• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set_layout.h"
7 #include "nvk_descriptor_types.h"
8 #include "nvk_shader.h"
9 
10 #include "vk_pipeline.h"
11 
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14 
15 #include "clc397.h"
16 #include "clc597.h"
17 
18 struct lower_desc_cbuf {
19    struct nvk_cbuf key;
20 
21    uint32_t use_count;
22 
23    uint64_t start;
24    uint64_t end;
25 };
26 
27 DERIVE_HASH_TABLE(nvk_cbuf);
28 
29 static int
compar_cbufs(const void * _a,const void * _b)30 compar_cbufs(const void *_a, const void *_b)
31 {
32    const struct lower_desc_cbuf *a = _a;
33    const struct lower_desc_cbuf *b = _b;
34 
35 #define COMPAR(field, pos) \
36    if (a->field < b->field) return -(pos); \
37    if (a->field > b->field) return (pos);
38 
39    /* Sort by most used first */
40    COMPAR(use_count, -1)
41 
42    /* Keep the list stable by then sorting by key fields. */
43    COMPAR(key.type, 1)
44    COMPAR(key.desc_set, 1)
45    COMPAR(key.dynamic_idx, 1)
46    COMPAR(key.desc_offset, 1)
47 
48 #undef COMPAR
49 
50    return 0;
51 }
52 
53 struct lower_descriptors_ctx {
54    const struct nv_device_info *dev_info;
55    const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
56 
57    bool use_bindless_cbuf;
58    bool use_edb_buffer_views;
59    bool clamp_desc_array_bounds;
60    bool indirect_bind;
61    nir_address_format ubo_addr_format;
62    nir_address_format ssbo_addr_format;
63 
64    struct hash_table *cbufs;
65    struct nvk_cbuf_map *cbuf_map;
66 };
67 
68 static bool
descriptor_type_is_ubo(VkDescriptorType desc_type)69 descriptor_type_is_ubo(VkDescriptorType desc_type)
70 {
71    switch (desc_type) {
72    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
73    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
74    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75       return true;
76 
77    default:
78       return false;
79    }
80 }
81 
82 static bool
descriptor_type_is_ssbo(VkDescriptorType desc_type)83 descriptor_type_is_ssbo(VkDescriptorType desc_type)
84 {
85    switch (desc_type) {
86    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
87    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
88       return true;
89 
90    default:
91       return false;
92    }
93 }
94 
95 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)96 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
97                 struct lower_descriptors_ctx *ctx)
98 {
99    struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
100    if (entry != NULL) {
101       struct lower_desc_cbuf *cbuf = entry->data;
102       cbuf->use_count++;
103       cbuf->start = MIN2(cbuf->start, start);
104       cbuf->end = MAX2(cbuf->end, end);
105    } else {
106       struct lower_desc_cbuf *cbuf =
107          ralloc(ctx->cbufs, struct lower_desc_cbuf);
108       *cbuf = (struct lower_desc_cbuf) {
109          .key = *key,
110          .use_count = 1,
111          .start = start,
112          .end = end,
113       };
114       _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
115    }
116 }
117 
118 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)119 get_binding_layout(uint32_t set, uint32_t binding,
120                    const struct lower_descriptors_ctx *ctx)
121 {
122    assert(set < NVK_MAX_SETS);
123    assert(ctx->set_layouts[set] != NULL);
124 
125    const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
126 
127    assert(binding < set_layout->binding_count);
128    return &set_layout->binding[binding];
129 }
130 
131 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)132 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
133                            struct lower_descriptors_ctx *ctx)
134 {
135    const struct nvk_descriptor_set_binding_layout *binding_layout =
136       get_binding_layout(set, binding, ctx);
137 
138    const struct nvk_cbuf key = {
139       .type = NVK_CBUF_TYPE_DESC_SET,
140       .desc_set = set,
141    };
142 
143    uint64_t start, end;
144    if (index == NULL) {
145       /* When we don't have an index, assume 0 */
146       start = binding_layout->offset;
147       end = start + binding_layout->stride;
148    } else if (nir_src_is_const(*index)) {
149       start = binding_layout->offset +
150               nir_src_as_uint(*index) * binding_layout->stride;
151       end = start + binding_layout->stride;
152    } else {
153       start = binding_layout->offset;
154       end = start + binding_layout->array_size * binding_layout->stride;
155    }
156 
157    record_cbuf_use(&key, start, end, ctx);
158 }
159 
160 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)161 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
162                                 struct lower_descriptors_ctx *ctx)
163 {
164    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
165 
166    /* These we'll handle later */
167    if (descriptor_type_is_ubo(nir_intrinsic_desc_type(intrin)))
168       return;
169 
170    record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
171                               nir_intrinsic_binding(intrin),
172                               &intrin->src[0], ctx);
173 }
174 
175 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)176 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
177                                  struct lower_descriptors_ctx *ctx)
178 {
179    nir_src *index_src = NULL;
180    if (deref->deref_type == nir_deref_type_array) {
181       index_src = &deref->arr.index;
182       deref = nir_deref_instr_parent(deref);
183    }
184 
185    assert(deref->deref_type == nir_deref_type_var);
186    nir_variable *var = deref->var;
187 
188    record_descriptor_cbuf_use(var->data.descriptor_set,
189                               var->data.binding,
190                               index_src, ctx);
191 }
192 
193 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)194 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
195                                struct lower_descriptors_ctx *ctx)
196 {
197    const int texture_src_idx =
198       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
199    const int sampler_src_idx =
200       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
201 
202    if (texture_src_idx >= 0) {
203       nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
204       record_deref_descriptor_cbuf_use(deref, ctx);
205    }
206 
207    if (sampler_src_idx >= 0) {
208       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
209       record_deref_descriptor_cbuf_use(deref, ctx);
210    }
211 }
212 
213 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)214 ubo_deref_to_cbuf(nir_deref_instr *deref,
215                   nir_intrinsic_instr **resource_index_out,
216                   uint64_t *offset_out,
217                   uint64_t *start_out, uint64_t *end_out,
218                   const struct lower_descriptors_ctx *ctx)
219 {
220    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
221 
222    /* In case we early return */
223    *offset_out = 0;
224    *start_out = 0;
225    *end_out = UINT64_MAX;
226    *resource_index_out = NULL;
227 
228    const struct nvk_cbuf invalid = {
229       .type = NVK_CBUF_TYPE_INVALID,
230    };
231 
232    uint64_t offset = 0;
233    uint64_t range = glsl_get_explicit_size(deref->type, false);
234    bool offset_valid = true;
235    while (deref->deref_type != nir_deref_type_cast) {
236       nir_deref_instr *parent = nir_deref_instr_parent(deref);
237 
238       switch (deref->deref_type) {
239       case nir_deref_type_var:
240          unreachable("Buffers don't use variables in Vulkan");
241 
242       case nir_deref_type_array:
243       case nir_deref_type_array_wildcard: {
244          uint32_t stride = nir_deref_instr_array_stride(deref);
245          if (deref->deref_type == nir_deref_type_array &&
246              nir_src_is_const(deref->arr.index)) {
247             offset += nir_src_as_uint(deref->arr.index) * stride;
248          } else {
249             range = glsl_get_length(parent->type) * stride;
250          }
251          break;
252       }
253 
254       case nir_deref_type_ptr_as_array:
255          /* All bets are off.  We shouldn't see these most of the time
256           * anyway, even with variable pointers.
257           */
258          offset_valid = false;
259          unreachable("Variable pointers aren't allowed on UBOs");
260          break;
261 
262       case nir_deref_type_struct: {
263          offset += glsl_get_struct_field_offset(parent->type,
264                                                 deref->strct.index);
265          break;
266       }
267 
268       default:
269          unreachable("Unknown deref type");
270       }
271 
272       deref = parent;
273    }
274 
275    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
276    if (load_desc == NULL ||
277        load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
278       return invalid;
279 
280    nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
281    if (res_index == NULL ||
282        res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
283       return invalid;
284 
285    /* We try to early return as little as possible prior to this point so we
286     * can return the resource index intrinsic in as many cases as possible.
287     * After this point, though, early returns are fair game.
288     */
289    *resource_index_out = res_index;
290 
291    if (!offset_valid || !nir_src_is_const(res_index->src[0]))
292       return invalid;
293 
294    uint32_t set = nir_intrinsic_desc_set(res_index);
295    uint32_t binding = nir_intrinsic_binding(res_index);
296    uint32_t index = nir_src_as_uint(res_index->src[0]);
297 
298    const struct nvk_descriptor_set_binding_layout *binding_layout =
299       get_binding_layout(set, binding, ctx);
300 
301    switch (binding_layout->type) {
302    case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
303    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
304       *offset_out = 0;
305       *start_out = offset;
306       *end_out = offset + range;
307       return (struct nvk_cbuf) {
308          .type = NVK_CBUF_TYPE_UBO_DESC,
309          .desc_set = set,
310          .desc_offset = binding_layout->offset +
311                         index * binding_layout->stride,
312       };
313    }
314 
315    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
316       *offset_out = 0;
317       *start_out = offset;
318       *end_out = offset + range;
319 
320       return (struct nvk_cbuf) {
321          .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
322          .desc_set = set,
323          .dynamic_idx = binding_layout->dynamic_buffer_index + index,
324       };
325    }
326 
327    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
328       *offset_out = binding_layout->offset;
329       *start_out = binding_layout->offset + offset;
330       *end_out = *start_out + range;
331 
332       return (struct nvk_cbuf) {
333          .type = NVK_CBUF_TYPE_DESC_SET,
334          .desc_set = set,
335       };
336    }
337 
338    default:
339       return invalid;
340    }
341 }
342 
343 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)344 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
345                           struct lower_descriptors_ctx *ctx)
346 {
347    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
348 
349    UNUSED uint64_t offset;
350    uint64_t start, end;
351    nir_intrinsic_instr *res_index;
352    struct nvk_cbuf cbuf =
353       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
354 
355    if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
356       record_cbuf_use(&cbuf, start, end, ctx);
357    } else if (res_index != NULL) {
358       record_vulkan_resource_cbuf_use(res_index, ctx);
359    }
360 }
361 
362 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)363 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
364 {
365    struct lower_descriptors_ctx *ctx = _ctx;
366 
367    switch (instr->type) {
368    case nir_instr_type_intrinsic: {
369       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
370       switch (intrin->intrinsic) {
371       case nir_intrinsic_vulkan_resource_index:
372          record_vulkan_resource_cbuf_use(intrin, ctx);
373          return false;
374 
375       case nir_intrinsic_load_deref: {
376          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
377          if (nir_deref_mode_is(deref, nir_var_mem_ubo))
378             record_load_ubo_cbuf_uses(deref, ctx);
379          return false;
380       }
381 
382       case nir_intrinsic_image_deref_load:
383       case nir_intrinsic_image_deref_store:
384       case nir_intrinsic_image_deref_atomic:
385       case nir_intrinsic_image_deref_atomic_swap:
386       case nir_intrinsic_image_deref_size:
387       case nir_intrinsic_image_deref_samples: {
388          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
389          record_deref_descriptor_cbuf_use(deref, ctx);
390          return false;
391       }
392 
393       default:
394          return false;
395       }
396       unreachable("All cases return false");
397    }
398 
399    case nir_instr_type_tex:
400       record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
401       return false;
402 
403    default:
404       return false;
405    }
406 }
407 
408 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)409 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
410 {
411    ctx->cbuf_map->cbuf_count = 0;
412 
413    /* Root descriptors always go in cbuf 0 */
414    ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
415       .type = NVK_CBUF_TYPE_ROOT_DESC,
416    };
417 
418    /* If we have constant data, put it at cbuf 1 */
419    if (nir->constant_data_size > 0) {
420       ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
421          .type = NVK_CBUF_TYPE_SHADER_DATA,
422       };
423    }
424 
425    if (ctx->indirect_bind)
426       return;
427 
428    ctx->cbufs = nvk_cbuf_table_create(NULL);
429    nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
430                                 nir_metadata_all, (void *)ctx);
431 
432    struct lower_desc_cbuf *cbufs =
433       ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
434                    _mesa_hash_table_num_entries(ctx->cbufs));
435 
436    uint32_t num_cbufs = 0;
437    hash_table_foreach(ctx->cbufs, entry) {
438       struct lower_desc_cbuf *cbuf = entry->data;
439 
440       /* We currently only start cbufs at the beginning so if it starts after
441        * the max cbuf size, there's no point in including it in the list.
442        */
443       if (cbuf->start > NVK_MAX_CBUF_SIZE)
444          continue;
445 
446       cbufs[num_cbufs++] = *cbuf;
447    }
448 
449    qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
450 
451    uint8_t max_cbuf_bindings;
452    if (nir->info.stage == MESA_SHADER_COMPUTE ||
453        nir->info.stage == MESA_SHADER_KERNEL) {
454       max_cbuf_bindings = 8;
455    } else {
456       max_cbuf_bindings = 16;
457    }
458 
459    for (uint32_t i = 0; i < num_cbufs; i++) {
460       if (ctx->cbuf_map->cbuf_count >= max_cbuf_bindings)
461          break;
462 
463       /* We can't support indirect cbufs in compute yet */
464       if ((nir->info.stage == MESA_SHADER_COMPUTE ||
465            nir->info.stage == MESA_SHADER_KERNEL) &&
466           cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
467          continue;
468 
469       /* Prior to Turing, indirect cbufs require splitting the pushbuf and
470        * pushing bits of the descriptor set.  Doing this every draw call is
471        * probably more overhead than it's worth.
472        */
473       if (ctx->dev_info->cls_eng3d < TURING_A &&
474           cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
475          continue;
476 
477       ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = cbufs[i].key;
478    }
479 
480    ralloc_free(ctx->cbufs);
481    ctx->cbufs = NULL;
482 }
483 
484 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)485 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
486                     const struct lower_descriptors_ctx *ctx)
487 {
488    if (ctx->cbuf_map == NULL)
489       return -1;
490 
491    for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
492       if (nvk_cbuf_equal(&ctx->cbuf_map->cbufs[c], key)) {
493          return c;
494       }
495    }
496 
497    return -1;
498 }
499 
500 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)501 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
502 {
503    const struct lower_descriptors_ctx *ctx = _ctx;
504 
505    if (load->intrinsic != nir_intrinsic_load_deref)
506       return false;
507 
508    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
509    if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
510       return false;
511 
512    uint64_t offset, end;
513    UNUSED uint64_t start;
514    UNUSED nir_intrinsic_instr *res_index;
515    struct nvk_cbuf cbuf_key =
516       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
517 
518    if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
519       return false;
520 
521    if (end > NVK_MAX_CBUF_SIZE)
522       return false;
523 
524    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
525    if (cbuf_idx < 0)
526       return false;
527 
528    b->cursor = nir_before_instr(&load->instr);
529 
530    nir_deref_path path;
531    nir_deref_path_init(&path, deref, NULL);
532 
533    nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
534    nir_address_format addr_format = nir_address_format_32bit_index_offset;
535    for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
536       addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
537 
538    nir_deref_path_finish(&path);
539 
540    nir_lower_explicit_io_instr(b, load, addr, addr_format);
541 
542    return true;
543 }
544 
545 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)546 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
547                     const struct lower_descriptors_ctx *ctx)
548 {
549    assert(load->intrinsic == nir_intrinsic_load_constant);
550 
551    const struct nvk_cbuf cbuf_key = {
552       .type = NVK_CBUF_TYPE_SHADER_DATA,
553    };
554    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
555    assert(cbuf_idx >= 0);
556 
557    uint32_t base = nir_intrinsic_base(load);
558 
559    b->cursor = nir_before_instr(&load->instr);
560 
561    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
562    nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
563                               nir_imm_int(b, cbuf_idx), offset,
564                               .align_mul = nir_intrinsic_align_mul(load),
565                               .align_offset = nir_intrinsic_align_offset(load));
566 
567    nir_def_rewrite_uses(&load->def, data);
568 
569    return true;
570 }
571 
572 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)573 load_descriptor_set_addr(nir_builder *b, uint32_t set,
574                          UNUSED const struct lower_descriptors_ctx *ctx)
575 {
576    uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
577       set * sizeof(struct nvk_buffer_address);
578 
579    return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
580                      nir_imm_int(b, set_addr_offset),
581                      .align_mul = 8, .align_offset = 0);
582 }
583 
584 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)585 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
586                           const struct lower_descriptors_ctx *ctx)
587 {
588    int dynamic_buffer_start_imm = 0;
589    for (uint32_t s = 0; s < set; s++) {
590       if (ctx->set_layouts[s] == NULL) {
591          dynamic_buffer_start_imm = -1;
592          break;
593       }
594 
595       dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
596    }
597 
598    if (dynamic_buffer_start_imm >= 0) {
599       return nir_imm_int(b, dynamic_buffer_start_imm);
600    } else {
601       uint32_t root_offset =
602          nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
603 
604       return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
605                                      nir_imm_int(b, root_offset),
606                                      .align_mul = 1, .align_offset = 0));
607    }
608 }
609 
610 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)611 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
612                 uint32_t set, uint32_t binding, nir_def *index,
613                 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
614 {
615    const struct nvk_descriptor_set_binding_layout *binding_layout =
616       get_binding_layout(set, binding, ctx);
617 
618    if (ctx->clamp_desc_array_bounds)
619       index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
620 
621    switch (binding_layout->type) {
622    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
623    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
624       /* Get the index in the root descriptor table dynamic_buffers array. */
625       nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
626 
627       index = nir_iadd(b, index,
628                        nir_iadd_imm(b, dynamic_buffer_start,
629                                     binding_layout->dynamic_buffer_index));
630       uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
631       nir_def *root_desc_offset =
632          nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
633                       nvk_root_descriptor_offset(dynamic_buffers));
634 
635       assert(num_components * bit_size <= desc_size * 8);
636       return nir_ldc_nv(b, num_components, bit_size,
637                         nir_imm_int(b, 0), root_desc_offset,
638                         .align_mul = 16, .align_offset = 0);
639    }
640 
641    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
642       nir_def *base_addr =
643          nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
644                           binding_layout->offset);
645 
646       assert(binding_layout->stride == 1);
647       const uint32_t binding_size = binding_layout->array_size;
648 
649       if (ctx->use_bindless_cbuf) {
650          assert(num_components == 1 && bit_size == 64);
651          const uint32_t size = align(binding_size, 16);
652          return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
653                                ((uint64_t)size >> 4) << 45);
654       } else {
655          /* Convert it to nir_address_format_64bit_bounded_global */
656          assert(num_components == 4 && bit_size == 32);
657          return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
658                             nir_unpack_64_2x32_split_y(b, base_addr),
659                             nir_imm_int(b, binding_size),
660                             nir_imm_int(b, 0));
661       }
662    }
663 
664    default: {
665       assert(binding_layout->stride > 0);
666       nir_def *desc_ubo_offset =
667          nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
668                          binding_layout->offset + offset_B);
669 
670       uint64_t max_desc_ubo_offset = binding_layout->offset +
671          binding_layout->array_size * binding_layout->stride;
672 
673       unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
674       desc_align_mul = MIN2(desc_align_mul, 16);
675       unsigned desc_align_offset = binding_layout->offset + offset_B;
676       desc_align_offset %= desc_align_mul;
677 
678       const struct nvk_cbuf cbuf_key = {
679          .type = NVK_CBUF_TYPE_DESC_SET,
680          .desc_set = set,
681       };
682       int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
683 
684       if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
685          return nir_ldc_nv(b, num_components, bit_size,
686                            nir_imm_int(b, cbuf_idx),
687                            desc_ubo_offset,
688                            .align_mul = desc_align_mul,
689                            .align_offset = desc_align_offset);
690       } else {
691          nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
692          return nir_load_global_constant_offset(b, num_components, bit_size,
693                                                 set_addr, desc_ubo_offset,
694                                                 .align_mul = desc_align_mul,
695                                                 .align_offset = desc_align_offset);
696       }
697    }
698    }
699 }
700 
701 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)702 is_idx_intrin(nir_intrinsic_instr *intrin)
703 {
704    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
705       intrin = nir_src_as_intrinsic(intrin->src[0]);
706       if (intrin == NULL)
707          return false;
708    }
709 
710    return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
711 }
712 
713 static nir_def *
buffer_address_to_ldcx_handle(nir_builder * b,nir_def * addr)714 buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
715 {
716    nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
717    nir_def *size = nir_channel(b, addr, 2);
718    nir_def *offset = nir_channel(b, addr, 3);
719 
720    nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
721    nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
722    nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
723 
724    /* If we assume the top bis of the address are 0 as well as the bottom two
725     * bits of the size. (We can trust it since it's a descriptor) then
726     *
727     *    ((size >> 4) << 13) | addr
728     *
729     * is just an imad.
730     */
731    nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
732 
733    return nir_vec3(b, addr16_lo, handle_hi, offset);
734 }
735 
736 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)737 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
738                                const struct lower_descriptors_ctx *ctx)
739 {
740    nir_def *index = nir_imm_int(b, 0);
741 
742    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
743       index = nir_iadd(b, index, intrin->src[1].ssa);
744       intrin = nir_src_as_intrinsic(intrin->src[0]);
745    }
746 
747    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
748    uint32_t set = nir_intrinsic_desc_set(intrin);
749    uint32_t binding = nir_intrinsic_binding(intrin);
750    index = nir_iadd(b, index, intrin->src[0].ssa);
751 
752    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
753    if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
754       nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
755 
756       /* The descriptor is just the handle.  NIR also needs an offset. */
757       return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
758                          nir_unpack_64_2x32_split_y(b, desc),
759                          nir_imm_int(b, 0));
760    } else {
761       nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
762 
763       /* We know a priori that the the .w compnent (offset) is zero */
764       return nir_vec4(b, nir_channel(b, desc, 0),
765                          nir_channel(b, desc, 1),
766                          nir_channel(b, desc, 2),
767                          nir_imm_int(b, 0));
768    }
769 }
770 
771 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)772 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
773                                  const struct lower_descriptors_ctx *ctx)
774 {
775    ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
776    b->cursor = nir_before_instr(&intrin->instr);
777 
778    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
779    if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
780       assert(descriptor_type_is_ssbo(desc_type));
781       return false;
782    }
783 
784    nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
785 
786    nir_def_rewrite_uses(&intrin->def, desc);
787 
788    return true;
789 }
790 
791 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)792 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
793                             uint32_t root_table_offset,
794                             const struct lower_descriptors_ctx *ctx)
795 {
796    b->cursor = nir_instr_remove(&intrin->instr);
797 
798    nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
799                              intrin->def.bit_size,
800                              nir_imm_int(b, 0), /* Root table */
801                              nir_imm_int(b, root_table_offset),
802                              .align_mul = 4,
803                              .align_offset = 0);
804 
805    nir_def_rewrite_uses(&intrin->def, val);
806 
807    return true;
808 }
809 
810 #define lower_sysval_to_root_table(b, intrin, member, ctx)           \
811    _lower_sysval_to_root_table(b, intrin,                            \
812                                nvk_root_descriptor_offset(member),   \
813                                ctx)
814 
815 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)816 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
817                          const struct lower_descriptors_ctx *ctx)
818 {
819    const uint32_t push_region_offset =
820       nvk_root_descriptor_offset(push);
821    const uint32_t base = nir_intrinsic_base(load);
822 
823    b->cursor = nir_before_instr(&load->instr);
824 
825    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
826                                          push_region_offset + base);
827 
828    nir_def *val =
829       nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
830                  nir_imm_int(b, 0), offset,
831                  .align_mul = load->def.bit_size / 8,
832                  .align_offset = 0);
833 
834    nir_def_rewrite_uses(&load->def, val);
835 
836    return true;
837 }
838 
839 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)840 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
841                            uint32_t *set, uint32_t *binding,
842                            nir_def **index)
843 {
844    if (deref->deref_type == nir_deref_type_array) {
845       *index = deref->arr.index.ssa;
846       deref = nir_deref_instr_parent(deref);
847    } else {
848       *index = nir_imm_int(b, 0);
849    }
850 
851    assert(deref->deref_type == nir_deref_type_var);
852    nir_variable *var = deref->var;
853 
854    *set = var->data.descriptor_set;
855    *binding = var->data.binding;
856 }
857 
858 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)859 load_resource_deref_desc(nir_builder *b,
860                          unsigned num_components, unsigned bit_size,
861                          nir_deref_instr *deref, unsigned offset_B,
862                          const struct lower_descriptors_ctx *ctx)
863 {
864    uint32_t set, binding;
865    nir_def *index;
866    get_resource_deref_binding(b, deref, &set, &binding, &index);
867    return load_descriptor(b, num_components, bit_size,
868                           set, binding, index, offset_B, ctx);
869 }
870 
871 static void
lower_msaa_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)872 lower_msaa_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
873                         const struct lower_descriptors_ctx *ctx)
874 {
875    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS);
876 
877    b->cursor = nir_before_instr(&intrin->instr);
878    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
879    nir_def *desc = load_resource_deref_desc(b, 2, 32, deref, 0, ctx);
880    nir_def *desc0 = nir_channel(b, desc, 0);
881    nir_def *desc1 = nir_channel(b, desc, 1);
882 
883    nir_def *img_index = nir_ubitfield_extract_imm(b, desc0, 0, 20);
884    nir_rewrite_image_intrinsic(intrin, img_index, true);
885 
886    nir_def *sw_log2 = nir_ubitfield_extract_imm(b, desc0, 20, 2);
887    nir_def *sh_log2 = nir_ubitfield_extract_imm(b, desc0, 22, 2);
888    nir_def *s_map = desc1;
889 
890    nir_def *sw = nir_ishl(b, nir_imm_int(b, 1), sw_log2);
891    nir_def *sh = nir_ishl(b, nir_imm_int(b, 1), sh_log2);
892    nir_def *num_samples = nir_imul(b, sw, sh);
893 
894    switch (intrin->intrinsic) {
895    case nir_intrinsic_bindless_image_load:
896    case nir_intrinsic_bindless_image_store:
897    case nir_intrinsic_bindless_image_atomic:
898    case nir_intrinsic_bindless_image_atomic_swap: {
899       nir_def *x = nir_channel(b, intrin->src[1].ssa, 0);
900       nir_def *y = nir_channel(b, intrin->src[1].ssa, 1);
901       nir_def *z = nir_channel(b, intrin->src[1].ssa, 2);
902       nir_def *w = nir_channel(b, intrin->src[1].ssa, 3);
903       nir_def *s = intrin->src[2].ssa;
904 
905       nir_def *s_xy = nir_ushr(b, s_map, nir_imul_imm(b, s, 4));
906       nir_def *sx = nir_ubitfield_extract_imm(b, s_xy, 0, 2);
907       nir_def *sy = nir_ubitfield_extract_imm(b, s_xy, 2, 2);
908 
909       x = nir_imad(b, x, sw, sx);
910       y = nir_imad(b, y, sh, sy);
911 
912       /* Make OOB sample indices OOB X/Y indices */
913       x = nir_bcsel(b, nir_ult(b, s, num_samples), x, nir_imm_int(b, -1));
914 
915       nir_src_rewrite(&intrin->src[1], nir_vec4(b, x, y, z, w));
916       nir_src_rewrite(&intrin->src[2], nir_undef(b, 1, 32));
917       break;
918    }
919 
920    case nir_intrinsic_bindless_image_size: {
921       b->cursor = nir_after_instr(&intrin->instr);
922 
923       nir_def *size = &intrin->def;
924       nir_def *w = nir_channel(b, size, 0);
925       nir_def *h = nir_channel(b, size, 1);
926 
927       w = nir_ushr(b, w, sw_log2);
928       h = nir_ushr(b, h, sh_log2);
929 
930       size = nir_vector_insert_imm(b, size, w, 0);
931       size = nir_vector_insert_imm(b, size, h, 1);
932 
933       nir_def_rewrite_uses_after(&intrin->def, size, size->parent_instr);
934       break;
935    }
936 
937    case nir_intrinsic_bindless_image_samples: {
938       /* We need to handle NULL descriptors explicitly */
939       nir_def *samples =
940          nir_bcsel(b, nir_ieq(b, desc0, nir_imm_int(b, 0)),
941                       nir_imm_int(b, 0), num_samples);
942       nir_def_rewrite_uses(&intrin->def, samples);
943       break;
944    }
945 
946    default:
947       unreachable("Unknown image intrinsic");
948    }
949 
950    nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
951 }
952 
953 static bool
is_edb_buffer_view(nir_deref_instr * deref,const struct lower_descriptors_ctx * ctx)954 is_edb_buffer_view(nir_deref_instr *deref,
955                    const struct lower_descriptors_ctx *ctx)
956 {
957    if (glsl_get_sampler_dim(deref->type) != GLSL_SAMPLER_DIM_BUF)
958       return false;
959 
960    if (ctx->use_edb_buffer_views)
961       return true;
962 
963    nir_variable *var = nir_deref_instr_get_variable(deref);
964    uint8_t set = var->data.descriptor_set;
965 
966    return ctx->set_layouts[set]->flags &
967           VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
968 }
969 
970 static nir_def *
edb_buffer_view_is_null(nir_builder * b,nir_def * desc)971 edb_buffer_view_is_null(nir_builder *b, nir_def *desc)
972 {
973    assert(desc->num_components == 4);
974    nir_def *index = nir_channel(b, desc, 0);
975    return nir_ieq_imm(b, index, 0);
976 }
977 
978 static nir_def *
edb_buffer_view_offset_el(nir_builder * b,nir_def * desc)979 edb_buffer_view_offset_el(nir_builder *b, nir_def *desc)
980 {
981    assert(desc->num_components == 4);
982    return nir_channel(b, desc, 1);
983 }
984 
985 static nir_def *
edb_buffer_view_size_el(nir_builder * b,nir_def * desc)986 edb_buffer_view_size_el(nir_builder *b, nir_def *desc)
987 {
988    assert(desc->num_components == 4);
989    return nir_channel(b, desc, 2);
990 }
991 
992 static nir_def *
edb_buffer_view_oob_alpha(nir_builder * b,nir_def * desc)993 edb_buffer_view_oob_alpha(nir_builder *b, nir_def *desc)
994 {
995    assert(desc->num_components == 4);
996    return nir_channel(b, desc, 3);
997 }
998 
999 static nir_def *
edb_buffer_view_coord_is_in_bounds(nir_builder * b,nir_def * desc,nir_def * coord)1000 edb_buffer_view_coord_is_in_bounds(nir_builder *b, nir_def *desc,
1001                                    nir_def *coord)
1002 {
1003    assert(desc->num_components == 4);
1004    return nir_ult(b, coord, edb_buffer_view_size_el(b, desc));
1005 }
1006 
1007 static nir_def *
edb_buffer_view_index(nir_builder * b,nir_def * desc,nir_def * in_bounds)1008 edb_buffer_view_index(nir_builder *b, nir_def *desc, nir_def *in_bounds)
1009 {
1010    assert(desc->num_components == 4);
1011    nir_def *index = nir_channel(b, desc, 0);
1012 
1013    /* Use the NULL descriptor for OOB access */
1014    return nir_bcsel(b, in_bounds, index, nir_imm_int(b, 0));
1015 }
1016 
1017 static nir_def *
adjust_edb_buffer_view_coord(nir_builder * b,nir_def * desc,nir_def * coord)1018 adjust_edb_buffer_view_coord(nir_builder *b, nir_def *desc, nir_def *coord)
1019 {
1020    return nir_iadd(b, coord, edb_buffer_view_offset_el(b, desc));
1021 }
1022 
1023 static nir_def *
fixup_edb_buffer_view_result(nir_builder * b,nir_def * desc,nir_def * in_bounds,nir_def * res,nir_alu_type dest_type)1024 fixup_edb_buffer_view_result(nir_builder *b, nir_def *desc, nir_def *in_bounds,
1025                              nir_def *res, nir_alu_type dest_type)
1026 {
1027    if (res->num_components < 4)
1028       return res;
1029 
1030    nir_def *is_null = edb_buffer_view_is_null(b, desc);
1031    nir_def *oob_alpha = edb_buffer_view_oob_alpha(b, desc);
1032 
1033    nir_def *a = nir_channel(b, res, 3);
1034    a = nir_bcsel(b, nir_ior(b, in_bounds, is_null), a, oob_alpha);
1035    return nir_vector_insert_imm(b, res, a, 3);
1036 }
1037 
1038 static void
lower_edb_buffer_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1039 lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1040                               const struct lower_descriptors_ctx *ctx)
1041 {
1042    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF);
1043 
1044    b->cursor = nir_before_instr(&intrin->instr);
1045    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1046    nir_def *desc = load_resource_deref_desc(b, 4, 32, deref, 0, ctx);
1047 
1048    switch (intrin->intrinsic) {
1049    case nir_intrinsic_image_deref_load:
1050    case nir_intrinsic_image_deref_store:
1051    case nir_intrinsic_image_deref_atomic:
1052    case nir_intrinsic_image_deref_atomic_swap: {
1053       nir_def *pos = intrin->src[1].ssa;
1054       nir_def *x = nir_channel(b, pos, 0);
1055 
1056       nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, x);
1057       nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1058 
1059       nir_def *new_x = adjust_edb_buffer_view_coord(b, desc, x);
1060       pos = nir_vector_insert_imm(b, pos, new_x, 0);
1061       nir_src_rewrite(&intrin->src[1], pos);
1062 
1063       if (intrin->intrinsic == nir_intrinsic_image_deref_load) {
1064          b->cursor = nir_after_instr(&intrin->instr);
1065          nir_def *res = &intrin->def;
1066          res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
1067                                             nir_intrinsic_dest_type(intrin));
1068          nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
1069       }
1070 
1071       nir_rewrite_image_intrinsic(intrin, index, true);
1072       break;
1073    }
1074 
1075    case nir_intrinsic_image_deref_size: {
1076       assert(intrin->def.num_components == 1);
1077       nir_def *size_el = nir_channel(b, desc, 2);
1078       nir_def_rewrite_uses(&intrin->def, size_el);
1079       break;
1080    }
1081 
1082    default:
1083       unreachable("Unknown image intrinsic");
1084    }
1085 }
1086 
1087 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1088 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1089                    const struct lower_descriptors_ctx *ctx)
1090 {
1091    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1092 
1093    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_MS) {
1094       lower_msaa_image_intrin(b, intrin, ctx);
1095    } else if (is_edb_buffer_view(deref, ctx)) {
1096       lower_edb_buffer_image_intrin(b, intrin, ctx);
1097    } else {
1098       b->cursor = nir_before_instr(&intrin->instr);
1099       nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
1100       nir_rewrite_image_intrinsic(intrin, desc, true);
1101    }
1102 
1103    return true;
1104 }
1105 
1106 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)1107 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
1108                        const struct lower_descriptors_ctx *ctx)
1109 {
1110    const uint32_t root_table_offset =
1111       nvk_root_descriptor_offset(draw.sample_locations);
1112 
1113    nir_def *sample = interp->src[1].ssa;
1114 
1115    b->cursor = nir_before_instr(&interp->instr);
1116 
1117    nir_def *loc = nir_ldc_nv(b, 1, 64,
1118                              nir_imm_int(b, 0), /* Root table */
1119                              nir_imm_int(b, root_table_offset),
1120                              .align_mul = 8,
1121                              .align_offset = 0);
1122 
1123    /* Yay little endian */
1124    loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
1125    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
1126    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
1127    nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
1128    nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
1129    nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
1130 
1131    assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
1132    interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
1133    nir_src_rewrite(&interp->src[1], offset);
1134 
1135    return true;
1136 }
1137 
1138 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1139 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1140                  const struct lower_descriptors_ctx *ctx)
1141 {
1142    switch (intrin->intrinsic) {
1143    case nir_intrinsic_load_constant:
1144       return lower_load_constant(b, intrin, ctx);
1145 
1146    case nir_intrinsic_load_vulkan_descriptor:
1147       return try_lower_load_vulkan_descriptor(b, intrin, ctx);
1148 
1149    case nir_intrinsic_load_workgroup_size:
1150       unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
1151 
1152    case nir_intrinsic_load_num_workgroups:
1153       return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
1154 
1155    case nir_intrinsic_load_base_workgroup_id:
1156       return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
1157 
1158    case nir_intrinsic_load_push_constant:
1159       return lower_load_push_constant(b, intrin, ctx);
1160 
1161    case nir_intrinsic_load_base_vertex:
1162    case nir_intrinsic_load_first_vertex:
1163       return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
1164 
1165    case nir_intrinsic_load_base_instance:
1166       return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
1167 
1168    case nir_intrinsic_load_draw_id:
1169       return lower_sysval_to_root_table(b, intrin, draw.draw_index, ctx);
1170 
1171    case nir_intrinsic_load_view_index:
1172       return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
1173 
1174    case nir_intrinsic_image_deref_load:
1175    case nir_intrinsic_image_deref_sparse_load:
1176    case nir_intrinsic_image_deref_store:
1177    case nir_intrinsic_image_deref_atomic:
1178    case nir_intrinsic_image_deref_atomic_swap:
1179    case nir_intrinsic_image_deref_size:
1180    case nir_intrinsic_image_deref_samples:
1181       return lower_image_intrin(b, intrin, ctx);
1182 
1183    case nir_intrinsic_interp_deref_at_sample:
1184       return lower_interp_at_sample(b, intrin, ctx);
1185 
1186    default:
1187       return false;
1188    }
1189 }
1190 
1191 static void
lower_edb_buffer_tex_instr(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1192 lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
1193                            const struct lower_descriptors_ctx *ctx)
1194 {
1195    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_BUF);
1196 
1197    b->cursor = nir_before_instr(&tex->instr);
1198 
1199    const int texture_src_idx =
1200       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1201    nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1202 
1203    nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1204    ASSERTED const uint32_t plane =
1205       plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1206    assert(plane == 0);
1207 
1208    nir_def *desc = load_resource_deref_desc(b, 4, 32, texture, 0, ctx);
1209 
1210    switch (tex->op) {
1211    case nir_texop_txf: {
1212       const int coord_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1213       assert(coord_src_idx >= 0);
1214       nir_def *coord = tex->src[coord_src_idx].src.ssa;
1215 
1216       nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
1217 
1218       nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1219       nir_src_rewrite(&tex->src[texture_src_idx].src, index);
1220       tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1221 
1222       nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
1223       nir_src_rewrite(&tex->src[coord_src_idx].src, new_coord);
1224 
1225       b->cursor = nir_after_instr(&tex->instr);
1226       nir_def *res = &tex->def;
1227       res = fixup_edb_buffer_view_result(b, desc, in_bounds,
1228                                          res, tex->dest_type);
1229       nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1230       break;
1231    }
1232 
1233    case nir_texop_txs: {
1234       assert(tex->def.num_components == 1);
1235       nir_def *size_el = edb_buffer_view_size_el(b, desc);
1236       nir_def_rewrite_uses(&tex->def, size_el);
1237       break;
1238    }
1239 
1240    default:
1241       unreachable("Invalid buffer texture op");
1242    }
1243 }
1244 
1245 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1246 lower_tex(nir_builder *b, nir_tex_instr *tex,
1247           const struct lower_descriptors_ctx *ctx)
1248 {
1249    const int texture_src_idx =
1250       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1251    const int sampler_src_idx =
1252       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1253    if (texture_src_idx < 0) {
1254       assert(sampler_src_idx < 0);
1255       return false;
1256    }
1257 
1258    nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1259    nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
1260                               nir_src_as_deref(tex->src[sampler_src_idx].src);
1261    assert(texture);
1262 
1263    if (is_edb_buffer_view(texture, ctx)) {
1264       lower_edb_buffer_tex_instr(b, tex, ctx);
1265       return true;
1266    }
1267 
1268    b->cursor = nir_before_instr(&tex->instr);
1269 
1270    nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1271    const uint32_t plane =
1272       plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1273    const uint64_t plane_offset_B =
1274       plane * sizeof(struct nvk_sampled_image_descriptor);
1275 
1276    nir_def *texture_desc =
1277          load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
1278 
1279    nir_def *combined_handle;
1280    if (texture == sampler) {
1281       combined_handle = texture_desc;
1282    } else {
1283       combined_handle = nir_iand_imm(b, texture_desc,
1284                                      NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1285 
1286       if (sampler != NULL) {
1287          nir_def *sampler_desc =
1288             load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
1289          nir_def *sampler_index =
1290             nir_iand_imm(b, sampler_desc,
1291                          NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
1292          combined_handle = nir_ior(b, combined_handle, sampler_index);
1293       }
1294    }
1295 
1296    /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
1297    combined_handle = nir_u2u64(b, combined_handle);
1298 
1299    /* TODO: The nv50 back-end assumes it gets handles both places, even for
1300     * texelFetch.
1301     */
1302    nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
1303    tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1304 
1305    if (sampler_src_idx < 0) {
1306       nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
1307    } else {
1308       nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
1309       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
1310    }
1311 
1312    /* On pre-Volta hardware, we don't have real null descriptors.  Null
1313     * descriptors work well enough for sampling but they may not return the
1314     * correct query results.
1315     */
1316    if (ctx->dev_info->cls_eng3d < VOLTA_A && nir_tex_instr_is_query(tex)) {
1317       b->cursor = nir_after_instr(&tex->instr);
1318 
1319       /* This should get CSE'd with the earlier load */
1320       nir_def *texture_handle =
1321          nir_iand_imm(b, texture_desc, NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1322       nir_def *is_null = nir_ieq_imm(b, texture_handle, 0);
1323       nir_def *zero = nir_imm_zero(b, tex->def.num_components,
1324                                       tex->def.bit_size);
1325       nir_def *res = nir_bcsel(b, is_null, zero, &tex->def);
1326       nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1327    }
1328 
1329    return true;
1330 }
1331 
1332 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)1333 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
1334                             void *_data)
1335 {
1336    const struct lower_descriptors_ctx *ctx = _data;
1337 
1338    switch (instr->type) {
1339    case nir_instr_type_tex:
1340       return lower_tex(b, nir_instr_as_tex(instr), ctx);
1341    case nir_instr_type_intrinsic:
1342       return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
1343    default:
1344       return false;
1345    }
1346 }
1347 
1348 #define ROOT_DESC_BASE_ADDR_HI 0x0057de3c
1349 
1350 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1351 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1352                           const struct lower_descriptors_ctx *ctx)
1353 {
1354    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1355       return false;
1356 
1357    b->cursor = nir_instr_remove(&intrin->instr);
1358 
1359    uint32_t set = nir_intrinsic_desc_set(intrin);
1360    uint32_t binding = nir_intrinsic_binding(intrin);
1361    nir_def *index = intrin->src[0].ssa;
1362 
1363    const struct nvk_descriptor_set_binding_layout *binding_layout =
1364       get_binding_layout(set, binding, ctx);
1365 
1366    nir_def *binding_addr;
1367    uint8_t binding_stride;
1368    switch (binding_layout->type) {
1369    case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1370    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1371       nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1372       binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1373       binding_stride = binding_layout->stride;
1374       break;
1375    }
1376 
1377    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1378       nir_def *dynamic_buffer_start =
1379          nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1380                       binding_layout->dynamic_buffer_index);
1381 
1382       nir_def *dynamic_binding_offset =
1383          nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1384                                       sizeof(struct nvk_buffer_address)),
1385                       nvk_root_descriptor_offset(dynamic_buffers));
1386 
1387       binding_addr =
1388          nir_pack_64_2x32_split(b, dynamic_binding_offset,
1389                                 nir_imm_int(b, ROOT_DESC_BASE_ADDR_HI));
1390       binding_stride = sizeof(struct nvk_buffer_address);
1391       break;
1392    }
1393 
1394    default:
1395       unreachable("Not an SSBO descriptor");
1396    }
1397 
1398    /* Tuck the stride in the top 8 bits of the binding address */
1399    binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1400 
1401    const uint32_t binding_size = binding_layout->array_size * binding_stride;
1402    nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1403 
1404    /* We depend on this when we load descrptors */
1405    assert(binding_layout->array_size >= 1);
1406 
1407    nir_def *addr;
1408    switch (ctx->ssbo_addr_format) {
1409    case nir_address_format_64bit_global_32bit_offset:
1410    case nir_address_format_64bit_bounded_global:
1411       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1412                          nir_unpack_64_2x32_split_y(b, binding_addr),
1413                          nir_imm_int(b, binding_size),
1414                          offset_in_binding);
1415       break;
1416 
1417    default:
1418       unreachable("Unknown address mode");
1419    }
1420 
1421    nir_def_rewrite_uses(&intrin->def, addr);
1422 
1423    return true;
1424 }
1425 
1426 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1427 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1428                             const struct lower_descriptors_ctx *ctx)
1429 {
1430    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1431       return false;
1432 
1433    b->cursor = nir_instr_remove(&intrin->instr);
1434 
1435    nir_def *addr = intrin->src[0].ssa;
1436    nir_def *index = intrin->src[1].ssa;
1437 
1438    nir_def *addr_high32;
1439    switch (ctx->ssbo_addr_format) {
1440    case nir_address_format_64bit_global_32bit_offset:
1441    case nir_address_format_64bit_bounded_global:
1442       addr_high32 = nir_channel(b, addr, 1);
1443       break;
1444 
1445    default:
1446       unreachable("Unknown address mode");
1447    }
1448 
1449    nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1450    nir_def *offset = nir_imul(b, index, stride);
1451 
1452    addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1453                               nir_var_mem_ssbo, offset);
1454    nir_def_rewrite_uses(&intrin->def, addr);
1455 
1456    return true;
1457 }
1458 
1459 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1460 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1461                            const struct lower_descriptors_ctx *ctx)
1462 {
1463    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1464       return false;
1465 
1466    b->cursor = nir_instr_remove(&intrin->instr);
1467 
1468    nir_def *addr = intrin->src[0].ssa;
1469 
1470    nir_def *base, *offset, *size = NULL;
1471    switch (ctx->ssbo_addr_format) {
1472    case nir_address_format_64bit_global_32bit_offset: {
1473       base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1474       offset = nir_channel(b, addr, 3);
1475       break;
1476    }
1477 
1478    case nir_address_format_64bit_bounded_global: {
1479       base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1480       size = nir_channel(b, addr, 2);
1481       offset = nir_channel(b, addr, 3);
1482       break;
1483    }
1484 
1485    default:
1486       unreachable("Unknown address mode");
1487    }
1488 
1489    /* Mask off the binding stride */
1490    base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1491 
1492    nir_def *base_lo = nir_unpack_64_2x32_split_x(b, base);
1493    nir_def *base_hi = nir_unpack_64_2x32_split_y(b, base);
1494 
1495    nir_def *desc_root, *desc_global;
1496    nir_push_if(b, nir_ieq_imm(b, base_hi, ROOT_DESC_BASE_ADDR_HI));
1497    {
1498       desc_root = nir_load_ubo(b, 4, 32, nir_imm_int(b, 0),
1499                                nir_iadd(b, base_lo, offset),
1500                                .align_mul = 16, .align_offset = 0,
1501                                .range = ~0);
1502       if (size != NULL) {
1503          /* assert(binding_layout->array_size >= 1); */
1504          nir_def *is_oob = nir_ult(b, nir_iadd_imm(b, size, -16), offset);
1505          desc_root = nir_bcsel(b, is_oob, nir_imm_zero(b, 4, 32), desc_root);
1506       }
1507    }
1508    nir_push_else(b, NULL);
1509    {
1510       if (size != NULL) {
1511          desc_global = nir_load_global_constant_bounded(b, 4, 32, base,
1512                                                         offset, size,
1513                                                         .align_mul = 16,
1514                                                         .align_offset = 0);
1515       } else {
1516          desc_global = nir_load_global_constant_offset(b, 4, 32, base,
1517                                                        offset,
1518                                                        .align_mul = 16,
1519                                                        .align_offset = 0);
1520       }
1521    }
1522    nir_pop_if(b, NULL);
1523    nir_def *desc = nir_if_phi(b, desc_root, desc_global);
1524 
1525    nir_def_rewrite_uses(&intrin->def, desc);
1526 
1527    return true;
1528 }
1529 
1530 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1531 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1532                             void *_data)
1533 {
1534    const struct lower_descriptors_ctx *ctx = _data;
1535 
1536    if (instr->type != nir_instr_type_intrinsic)
1537       return false;
1538 
1539    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1540    switch (intrin->intrinsic) {
1541    case nir_intrinsic_vulkan_resource_index:
1542       return lower_ssbo_resource_index(b, intrin, ctx);
1543    case nir_intrinsic_vulkan_resource_reindex:
1544       return lower_ssbo_resource_reindex(b, intrin, ctx);
1545    case nir_intrinsic_load_vulkan_descriptor:
1546       return lower_load_ssbo_descriptor(b, intrin, ctx);
1547    default:
1548       return false;
1549    }
1550 }
1551 
1552 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct nvk_physical_device * pdev,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1553 nvk_nir_lower_descriptors(nir_shader *nir,
1554                           const struct nvk_physical_device *pdev,
1555                           VkShaderCreateFlagsEXT shader_flags,
1556                           const struct vk_pipeline_robustness_state *rs,
1557                           uint32_t set_layout_count,
1558                           struct vk_descriptor_set_layout * const *set_layouts,
1559                           struct nvk_cbuf_map *cbuf_map_out)
1560 {
1561    struct lower_descriptors_ctx ctx = {
1562       .dev_info = &pdev->info,
1563       .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
1564       .use_edb_buffer_views = nvk_use_edb_buffer_views(pdev),
1565       .clamp_desc_array_bounds =
1566          rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1567          rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1568          rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1569       .indirect_bind =
1570          shader_flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
1571       .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
1572       .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
1573    };
1574 
1575    assert(set_layout_count <= NVK_MAX_SETS);
1576    for (uint32_t s = 0; s < set_layout_count; s++) {
1577       if (set_layouts[s] != NULL)
1578          ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1579    }
1580 
1581    /* We run in four passes:
1582     *
1583     *  1. Find ranges of UBOs that we can promote to bound UBOs.  Nothing is
1584     *     actually lowered in this pass.  It's just analysis.
1585     *
1586     *  2. Try to lower UBO loads to cbufs based on the map we just created.
1587     *     We need to do this before the main lowering pass because it relies
1588     *     on the original descriptor load intrinsics.
1589     *
1590     *  3. Attempt to lower everything with direct descriptors.  This may fail
1591     *     to lower some SSBO intrinsics when variable pointers are used.
1592     *
1593     *  4. Clean up any SSBO intrinsics which are left and lower them to
1594     *     slightly less efficient but variable- pointers-correct versions.
1595     */
1596 
1597    bool pass_lower_ubo = false;
1598    if (cbuf_map_out != NULL) {
1599       ctx.cbuf_map = cbuf_map_out;
1600       build_cbuf_map(nir, &ctx);
1601 
1602       pass_lower_ubo =
1603          nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1604                                     nir_metadata_control_flow,
1605                                     (void *)&ctx);
1606    }
1607 
1608    bool pass_lower_descriptors =
1609       nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1610                                    nir_metadata_control_flow,
1611                                    (void *)&ctx);
1612    bool pass_lower_ssbo =
1613       nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1614                                    nir_metadata_control_flow,
1615                                    (void *)&ctx);
1616    return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1617 }
1618