• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_program.h"
28 #include "zink_screen.h"
29 #include "nir_to_spirv/nir_to_spirv.h"
30 
31 #include "pipe/p_state.h"
32 
33 #include "nir.h"
34 #include "compiler/nir/nir_builder.h"
35 
36 #include "nir/tgsi_to_nir.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 
40 #include "util/u_memory.h"
41 
42 #include "compiler/spirv/nir_spirv.h"
43 #include "vulkan/util/vk_util.h"
44 
45 bool
46 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
47 
48 static void
create_vs_pushconst(nir_shader * nir)49 create_vs_pushconst(nir_shader *nir)
50 {
51    nir_variable *vs_pushconst;
52    /* create compatible layout for the ntv push constant loader */
53    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
54    fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
55    fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
56    fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
57    fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
58    fields[1].name = ralloc_asprintf(nir, "draw_id");
59    fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
60    vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
61                                                  glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
62    vs_pushconst->data.location = INT_MAX; //doesn't really matter
63 }
64 
65 static void
create_cs_pushconst(nir_shader * nir)66 create_cs_pushconst(nir_shader *nir)
67 {
68    nir_variable *cs_pushconst;
69    /* create compatible layout for the ntv push constant loader */
70    struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
71    fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
72    fields[0].name = ralloc_asprintf(nir, "work_dim");
73    fields[0].offset = 0;
74    cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
75                                                  glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
76    cs_pushconst->data.location = INT_MAX; //doesn't really matter
77 }
78 
79 static bool
reads_work_dim(nir_shader * shader)80 reads_work_dim(nir_shader *shader)
81 {
82    return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
83 }
84 
85 static bool
lower_work_dim_instr(nir_builder * b,nir_instr * in,void * data)86 lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
87 {
88    if (in->type != nir_instr_type_intrinsic)
89       return false;
90    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
91    if (instr->intrinsic != nir_intrinsic_load_work_dim)
92       return false;
93 
94    if (instr->intrinsic == nir_intrinsic_load_work_dim) {
95       b->cursor = nir_after_instr(&instr->instr);
96       nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
97       load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
98       nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
99       load->num_components = 1;
100       nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
101       nir_builder_instr_insert(b, &load->instr);
102 
103       nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
104    }
105 
106    return true;
107 }
108 
109 static bool
lower_work_dim(nir_shader * shader)110 lower_work_dim(nir_shader *shader)
111 {
112    if (shader->info.stage != MESA_SHADER_KERNEL)
113       return false;
114 
115    if (!reads_work_dim(shader))
116       return false;
117 
118    return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
119 }
120 
121 static bool
lower_64bit_vertex_attribs_instr(nir_builder * b,nir_instr * instr,void * data)122 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
123 {
124    if (instr->type != nir_instr_type_intrinsic)
125       return false;
126    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
127    if (intr->intrinsic != nir_intrinsic_load_deref)
128       return false;
129    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
130    if (var->data.mode != nir_var_shader_in)
131       return false;
132    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
133       return false;
134 
135    /* create second variable for the split */
136    nir_variable *var2 = nir_variable_clone(var, b->shader);
137    /* split new variable into second slot */
138    var2->data.driver_location++;
139    nir_shader_add_variable(b->shader, var2);
140 
141    unsigned total_num_components = glsl_get_vector_elements(var->type);
142    /* new variable is the second half of the dvec */
143    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
144    /* clamp original variable to a dvec2 */
145    var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
146 
147    b->cursor = nir_after_instr(instr);
148 
149    /* this is the first load instruction for the first half of the dvec3/4 components */
150    nir_ssa_def *load = nir_load_var(b, var);
151    /* this is the second load instruction for the second half of the dvec3/4 components */
152    nir_ssa_def *load2 = nir_load_var(b, var2);
153 
154    nir_ssa_def *def[4];
155    /* create a new dvec3/4 comprised of all the loaded components from both variables */
156    def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
157    def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
158    def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
159    if (total_num_components == 4)
160       def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
161    nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
162    /* use the assembled dvec3/4 for all other uses of the load */
163    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
164                                   new_vec->parent_instr);
165 
166    /* remove the original instr and its deref chain */
167    nir_instr *parent = intr->src[0].ssa->parent_instr;
168    nir_instr_remove(instr);
169    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
170 
171    return true;
172 }
173 
174 /* mesa/gallium always provides UINT versions of 64bit formats:
175  * - rewrite loads as 32bit vec loads
176  * - cast back to 64bit
177  */
178 static bool
lower_64bit_uint_attribs_instr(nir_builder * b,nir_instr * instr,void * data)179 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
180 {
181    if (instr->type != nir_instr_type_intrinsic)
182       return false;
183    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
184    if (intr->intrinsic != nir_intrinsic_load_deref)
185       return false;
186    nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
187    if (var->data.mode != nir_var_shader_in)
188       return false;
189    if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
190       return false;
191 
192    unsigned num_components = glsl_get_vector_elements(var->type);
193    enum glsl_base_type base_type;
194    switch (glsl_get_base_type(var->type)) {
195    case GLSL_TYPE_UINT64:
196       base_type = GLSL_TYPE_UINT;
197       break;
198    case GLSL_TYPE_INT64:
199       base_type = GLSL_TYPE_INT;
200       break;
201    case GLSL_TYPE_DOUBLE:
202       base_type = GLSL_TYPE_FLOAT;
203       break;
204    default:
205       unreachable("unknown 64-bit vertex attribute format!");
206    }
207    var->type = glsl_vector_type(base_type, num_components * 2);
208 
209    b->cursor = nir_after_instr(instr);
210 
211    nir_ssa_def *load = nir_load_var(b, var);
212    nir_ssa_def *casted[2];
213    for (unsigned i = 0; i < num_components; i++)
214      casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
215    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
216 
217    /* remove the original instr and its deref chain */
218    nir_instr *parent = intr->src[0].ssa->parent_instr;
219    nir_instr_remove(instr);
220    nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
221 
222    return true;
223 }
224 
225 /* "64-bit three- and four-component vectors consume two consecutive locations."
226  *  - 14.1.4. Location Assignment
227  *
228  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
229  * are assigned to consecutive locations, loaded separately, and then assembled back into a
230  * composite value that's used in place of the original loaded ssa src
231  */
232 static bool
lower_64bit_vertex_attribs(nir_shader * shader)233 lower_64bit_vertex_attribs(nir_shader *shader)
234 {
235    if (shader->info.stage != MESA_SHADER_VERTEX)
236       return false;
237 
238    bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
239    progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
240    return progress;
241 }
242 
243 static bool
lower_basevertex_instr(nir_builder * b,nir_instr * in,void * data)244 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
245 {
246    if (in->type != nir_instr_type_intrinsic)
247       return false;
248    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
249    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
250       return false;
251 
252    b->cursor = nir_after_instr(&instr->instr);
253    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
254    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
255    nir_intrinsic_set_range(load, 4);
256    load->num_components = 1;
257    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
258    nir_builder_instr_insert(b, &load->instr);
259 
260    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
261                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
262                                           &instr->dest.ssa,
263                                           nir_imm_int(b, 0),
264                                           NULL);
265 
266    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
267                                   composite->parent_instr);
268    return true;
269 }
270 
271 static bool
lower_basevertex(nir_shader * shader)272 lower_basevertex(nir_shader *shader)
273 {
274    if (shader->info.stage != MESA_SHADER_VERTEX)
275       return false;
276 
277    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
278       return false;
279 
280    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
281 }
282 
283 
284 static bool
lower_drawid_instr(nir_builder * b,nir_instr * in,void * data)285 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
286 {
287    if (in->type != nir_instr_type_intrinsic)
288       return false;
289    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
290    if (instr->intrinsic != nir_intrinsic_load_draw_id)
291       return false;
292 
293    b->cursor = nir_before_instr(&instr->instr);
294    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
295    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
296    nir_intrinsic_set_range(load, 4);
297    load->num_components = 1;
298    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
299    nir_builder_instr_insert(b, &load->instr);
300 
301    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
302 
303    return true;
304 }
305 
306 static bool
lower_drawid(nir_shader * shader)307 lower_drawid(nir_shader *shader)
308 {
309    if (shader->info.stage != MESA_SHADER_VERTEX)
310       return false;
311 
312    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
313       return false;
314 
315    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
316 }
317 
318 static bool
lower_dual_blend(nir_shader * shader)319 lower_dual_blend(nir_shader *shader)
320 {
321    bool progress = false;
322    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
323    if (var) {
324       var->data.location = FRAG_RESULT_DATA0;
325       var->data.index = 1;
326       progress = true;
327    }
328    nir_shader_preserve_all_metadata(shader);
329    return progress;
330 }
331 
332 void
zink_screen_init_compiler(struct zink_screen * screen)333 zink_screen_init_compiler(struct zink_screen *screen)
334 {
335    static const struct nir_shader_compiler_options
336    default_options = {
337       .lower_ffma16 = true,
338       .lower_ffma32 = true,
339       .lower_ffma64 = true,
340       .lower_scmp = true,
341       .lower_fdph = true,
342       .lower_flrp32 = true,
343       .lower_fpow = true,
344       .lower_fsat = true,
345       .lower_extract_byte = true,
346       .lower_extract_word = true,
347       .lower_insert_byte = true,
348       .lower_insert_word = true,
349       .lower_mul_high = true,
350       .lower_rotate = true,
351       .lower_uadd_carry = true,
352       .lower_uadd_sat = true,
353       .lower_usub_sat = true,
354       .lower_vector_cmp = true,
355       .lower_int64_options = 0,
356       .lower_doubles_options = 0,
357       .lower_uniforms_to_ubo = true,
358       .has_fsub = true,
359       .has_isub = true,
360       .has_txs = true,
361       .lower_mul_2x32_64 = true,
362       .support_16bit_alu = true, /* not quite what it sounds like */
363    };
364 
365    screen->nir_options = default_options;
366 
367    if (!screen->info.feats.features.shaderInt64)
368       screen->nir_options.lower_int64_options = ~0;
369 
370    if (!screen->info.feats.features.shaderFloat64) {
371       screen->nir_options.lower_doubles_options = ~0;
372       screen->nir_options.lower_flrp64 = true;
373       screen->nir_options.lower_ffma64 = true;
374    }
375 
376    /*
377        The OpFRem and OpFMod instructions use cheap approximations of remainder,
378        and the error can be large due to the discontinuity in trunc() and floor().
379        This can produce mathematically unexpected results in some cases, such as
380        FMod(x,x) computing x rather than 0, and can also cause the result to have
381        a different sign than the infinitely precise result.
382 
383        -Table 84. Precision of core SPIR-V Instructions
384        * for drivers that are known to have imprecise fmod for doubles, lower dmod
385     */
386    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
387        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
388        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
389       screen->nir_options.lower_doubles_options = nir_lower_dmod;
390 }
391 
392 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)393 zink_get_compiler_options(struct pipe_screen *pscreen,
394                           enum pipe_shader_ir ir,
395                           enum pipe_shader_type shader)
396 {
397    assert(ir == PIPE_SHADER_IR_NIR);
398    return &zink_screen(pscreen)->nir_options;
399 }
400 
401 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)402 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
403 {
404    if (zink_debug & ZINK_DEBUG_TGSI) {
405       fprintf(stderr, "TGSI shader:\n---8<---\n");
406       tgsi_dump_to_file(tokens, 0, stderr);
407       fprintf(stderr, "---8<---\n\n");
408    }
409 
410    return tgsi_to_nir(tokens, screen, false);
411 }
412 
413 
414 static bool
dest_is_64bit(nir_dest * dest,void * state)415 dest_is_64bit(nir_dest *dest, void *state)
416 {
417    bool *lower = (bool *)state;
418    if (dest && (nir_dest_bit_size(*dest) == 64)) {
419       *lower = true;
420       return false;
421    }
422    return true;
423 }
424 
425 static bool
src_is_64bit(nir_src * src,void * state)426 src_is_64bit(nir_src *src, void *state)
427 {
428    bool *lower = (bool *)state;
429    if (src && (nir_src_bit_size(*src) == 64)) {
430       *lower = true;
431       return false;
432    }
433    return true;
434 }
435 
436 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)437 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
438 {
439    bool lower = false;
440    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
441     * doesn't have const variants, so do the ugly const_cast here. */
442    nir_instr *instr = (nir_instr *)const_instr;
443 
444    nir_foreach_dest(instr, dest_is_64bit, &lower);
445    if (lower)
446       return true;
447    nir_foreach_src(instr, src_is_64bit, &lower);
448    return lower;
449 }
450 
451 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)452 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
453 {
454    nir_instr *instr = (nir_instr *)const_instr;
455    nir_alu_instr *alu = nir_instr_as_alu(instr);
456    switch (alu->op) {
457    case nir_op_pack_64_2x32_split:
458    case nir_op_pack_32_2x16_split:
459    case nir_op_unpack_32_2x16_split_x:
460    case nir_op_unpack_32_2x16_split_y:
461    case nir_op_unpack_64_2x32_split_x:
462    case nir_op_unpack_64_2x32_split_y:
463       return true;
464    default:
465       break;
466    }
467    return false;
468 }
469 
470 
471 struct bo_vars {
472    nir_variable *uniforms[5];
473    nir_variable *ubo[5];
474    nir_variable *ssbo[5];
475    uint32_t first_ubo;
476    uint32_t first_ssbo;
477 };
478 
479 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)480 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
481 {
482    struct bo_vars bo;
483    memset(&bo, 0, sizeof(bo));
484    if (zs->ubos_used)
485       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
486    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
487    if (zs->ssbos_used)
488       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
489    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
490    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
491       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
492       if (var->data.mode == nir_var_mem_ssbo) {
493          assert(!bo.ssbo[idx]);
494          bo.ssbo[idx] = var;
495       } else {
496          if (var->data.driver_location) {
497             assert(!bo.ubo[idx]);
498             bo.ubo[idx] = var;
499          } else {
500             assert(!bo.uniforms[idx]);
501             bo.uniforms[idx] = var;
502          }
503       }
504    }
505    return bo;
506 }
507 
508 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)509 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
510 {
511    struct bo_vars *bo = data;
512    if (instr->type != nir_instr_type_intrinsic)
513       return false;
514    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
515    nir_variable *var = NULL;
516    nir_ssa_def *offset = NULL;
517    bool is_load = true;
518    b->cursor = nir_before_instr(instr);
519 
520    switch (intr->intrinsic) {
521    case nir_intrinsic_store_ssbo:
522       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
523       offset = intr->src[2].ssa;
524       is_load = false;
525       break;
526    case nir_intrinsic_load_ssbo:
527       var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
528       offset = intr->src[1].ssa;
529       break;
530    case nir_intrinsic_load_ubo:
531       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
532          var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
533       else
534          var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
535       offset = intr->src[1].ssa;
536       break;
537    default:
538       return false;
539    }
540    nir_src offset_src = nir_src_for_ssa(offset);
541    if (!nir_src_is_const(offset_src))
542       return false;
543 
544    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
545    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
546    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
547    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
548    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
549       return false;
550 
551    unsigned rewrites = 0;
552    nir_ssa_def *result[2];
553    for (unsigned i = 0; i < intr->num_components; i++) {
554       if (offset_bytes + i >= size) {
555          rewrites++;
556          if (is_load)
557             result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
558       }
559    }
560    assert(rewrites == intr->num_components);
561    if (is_load) {
562       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
563       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
564    }
565    nir_instr_remove(instr);
566    return true;
567 }
568 
569 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)570 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
571 {
572    struct bo_vars bo = get_bo_vars(zs, shader);
573    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
574 }
575 
576 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs)577 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
578 {
579    bool progress;
580    do {
581       progress = false;
582       if (s->options->lower_int64_options)
583          NIR_PASS_V(s, nir_lower_int64);
584       NIR_PASS_V(s, nir_lower_vars_to_ssa);
585       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
586       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
587       NIR_PASS(progress, s, nir_copy_prop);
588       NIR_PASS(progress, s, nir_opt_remove_phis);
589       if (s->options->lower_int64_options) {
590          NIR_PASS(progress, s, nir_lower_64bit_phis);
591          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
592       }
593       NIR_PASS(progress, s, nir_opt_dce);
594       NIR_PASS(progress, s, nir_opt_dead_cf);
595       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
596       NIR_PASS(progress, s, nir_opt_cse);
597       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
598       NIR_PASS(progress, s, nir_opt_algebraic);
599       NIR_PASS(progress, s, nir_opt_constant_folding);
600       NIR_PASS(progress, s, nir_opt_undef);
601       NIR_PASS(progress, s, zink_nir_lower_b2b);
602       if (zs)
603          NIR_PASS(progress, s, bound_bo_access, zs);
604    } while (progress);
605 
606    do {
607       progress = false;
608       NIR_PASS(progress, s, nir_opt_algebraic_late);
609       if (progress) {
610          NIR_PASS_V(s, nir_copy_prop);
611          NIR_PASS_V(s, nir_opt_dce);
612          NIR_PASS_V(s, nir_opt_cse);
613       }
614    } while (progress);
615 }
616 
617 /* - copy the lowered fbfetch variable
618  * - set the new one up as an input attachment for descriptor 0.6
619  * - load it as an image
620  * - overwrite the previous load
621  */
622 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)623 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
624 {
625    bool ms = data != NULL;
626    if (instr->type != nir_instr_type_intrinsic)
627       return false;
628    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
629    if (intr->intrinsic != nir_intrinsic_load_deref)
630       return false;
631    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
632    if (!var->data.fb_fetch_output)
633       return false;
634    b->cursor = nir_after_instr(instr);
635    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
636    /* If Dim is SubpassData, ... Image Format must be Unknown
637     * - SPIRV OpTypeImage specification
638     */
639    fbfetch->data.image.format = 0;
640    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
641    fbfetch->data.mode = nir_var_uniform;
642    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
643    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
644    fbfetch->data.sample = ms;
645    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
646    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
647    nir_shader_add_variable(b->shader, fbfetch);
648    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
649    nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
650    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
651    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
652    return true;
653 }
654 
655 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)656 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
657 {
658    nir_foreach_shader_out_variable(var, shader) {
659       if (var->data.fb_fetch_output) {
660          *fbfetch = var;
661          break;
662       }
663    }
664    assert(*fbfetch);
665    if (!*fbfetch)
666       return false;
667    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
668 }
669 
670 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
671 static bool
check_psiz(struct nir_shader * s)672 check_psiz(struct nir_shader *s)
673 {
674    bool have_psiz = false;
675    nir_foreach_shader_out_variable(var, s) {
676       if (var->data.location == VARYING_SLOT_PSIZ) {
677          /* genuine PSIZ outputs will have this set */
678          have_psiz |= !!var->data.explicit_location;
679       }
680    }
681    return have_psiz;
682 }
683 
684 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz)685 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
686 {
687    unsigned found = 0;
688    if (!location_frac && location != VARYING_SLOT_PSIZ) {
689       nir_foreach_shader_out_variable(var, nir) {
690          if (var->data.location == location)
691             found++;
692       }
693    }
694    if (found) {
695       /* multiple variables found for this location: find the biggest one */
696       nir_variable *out = NULL;
697       unsigned slots = 0;
698       nir_foreach_shader_out_variable(var, nir) {
699          if (var->data.location == location) {
700             unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
701             if (count_slots > slots) {
702                slots = count_slots;
703                out = var;
704             }
705          }
706       }
707       return out;
708    } else {
709       /* only one variable found or this is location_frac */
710       nir_foreach_shader_out_variable(var, nir) {
711          if (var->data.location == location &&
712              (var->data.location_frac == location_frac ||
713               (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
714             if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
715                return var;
716          }
717       }
718    }
719    return NULL;
720 }
721 
722 static bool
is_inlined(const bool * inlined,const struct pipe_stream_output * output)723 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
724 {
725    for (unsigned i = 0; i < output->num_components; i++)
726       if (!inlined[output->start_component + i])
727          return false;
728    return true;
729 }
730 
731 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)732 update_psiz_location(nir_shader *nir, nir_variable *psiz)
733 {
734    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
735    if (last_output < VARYING_SLOT_VAR0)
736       last_output = VARYING_SLOT_VAR0;
737    else
738       last_output++;
739    /* this should get fixed up by slot remapping */
740    psiz->data.location = last_output;
741 }
742 
743 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)744 clamp_slot_type(const struct glsl_type *type, unsigned slot)
745 {
746    /* could be dvec/dmat/mat: each member is the same */
747    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
748    /* determine size of each member type */
749    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
750    /* normalize slot idx to current type's size */
751    slot %= slot_count;
752    unsigned slot_components = glsl_get_components(plain);
753    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
754       slot_components *= 2;
755    /* create a vec4 mask of the selected slot's components out of all the components */
756    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
757    /* return a vecN of the selected components */
758    slot_components = util_bitcount(mask);
759    return glsl_vec_type(slot_components);
760 }
761 
762 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)763 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
764 {
765    const struct glsl_type *type = slot_type;
766    unsigned slot_count = 0;
767    unsigned cur_slot = 0;
768    /* iterate over all the members in the struct, stopping once the slot idx is reached */
769    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
770       /* use array type for slot counting but return array member type for unroll */
771       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
772       type = glsl_without_array(arraytype);
773       slot_count = glsl_count_vec4_slots(arraytype, false, false);
774    }
775    *slot_idx -= (cur_slot - slot_count);
776    if (!glsl_type_is_struct_or_ifc(type))
777       /* this is a fully unrolled struct: find the number of vec components to output */
778       type = clamp_slot_type(type, *slot_idx);
779    return type;
780 }
781 
782 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)783 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
784 {
785    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
786    const struct glsl_type *orig_type = var->type;
787    const struct glsl_type *type = glsl_without_array(var->type);
788    unsigned slot_idx = slot - so_slot;
789    if (type != orig_type)
790       slot_idx %= glsl_count_vec4_slots(type, false, false);
791    /* need to find the vec4 that's being exported by this slot */
792    while (glsl_type_is_struct_or_ifc(type))
793       type = unroll_struct_type(type, &slot_idx);
794 
795    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
796    unsigned num_components = glsl_get_components(glsl_without_array(type));
797    const struct glsl_type *arraytype = orig_type;
798    while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) {
799       num_components *= glsl_array_size(arraytype);
800       arraytype = glsl_get_array_element(arraytype);
801    }
802    assert(num_components);
803    /* gallium handles xfb in terms of 32bit units */
804    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
805       num_components *= 2;
806    return num_components;
807 }
808 
809 static const struct pipe_stream_output *
find_packed_output(const struct pipe_stream_output_info * so_info,uint8_t * reverse_map,unsigned slot)810 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
811 {
812    for (unsigned i = 0; i < so_info->num_outputs; i++) {
813       const struct pipe_stream_output *packed_output = &so_info->output[i];
814       if (reverse_map[packed_output->register_index] == slot)
815          return packed_output;
816    }
817    return NULL;
818 }
819 
820 static void
update_so_info(struct zink_shader * zs,const struct pipe_stream_output_info * so_info,uint64_t outputs_written,bool have_psiz)821 update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
822                uint64_t outputs_written, bool have_psiz)
823 {
824    uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
825    unsigned slot = 0;
826    /* semi-copied from iris */
827    while (outputs_written) {
828       int bit = u_bit_scan64(&outputs_written);
829       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
830       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
831          continue;
832       reverse_map[slot++] = bit;
833    }
834 
835    bool have_fake_psiz = false;
836    nir_foreach_shader_out_variable(var, zs->nir) {
837       if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
838          have_fake_psiz = true;
839    }
840 
841    bool inlined[VARYING_SLOT_MAX][4] = {0};
842    uint64_t packed = 0;
843    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
844    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
845    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
846    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
847    nir_variable *psiz = NULL;
848    for (unsigned i = 0; i < so_info->num_outputs; i++) {
849       const struct pipe_stream_output *output = &so_info->output[i];
850       unsigned slot = reverse_map[output->register_index];
851       /* always set stride to be used during draw */
852       zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
853       if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
854          nir_variable *var = NULL;
855          unsigned so_slot;
856          while (!var)
857             var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
858          if (var->data.location == VARYING_SLOT_PSIZ)
859             psiz = var;
860          so_slot = slot + 1;
861          slot = reverse_map[output->register_index];
862          if (var->data.explicit_xfb_buffer) {
863             /* handle dvec3 where gallium splits streamout over 2 registers */
864             for (unsigned j = 0; j < output->num_components; j++)
865                inlined[slot][output->start_component + j] = true;
866          }
867          if (is_inlined(inlined[slot], output))
868             continue;
869          bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
870          unsigned num_components = get_slot_components(var, slot, so_slot);
871          /* if this is the entire variable, try to blast it out during the initial declaration
872           * structs must be handled later to ensure accurate analysis
873           */
874          if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
875             var->data.explicit_xfb_buffer = 1;
876             var->data.xfb.buffer = output->output_buffer;
877             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
878             var->data.offset = output->dst_offset * 4;
879             var->data.stream = output->stream;
880             for (unsigned j = 0; j < output->num_components; j++)
881                inlined[slot][output->start_component + j] = true;
882          } else {
883             /* otherwise store some metadata for later */
884             packed |= BITFIELD64_BIT(slot);
885             packed_components[slot] += output->num_components;
886             packed_streams[slot] |= BITFIELD_BIT(output->stream);
887             packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
888             for (unsigned j = 0; j < output->num_components; j++)
889                packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
890          }
891       }
892    }
893 
894    /* if this was flagged as a packed output before, and if all the components are
895     * being output with the same stream on the same buffer with increasing offsets, this entire variable
896     * can be consolidated into a single output to conserve locations
897     */
898    for (unsigned i = 0; i < so_info->num_outputs; i++) {
899       const struct pipe_stream_output *output = &so_info->output[i];
900       unsigned slot = reverse_map[output->register_index];
901       if (is_inlined(inlined[slot], output))
902          continue;
903       if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
904          nir_variable *var = NULL;
905          while (!var)
906             var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
907          /* this is a lowered 64bit variable that can't be exported due to packing */
908          if (var->data.is_xfb)
909             goto out;
910 
911          unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
912          /* for each variable, iterate over all the variable's slots and inline the outputs */
913          for (unsigned j = 0; j < num_slots; j++) {
914             slot = var->data.location + j;
915             const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
916             if (!packed_output)
917                goto out;
918 
919             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
920             if (!(packed & BITFIELD64_BIT(slot)) ||
921                 util_bitcount(packed_streams[slot]) != 1 ||
922                 util_bitcount(packed_buffers[slot]) != 1)
923                goto out;
924 
925             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
926             unsigned num_components = get_slot_components(var, slot, var->data.location);
927             if (glsl_type_is_array(var->type) && !glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
928                num_components /= glsl_array_size(var->type);
929             if (num_components != packed_components[slot])
930                goto out;
931 
932             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
933             uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
934             for (unsigned k = 1; k < num_components; k++) {
935                /* if the offsets are not incrementing as expected, skip consolidation */
936                if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
937                   goto out;
938                prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
939             }
940          }
941          /* this output can be consolidated: blast out all the data inlined */
942          var->data.explicit_xfb_buffer = 1;
943          var->data.xfb.buffer = output->output_buffer;
944          var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
945          var->data.offset = output->dst_offset * 4;
946          var->data.stream = output->stream;
947          /* GLSL specifies that interface blocks are split per-buffer in XFB */
948          if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
949             zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
950          /* mark all slot components inlined to skip subsequent loop iterations */
951          for (unsigned j = 0; j < num_slots; j++) {
952             slot = var->data.location + j;
953             for (unsigned k = 0; k < packed_components[slot]; k++)
954                inlined[slot][k] = true;
955             packed &= ~BITFIELD64_BIT(slot);
956          }
957          continue;
958       }
959 out:
960       /* these are packed/explicit varyings which can't be exported with normal output */
961       zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
962       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
963       zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
964    }
965    zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
966    /* ensure this doesn't get output in the shader by unsetting location */
967    if (have_fake_psiz && psiz)
968       update_psiz_location(zs->nir, psiz);
969 }
970 
971 struct decompose_state {
972   nir_variable **split;
973   bool needs_w;
974 };
975 
976 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)977 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
978 {
979    struct decompose_state *state = data;
980    nir_variable **split = state->split;
981    if (instr->type != nir_instr_type_intrinsic)
982       return false;
983    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
984    if (intr->intrinsic != nir_intrinsic_load_deref)
985       return false;
986    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
987    nir_variable *var = nir_deref_instr_get_variable(deref);
988    if (var != split[0])
989       return false;
990    unsigned num_components = glsl_get_vector_elements(split[0]->type);
991    b->cursor = nir_after_instr(instr);
992    nir_ssa_def *loads[4];
993    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
994       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
995    if (state->needs_w) {
996       /* oob load w comopnent to get correct value for int/float */
997       loads[3] = nir_channel(b, loads[0], 3);
998       loads[0] = nir_channel(b, loads[0], 0);
999    }
1000    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
1001    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
1002    nir_instr_remove_v(instr);
1003    return true;
1004 }
1005 
1006 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)1007 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
1008 {
1009    uint32_t bits = 0;
1010    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
1011       bits |= BITFIELD_BIT(var->data.driver_location);
1012    bits = ~bits;
1013    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
1014       nir_variable *split[5];
1015       struct decompose_state state;
1016       state.split = split;
1017       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
1018       assert(var);
1019       split[0] = var;
1020       bits |= BITFIELD_BIT(var->data.driver_location);
1021       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
1022       unsigned num_components = glsl_get_vector_elements(var->type);
1023       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
1024       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
1025          split[i+1] = nir_variable_clone(var, nir);
1026          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
1027          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
1028             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
1029          else
1030             split[i+1]->type = new_type;
1031          split[i+1]->data.driver_location = ffs(bits) - 1;
1032          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
1033          nir_shader_add_variable(nir, split[i+1]);
1034       }
1035       var->data.mode = nir_var_shader_temp;
1036       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
1037    }
1038    nir_fixup_deref_modes(nir);
1039    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1040    optimize_nir(nir, NULL);
1041    return true;
1042 }
1043 
1044 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1045 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1046 {
1047    struct zink_screen *screen = data;
1048    const bool has_int64 = screen->info.feats.features.shaderInt64;
1049    if (instr->type != nir_instr_type_intrinsic)
1050       return false;
1051    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1052    b->cursor = nir_before_instr(instr);
1053    switch (intr->intrinsic) {
1054    case nir_intrinsic_ssbo_atomic_fadd:
1055    case nir_intrinsic_ssbo_atomic_add:
1056    case nir_intrinsic_ssbo_atomic_umin:
1057    case nir_intrinsic_ssbo_atomic_imin:
1058    case nir_intrinsic_ssbo_atomic_umax:
1059    case nir_intrinsic_ssbo_atomic_imax:
1060    case nir_intrinsic_ssbo_atomic_and:
1061    case nir_intrinsic_ssbo_atomic_or:
1062    case nir_intrinsic_ssbo_atomic_xor:
1063    case nir_intrinsic_ssbo_atomic_exchange:
1064    case nir_intrinsic_ssbo_atomic_comp_swap: {
1065       /* convert offset to uintN_t[idx] */
1066       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
1067       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1068       return true;
1069    }
1070    case nir_intrinsic_load_ssbo:
1071    case nir_intrinsic_load_ubo: {
1072       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
1073       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
1074                         nir_src_is_const(intr->src[0]) &&
1075                         nir_src_as_uint(intr->src[0]) == 0 &&
1076                         nir_dest_bit_size(intr->dest) == 64 &&
1077                         nir_intrinsic_align_offset(intr) % 8 != 0;
1078       force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1079       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1080       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1081       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1082       if (force_2x32) {
1083          /* this is always scalarized */
1084          assert(intr->dest.ssa.num_components == 1);
1085          /* rewrite as 2x32 */
1086          nir_ssa_def *load[2];
1087          for (unsigned i = 0; i < 2; i++) {
1088             if (intr->intrinsic == nir_intrinsic_load_ssbo)
1089                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1090             else
1091                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
1092             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
1093          }
1094          /* cast back to 64bit */
1095          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1096          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1097          nir_instr_remove(instr);
1098       }
1099       return true;
1100    }
1101    case nir_intrinsic_load_shared:
1102       b->cursor = nir_before_instr(instr);
1103       bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1104       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1105       nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
1106       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1107       if (force_2x32) {
1108          /* this is always scalarized */
1109          assert(intr->dest.ssa.num_components == 1);
1110          /* rewrite as 2x32 */
1111          nir_ssa_def *load[2];
1112          for (unsigned i = 0; i < 2; i++)
1113             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
1114          /* cast back to 64bit */
1115          nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1116          nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1117          nir_instr_remove(instr);
1118          return true;
1119       }
1120       break;
1121    case nir_intrinsic_store_ssbo: {
1122       b->cursor = nir_before_instr(instr);
1123       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1124       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1125       nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
1126       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1127       if (force_2x32) {
1128          /* this is always scalarized */
1129          assert(intr->src[0].ssa->num_components == 1);
1130          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1131          for (unsigned i = 0; i < 2; i++)
1132             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
1133          nir_instr_remove(instr);
1134       }
1135       return true;
1136    }
1137    case nir_intrinsic_store_shared: {
1138       b->cursor = nir_before_instr(instr);
1139       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1140       nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1141       nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1142       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1143       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
1144          /* this is always scalarized */
1145          assert(intr->src[0].ssa->num_components == 1);
1146          nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1147          for (unsigned i = 0; i < 2; i++)
1148             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1149          nir_instr_remove(instr);
1150       }
1151       return true;
1152    }
1153    default:
1154       break;
1155    }
1156    return false;
1157 }
1158 
1159 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)1160 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
1161 {
1162    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
1163 }
1164 
1165 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)1166 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
1167 {
1168    nir_variable *var, **ptr;
1169    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
1170 
1171    if (ssbo)
1172       ptr = &bo->ssbo[bit_size >> 4];
1173    else {
1174       if (!idx) {
1175          ptr = &bo->uniforms[bit_size >> 4];
1176       } else
1177          ptr = &bo->ubo[bit_size >> 4];
1178    }
1179    var = *ptr;
1180    if (!var) {
1181       if (ssbo)
1182          var = bo->ssbo[32 >> 4];
1183       else {
1184          if (!idx)
1185             var = bo->uniforms[32 >> 4];
1186          else
1187             var = bo->ubo[32 >> 4];
1188       }
1189       var = nir_variable_clone(var, shader);
1190       *ptr = var;
1191       nir_shader_add_variable(shader, var);
1192 
1193       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1194       fields[0].name = ralloc_strdup(shader, "base");
1195       fields[1].name = ralloc_strdup(shader, "unsized");
1196       unsigned array_size = glsl_get_length(var->type);
1197       const struct glsl_type *bare_type = glsl_without_array(var->type);
1198       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
1199       unsigned length = glsl_get_length(array_type);
1200       const struct glsl_type *type;
1201       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
1202       if (bit_size > 32) {
1203          assert(bit_size == 64);
1204          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
1205       } else {
1206          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
1207       }
1208       fields[0].type = type;
1209       fields[1].type = unsized;
1210       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
1211       var->data.driver_location = idx;
1212    }
1213    return var;
1214 }
1215 
1216 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)1217 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
1218 {
1219    nir_intrinsic_op op;
1220    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1221    switch (intr->intrinsic) {
1222    case nir_intrinsic_ssbo_atomic_add:
1223       op = nir_intrinsic_deref_atomic_add;
1224       break;
1225    case nir_intrinsic_ssbo_atomic_umin:
1226       op = nir_intrinsic_deref_atomic_umin;
1227       break;
1228    case nir_intrinsic_ssbo_atomic_imin:
1229       op = nir_intrinsic_deref_atomic_imin;
1230       break;
1231    case nir_intrinsic_ssbo_atomic_umax:
1232       op = nir_intrinsic_deref_atomic_umax;
1233       break;
1234    case nir_intrinsic_ssbo_atomic_imax:
1235       op = nir_intrinsic_deref_atomic_imax;
1236       break;
1237    case nir_intrinsic_ssbo_atomic_and:
1238       op = nir_intrinsic_deref_atomic_and;
1239       break;
1240    case nir_intrinsic_ssbo_atomic_or:
1241       op = nir_intrinsic_deref_atomic_or;
1242       break;
1243    case nir_intrinsic_ssbo_atomic_xor:
1244       op = nir_intrinsic_deref_atomic_xor;
1245       break;
1246    case nir_intrinsic_ssbo_atomic_exchange:
1247       op = nir_intrinsic_deref_atomic_exchange;
1248       break;
1249    case nir_intrinsic_ssbo_atomic_comp_swap:
1250       op = nir_intrinsic_deref_atomic_comp_swap;
1251       break;
1252    default:
1253       unreachable("unknown intrinsic");
1254    }
1255    nir_ssa_def *offset = intr->src[1].ssa;
1256    nir_src *src = &intr->src[0];
1257    nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1258    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1259    nir_ssa_def *idx = src->ssa;
1260    if (bo->first_ssbo)
1261       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1262    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1263    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1264 
1265    /* generate new atomic deref ops for every component */
1266    nir_ssa_def *result[4];
1267    unsigned num_components = nir_dest_num_components(intr->dest);
1268    for (unsigned i = 0; i < num_components; i++) {
1269       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1270       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
1271       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
1272       new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
1273       /* deref ops have no offset src, so copy the srcs after it */
1274       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
1275          nir_src_copy(&new_instr->src[i - 1], &intr->src[i]);
1276       nir_builder_instr_insert(b, &new_instr->instr);
1277 
1278       result[i] = &new_instr->dest.ssa;
1279       offset = nir_iadd_imm(b, offset, 1);
1280    }
1281 
1282    nir_ssa_def *load = nir_vec(b, result, num_components);
1283    nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1284    nir_instr_remove(instr);
1285 }
1286 
1287 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1288 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1289 {
1290    struct bo_vars *bo = data;
1291    if (instr->type != nir_instr_type_intrinsic)
1292       return false;
1293    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1294    nir_variable *var = NULL;
1295    nir_ssa_def *offset = NULL;
1296    bool is_load = true;
1297    b->cursor = nir_before_instr(instr);
1298    nir_src *src;
1299    bool ssbo = true;
1300    switch (intr->intrinsic) {
1301    case nir_intrinsic_ssbo_atomic_add:
1302    case nir_intrinsic_ssbo_atomic_umin:
1303    case nir_intrinsic_ssbo_atomic_imin:
1304    case nir_intrinsic_ssbo_atomic_umax:
1305    case nir_intrinsic_ssbo_atomic_imax:
1306    case nir_intrinsic_ssbo_atomic_and:
1307    case nir_intrinsic_ssbo_atomic_or:
1308    case nir_intrinsic_ssbo_atomic_xor:
1309    case nir_intrinsic_ssbo_atomic_exchange:
1310    case nir_intrinsic_ssbo_atomic_comp_swap:
1311       rewrite_atomic_ssbo_instr(b, instr, bo);
1312       return true;
1313    case nir_intrinsic_store_ssbo:
1314       src = &intr->src[1];
1315       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
1316       offset = intr->src[2].ssa;
1317       is_load = false;
1318       break;
1319    case nir_intrinsic_load_ssbo:
1320       src = &intr->src[0];
1321       var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1322       offset = intr->src[1].ssa;
1323       break;
1324    case nir_intrinsic_load_ubo:
1325       src = &intr->src[0];
1326       var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
1327       offset = intr->src[1].ssa;
1328       ssbo = false;
1329       break;
1330    default:
1331       return false;
1332    }
1333    assert(var);
1334    assert(offset);
1335    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1336    nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
1337    if (!ssbo && bo->first_ubo && var->data.driver_location)
1338       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
1339    else if (ssbo && bo->first_ssbo)
1340       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1341    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1342    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1343    assert(intr->num_components <= 2);
1344    if (is_load) {
1345       nir_ssa_def *result[2];
1346       for (unsigned i = 0; i < intr->num_components; i++) {
1347          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1348          result[i] = nir_load_deref(b, deref_arr);
1349          if (intr->intrinsic == nir_intrinsic_load_ssbo)
1350             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
1351          offset = nir_iadd_imm(b, offset, 1);
1352       }
1353       nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1354       nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1355    } else {
1356       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1357       nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
1358    }
1359    nir_instr_remove(instr);
1360    return true;
1361 }
1362 
1363 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)1364 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
1365 {
1366    struct bo_vars bo = get_bo_vars(zs, shader);
1367    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
1368 }
1369 
1370 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)1371 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1372 {
1373    unsigned slot = var->data.location;
1374    switch (slot) {
1375    case -1:
1376    case VARYING_SLOT_POS:
1377    case VARYING_SLOT_PNTC:
1378    case VARYING_SLOT_PSIZ:
1379    case VARYING_SLOT_LAYER:
1380    case VARYING_SLOT_PRIMITIVE_ID:
1381    case VARYING_SLOT_CLIP_DIST0:
1382    case VARYING_SLOT_CULL_DIST0:
1383    case VARYING_SLOT_VIEWPORT:
1384    case VARYING_SLOT_FACE:
1385    case VARYING_SLOT_TESS_LEVEL_OUTER:
1386    case VARYING_SLOT_TESS_LEVEL_INNER:
1387       /* use a sentinel value to avoid counting later */
1388       var->data.driver_location = UINT_MAX;
1389       break;
1390 
1391    default:
1392       if (var->data.patch) {
1393          assert(slot >= VARYING_SLOT_PATCH0);
1394          slot -= VARYING_SLOT_PATCH0;
1395       }
1396       if (slot_map[slot] == 0xff) {
1397          assert(*reserved < MAX_VARYING);
1398          unsigned num_slots;
1399          if (nir_is_arrayed_io(var, stage))
1400             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
1401          else
1402             num_slots = glsl_count_vec4_slots(var->type, false, false);
1403          assert(*reserved + num_slots <= MAX_VARYING);
1404          for (unsigned i = 0; i < num_slots; i++)
1405             slot_map[slot + i] = (*reserved)++;
1406       }
1407       slot = slot_map[slot];
1408       assert(slot < MAX_VARYING);
1409       var->data.driver_location = slot;
1410    }
1411 }
1412 
1413 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)1414 is_texcoord(gl_shader_stage stage, const nir_variable *var)
1415 {
1416    if (stage != MESA_SHADER_FRAGMENT)
1417       return false;
1418    return var->data.location >= VARYING_SLOT_TEX0 &&
1419           var->data.location <= VARYING_SLOT_TEX7;
1420 }
1421 
1422 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)1423 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1424 {
1425    unsigned slot = var->data.location;
1426    switch (slot) {
1427    case VARYING_SLOT_POS:
1428    case VARYING_SLOT_PNTC:
1429    case VARYING_SLOT_PSIZ:
1430    case VARYING_SLOT_LAYER:
1431    case VARYING_SLOT_PRIMITIVE_ID:
1432    case VARYING_SLOT_CLIP_DIST0:
1433    case VARYING_SLOT_CULL_DIST0:
1434    case VARYING_SLOT_VIEWPORT:
1435    case VARYING_SLOT_FACE:
1436    case VARYING_SLOT_TESS_LEVEL_OUTER:
1437    case VARYING_SLOT_TESS_LEVEL_INNER:
1438       /* use a sentinel value to avoid counting later */
1439       var->data.driver_location = UINT_MAX;
1440       break;
1441    default:
1442       if (var->data.patch) {
1443          assert(slot >= VARYING_SLOT_PATCH0);
1444          slot -= VARYING_SLOT_PATCH0;
1445       }
1446       if (slot_map[slot] == (unsigned char)-1) {
1447          if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var))
1448             /* dead io */
1449             return false;
1450          /* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE
1451           * - patch variables may be read in the workgroup
1452           */
1453          slot_map[slot] = (*reserved)++;
1454       }
1455       var->data.driver_location = slot_map[slot];
1456    }
1457    return true;
1458 }
1459 
1460 
1461 static bool
rewrite_and_discard_read(nir_builder * b,nir_instr * instr,void * data)1462 rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
1463 {
1464    nir_variable *var = data;
1465    if (instr->type != nir_instr_type_intrinsic)
1466       return false;
1467 
1468    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1469    if (intr->intrinsic != nir_intrinsic_load_deref)
1470       return false;
1471    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
1472    if (deref_var != var)
1473       return false;
1474    nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
1475    nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
1476    return true;
1477 }
1478 
1479 void
zink_compiler_assign_io(nir_shader * producer,nir_shader * consumer)1480 zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
1481 {
1482    unsigned reserved = 0;
1483    unsigned char slot_map[VARYING_SLOT_MAX];
1484    memset(slot_map, -1, sizeof(slot_map));
1485    bool do_fixup = false;
1486    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
1487    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1488       /* remove injected pointsize from all but the last vertex stage */
1489       nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1490       if (var && !var->data.explicit_location) {
1491          var->data.mode = nir_var_shader_temp;
1492          nir_fixup_deref_modes(producer);
1493          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1494          optimize_nir(producer, NULL);
1495       }
1496    }
1497    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
1498       /* never assign from tcs -> tes, always invert */
1499       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
1500          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
1501       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
1502          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
1503             /* this is an output, nothing more needs to be done for it to be dropped */
1504             do_fixup = true;
1505       }
1506    } else {
1507       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
1508          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
1509       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
1510          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
1511             do_fixup = true;
1512             /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
1513             nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
1514          }
1515       }
1516    }
1517    if (!do_fixup)
1518       return;
1519    nir_fixup_deref_modes(nir);
1520    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1521    optimize_nir(nir, NULL);
1522 }
1523 
1524 /* all types that hit this function contain something that is 64bit */
1525 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var)1526 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var)
1527 {
1528    if (glsl_type_is_array(type)) {
1529       const struct glsl_type *child = glsl_get_array_element(type);
1530       unsigned elements = glsl_array_size(type);
1531       unsigned stride = glsl_get_explicit_stride(type);
1532       return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride);
1533    }
1534    /* rewrite structs recursively */
1535    if (glsl_type_is_struct_or_ifc(type)) {
1536       unsigned nmembers = glsl_get_length(type);
1537       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
1538       unsigned xfb_offset = 0;
1539       for (unsigned i = 0; i < nmembers; i++) {
1540          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
1541          fields[i] = *f;
1542          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
1543          if (i < nmembers - 1 && xfb_offset % 8 &&
1544              glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) {
1545             var->data.is_xfb = true;
1546          }
1547          fields[i].type = rewrite_64bit_type(nir, f->type, var);
1548       }
1549       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
1550    }
1551    if (!glsl_type_is_64bit(type))
1552       return type;
1553    enum glsl_base_type base_type;
1554    switch (glsl_get_base_type(type)) {
1555    case GLSL_TYPE_UINT64:
1556       base_type = GLSL_TYPE_UINT;
1557       break;
1558    case GLSL_TYPE_INT64:
1559       base_type = GLSL_TYPE_INT;
1560       break;
1561    case GLSL_TYPE_DOUBLE:
1562       base_type = GLSL_TYPE_FLOAT;
1563       break;
1564    default:
1565       unreachable("unknown 64-bit vertex attribute format!");
1566    }
1567    if (glsl_type_is_scalar(type))
1568       return glsl_vector_type(base_type, 2);
1569    unsigned num_components;
1570    if (glsl_type_is_matrix(type)) {
1571       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
1572       unsigned vec_components = glsl_get_vector_elements(type);
1573       if (vec_components == 3)
1574          vec_components = 4;
1575       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
1576    } else {
1577       num_components = glsl_get_vector_elements(type) * 2;
1578       if (num_components <= 4)
1579          return glsl_vector_type(base_type, num_components);
1580    }
1581    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
1582    struct glsl_struct_field fields[8] = {0};
1583    unsigned remaining = num_components;
1584    unsigned nfields = 0;
1585    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
1586       assert(i < ARRAY_SIZE(fields));
1587       fields[i].name = "";
1588       fields[i].offset = i * 16;
1589       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
1590    }
1591    char buf[64];
1592    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
1593    return glsl_struct_type(fields, nfields, buf, true);
1594 }
1595 
1596 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)1597 deref_is_matrix(nir_deref_instr *deref)
1598 {
1599    if (glsl_type_is_matrix(deref->type))
1600       return deref->type;
1601    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1602    if (parent)
1603       return deref_is_matrix(parent);
1604    return NULL;
1605 }
1606 
1607 /* rewrite all input/output variables using 32bit types and load/stores */
1608 static bool
lower_64bit_vars(nir_shader * shader)1609 lower_64bit_vars(nir_shader *shader)
1610 {
1611    bool progress = false;
1612    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1613    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1614    nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) {
1615       if (!glsl_type_contains_64bit(var->type))
1616          continue;
1617       var->type = rewrite_64bit_type(shader, var->type, var);
1618       /* once type is rewritten, rewrite all loads and stores */
1619       nir_foreach_function(function, shader) {
1620          bool func_progress = false;
1621          if (!function->impl)
1622             continue;
1623          nir_builder b;
1624          nir_builder_init(&b, function->impl);
1625          nir_foreach_block(block, function->impl) {
1626             nir_foreach_instr_safe(instr, block) {
1627                switch (instr->type) {
1628                case nir_instr_type_deref: {
1629                   nir_deref_instr *deref = nir_instr_as_deref(instr);
1630                   if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out)))
1631                      continue;
1632                   if (nir_deref_instr_get_variable(deref) != var)
1633                      continue;
1634 
1635                   /* matrix types are special: store the original deref type for later use */
1636                   const struct glsl_type *matrix = deref_is_matrix(deref);
1637                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
1638                   if (!matrix) {
1639                      /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
1640                      hash_table_foreach(derefs, he) {
1641                         /* propagate parent matrix type to row deref */
1642                         if (he->key == parent)
1643                            matrix = he->data;
1644                      }
1645                   }
1646                   if (matrix)
1647                      _mesa_hash_table_insert(derefs, deref, (void*)matrix);
1648                   if (deref->deref_type == nir_deref_type_var)
1649                      deref->type = var->type;
1650                   else
1651                      deref->type = rewrite_64bit_type(shader, deref->type, var);
1652                }
1653                break;
1654                case nir_instr_type_intrinsic: {
1655                   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1656                   if (intr->intrinsic != nir_intrinsic_store_deref &&
1657                       intr->intrinsic != nir_intrinsic_load_deref)
1658                      break;
1659                   if (nir_intrinsic_get_var(intr, 0) != var)
1660                      break;
1661                   if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
1662                       (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
1663                      break;
1664                   b.cursor = nir_before_instr(instr);
1665                   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1666                   unsigned num_components = intr->num_components * 2;
1667                   nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
1668                   /* this is the stored matrix type from the deref */
1669                   struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
1670                   const struct glsl_type *matrix = he ? he->data : NULL;
1671                   func_progress = true;
1672                   if (intr->intrinsic == nir_intrinsic_store_deref) {
1673                      /* first, unpack the src data to 32bit vec2 components */
1674                      for (unsigned i = 0; i < intr->num_components; i++) {
1675                         nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
1676                         comp[i * 2] = nir_channel(&b, ssa, 0);
1677                         comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
1678                      }
1679                      unsigned wrmask = nir_intrinsic_write_mask(intr);
1680                      unsigned mask = 0;
1681                      /* expand writemask for doubled components */
1682                      for (unsigned i = 0; i < intr->num_components; i++) {
1683                         if (wrmask & BITFIELD_BIT(i))
1684                            mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
1685                      }
1686                      if (matrix) {
1687                         /* matrix types always come from array (row) derefs */
1688                         assert(deref->deref_type == nir_deref_type_array);
1689                         nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1690                         /* let optimization clean up consts later */
1691                         nir_ssa_def *index = deref->arr.index.ssa;
1692                         /* this might be an indirect array index:
1693                          * - iterate over matrix columns
1694                          * - add if blocks for each column
1695                          * - perform the store in the block
1696                          */
1697                         for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
1698                            nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1699                            unsigned vec_components = glsl_get_vector_elements(matrix);
1700                            /* always clamp dvec3 to 4 components */
1701                            if (vec_components == 3)
1702                               vec_components = 4;
1703                            unsigned start_component = idx * vec_components * 2;
1704                            /* struct member */
1705                            unsigned member = start_component / 4;
1706                            /* number of components remaining */
1707                            unsigned remaining = num_components;
1708                            for (unsigned i = 0; i < num_components; member++) {
1709                               if (!(mask & BITFIELD_BIT(i)))
1710                                  continue;
1711                               assert(member < glsl_get_length(var_deref->type));
1712                               /* deref the rewritten struct to the appropriate vec4/vec2 */
1713                               nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1714                               unsigned incr = MIN2(remaining, 4);
1715                               /* assemble the write component vec */
1716                               nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
1717                               /* use the number of components being written as the writemask */
1718                               if (glsl_get_vector_elements(strct->type) > val->num_components)
1719                                  val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
1720                               nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
1721                               remaining -= incr;
1722                               i += incr;
1723                            }
1724                            nir_pop_if(&b, NULL);
1725                         }
1726                         _mesa_set_add(deletes, &deref->instr);
1727                      } else if (num_components <= 4) {
1728                         /* simple store case: just write out the components */
1729                         nir_ssa_def *dest = nir_vec(&b, comp, num_components);
1730                         nir_store_deref(&b, deref, dest, mask);
1731                      } else {
1732                         /* writing > 4 components: access the struct and write to the appropriate vec4 members */
1733                         for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
1734                            if (!(mask & BITFIELD_MASK(4)))
1735                               continue;
1736                            nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1737                            nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
1738                            if (glsl_get_vector_elements(strct->type) > dest->num_components)
1739                               dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
1740                            nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
1741                            mask >>= 4;
1742                         }
1743                      }
1744                   } else {
1745                      nir_ssa_def *dest = NULL;
1746                      if (matrix) {
1747                         /* matrix types always come from array (row) derefs */
1748                         assert(deref->deref_type == nir_deref_type_array);
1749                         nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1750                         /* let optimization clean up consts later */
1751                         nir_ssa_def *index = deref->arr.index.ssa;
1752                         /* this might be an indirect array index:
1753                          * - iterate over matrix columns
1754                          * - add if blocks for each column
1755                          * - phi the loads using the array index
1756                          */
1757                         unsigned cols = glsl_get_matrix_columns(matrix);
1758                         nir_ssa_def *dests[4];
1759                         for (unsigned idx = 0; idx < cols; idx++) {
1760                            /* don't add an if for the final row: this will be handled in the else */
1761                            if (idx < cols - 1)
1762                               nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1763                            unsigned vec_components = glsl_get_vector_elements(matrix);
1764                            /* always clamp dvec3 to 4 components */
1765                            if (vec_components == 3)
1766                               vec_components = 4;
1767                            unsigned start_component = idx * vec_components * 2;
1768                            /* struct member */
1769                            unsigned member = start_component / 4;
1770                            /* number of components remaining */
1771                            unsigned remaining = num_components;
1772                            /* component index */
1773                            unsigned comp_idx = 0;
1774                            for (unsigned i = 0; i < num_components; member++) {
1775                               assert(member < glsl_get_length(var_deref->type));
1776                               nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1777                               nir_ssa_def *load = nir_load_deref(&b, strct);
1778                               unsigned incr = MIN2(remaining, 4);
1779                               /* repack the loads to 64bit */
1780                               for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
1781                                  comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
1782                               remaining -= incr;
1783                               i += incr;
1784                            }
1785                            dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
1786                            if (idx < cols - 1)
1787                               nir_push_else(&b, NULL);
1788                         }
1789                         /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
1790                         for (unsigned idx = cols - 1; idx >= 1; idx--) {
1791                            nir_pop_if(&b, NULL);
1792                            dest = nir_if_phi(&b, dests[idx - 1], dest);
1793                         }
1794                         _mesa_set_add(deletes, &deref->instr);
1795                      } else if (num_components <= 4) {
1796                         /* simple load case */
1797                         nir_ssa_def *load = nir_load_deref(&b, deref);
1798                         /* pack 32bit loads into 64bit: this will automagically get optimized out later */
1799                         for (unsigned i = 0; i < intr->num_components; i++) {
1800                            comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
1801                         }
1802                         dest = nir_vec(&b, comp, intr->num_components);
1803                      } else {
1804                         /* writing > 4 components: access the struct and load the appropriate vec4 members */
1805                         for (unsigned i = 0; i < 2; i++, num_components -= 4) {
1806                            nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1807                            nir_ssa_def *load = nir_load_deref(&b, strct);
1808                            comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
1809                            if (num_components > 2)
1810                               comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
1811                         }
1812                         dest = nir_vec(&b, comp, intr->num_components);
1813                      }
1814                      nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
1815                   }
1816                   _mesa_set_add(deletes, instr);
1817                   break;
1818                }
1819                break;
1820                default: break;
1821                }
1822             }
1823          }
1824          if (func_progress)
1825             nir_metadata_preserve(function->impl, nir_metadata_none);
1826          /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
1827          set_foreach_remove(deletes, he)
1828             nir_instr_remove((void*)he->key);
1829       }
1830       progress = true;
1831    }
1832    ralloc_free(deletes);
1833    ralloc_free(derefs);
1834    if (progress) {
1835       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
1836       nir_lower_phis_to_scalar(shader, false);
1837       optimize_nir(shader, NULL);
1838    }
1839    return progress;
1840 }
1841 
1842 static bool
split_blocks(nir_shader * nir)1843 split_blocks(nir_shader *nir)
1844 {
1845    bool progress = false;
1846    bool changed = true;
1847    do {
1848       progress = false;
1849       nir_foreach_shader_out_variable(var, nir) {
1850          const struct glsl_type *base_type = glsl_without_array(var->type);
1851          nir_variable *members[32]; //can't have more than this without breaking NIR
1852          if (!glsl_type_is_struct(base_type))
1853             continue;
1854          /* TODO: arrays? */
1855          if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
1856             continue;
1857          if (glsl_count_attribute_slots(var->type, false) == 1)
1858             continue;
1859          unsigned offset = 0;
1860          for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
1861             members[i] = nir_variable_clone(var, nir);
1862             members[i]->type = glsl_get_struct_field(var->type, i);
1863             members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
1864             members[i]->data.location += offset;
1865             offset += glsl_count_attribute_slots(members[i]->type, false);
1866             nir_shader_add_variable(nir, members[i]);
1867          }
1868          nir_foreach_function(function, nir) {
1869             bool func_progress = false;
1870             if (!function->impl)
1871                continue;
1872             nir_builder b;
1873             nir_builder_init(&b, function->impl);
1874             nir_foreach_block(block, function->impl) {
1875                nir_foreach_instr_safe(instr, block) {
1876                   switch (instr->type) {
1877                   case nir_instr_type_deref: {
1878                   nir_deref_instr *deref = nir_instr_as_deref(instr);
1879                   if (!(deref->modes & nir_var_shader_out))
1880                      continue;
1881                   if (nir_deref_instr_get_variable(deref) != var)
1882                      continue;
1883                   if (deref->deref_type != nir_deref_type_struct)
1884                      continue;
1885                   nir_deref_instr *parent = nir_deref_instr_parent(deref);
1886                   if (parent->deref_type != nir_deref_type_var)
1887                      continue;
1888                   deref->modes = nir_var_shader_temp;
1889                   parent->modes = nir_var_shader_temp;
1890                   b.cursor = nir_before_instr(instr);
1891                   nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
1892                   nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
1893                   nir_instr_remove(&deref->instr);
1894                   func_progress = true;
1895                   break;
1896                   }
1897                   default: break;
1898                   }
1899                }
1900             }
1901             if (func_progress)
1902                nir_metadata_preserve(function->impl, nir_metadata_none);
1903          }
1904          var->data.mode = nir_var_shader_temp;
1905          changed = true;
1906          progress = true;
1907       }
1908    } while (progress);
1909    return changed;
1910 }
1911 
1912 static void
zink_shader_dump(void * words,size_t size,const char * file)1913 zink_shader_dump(void *words, size_t size, const char *file)
1914 {
1915    FILE *fp = fopen(file, "wb");
1916    if (fp) {
1917       fwrite(words, 1, size, fp);
1918       fclose(fp);
1919       fprintf(stderr, "wrote '%s'...\n", file);
1920    }
1921 }
1922 
1923 VkShaderModule
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv)1924 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
1925 {
1926    VkShaderModule mod;
1927    VkShaderModuleCreateInfo smci = {0};
1928 
1929    if (!spirv)
1930       spirv = zs->spirv;
1931 
1932    if (zink_debug & ZINK_DEBUG_SPIRV) {
1933       char buf[256];
1934       static int i;
1935       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
1936       zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
1937    }
1938 
1939    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1940    smci.codeSize = spirv->num_words * sizeof(uint32_t);
1941    smci.pCode = spirv->words;
1942 
1943 #ifndef NDEBUG
1944    if (zink_debug & ZINK_DEBUG_VALIDATION) {
1945       static const struct spirv_to_nir_options spirv_options = {
1946          .environment = NIR_SPIRV_VULKAN,
1947          .caps = {
1948             .float64 = true,
1949             .int16 = true,
1950             .int64 = true,
1951             .tessellation = true,
1952             .float_controls = true,
1953             .image_ms_array = true,
1954             .image_read_without_format = true,
1955             .image_write_without_format = true,
1956             .storage_image_ms = true,
1957             .geometry_streams = true,
1958             .storage_8bit = true,
1959             .storage_16bit = true,
1960             .variable_pointers = true,
1961             .stencil_export = true,
1962             .post_depth_coverage = true,
1963             .transform_feedback = true,
1964             .device_group = true,
1965             .draw_parameters = true,
1966             .shader_viewport_index_layer = true,
1967             .multiview = true,
1968             .physical_storage_buffer_address = true,
1969             .int64_atomics = true,
1970             .subgroup_arithmetic = true,
1971             .subgroup_basic = true,
1972             .subgroup_ballot = true,
1973             .subgroup_quad = true,
1974             .subgroup_shuffle = true,
1975             .subgroup_vote = true,
1976             .vk_memory_model = true,
1977             .vk_memory_model_device_scope = true,
1978             .int8 = true,
1979             .float16 = true,
1980             .demote_to_helper_invocation = true,
1981             .sparse_residency = true,
1982             .min_lod = true,
1983          },
1984          .ubo_addr_format = nir_address_format_32bit_index_offset,
1985          .ssbo_addr_format = nir_address_format_32bit_index_offset,
1986          .phys_ssbo_addr_format = nir_address_format_64bit_global,
1987          .push_const_addr_format = nir_address_format_logical,
1988          .shared_addr_format = nir_address_format_32bit_offset,
1989       };
1990       uint32_t num_spec_entries = 0;
1991       struct nir_spirv_specialization *spec_entries = NULL;
1992       VkSpecializationInfo sinfo = {0};
1993       VkSpecializationMapEntry me[3];
1994       uint32_t size[3] = {1,1,1};
1995       if (!zs->nir->info.workgroup_size[0]) {
1996          sinfo.mapEntryCount = 3;
1997          sinfo.pMapEntries = &me[0];
1998          sinfo.dataSize = sizeof(uint32_t) * 3;
1999          sinfo.pData = size;
2000          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
2001          for (int i = 0; i < 3; i++) {
2002             me[i].size = sizeof(uint32_t);
2003             me[i].constantID = ids[i];
2004             me[i].offset = i * sizeof(uint32_t);
2005          }
2006          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
2007       }
2008       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
2009                          spec_entries, num_spec_entries,
2010                          zs->nir->info.stage, "main", &spirv_options, &screen->nir_options);
2011       assert(nir);
2012       ralloc_free(nir);
2013       free(spec_entries);
2014    }
2015 #endif
2016 
2017    VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
2018    bool success = zink_screen_handle_vkresult(screen, ret);
2019    assert(success);
2020    return success ? mod : VK_NULL_HANDLE;
2021 }
2022 
2023 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2024 find_var_deref(nir_shader *nir, nir_variable *var)
2025 {
2026    nir_foreach_function(function, nir) {
2027       if (!function->impl)
2028          continue;
2029 
2030       nir_foreach_block(block, function->impl) {
2031          nir_foreach_instr(instr, block) {
2032             if (instr->type != nir_instr_type_deref)
2033                continue;
2034             nir_deref_instr *deref = nir_instr_as_deref(instr);
2035             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2036                return true;
2037          }
2038       }
2039    }
2040    return false;
2041 }
2042 
2043 static void
prune_io(nir_shader * nir)2044 prune_io(nir_shader *nir)
2045 {
2046    nir_foreach_shader_in_variable_safe(var, nir) {
2047       if (!find_var_deref(nir, var))
2048          var->data.mode = nir_var_shader_temp;
2049    }
2050    nir_foreach_shader_out_variable_safe(var, nir) {
2051       if (!find_var_deref(nir, var))
2052          var->data.mode = nir_var_shader_temp;
2053    }
2054 }
2055 
2056 VkShaderModule
zink_shader_compile(struct zink_screen * screen,struct zink_shader * zs,nir_shader * base_nir,const struct zink_shader_key * key)2057 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
2058 {
2059    VkShaderModule mod = VK_NULL_HANDLE;
2060    struct zink_shader_info *sinfo = &zs->sinfo;
2061    nir_shader *nir = nir_shader_clone(NULL, base_nir);
2062    bool need_optimize = false;
2063    bool inlined_uniforms = false;
2064 
2065    if (key) {
2066       if (key->inline_uniforms) {
2067          NIR_PASS_V(nir, nir_inline_uniforms,
2068                     nir->info.num_inlinable_uniforms,
2069                     key->base.inlined_uniform_values,
2070                     nir->info.inlinable_uniform_dw_offsets);
2071 
2072          inlined_uniforms = true;
2073       }
2074 
2075       /* TODO: use a separate mem ctx here for ralloc */
2076       switch (zs->nir->info.stage) {
2077       case MESA_SHADER_VERTEX: {
2078          uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
2079          const struct zink_vs_key *vs_key = zink_vs_key(key);
2080          switch (vs_key->size) {
2081          case 4:
2082             decomposed_attrs = vs_key->u32.decomposed_attrs;
2083             decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
2084             break;
2085          case 2:
2086             decomposed_attrs = vs_key->u16.decomposed_attrs;
2087             decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
2088             break;
2089          case 1:
2090             decomposed_attrs = vs_key->u8.decomposed_attrs;
2091             decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
2092             break;
2093          default: break;
2094          }
2095          if (decomposed_attrs || decomposed_attrs_without_w)
2096             NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
2097          FALLTHROUGH;
2098       }
2099       case MESA_SHADER_TESS_EVAL:
2100       case MESA_SHADER_GEOMETRY:
2101          if (zink_vs_key_base(key)->last_vertex_stage) {
2102             if (zs->sinfo.have_xfb)
2103                sinfo->last_vertex = true;
2104 
2105             if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) {
2106                NIR_PASS_V(nir, nir_lower_clip_halfz);
2107             }
2108             if (zink_vs_key_base(key)->push_drawid) {
2109                NIR_PASS_V(nir, lower_drawid);
2110             }
2111          }
2112          break;
2113       case MESA_SHADER_FRAGMENT:
2114          if (!zink_fs_key(key)->samples &&
2115             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
2116             /* VK will always use gl_SampleMask[] values even if sample count is 0,
2117             * so we need to skip this write here to mimic GL's behavior of ignoring it
2118             */
2119             nir_foreach_shader_out_variable(var, nir) {
2120                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2121                   var->data.mode = nir_var_shader_temp;
2122             }
2123             nir_fixup_deref_modes(nir);
2124             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2125             need_optimize = true;
2126          }
2127          if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
2128             NIR_PASS_V(nir, lower_dual_blend);
2129          }
2130          if (zink_fs_key(key)->coord_replace_bits) {
2131             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
2132                      false, zink_fs_key(key)->coord_replace_yinvert);
2133          }
2134          if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
2135             nir_foreach_shader_in_variable(var, nir)
2136                var->data.sample = true;
2137             nir->info.fs.uses_sample_qualifier = true;
2138             nir->info.fs.uses_sample_shading = true;
2139          }
2140          if (nir->info.fs.uses_fbfetch_output) {
2141             nir_variable *fbfetch = NULL;
2142             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms);
2143             /* old variable must be deleted to avoid spirv errors */
2144             fbfetch->data.mode = nir_var_shader_temp;
2145             nir_fixup_deref_modes(nir);
2146             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2147             need_optimize = true;
2148          }
2149          break;
2150       default: break;
2151       }
2152       if (key->base.nonseamless_cube_mask) {
2153          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
2154          need_optimize = true;
2155       }
2156    }
2157    if (screen->driconf.inline_uniforms) {
2158       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
2159       NIR_PASS_V(nir, rewrite_bo_access, screen);
2160       NIR_PASS_V(nir, remove_bo_access, zs);
2161       need_optimize = true;
2162    }
2163    if (inlined_uniforms) {
2164       optimize_nir(nir, zs);
2165 
2166       /* This must be done again. */
2167       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
2168                                                        nir_var_shader_out);
2169 
2170       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2171       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
2172          zs->can_inline = false;
2173    } else if (need_optimize)
2174       optimize_nir(nir, zs);
2175    prune_io(nir);
2176 
2177    NIR_PASS_V(nir, nir_convert_from_ssa, true);
2178 
2179    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
2180    if (spirv)
2181       mod = zink_shader_spirv_compile(screen, zs, spirv);
2182 
2183    ralloc_free(nir);
2184 
2185    /* TODO: determine if there's any reason to cache spirv output? */
2186    if (zs->nir->info.stage == MESA_SHADER_TESS_CTRL && zs->is_generated)
2187       zs->spirv = spirv;
2188    else
2189       ralloc_free(spirv);
2190    return mod;
2191 }
2192 
2193 static bool
lower_baseinstance_instr(nir_builder * b,nir_instr * instr,void * data)2194 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
2195 {
2196    if (instr->type != nir_instr_type_intrinsic)
2197       return false;
2198    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2199    if (intr->intrinsic != nir_intrinsic_load_instance_id)
2200       return false;
2201    b->cursor = nir_after_instr(instr);
2202    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
2203    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
2204    return true;
2205 }
2206 
2207 static bool
lower_baseinstance(nir_shader * shader)2208 lower_baseinstance(nir_shader *shader)
2209 {
2210    if (shader->info.stage != MESA_SHADER_VERTEX)
2211       return false;
2212    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
2213 }
2214 
2215 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
2216  * so instead we delete all those broken variables and just make new ones
2217  */
2218 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)2219 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
2220 {
2221    uint64_t max_ssbo_size = 0;
2222    uint64_t max_ubo_size = 0;
2223    uint64_t max_uniform_size = 0;
2224 
2225    if (!shader->info.num_ssbos && !shader->info.num_ubos)
2226       return false;
2227 
2228    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
2229       const struct glsl_type *type = glsl_without_array(var->type);
2230       if (type_is_counter(type))
2231          continue;
2232       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
2233       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
2234       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
2235       if (interface_type) {
2236          unsigned block_size = glsl_get_explicit_size(interface_type, true);
2237          block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
2238          size = MAX2(size, block_size);
2239       }
2240       if (var->data.mode == nir_var_mem_ubo) {
2241          if (var->data.driver_location)
2242             max_ubo_size = MAX2(max_ubo_size, size);
2243          else
2244             max_uniform_size = MAX2(max_uniform_size, size);
2245       } else {
2246          max_ssbo_size = MAX2(max_ssbo_size, size);
2247          if (interface_type) {
2248             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
2249                needs_size = true;
2250          }
2251       }
2252       var->data.mode = nir_var_shader_temp;
2253    }
2254    nir_fixup_deref_modes(shader);
2255    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2256    optimize_nir(shader, NULL);
2257 
2258    struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2259    fields[0].name = ralloc_strdup(shader, "base");
2260    fields[1].name = ralloc_strdup(shader, "unsized");
2261    if (shader->info.num_ubos) {
2262       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
2263          fields[0].type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
2264          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2265                                                  glsl_array_type(glsl_interface_type(fields, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
2266                                                  "uniform_0");
2267          var->interface_type = var->type;
2268          var->data.mode = nir_var_mem_ubo;
2269          var->data.driver_location = 0;
2270       }
2271 
2272       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
2273       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
2274       if (num_ubos && ubos_used) {
2275          fields[0].type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
2276          /* shrink array as much as possible */
2277          unsigned first_ubo = ffs(ubos_used) - 2;
2278          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
2279          num_ubos -= first_ubo;
2280          assert(num_ubos);
2281          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2282                                    glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0),
2283                                    "ubos");
2284          var->interface_type = var->type;
2285          var->data.mode = nir_var_mem_ubo;
2286          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
2287       }
2288    }
2289    if (shader->info.num_ssbos && zs->ssbos_used) {
2290       /* shrink array as much as possible */
2291       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
2292       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
2293       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
2294       assert(num_ssbos);
2295       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
2296       const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
2297       fields[0].type = ssbo_type;
2298       fields[1].type = max_ssbo_size ? unsized : NULL;
2299       unsigned field_count = max_ssbo_size && needs_size ? 2 : 1;
2300       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
2301                                               glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0),
2302                                               "ssbos");
2303       var->interface_type = var->type;
2304       var->data.mode = nir_var_mem_ssbo;
2305       var->data.driver_location = first_ssbo;
2306    }
2307    return true;
2308 }
2309 
2310 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)2311 get_src_mask_ssbo(unsigned total, nir_src src)
2312 {
2313    if (nir_src_is_const(src))
2314       return BITFIELD_BIT(nir_src_as_uint(src));
2315    return BITFIELD_MASK(total);
2316 }
2317 
2318 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)2319 get_src_mask_ubo(unsigned total, nir_src src)
2320 {
2321    if (nir_src_is_const(src))
2322       return BITFIELD_BIT(nir_src_as_uint(src));
2323    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
2324 }
2325 
2326 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)2327 analyze_io(struct zink_shader *zs, nir_shader *shader)
2328 {
2329    bool ret = false;
2330    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
2331    nir_foreach_block(block, impl) {
2332       nir_foreach_instr(instr, block) {
2333          if (instr->type != nir_instr_type_intrinsic)
2334             continue;
2335 
2336          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2337          switch (intrin->intrinsic) {
2338          case nir_intrinsic_store_ssbo:
2339             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
2340             break;
2341 
2342          case nir_intrinsic_get_ssbo_size: {
2343             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2344             ret = true;
2345             break;
2346          }
2347          case nir_intrinsic_ssbo_atomic_fadd:
2348          case nir_intrinsic_ssbo_atomic_add:
2349          case nir_intrinsic_ssbo_atomic_imin:
2350          case nir_intrinsic_ssbo_atomic_umin:
2351          case nir_intrinsic_ssbo_atomic_imax:
2352          case nir_intrinsic_ssbo_atomic_umax:
2353          case nir_intrinsic_ssbo_atomic_and:
2354          case nir_intrinsic_ssbo_atomic_or:
2355          case nir_intrinsic_ssbo_atomic_xor:
2356          case nir_intrinsic_ssbo_atomic_exchange:
2357          case nir_intrinsic_ssbo_atomic_comp_swap:
2358          case nir_intrinsic_ssbo_atomic_fmin:
2359          case nir_intrinsic_ssbo_atomic_fmax:
2360          case nir_intrinsic_ssbo_atomic_fcomp_swap:
2361          case nir_intrinsic_load_ssbo:
2362             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2363             break;
2364          case nir_intrinsic_load_ubo:
2365          case nir_intrinsic_load_ubo_vec4:
2366             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
2367             break;
2368          default:
2369             break;
2370          }
2371       }
2372    }
2373    return ret;
2374 }
2375 
2376 struct zink_bindless_info {
2377    nir_variable *bindless[4];
2378    unsigned bindless_set;
2379 };
2380 
2381 /* this is a "default" bindless texture used if the shader has no texture variables */
2382 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)2383 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
2384 {
2385    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
2386    nir_variable *var;
2387 
2388    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
2389    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
2390    var->data.descriptor_set = descriptor_set;
2391    var->data.driver_location = var->data.binding = binding;
2392    return var;
2393 }
2394 
2395 /* this is a "default" bindless image used if the shader has no image variables */
2396 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)2397 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
2398 {
2399    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
2400    nir_variable *var;
2401 
2402    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
2403    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
2404    var->data.descriptor_set = descriptor_set;
2405    var->data.driver_location = var->data.binding = binding;
2406    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2407    return var;
2408 }
2409 
2410 /* rewrite bindless instructions as array deref instructions */
2411 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)2412 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
2413 {
2414    struct zink_bindless_info *bindless = data;
2415 
2416    if (in->type == nir_instr_type_tex) {
2417       nir_tex_instr *tex = nir_instr_as_tex(in);
2418       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2419       if (idx == -1)
2420          return false;
2421 
2422       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
2423       if (!var)
2424          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
2425       b->cursor = nir_before_instr(in);
2426       nir_deref_instr *deref = nir_build_deref_var(b, var);
2427       if (glsl_type_is_array(var->type))
2428          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
2429       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
2430 
2431       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
2432        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
2433        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
2434        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
2435        *
2436        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
2437        * - Warhammer 40k: Dawn of War III
2438        */
2439       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
2440       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
2441       unsigned coord_components = nir_src_num_components(tex->src[c].src);
2442       if (coord_components < needed_components) {
2443          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
2444          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
2445          tex->coord_components = needed_components;
2446       }
2447       return true;
2448    }
2449    if (in->type != nir_instr_type_intrinsic)
2450       return false;
2451    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2452 
2453    nir_intrinsic_op op;
2454 #define OP_SWAP(OP) \
2455    case nir_intrinsic_bindless_image_##OP: \
2456       op = nir_intrinsic_image_deref_##OP; \
2457       break;
2458 
2459 
2460    /* convert bindless intrinsics to deref intrinsics */
2461    switch (instr->intrinsic) {
2462    OP_SWAP(atomic_add)
2463    OP_SWAP(atomic_and)
2464    OP_SWAP(atomic_comp_swap)
2465    OP_SWAP(atomic_dec_wrap)
2466    OP_SWAP(atomic_exchange)
2467    OP_SWAP(atomic_fadd)
2468    OP_SWAP(atomic_fmax)
2469    OP_SWAP(atomic_fmin)
2470    OP_SWAP(atomic_imax)
2471    OP_SWAP(atomic_imin)
2472    OP_SWAP(atomic_inc_wrap)
2473    OP_SWAP(atomic_or)
2474    OP_SWAP(atomic_umax)
2475    OP_SWAP(atomic_umin)
2476    OP_SWAP(atomic_xor)
2477    OP_SWAP(format)
2478    OP_SWAP(load)
2479    OP_SWAP(order)
2480    OP_SWAP(samples)
2481    OP_SWAP(size)
2482    OP_SWAP(store)
2483    default:
2484       return false;
2485    }
2486 
2487    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2488    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
2489    if (!var)
2490       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
2491    instr->intrinsic = op;
2492    b->cursor = nir_before_instr(in);
2493    nir_deref_instr *deref = nir_build_deref_var(b, var);
2494    if (glsl_type_is_array(var->type))
2495       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
2496    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
2497    return true;
2498 }
2499 
2500 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)2501 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
2502 {
2503    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
2504       return false;
2505    nir_fixup_deref_modes(shader);
2506    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2507    optimize_nir(shader, NULL);
2508    return true;
2509 }
2510 
2511 /* convert shader image/texture io variables to int64 handles for bindless indexing */
2512 static bool
lower_bindless_io_instr(nir_builder * b,nir_instr * in,void * data)2513 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
2514 {
2515    if (in->type != nir_instr_type_intrinsic)
2516       return false;
2517    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2518    if (instr->intrinsic != nir_intrinsic_load_deref &&
2519        instr->intrinsic != nir_intrinsic_store_deref)
2520       return false;
2521 
2522    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
2523    nir_variable *var = nir_deref_instr_get_variable(src_deref);
2524    if (var->data.bindless)
2525       return false;
2526    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
2527       return false;
2528    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
2529       return false;
2530 
2531    var->type = glsl_int64_t_type();
2532    var->data.bindless = 1;
2533    b->cursor = nir_before_instr(in);
2534    nir_deref_instr *deref = nir_build_deref_var(b, var);
2535    if (instr->intrinsic == nir_intrinsic_load_deref) {
2536        nir_ssa_def *def = nir_load_deref(b, deref);
2537        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
2538        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
2539    } else {
2540       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
2541    }
2542    nir_instr_remove(in);
2543    nir_instr_remove(&src_deref->instr);
2544    return true;
2545 }
2546 
2547 static bool
lower_bindless_io(nir_shader * shader)2548 lower_bindless_io(nir_shader *shader)
2549 {
2550    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
2551 }
2552 
2553 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)2554 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
2555 {
2556    if (stage == MESA_SHADER_NONE) {
2557       unreachable("not supported");
2558    } else {
2559       switch (type) {
2560       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2561       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2562          return stage * 2 + !!index;
2563 
2564       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2565       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2566          assert(index < PIPE_MAX_SAMPLERS);
2567          return (stage * PIPE_MAX_SAMPLERS) + index;
2568 
2569       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2570          return stage + (compact_descriptors * (ZINK_SHADER_COUNT * 2));
2571 
2572       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2573       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2574          assert(index < ZINK_MAX_SHADER_IMAGES);
2575          return (stage * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_SHADER_COUNT * PIPE_MAX_SAMPLERS));
2576 
2577       default:
2578          unreachable("unexpected type");
2579       }
2580    }
2581 }
2582 
2583 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)2584 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
2585 {
2586    if (glsl_type_is_struct(type)) {
2587       for (unsigned i = 0; i < glsl_get_length(type); i++)
2588          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
2589       return;
2590    }
2591 
2592    /* just a random scalar in a struct */
2593    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
2594       return;
2595 
2596    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
2597    unsigned binding;
2598    switch (vktype) {
2599       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2600          binding = 0;
2601          break;
2602       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2603          binding = 1;
2604          break;
2605       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2606          binding = 2;
2607          break;
2608       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2609          binding = 3;
2610          break;
2611       default:
2612          unreachable("unknown");
2613    }
2614    if (!bindless->bindless[binding]) {
2615       bindless->bindless[binding] = nir_variable_clone(var, nir);
2616       bindless->bindless[binding]->data.bindless = 0;
2617       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
2618       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
2619       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
2620       if (!bindless->bindless[binding]->data.image.format)
2621          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2622       nir_shader_add_variable(nir, bindless->bindless[binding]);
2623    } else {
2624       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
2625    }
2626    var->data.mode = nir_var_shader_temp;
2627 }
2628 
2629 static enum pipe_prim_type
prim_to_pipe(enum shader_prim primitive_type)2630 prim_to_pipe(enum shader_prim primitive_type)
2631 {
2632    switch (primitive_type) {
2633    case SHADER_PRIM_POINTS:
2634       return PIPE_PRIM_POINTS;
2635    case SHADER_PRIM_LINES:
2636    case SHADER_PRIM_LINE_LOOP:
2637    case SHADER_PRIM_LINE_STRIP:
2638    case SHADER_PRIM_LINES_ADJACENCY:
2639    case SHADER_PRIM_LINE_STRIP_ADJACENCY:
2640       return PIPE_PRIM_LINES;
2641    default:
2642       return PIPE_PRIM_TRIANGLES;
2643    }
2644 }
2645 
2646 static enum pipe_prim_type
tess_prim_to_pipe(enum tess_primitive_mode prim_mode)2647 tess_prim_to_pipe(enum tess_primitive_mode prim_mode)
2648 {
2649    switch (prim_mode) {
2650    case TESS_PRIMITIVE_ISOLINES:
2651       return PIPE_PRIM_LINES;
2652    default:
2653       return PIPE_PRIM_TRIANGLES;
2654    }
2655 }
2656 
2657 static enum pipe_prim_type
get_shader_base_prim_type(struct nir_shader * nir)2658 get_shader_base_prim_type(struct nir_shader *nir)
2659 {
2660    switch (nir->info.stage) {
2661    case MESA_SHADER_GEOMETRY:
2662       return prim_to_pipe(nir->info.gs.output_primitive);
2663    case MESA_SHADER_TESS_EVAL:
2664       return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode);
2665    default:
2666       break;
2667    }
2668    return PIPE_PRIM_MAX;
2669 }
2670 
2671 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)2672 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
2673 {
2674    struct zink_screen *screen = data;
2675    if (instr->type != nir_instr_type_tex)
2676       return false;
2677    nir_tex_instr *tex = nir_instr_as_tex(instr);
2678    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
2679       return false;
2680    if (tex->is_sparse && screen->need_2D_sparse) {
2681       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
2682       mesa_loge("unhandled/unsupported 1D sparse texture!");
2683       abort();
2684    }
2685    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
2686    b->cursor = nir_before_instr(instr);
2687    tex->coord_components++;
2688    unsigned srcs[] = {
2689       nir_tex_src_coord,
2690       nir_tex_src_offset,
2691       nir_tex_src_ddx,
2692       nir_tex_src_ddy,
2693    };
2694    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
2695       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
2696       if (c == -1)
2697          continue;
2698       if (tex->src[c].src.ssa->num_components == tex->coord_components)
2699          continue;
2700       nir_ssa_def *def;
2701       nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
2702       if (tex->src[c].src.ssa->num_components == 1)
2703          def = nir_vec2(b, tex->src[c].src.ssa, zero);
2704       else
2705          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
2706       nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
2707    }
2708    b->cursor = nir_after_instr(instr);
2709    unsigned needed_components = nir_tex_instr_dest_size(tex);
2710    unsigned num_components = tex->dest.ssa.num_components;
2711    if (needed_components > num_components) {
2712       tex->dest.ssa.num_components = needed_components;
2713       assert(num_components < 3);
2714       /* take either xz or just x since this is promoted to 2D from 1D */
2715       uint32_t mask = num_components == 2 ? (1|4) : 1;
2716       nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
2717       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
2718    }
2719    return true;
2720 }
2721 
2722 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)2723 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
2724 {
2725    bool found = false;
2726    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
2727       const struct glsl_type *type = glsl_without_array(var->type);
2728       unsigned length = glsl_get_length(var->type);
2729       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
2730          continue;
2731       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
2732       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
2733 
2734       found = true;
2735    }
2736    if (found)
2737       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
2738    return found;
2739 }
2740 
2741 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)2742 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
2743 {
2744    nir_foreach_function(function, shader) {
2745       if (!function->impl)
2746          continue;
2747       nir_foreach_block_safe(block, function->impl) {
2748          nir_foreach_instr_safe(instr, block) {
2749             if (instr->type == nir_instr_type_tex) {
2750                nir_tex_instr *tex = nir_instr_as_tex(instr);
2751                zs->sinfo.have_sparse |= tex->is_sparse;
2752             }
2753             if (instr->type != nir_instr_type_intrinsic)
2754                continue;
2755             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2756             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
2757                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2758                 intr->intrinsic == nir_intrinsic_image_deref_store ||
2759                 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
2760                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
2761                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
2762                 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
2763                 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
2764                 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
2765                 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
2766                 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
2767                 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
2768                 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2769                 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
2770                 intr->intrinsic == nir_intrinsic_image_deref_size ||
2771                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
2772                 intr->intrinsic == nir_intrinsic_image_deref_format ||
2773                 intr->intrinsic == nir_intrinsic_image_deref_order) {
2774 
2775                 nir_variable *var =
2776                    nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2777 
2778                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
2779                 const unsigned size =
2780                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
2781                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
2782                                  var->data.binding + (MAX2(size, 1) - 1));
2783             }
2784             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
2785                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
2786                zs->sinfo.have_sparse = true;
2787 
2788             static bool warned = false;
2789             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
2790                switch (intr->intrinsic) {
2791                case nir_intrinsic_image_deref_atomic_add: {
2792                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
2793                   if (util_format_is_float(var->data.image.format))
2794                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
2795                   break;
2796                }
2797                default:
2798                   break;
2799                }
2800             }
2801          }
2802       }
2803    }
2804 }
2805 
2806 static bool
is_residency_code(nir_ssa_def * src)2807 is_residency_code(nir_ssa_def *src)
2808 {
2809    nir_instr *parent = src->parent_instr;
2810    while (1) {
2811       if (parent->type == nir_instr_type_intrinsic) {
2812          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2813          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2814          return false;
2815       }
2816       if (parent->type == nir_instr_type_tex)
2817          return true;
2818       assert(parent->type == nir_instr_type_alu);
2819       nir_alu_instr *alu = nir_instr_as_alu(parent);
2820       parent = alu->src[0].src.ssa->parent_instr;
2821    }
2822 }
2823 
2824 static bool
lower_sparse_instr(nir_builder * b,nir_instr * in,void * data)2825 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
2826 {
2827    if (in->type != nir_instr_type_intrinsic)
2828       return false;
2829    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2830    if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
2831       b->cursor = nir_before_instr(&instr->instr);
2832       nir_ssa_def *src0;
2833       if (is_residency_code(instr->src[0].ssa))
2834          src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
2835       else
2836          src0 = instr->src[0].ssa;
2837       nir_ssa_def *src1;
2838       if (is_residency_code(instr->src[1].ssa))
2839          src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
2840       else
2841          src1 = instr->src[1].ssa;
2842       nir_ssa_def *def = nir_iand(b, src0, src1);
2843       nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
2844       nir_instr_remove(in);
2845       return true;
2846    }
2847    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
2848       return false;
2849 
2850    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
2851     * so just rewrite as the first component since ntv is going to use a different
2852     * method for storing the residency value anyway
2853     */
2854    b->cursor = nir_before_instr(&instr->instr);
2855    nir_instr *parent = instr->src[0].ssa->parent_instr;
2856    if (is_residency_code(instr->src[0].ssa)) {
2857       assert(parent->type == nir_instr_type_alu);
2858       nir_alu_instr *alu = nir_instr_as_alu(parent);
2859       nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
2860       nir_instr_remove(parent);
2861    } else {
2862       nir_ssa_def *src;
2863       if (parent->type == nir_instr_type_intrinsic) {
2864          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2865          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2866          src = intr->src[0].ssa;
2867       } else {
2868          assert(parent->type == nir_instr_type_alu);
2869          nir_alu_instr *alu = nir_instr_as_alu(parent);
2870          src = alu->src[0].src.ssa;
2871       }
2872       if (instr->dest.ssa.bit_size != 32) {
2873          if (instr->dest.ssa.bit_size == 1)
2874             src = nir_ieq_imm(b, src, 1);
2875          else
2876             src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
2877       }
2878       nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
2879       nir_instr_remove(in);
2880    }
2881    return true;
2882 }
2883 
2884 static bool
lower_sparse(nir_shader * shader)2885 lower_sparse(nir_shader *shader)
2886 {
2887    return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
2888 }
2889 
2890 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data)2891 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
2892 {
2893    if (in->type != nir_instr_type_tex)
2894       return false;
2895    nir_tex_instr *tex = nir_instr_as_tex(in);
2896    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
2897       return false;
2898    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2899    nir_variable *var = NULL;
2900    if (handle != -1) {
2901       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
2902    } else {
2903       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
2904          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
2905             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
2906             if (tex->texture_index >= img->data.driver_location &&
2907                 tex->texture_index < img->data.driver_location + size) {
2908                var = img;
2909                break;
2910             }
2911          }
2912       }
2913    }
2914    assert(var);
2915    const struct glsl_type *type = glsl_without_array(var->type);
2916    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
2917    bool is_int = glsl_base_type_is_integer(ret_type);
2918    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
2919    unsigned dest_size = nir_dest_bit_size(tex->dest);
2920    b->cursor = nir_after_instr(in);
2921    unsigned num_components = nir_dest_num_components(tex->dest);
2922    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
2923    if (bit_size == dest_size && !rewrite_depth)
2924       return false;
2925    nir_ssa_def *dest = &tex->dest.ssa;
2926    if (bit_size != dest_size) {
2927       tex->dest.ssa.bit_size = bit_size;
2928       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
2929       if (rewrite_depth) {
2930          assert(!tex->is_new_style_shadow);
2931          tex->dest.ssa.num_components = 1;
2932          tex->is_new_style_shadow = true;
2933       }
2934 
2935       if (is_int) {
2936          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
2937             dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
2938          else
2939             dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
2940       } else {
2941          dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
2942       }
2943       if (rewrite_depth) {
2944          nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2945          dest = nir_vec(b, vec, num_components);
2946       }
2947       nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
2948    } else if (rewrite_depth) {
2949       assert(!tex->is_new_style_shadow);
2950       tex->dest.ssa.num_components = 1;
2951       tex->is_new_style_shadow = true;
2952       nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2953       nir_ssa_def *splat = nir_vec(b, vec, num_components);
2954       nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
2955    }
2956    return true;
2957 }
2958 
2959 static bool
match_tex_dests(nir_shader * shader)2960 match_tex_dests(nir_shader *shader)
2961 {
2962    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, NULL);
2963 }
2964 
2965 static bool
split_bitfields_instr(nir_builder * b,nir_instr * in,void * data)2966 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
2967 {
2968    if (in->type != nir_instr_type_alu)
2969       return false;
2970    nir_alu_instr *alu = nir_instr_as_alu(in);
2971    switch (alu->op) {
2972    case nir_op_ubitfield_extract:
2973    case nir_op_ibitfield_extract:
2974    case nir_op_bitfield_insert:
2975       break;
2976    default:
2977       return false;
2978    }
2979    unsigned num_components = nir_dest_num_components(alu->dest.dest);
2980    if (num_components == 1)
2981       return false;
2982    b->cursor = nir_before_instr(in);
2983    nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
2984    for (unsigned i = 0; i < num_components; i++) {
2985       if (alu->op == nir_op_bitfield_insert)
2986          dests[i] = nir_bitfield_insert(b,
2987                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2988                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2989                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
2990                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
2991       else if (alu->op == nir_op_ubitfield_extract)
2992          dests[i] = nir_ubitfield_extract(b,
2993                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2994                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2995                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
2996       else
2997          dests[i] = nir_ibitfield_extract(b,
2998                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2999                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
3000                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
3001    }
3002    nir_ssa_def *dest = nir_vec(b, dests, num_components);
3003    nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
3004    nir_instr_remove(in);
3005    return true;
3006 }
3007 
3008 
3009 static bool
split_bitfields(nir_shader * shader)3010 split_bitfields(nir_shader *shader)
3011 {
3012    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
3013 }
3014 
3015 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir,const struct pipe_stream_output_info * so_info)3016 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
3017                    const struct pipe_stream_output_info *so_info)
3018 {
3019    struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3020    bool have_psiz = false;
3021 
3022    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
3023 
3024    ret->hash = _mesa_hash_pointer(ret);
3025    ret->reduced_prim = get_shader_base_prim_type(nir);
3026 
3027    ret->programs = _mesa_pointer_set_create(NULL);
3028    simple_mtx_init(&ret->lock, mtx_plain);
3029 
3030    nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
3031    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3032        nir->info.stage == MESA_SHADER_TESS_EVAL)
3033       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
3034 
3035    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
3036               UINT32_MAX);
3037 
3038    if (nir->info.stage == MESA_SHADER_VERTEX)
3039       create_vs_pushconst(nir);
3040    else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3041             nir->info.stage == MESA_SHADER_TESS_EVAL)
3042       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
3043    else if (nir->info.stage == MESA_SHADER_KERNEL)
3044       create_cs_pushconst(nir);
3045 
3046    if (nir->info.stage < MESA_SHADER_FRAGMENT)
3047       have_psiz = check_psiz(nir);
3048    NIR_PASS_V(nir, lower_basevertex);
3049    NIR_PASS_V(nir, lower_work_dim);
3050    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3051    NIR_PASS_V(nir, lower_baseinstance);
3052    NIR_PASS_V(nir, lower_sparse);
3053    NIR_PASS_V(nir, split_bitfields);
3054 
3055    if (screen->need_2D_zs)
3056       NIR_PASS_V(nir, lower_1d_shadow, screen);
3057 
3058    {
3059       nir_lower_subgroups_options subgroup_options = {0};
3060       subgroup_options.lower_to_scalar = true;
3061       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
3062       subgroup_options.ballot_bit_size = 32;
3063       subgroup_options.ballot_components = 4;
3064       subgroup_options.lower_subgroup_masks = true;
3065       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(nir->info.stage))) {
3066          subgroup_options.subgroup_size = 1;
3067          subgroup_options.lower_vote_trivial = true;
3068       }
3069       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3070    }
3071 
3072    if (so_info && so_info->num_outputs)
3073       NIR_PASS_V(nir, split_blocks);
3074 
3075    optimize_nir(nir, NULL);
3076    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3077    NIR_PASS_V(nir, nir_lower_discard_if);
3078    NIR_PASS_V(nir, nir_lower_fragcolor,
3079          nir->info.fs.color_is_dual_source ? 1 : 8);
3080    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
3081    bool needs_size = analyze_io(ret, nir);
3082    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
3083    /* run in compile if there could be inlined uniforms */
3084    if (!screen->driconf.inline_uniforms) {
3085       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3086       NIR_PASS_V(nir, rewrite_bo_access, screen);
3087       NIR_PASS_V(nir, remove_bo_access, ret);
3088    }
3089 
3090    if (zink_debug & ZINK_DEBUG_NIR) {
3091       fprintf(stderr, "NIR shader:\n---8<---\n");
3092       nir_print_shader(nir, stderr);
3093       fprintf(stderr, "---8<---\n");
3094    }
3095 
3096    struct zink_bindless_info bindless = {0};
3097    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
3098    bool has_bindless_io = false;
3099    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
3100       var->data.is_xfb = false;
3101       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
3102          has_bindless_io = true;
3103          break;
3104       }
3105    }
3106    if (has_bindless_io)
3107       NIR_PASS_V(nir, lower_bindless_io);
3108 
3109    optimize_nir(nir, NULL);
3110    prune_io(nir);
3111 
3112    scan_nir(screen, nir, ret);
3113 
3114    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
3115       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
3116                                         nir_var_image |
3117                                         nir_var_mem_ubo |
3118                                         nir_var_mem_ssbo)) {
3119          enum zink_descriptor_type ztype;
3120          const struct glsl_type *type = glsl_without_array(var->type);
3121          if (var->data.mode == nir_var_mem_ubo) {
3122             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
3123             /* buffer 0 is a push descriptor */
3124             var->data.descriptor_set = !!var->data.driver_location;
3125             var->data.binding = !var->data.driver_location ? nir->info.stage :
3126                                 zink_binding(nir->info.stage,
3127                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
3128                                              var->data.driver_location,
3129                                              screen->compact_descriptors);
3130             assert(var->data.driver_location || var->data.binding < 10);
3131             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
3132             int binding = var->data.binding;
3133 
3134             ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3135             ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
3136             ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3137             ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3138             assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3139             ret->num_bindings[ztype]++;
3140          } else if (var->data.mode == nir_var_mem_ssbo) {
3141             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
3142             var->data.descriptor_set = screen->desc_set_id[ztype];
3143             var->data.binding = zink_binding(nir->info.stage,
3144                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
3145                                              var->data.driver_location,
3146                                              screen->compact_descriptors);
3147             ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3148             ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3149             ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3150             ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3151             assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3152             ret->num_bindings[ztype]++;
3153          } else {
3154             assert(var->data.mode == nir_var_uniform ||
3155                    var->data.mode == nir_var_image);
3156             if (var->data.bindless) {
3157                ret->bindless = true;
3158                handle_bindless_var(nir, var, type, &bindless);
3159             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
3160                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
3161                ztype = zink_desc_type_from_vktype(vktype);
3162                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
3163                   ret->num_texel_buffers++;
3164                var->data.driver_location = var->data.binding;
3165                var->data.descriptor_set = screen->desc_set_id[ztype];
3166                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
3167                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3168                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3169                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3170                if (glsl_type_is_array(var->type))
3171                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
3172                else
3173                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
3174                ret->num_bindings[ztype]++;
3175             }
3176          }
3177       }
3178    }
3179    bool bindless_lowered = false;
3180    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
3181    ret->bindless |= bindless_lowered;
3182 
3183    if (!screen->info.feats.features.shaderInt64)
3184       NIR_PASS_V(nir, lower_64bit_vars);
3185    NIR_PASS_V(nir, match_tex_dests);
3186 
3187    ret->nir = nir;
3188    nir_foreach_shader_out_variable(var, nir)
3189       var->data.explicit_xfb_buffer = 0;
3190    if (so_info && so_info->num_outputs)
3191       update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
3192    else if (have_psiz) {
3193       bool have_fake_psiz = false;
3194       nir_variable *psiz = NULL;
3195       nir_foreach_shader_out_variable(var, nir) {
3196          if (var->data.location == VARYING_SLOT_PSIZ) {
3197             if (!var->data.explicit_location)
3198                have_fake_psiz = true;
3199             else
3200                psiz = var;
3201          }
3202       }
3203       if (have_fake_psiz && psiz) {
3204          psiz->data.mode = nir_var_shader_temp;
3205          nir_fixup_deref_modes(nir);
3206          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3207       }
3208    }
3209 
3210    ret->can_inline = true;
3211 
3212    return ret;
3213 }
3214 
3215 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)3216 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
3217 {
3218    struct zink_screen *screen = zink_screen(pscreen);
3219    nir_shader *nir = nirptr;
3220 
3221    nir_lower_tex_options tex_opts = {
3222       .lower_invalid_implicit_lod = true,
3223    };
3224    /*
3225       Sampled Image must be an object whose type is OpTypeSampledImage.
3226       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
3227       or Rect, and the Arrayed and MS operands must be 0.
3228       - SPIRV, OpImageSampleProj* opcodes
3229     */
3230    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
3231                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
3232    tex_opts.lower_txp_array = true;
3233    if (!screen->info.feats.features.shaderImageGatherExtended)
3234       tex_opts.lower_tg4_offsets = true;
3235    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
3236    if (nir->info.stage == MESA_SHADER_GEOMETRY)
3237       NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
3238    optimize_nir(nir, NULL);
3239    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
3240    if (screen->driconf.inline_uniforms)
3241       nir_find_inlinable_uniforms(nir);
3242 
3243    return NULL;
3244 }
3245 
3246 void
zink_shader_free(struct zink_context * ctx,struct zink_shader * shader)3247 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
3248 {
3249    set_foreach(shader->programs, entry) {
3250       if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
3251          struct zink_compute_program *comp = (void*)entry->key;
3252          if (!comp->base.removed) {
3253             _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
3254             comp->base.removed = true;
3255          }
3256          comp->shader = NULL;
3257          zink_compute_program_reference(ctx, &comp, NULL);
3258       } else {
3259          struct zink_gfx_program *prog = (void*)entry->key;
3260          enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
3261          assert(pstage < ZINK_SHADER_COUNT);
3262          if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
3263             unsigned stages_present = prog->stages_present;
3264             if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
3265                stages_present &= ~BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
3266             struct hash_table *ht = &ctx->program_cache[stages_present >> 2];
3267             struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
3268             assert(he);
3269             _mesa_hash_table_remove(ht, he);
3270             prog->base.removed = true;
3271          }
3272          if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
3273             prog->shaders[pstage] = NULL;
3274          /* only remove generated tcs during parent tes destruction */
3275          if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
3276             prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL;
3277          zink_gfx_program_reference(ctx, &prog, NULL);
3278       }
3279    }
3280    if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) {
3281       /* automatically destroy generated tcs shaders when tes is destroyed */
3282       zink_shader_free(ctx, shader->generated);
3283       shader->generated = NULL;
3284    }
3285    _mesa_set_destroy(shader->programs, NULL);
3286    ralloc_free(shader->nir);
3287    ralloc_free(shader->spirv);
3288    FREE(shader);
3289 }
3290 
3291 
3292 VkShaderModule
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices)3293 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
3294 {
3295    assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL);
3296    /* shortcut all the nir passes since we just have to change this one word */
3297    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
3298    return zink_shader_spirv_compile(screen, zs, NULL);
3299 }
3300 
3301 /* creating a passthrough tcs shader that's roughly:
3302 
3303 #version 150
3304 #extension GL_ARB_tessellation_shader : require
3305 
3306 in vec4 some_var[gl_MaxPatchVertices];
3307 out vec4 some_var_out;
3308 
3309 layout(push_constant) uniform tcsPushConstants {
3310     layout(offset = 0) float TessLevelInner[2];
3311     layout(offset = 8) float TessLevelOuter[4];
3312 } u_tcsPushConstants;
3313 layout(vertices = $vertices_per_patch) out;
3314 void main()
3315 {
3316   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
3317   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
3318   some_var_out = some_var[gl_InvocationID];
3319 }
3320 
3321 */
3322 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,struct zink_shader * vs,unsigned vertices_per_patch)3323 zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
3324 {
3325    struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3326    ret->hash = _mesa_hash_pointer(ret);
3327    ret->programs = _mesa_pointer_set_create(NULL);
3328    simple_mtx_init(&ret->lock, mtx_plain);
3329 
3330    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
3331    nir_function *fn = nir_function_create(nir, "main");
3332    fn->is_entrypoint = true;
3333    nir_function_impl *impl = nir_function_impl_create(fn);
3334 
3335    nir_builder b;
3336    nir_builder_init(&b, impl);
3337    b.cursor = nir_before_block(nir_start_block(impl));
3338 
3339    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
3340 
3341    nir_foreach_shader_out_variable(var, vs->nir) {
3342       const struct glsl_type *type = var->type;
3343       const struct glsl_type *in_type = var->type;
3344       const struct glsl_type *out_type = var->type;
3345       char buf[1024];
3346       snprintf(buf, sizeof(buf), "%s_out", var->name);
3347       in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
3348       out_type = glsl_array_type(type, vertices_per_patch, 0);
3349 
3350       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
3351       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
3352       out->data.location = in->data.location = var->data.location;
3353       out->data.location_frac = in->data.location_frac = var->data.location_frac;
3354 
3355       /* gl_in[] receives values from equivalent built-in output
3356          variables written by the vertex shader (section 2.14.7).  Each array
3357          element of gl_in[] is a structure holding values for a specific vertex of
3358          the input patch.  The length of gl_in[] is equal to the
3359          implementation-dependent maximum patch size (gl_MaxPatchVertices).
3360          - ARB_tessellation_shader
3361        */
3362       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
3363       nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
3364       nir_ssa_def *load = nir_load_deref(&b, in_array_var);
3365       nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
3366       nir_store_deref(&b, out_array_var, load, 0xff);
3367    }
3368    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
3369    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
3370    gl_TessLevelInner->data.patch = 1;
3371    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
3372    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
3373    gl_TessLevelOuter->data.patch = 1;
3374 
3375    /* hacks so we can size these right for now */
3376    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
3377    /* just use a single blob for padding here because it's easier */
3378    fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
3379    fields[0].name = ralloc_asprintf(nir, "padding");
3380    fields[0].offset = 0;
3381    fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
3382    fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
3383    fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
3384    fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
3385    fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
3386    fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
3387    nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
3388                                                  glsl_struct_type(fields, 3, "struct", false), "pushconst");
3389    pushconst->data.location = VARYING_SLOT_VAR0;
3390 
3391    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
3392    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
3393 
3394    for (unsigned i = 0; i < 2; i++) {
3395       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
3396       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
3397    }
3398    for (unsigned i = 0; i < 4; i++) {
3399       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
3400       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
3401    }
3402 
3403    nir->info.tess.tcs_vertices_out = vertices_per_patch;
3404    nir_validate_shader(nir, "created");
3405 
3406    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3407    optimize_nir(nir, NULL);
3408    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3409    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3410 
3411    ret->nir = nir;
3412    ret->is_generated = true;
3413    return ret;
3414 }
3415