• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31 
32 #include "pipe/p_state.h"
33 
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40 
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 
44 #include "util/u_memory.h"
45 
46 #include "compiler/spirv/nir_spirv.h"
47 #include "vulkan/util/vk_util.h"
48 
49 bool
50 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
51 
52 
53 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)54 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
55 {
56    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
57    if (glsl_type_is_struct_or_ifc(dst->type)) {
58       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
59          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
60       }
61    } else if (glsl_type_is_array_or_matrix(dst->type)) {
62       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
63       for (unsigned i = 0; i < count; i++) {
64          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
65       }
66    } else {
67       nir_def *load = nir_load_deref(b, src);
68       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
69    }
70 }
71 
72 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
73 
74 static void
create_gfx_pushconst(nir_shader * nir)75 create_gfx_pushconst(nir_shader *nir)
76 {
77 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
78 fields[member_idx].type =                                                                                       \
79    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
80 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
81 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
82 
83    nir_variable *pushconst;
84    /* create compatible layout for the ntv push constant loader */
85    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
86    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
87    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
88    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
89    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
90    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
91    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
92    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
93    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
94 
95    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
96                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
97                                    "gfx_pushconst");
98    pushconst->data.location = INT_MAX; //doesn't really matter
99 
100 #undef PUSHCONST_MEMBER
101 }
102 
103 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)104 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
105 {
106    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
107       return false;
108 
109    b->cursor = nir_after_instr(&instr->instr);
110    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
111    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
112    load->num_components = 1;
113    nir_def_init(&load->instr, &load->def, 1, 32);
114    nir_builder_instr_insert(b, &load->instr);
115 
116    nir_def *composite = nir_build_alu(b, nir_op_bcsel,
117                                           nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
118                                           &instr->def,
119                                           nir_imm_int(b, 0),
120                                           NULL);
121 
122    nir_def_rewrite_uses_after(&instr->def, composite,
123                                   composite->parent_instr);
124    return true;
125 }
126 
127 static bool
lower_basevertex(nir_shader * shader)128 lower_basevertex(nir_shader *shader)
129 {
130    if (shader->info.stage != MESA_SHADER_VERTEX)
131       return false;
132 
133    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
134       return false;
135 
136    return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
137                                      nir_metadata_dominance, NULL);
138 }
139 
140 
141 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)142 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
143 {
144    if (instr->intrinsic != nir_intrinsic_load_draw_id)
145       return false;
146 
147    b->cursor = nir_before_instr(&instr->instr);
148    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
149    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
150    load->num_components = 1;
151    nir_def_init(&load->instr, &load->def, 1, 32);
152    nir_builder_instr_insert(b, &load->instr);
153 
154    nir_def_rewrite_uses(&instr->def, &load->def);
155 
156    return true;
157 }
158 
159 static bool
lower_drawid(nir_shader * shader)160 lower_drawid(nir_shader *shader)
161 {
162    if (shader->info.stage != MESA_SHADER_VERTEX)
163       return false;
164 
165    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
166       return false;
167 
168    return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
169                                      nir_metadata_dominance, NULL);
170 }
171 
172 struct lower_gl_point_state {
173    nir_variable *gl_pos_out;
174    nir_variable *gl_point_size;
175 };
176 
177 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)178 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
179 {
180    struct lower_gl_point_state *state = data;
181    nir_def *vp_scale, *pos;
182 
183    if (instr->type != nir_instr_type_intrinsic)
184       return false;
185 
186    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
187    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
188        intrin->intrinsic != nir_intrinsic_emit_vertex)
189       return false;
190 
191    if (nir_intrinsic_stream_id(intrin) != 0)
192       return false;
193 
194    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
195          intrin->intrinsic == nir_intrinsic_end_primitive) {
196       nir_instr_remove(&intrin->instr);
197       return true;
198    }
199 
200    b->cursor = nir_before_instr(instr);
201 
202    // viewport-map endpoints
203    nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
204    vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
205 
206    // Load point info values
207    nir_def *point_size = nir_load_var(b, state->gl_point_size);
208    nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
209 
210    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
211    nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
212    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
213    // halt_w_delta = w_delta / 2
214    nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
215 
216    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
217    nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
218    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
219    // halt_h_delta = h_delta / 2
220    nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
221 
222    nir_def *point_dir[4][2] = {
223       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
224       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
225       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
226       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
227    };
228 
229    nir_def *point_pos_x = nir_channel(b, point_pos, 0);
230    nir_def *point_pos_y = nir_channel(b, point_pos, 1);
231 
232    for (size_t i = 0; i < 4; i++) {
233       pos = nir_vec4(b,
234                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
235                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
236                      nir_channel(b, point_pos, 2),
237                      nir_channel(b, point_pos, 3));
238 
239       nir_store_var(b, state->gl_pos_out, pos, 0xf);
240 
241       nir_emit_vertex(b);
242    }
243 
244    nir_end_primitive(b);
245 
246    nir_instr_remove(&intrin->instr);
247 
248    return true;
249 }
250 
251 static bool
lower_gl_point_gs(nir_shader * shader)252 lower_gl_point_gs(nir_shader *shader)
253 {
254    struct lower_gl_point_state state;
255 
256    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
257    shader->info.gs.vertices_out *= 4;
258 
259    // Gets the gl_Position in and out
260    state.gl_pos_out =
261       nir_find_variable_with_location(shader, nir_var_shader_out,
262                                       VARYING_SLOT_POS);
263    state.gl_point_size =
264       nir_find_variable_with_location(shader, nir_var_shader_out,
265                                       VARYING_SLOT_PSIZ);
266 
267    // if position in or gl_PointSize aren't written, we have nothing to do
268    if (!state.gl_pos_out || !state.gl_point_size)
269       return false;
270 
271    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
272                                        nir_metadata_dominance, &state);
273 }
274 
275 struct lower_pv_mode_state {
276    nir_variable *varyings[VARYING_SLOT_MAX][4];
277    nir_variable *pos_counter;
278    nir_variable *out_pos_counter;
279    nir_variable *ring_offset;
280    unsigned ring_size;
281    unsigned primitive_vert_count;
282    unsigned prim;
283 };
284 
285 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)286 lower_pv_mode_gs_ring_index(nir_builder *b,
287                             struct lower_pv_mode_state *state,
288                             nir_def *index)
289 {
290    nir_def *ring_offset = nir_load_var(b, state->ring_offset);
291    return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
292                           state->ring_size);
293 }
294 
295 /* Given the final deref of chain of derefs this function will walk up the chain
296  * until it finds a var deref.
297  *
298  * It will then recreate an identical chain that ends with the provided deref.
299  */
300 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)301 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
302 {
303    nir_deref_instr *parent = nir_deref_instr_parent(old);
304    if (!parent)
305       return new;
306    switch(old->deref_type) {
307    case nir_deref_type_var:
308       return new;
309    case nir_deref_type_array:
310       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
311    case nir_deref_type_struct:
312       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
313    case nir_deref_type_array_wildcard:
314    case nir_deref_type_ptr_as_array:
315    case nir_deref_type_cast:
316       unreachable("unexpected deref type");
317    }
318    unreachable("impossible deref type");
319 }
320 
321 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)322 lower_pv_mode_gs_store(nir_builder *b,
323                        nir_intrinsic_instr *intrin,
324                        struct lower_pv_mode_state *state)
325 {
326    b->cursor = nir_before_instr(&intrin->instr);
327    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
328    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
329       nir_variable *var = nir_deref_instr_get_variable(deref);
330 
331       gl_varying_slot location = var->data.location;
332       unsigned location_frac = var->data.location_frac;
333       assert(state->varyings[location][location_frac]);
334       nir_def *pos_counter = nir_load_var(b, state->pos_counter);
335       nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
336       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
337       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
338       // recreate the chain of deref that lead to the store.
339       nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
340       nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
341       nir_instr_remove(&intrin->instr);
342       return true;
343    }
344 
345    return false;
346 }
347 
348 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)349 lower_pv_mode_emit_rotated_prim(nir_builder *b,
350                                 struct lower_pv_mode_state *state,
351                                 nir_def *current_vertex)
352 {
353    nir_def *two = nir_imm_int(b, 2);
354    nir_def *three = nir_imm_int(b, 3);
355    bool is_triangle = state->primitive_vert_count == 3;
356    /* This shader will always see the last three vertices emitted by the user gs.
357     * The following table is used to to rotate primitives within a strip generated
358     * by the user gs such that the last vertex becomes the first.
359     *
360     * [lines, tris][even/odd index][vertex mod 3]
361     */
362    static const unsigned vert_maps[2][2][3] = {
363       {{1, 0, 0}, {1, 0, 0}},
364       {{2, 0, 1}, {2, 1, 0}}
365    };
366    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
367     * is either the last or the second, depending on whether the triangle is at an odd
368     * or even position within the strip.
369     *
370     * odd or even primitive within draw
371     */
372    nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
373    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
374       /* odd or even triangle within strip emitted by user GS
375        * this is handled using the table
376        */
377       nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
378       unsigned offset_even = vert_maps[is_triangle][0][i];
379       unsigned offset_odd = vert_maps[is_triangle][1][i];
380       nir_def *offset_even_value = nir_imm_int(b, offset_even);
381       nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
382       nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
383                                             offset_odd_value, offset_even_value);
384       /* Here we account for how triangles are provided to the gs from a strip.
385        * For even primitives we rotate by 3, meaning we do nothing.
386        * For odd primitives we rotate by 2, combined with the previous rotation this
387        * means the second vertex becomes the last.
388        */
389       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
390         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
391                                             nir_isub(b, three,
392                                                         odd_prim)),
393                                             three);
394       /* Triangles that come from fans are provided to the gs the same way as
395        * odd triangles from a strip so always rotate by 2.
396        */
397       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
398         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
399                                 three);
400       rotated_i = nir_iadd(b, rotated_i, current_vertex);
401       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
402          gl_varying_slot location = var->data.location;
403          unsigned location_frac = var->data.location_frac;
404          if (state->varyings[location][location_frac]) {
405             nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
406             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
407             copy_vars(b, nir_build_deref_var(b, var), value);
408          }
409       }
410       nir_emit_vertex(b);
411    }
412 }
413 
414 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)415 lower_pv_mode_gs_emit_vertex(nir_builder *b,
416                              nir_intrinsic_instr *intrin,
417                              struct lower_pv_mode_state *state)
418 {
419    b->cursor = nir_before_instr(&intrin->instr);
420 
421    // increment pos_counter
422    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
423    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
424 
425    nir_instr_remove(&intrin->instr);
426    return true;
427 }
428 
429 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)430 lower_pv_mode_gs_end_primitive(nir_builder *b,
431                                nir_intrinsic_instr *intrin,
432                                struct lower_pv_mode_state *state)
433 {
434    b->cursor = nir_before_instr(&intrin->instr);
435 
436    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
437    nir_push_loop(b);
438    {
439       nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
440       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
441                                 nir_imm_int(b, state->primitive_vert_count)));
442       nir_jump(b, nir_jump_break);
443       nir_pop_if(b, NULL);
444 
445       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
446       nir_end_primitive(b);
447 
448       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
449    }
450    nir_pop_loop(b, NULL);
451    /* Set the ring offset such that when position 0 is
452     * read we get the last value written
453     */
454    nir_store_var(b, state->ring_offset, pos_counter, 1);
455    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
456    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
457 
458    nir_instr_remove(&intrin->instr);
459    return true;
460 }
461 
462 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)463 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
464 {
465    if (instr->type != nir_instr_type_intrinsic)
466       return false;
467 
468    struct lower_pv_mode_state *state = data;
469    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
470 
471    switch (intrin->intrinsic) {
472    case nir_intrinsic_store_deref:
473       return lower_pv_mode_gs_store(b, intrin, state);
474    case nir_intrinsic_copy_deref:
475       unreachable("should be lowered");
476    case nir_intrinsic_emit_vertex_with_counter:
477    case nir_intrinsic_emit_vertex:
478       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
479    case nir_intrinsic_end_primitive:
480    case nir_intrinsic_end_primitive_with_counter:
481       return lower_pv_mode_gs_end_primitive(b, intrin, state);
482    default:
483       return false;
484    }
485 }
486 
487 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)488 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
489 {
490    nir_builder b;
491    struct lower_pv_mode_state state;
492    memset(state.varyings, 0, sizeof(state.varyings));
493 
494    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
495    b = nir_builder_at(nir_before_impl(entry));
496 
497    state.primitive_vert_count =
498       mesa_vertices_per_prim(shader->info.gs.output_primitive);
499    state.ring_size = shader->info.gs.vertices_out;
500 
501    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
502       gl_varying_slot location = var->data.location;
503       unsigned location_frac = var->data.location_frac;
504 
505       char name[100];
506       snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
507       state.varyings[location][location_frac] =
508          nir_local_variable_create(entry,
509                                    glsl_array_type(var->type,
510                                                    state.ring_size,
511                                                    false),
512                                    name);
513    }
514 
515    state.pos_counter = nir_local_variable_create(entry,
516                                                  glsl_uint_type(),
517                                                  "__pos_counter");
518 
519    state.out_pos_counter = nir_local_variable_create(entry,
520                                                      glsl_uint_type(),
521                                                      "__out_pos_counter");
522 
523    state.ring_offset = nir_local_variable_create(entry,
524                                                  glsl_uint_type(),
525                                                  "__ring_offset");
526 
527    state.prim = prim;
528 
529    // initialize pos_counter and out_pos_counter
530    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
531    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
532    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
533 
534    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
535                                    (state.primitive_vert_count - 1)) *
536                                   state.primitive_vert_count;
537    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
538                                        nir_metadata_dominance, &state);
539 }
540 
541 struct lower_line_stipple_state {
542    nir_variable *pos_out;
543    nir_variable *stipple_out;
544    nir_variable *prev_pos;
545    nir_variable *pos_counter;
546    nir_variable *stipple_counter;
547    bool line_rectangular;
548 };
549 
550 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)551 viewport_map(nir_builder *b, nir_def *vert,
552              nir_def *scale)
553 {
554    nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
555    nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
556                                         w_recip);
557    return nir_fmul(b, ndc_point, scale);
558 }
559 
560 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)561 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
562 {
563    struct lower_line_stipple_state *state = data;
564    if (instr->type != nir_instr_type_intrinsic)
565       return false;
566 
567    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
568    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
569        intrin->intrinsic != nir_intrinsic_emit_vertex)
570       return false;
571 
572    b->cursor = nir_before_instr(instr);
573 
574    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
575    // viewport-map endpoints
576    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
577                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
578    nir_def *prev = nir_load_var(b, state->prev_pos);
579    nir_def *curr = nir_load_var(b, state->pos_out);
580    prev = viewport_map(b, prev, vp_scale);
581    curr = viewport_map(b, curr, vp_scale);
582 
583    // calculate length of line
584    nir_def *len;
585    if (state->line_rectangular)
586       len = nir_fast_distance(b, prev, curr);
587    else {
588       nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
589       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
590    }
591    // update stipple_counter
592    nir_store_var(b, state->stipple_counter,
593                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
594                                 len), 1);
595    nir_pop_if(b, NULL);
596    // emit stipple out
597    nir_copy_var(b, state->stipple_out, state->stipple_counter);
598    nir_copy_var(b, state->prev_pos, state->pos_out);
599 
600    // update prev_pos and pos_counter for next vertex
601    b->cursor = nir_after_instr(instr);
602    nir_store_var(b, state->pos_counter,
603                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
604                                     1), 1);
605 
606    return true;
607 }
608 
609 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)610 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
611 {
612    nir_builder b;
613    struct lower_line_stipple_state state;
614 
615    state.pos_out =
616       nir_find_variable_with_location(shader, nir_var_shader_out,
617                                       VARYING_SLOT_POS);
618 
619    // if position isn't written, we have nothing to do
620    if (!state.pos_out)
621       return false;
622 
623    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
624                                            glsl_float_type(),
625                                            "__stipple");
626    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
627    state.stipple_out->data.driver_location = shader->num_outputs++;
628    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
629    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
630 
631    // create temp variables
632    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
633                                         glsl_vec4_type(),
634                                         "__prev_pos");
635    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
636                                            glsl_uint_type(),
637                                            "__pos_counter");
638    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
639                                                glsl_float_type(),
640                                                "__stipple_counter");
641 
642    state.line_rectangular = line_rectangular;
643    // initialize pos_counter and stipple_counter
644    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
645    b = nir_builder_at(nir_before_impl(entry));
646    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
647    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
648 
649    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
650                                        nir_metadata_dominance, &state);
651 }
652 
653 static bool
lower_line_stipple_fs(nir_shader * shader)654 lower_line_stipple_fs(nir_shader *shader)
655 {
656    nir_builder b;
657    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
658    b = nir_builder_at(nir_after_impl(entry));
659 
660    // create stipple counter
661    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
662                                                glsl_float_type(),
663                                                "__stipple");
664    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
665    stipple->data.driver_location = shader->num_inputs++;
666    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
667    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
668 
669    nir_variable *sample_mask_out =
670       nir_find_variable_with_location(shader, nir_var_shader_out,
671                                       FRAG_RESULT_SAMPLE_MASK);
672    if (!sample_mask_out) {
673       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
674                                         glsl_uint_type(), "sample_mask");
675       sample_mask_out->data.driver_location = shader->num_outputs++;
676       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
677    }
678 
679    nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
680                                                       nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
681    nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
682    pattern = nir_iand_imm(&b, pattern, 0xffff);
683 
684    nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
685    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
686    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
687    nir_store_var(&b, v, sample_mask_in, 1);
688    nir_store_var(&b, sample_mask, sample_mask_in, 1);
689    nir_push_loop(&b);
690    {
691       nir_def *value = nir_load_var(&b, v);
692       nir_def *index = nir_ufind_msb(&b, value);
693       nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
694       nir_def *new_value = nir_ixor(&b, value, index_mask);
695       nir_store_var(&b, v, new_value,  1);
696       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
697       nir_jump(&b, nir_jump_break);
698       nir_pop_if(&b, NULL);
699 
700       nir_def *stipple_pos =
701          nir_interp_deref_at_sample(&b, 1, 32,
702             &nir_build_deref_var(&b, stipple)->def, index);
703       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
704                                  nir_imm_float(&b, 16.0));
705       stipple_pos = nir_f2i32(&b, stipple_pos);
706       nir_def *bit =
707          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
708       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
709       {
710          nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
711          sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
712          nir_store_var(&b, sample_mask, sample_mask_value, 1);
713       }
714       nir_pop_if(&b, NULL);
715    }
716    nir_pop_loop(&b, NULL);
717    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
718 
719    return true;
720 }
721 
722 struct lower_line_smooth_state {
723    nir_variable *pos_out;
724    nir_variable *line_coord_out;
725    nir_variable *prev_pos;
726    nir_variable *pos_counter;
727    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
728                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
729 };
730 
731 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)732 lower_line_smooth_gs_store(nir_builder *b,
733                            nir_intrinsic_instr *intrin,
734                            struct lower_line_smooth_state *state)
735 {
736    b->cursor = nir_before_instr(&intrin->instr);
737    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
738    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
739       nir_variable *var = nir_deref_instr_get_variable(deref);
740 
741       // we take care of position elsewhere
742       gl_varying_slot location = var->data.location;
743       unsigned location_frac = var->data.location_frac;
744       if (location != VARYING_SLOT_POS) {
745          assert(state->varyings[location]);
746          nir_store_var(b, state->varyings[location][location_frac],
747                        intrin->src[1].ssa,
748                        nir_intrinsic_write_mask(intrin));
749          nir_instr_remove(&intrin->instr);
750          return true;
751       }
752    }
753 
754    return false;
755 }
756 
757 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)758 lower_line_smooth_gs_emit_vertex(nir_builder *b,
759                                  nir_intrinsic_instr *intrin,
760                                  struct lower_line_smooth_state *state)
761 {
762    b->cursor = nir_before_instr(&intrin->instr);
763 
764    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
765    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
766                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
767    nir_def *prev = nir_load_var(b, state->prev_pos);
768    nir_def *curr = nir_load_var(b, state->pos_out);
769    nir_def *prev_vp = viewport_map(b, prev, vp_scale);
770    nir_def *curr_vp = viewport_map(b, curr, vp_scale);
771 
772    nir_def *width = nir_load_push_constant_zink(b, 1, 32,
773                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
774    nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
775 
776    const unsigned yx[2] = { 1, 0 };
777    nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
778    nir_def *len = nir_fast_length(b, vec);
779    nir_def *dir = nir_normalize(b, vec);
780    nir_def *half_length = nir_fmul_imm(b, len, 0.5);
781    half_length = nir_fadd_imm(b, half_length, 0.5);
782 
783    nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
784    nir_def *tangent =
785       nir_fmul(b,
786                nir_fmul(b,
787                         nir_swizzle(b, dir, yx, 2),
788                         nir_imm_vec2(b, 1.0, -1.0)),
789                vp_scale_rcp);
790    tangent = nir_fmul(b, tangent, half_width);
791    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
792    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
793 
794    nir_def *line_offets[8] = {
795       nir_fadd(b, tangent, nir_fneg(b, dir)),
796       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
797       tangent,
798       nir_fneg(b, tangent),
799       tangent,
800       nir_fneg(b, tangent),
801       nir_fadd(b, tangent, dir),
802       nir_fadd(b, nir_fneg(b, tangent), dir),
803    };
804    nir_def *line_coord =
805       nir_vec4(b, half_width, half_width, half_length, half_length);
806    nir_def *line_coords[8] = {
807       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
808       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
809       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
810       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
811       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
812       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
813       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
814       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
815    };
816 
817    /* emit first end-cap, and start line */
818    for (int i = 0; i < 4; ++i) {
819       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
820          gl_varying_slot location = var->data.location;
821          unsigned location_frac = var->data.location_frac;
822          if (state->prev_varyings[location][location_frac])
823             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
824       }
825       nir_store_var(b, state->pos_out,
826                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
827                              nir_channel(b, prev, 3))), 0xf);
828       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
829       nir_emit_vertex(b);
830    }
831 
832    /* finish line and emit last end-cap */
833    for (int i = 4; i < 8; ++i) {
834       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
835          gl_varying_slot location = var->data.location;
836          unsigned location_frac = var->data.location_frac;
837          if (state->varyings[location][location_frac])
838             nir_copy_var(b, var, state->varyings[location][location_frac]);
839       }
840       nir_store_var(b, state->pos_out,
841                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
842                              nir_channel(b, curr, 3))), 0xf);
843       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
844       nir_emit_vertex(b);
845    }
846    nir_end_primitive(b);
847 
848    nir_pop_if(b, NULL);
849 
850    nir_copy_var(b, state->prev_pos, state->pos_out);
851    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
852       gl_varying_slot location = var->data.location;
853       unsigned location_frac = var->data.location_frac;
854       if (state->varyings[location][location_frac])
855          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
856    }
857 
858    // update prev_pos and pos_counter for next vertex
859    b->cursor = nir_after_instr(&intrin->instr);
860    nir_store_var(b, state->pos_counter,
861                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
862                                     1), 1);
863 
864    nir_instr_remove(&intrin->instr);
865    return true;
866 }
867 
868 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)869 lower_line_smooth_gs_end_primitive(nir_builder *b,
870                                    nir_intrinsic_instr *intrin,
871                                    struct lower_line_smooth_state *state)
872 {
873    b->cursor = nir_before_instr(&intrin->instr);
874 
875    // reset line counter
876    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
877 
878    nir_instr_remove(&intrin->instr);
879    return true;
880 }
881 
882 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)883 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
884 {
885    if (instr->type != nir_instr_type_intrinsic)
886       return false;
887 
888    struct lower_line_smooth_state *state = data;
889    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
890 
891    switch (intrin->intrinsic) {
892    case nir_intrinsic_store_deref:
893       return lower_line_smooth_gs_store(b, intrin, state);
894    case nir_intrinsic_copy_deref:
895       unreachable("should be lowered");
896    case nir_intrinsic_emit_vertex_with_counter:
897    case nir_intrinsic_emit_vertex:
898       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
899    case nir_intrinsic_end_primitive:
900    case nir_intrinsic_end_primitive_with_counter:
901       return lower_line_smooth_gs_end_primitive(b, intrin, state);
902    default:
903       return false;
904    }
905 }
906 
907 static bool
lower_line_smooth_gs(nir_shader * shader)908 lower_line_smooth_gs(nir_shader *shader)
909 {
910    nir_builder b;
911    struct lower_line_smooth_state state;
912 
913    memset(state.varyings, 0, sizeof(state.varyings));
914    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
915    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
916       gl_varying_slot location = var->data.location;
917       unsigned location_frac = var->data.location_frac;
918       if (location == VARYING_SLOT_POS)
919          continue;
920 
921       char name[100];
922       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
923       state.varyings[location][location_frac] =
924          nir_variable_create(shader, nir_var_shader_temp,
925                               var->type, name);
926 
927       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
928       state.prev_varyings[location][location_frac] =
929          nir_variable_create(shader, nir_var_shader_temp,
930                               var->type, name);
931    }
932 
933    state.pos_out =
934       nir_find_variable_with_location(shader, nir_var_shader_out,
935                                       VARYING_SLOT_POS);
936 
937    // if position isn't written, we have nothing to do
938    if (!state.pos_out)
939       return false;
940 
941    unsigned location = 0;
942    nir_foreach_shader_in_variable(var, shader) {
943      if (var->data.driver_location >= location)
944          location = var->data.driver_location + 1;
945    }
946 
947    state.line_coord_out =
948       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
949                           "__line_coord");
950    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
951    state.line_coord_out->data.driver_location = location;
952    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
953    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
954    shader->num_outputs++;
955 
956    // create temp variables
957    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
958                                         glsl_vec4_type(),
959                                         "__prev_pos");
960    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
961                                            glsl_uint_type(),
962                                            "__pos_counter");
963 
964    // initialize pos_counter
965    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
966    b = nir_builder_at(nir_before_impl(entry));
967    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
968 
969    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
970    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
971 
972    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
973                                        nir_metadata_dominance, &state);
974 }
975 
976 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)977 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
978 {
979    int dummy;
980    nir_builder b;
981 
982    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
983    if (lower_stipple) {
984       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
985                                             glsl_float_type(),
986                                             "__stipple");
987       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
988       stipple_counter->data.driver_location = shader->num_inputs++;
989       stipple_counter->data.location =
990          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
991       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
992 
993       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
994                                             glsl_uint_type(),
995                                             "stipple_pattern");
996 
997       // initialize stipple_pattern
998       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
999       b = nir_builder_at(nir_before_impl(entry));
1000       nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1001                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1002       nir_store_var(&b, stipple_pattern, pattern, 1);
1003    }
1004 
1005    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1006    return true;
1007 }
1008 
1009 static bool
lower_dual_blend(nir_shader * shader)1010 lower_dual_blend(nir_shader *shader)
1011 {
1012    bool progress = false;
1013    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1014    if (var) {
1015       var->data.location = FRAG_RESULT_DATA0;
1016       var->data.index = 1;
1017       progress = true;
1018    }
1019    nir_shader_preserve_all_metadata(shader);
1020    return progress;
1021 }
1022 
1023 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1024 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1025 {
1026    if (instr->type != nir_instr_type_alu)
1027       return false;
1028    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1029    if (alu_instr->op != nir_op_pack_64_2x32 &&
1030        alu_instr->op != nir_op_unpack_64_2x32)
1031       return false;
1032    b->cursor = nir_before_instr(&alu_instr->instr);
1033    nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1034    nir_def *dest;
1035    switch (alu_instr->op) {
1036    case nir_op_pack_64_2x32:
1037       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1038       break;
1039    case nir_op_unpack_64_2x32:
1040       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1041       break;
1042    default:
1043       unreachable("Impossible opcode");
1044    }
1045    nir_def_rewrite_uses(&alu_instr->def, dest);
1046    nir_instr_remove(&alu_instr->instr);
1047    return true;
1048 }
1049 
1050 static bool
lower_64bit_pack(nir_shader * shader)1051 lower_64bit_pack(nir_shader *shader)
1052 {
1053    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1054                                        nir_metadata_block_index | nir_metadata_dominance, NULL);
1055 }
1056 
1057 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1058 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1059                                const nir_shader *prev_stage)
1060 {
1061    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1062                                                   options,
1063                                                   "filled quad gs");
1064 
1065    nir_shader *nir = b.shader;
1066    nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1067    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1068    nir->info.gs.vertices_in = 4;
1069    nir->info.gs.vertices_out = 6;
1070    nir->info.gs.invocations = 1;
1071    nir->info.gs.active_stream_mask = 1;
1072 
1073    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1074    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1075    if (prev_stage->xfb_info) {
1076       size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1077       nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1078    }
1079 
1080    nir_variable *in_vars[VARYING_SLOT_MAX];
1081    nir_variable *out_vars[VARYING_SLOT_MAX];
1082    unsigned num_vars = 0;
1083 
1084    /* Create input/output variables. */
1085    nir_foreach_shader_out_variable(var, prev_stage) {
1086       assert(!var->data.patch);
1087 
1088       /* input vars can't be created for those */
1089       if (var->data.location == VARYING_SLOT_LAYER ||
1090           var->data.location == VARYING_SLOT_VIEW_INDEX ||
1091           /* psiz not needed for quads */
1092           var->data.location == VARYING_SLOT_PSIZ)
1093          continue;
1094 
1095       char name[100];
1096       if (var->name)
1097          snprintf(name, sizeof(name), "in_%s", var->name);
1098       else
1099          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1100 
1101       nir_variable *in = nir_variable_clone(var, nir);
1102       ralloc_free(in->name);
1103       in->name = ralloc_strdup(in, name);
1104       in->type = glsl_array_type(var->type, 4, false);
1105       in->data.mode = nir_var_shader_in;
1106       nir_shader_add_variable(nir, in);
1107 
1108       if (var->name)
1109          snprintf(name, sizeof(name), "out_%s", var->name);
1110       else
1111          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1112 
1113       nir_variable *out = nir_variable_clone(var, nir);
1114       ralloc_free(out->name);
1115       out->name = ralloc_strdup(out, name);
1116       out->data.mode = nir_var_shader_out;
1117       nir_shader_add_variable(nir, out);
1118 
1119       in_vars[num_vars] = in;
1120       out_vars[num_vars++] = out;
1121    }
1122 
1123    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1124    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1125    nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1126    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1127    for (unsigned i = 0; i < 6; ++i) {
1128       /* swap indices 2 and 3 */
1129       nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1130                                    nir_imm_int(&b, mapping_last[i]),
1131                                    nir_imm_int(&b, mapping_first[i]));
1132       /* Copy inputs to outputs. */
1133       for (unsigned j = 0; j < num_vars; ++j) {
1134          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1135             continue;
1136          }
1137          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1138          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1139       }
1140       nir_emit_vertex(&b, 0);
1141       if (i == 2)
1142         nir_end_primitive(&b, 0);
1143    }
1144 
1145    nir_end_primitive(&b, 0);
1146    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1147    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1148    return nir;
1149 }
1150 
1151 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1152 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1153                                               nir_intrinsic_instr *intrin,
1154                                               void *data)
1155 {
1156    int inlined_uniform_offset;
1157    switch (intrin->intrinsic) {
1158    case nir_intrinsic_load_flat_mask:
1159       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1160       break;
1161    case nir_intrinsic_load_provoking_last:
1162       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1163       break;
1164    default:
1165       return false;
1166    }
1167 
1168    b->cursor = nir_before_instr(&intrin->instr);
1169    assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1170    /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1171     * anything with a different bit_size) so we need to split the load. */
1172    int num_dwords = intrin->def.bit_size / 32;
1173    nir_def *dwords[2] = {NULL};
1174    for (unsigned i = 0; i < num_dwords; i++)
1175       dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1176                                    nir_imm_int(b, inlined_uniform_offset + i),
1177                                    .align_mul = intrin->def.bit_size / 8,
1178                                    .align_offset = 0,
1179                                    .range_base = 0, .range = ~0);
1180    nir_def *new_dest_def;
1181    if (intrin->def.bit_size == 32)
1182       new_dest_def = dwords[0];
1183    else
1184       new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1185    nir_def_rewrite_uses(&intrin->def, new_dest_def);
1186    nir_instr_remove(&intrin->instr);
1187    return true;
1188 }
1189 
1190 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1191 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1192 {
1193    return nir_shader_intrinsics_pass(nir,
1194                                        lower_system_values_to_inlined_uniforms_instr,
1195                                        nir_metadata_dominance, NULL);
1196 }
1197 
1198 void
zink_screen_init_compiler(struct zink_screen * screen)1199 zink_screen_init_compiler(struct zink_screen *screen)
1200 {
1201    static const struct nir_shader_compiler_options
1202    default_options = {
1203       .lower_ffma16 = true,
1204       .lower_ffma32 = true,
1205       .lower_ffma64 = true,
1206       .lower_scmp = true,
1207       .lower_fdph = true,
1208       .lower_flrp32 = true,
1209       .lower_fpow = true,
1210       .lower_fsat = true,
1211       .lower_hadd = true,
1212       .lower_iadd_sat = true,
1213       .lower_fisnormal = true,
1214       .lower_extract_byte = true,
1215       .lower_extract_word = true,
1216       .lower_insert_byte = true,
1217       .lower_insert_word = true,
1218 
1219       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1220        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1221        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1222        * effectively unused (no instances in shader-db), it's not worth the
1223        * effort to do so.
1224        * */
1225       .lower_ldexp = true,
1226 
1227       .lower_mul_high = true,
1228       .lower_uadd_carry = true,
1229       .lower_usub_borrow = true,
1230       .lower_uadd_sat = true,
1231       .lower_usub_sat = true,
1232       .lower_vector_cmp = true,
1233       .lower_int64_options = 0,
1234       .lower_doubles_options = nir_lower_dround_even,
1235       .lower_uniforms_to_ubo = true,
1236       .has_fsub = true,
1237       .has_isub = true,
1238       .lower_mul_2x32_64 = true,
1239       .support_16bit_alu = true, /* not quite what it sounds like */
1240       .max_unroll_iterations = 0,
1241       .use_interpolated_input_intrinsics = true,
1242    };
1243 
1244    screen->nir_options = default_options;
1245 
1246    if (!screen->info.feats.features.shaderInt64)
1247       screen->nir_options.lower_int64_options = ~0;
1248 
1249    if (!screen->info.feats.features.shaderFloat64) {
1250       screen->nir_options.lower_doubles_options = ~0;
1251       screen->nir_options.lower_flrp64 = true;
1252       screen->nir_options.lower_ffma64 = true;
1253       /* soft fp64 function inlining will blow up loop bodies and effectively
1254        * stop Vulkan drivers from unrolling the loops.
1255        */
1256       screen->nir_options.max_unroll_iterations_fp64 = 32;
1257    }
1258 
1259    /*
1260        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1261        and the error can be large due to the discontinuity in trunc() and floor().
1262        This can produce mathematically unexpected results in some cases, such as
1263        FMod(x,x) computing x rather than 0, and can also cause the result to have
1264        a different sign than the infinitely precise result.
1265 
1266        -Table 84. Precision of core SPIR-V Instructions
1267        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1268     */
1269    if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1270        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1271        screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1272       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1273 }
1274 
1275 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1276 zink_get_compiler_options(struct pipe_screen *pscreen,
1277                           enum pipe_shader_ir ir,
1278                           gl_shader_stage shader)
1279 {
1280    assert(ir == PIPE_SHADER_IR_NIR);
1281    return &zink_screen(pscreen)->nir_options;
1282 }
1283 
1284 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1285 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1286 {
1287    if (zink_debug & ZINK_DEBUG_TGSI) {
1288       fprintf(stderr, "TGSI shader:\n---8<---\n");
1289       tgsi_dump_to_file(tokens, 0, stderr);
1290       fprintf(stderr, "---8<---\n\n");
1291    }
1292 
1293    return tgsi_to_nir(tokens, screen, false);
1294 }
1295 
1296 
1297 static bool
def_is_64bit(nir_def * def,void * state)1298 def_is_64bit(nir_def *def, void *state)
1299 {
1300    bool *lower = (bool *)state;
1301    if (def && (def->bit_size == 64)) {
1302       *lower = true;
1303       return false;
1304    }
1305    return true;
1306 }
1307 
1308 static bool
src_is_64bit(nir_src * src,void * state)1309 src_is_64bit(nir_src *src, void *state)
1310 {
1311    bool *lower = (bool *)state;
1312    if (src && (nir_src_bit_size(*src) == 64)) {
1313       *lower = true;
1314       return false;
1315    }
1316    return true;
1317 }
1318 
1319 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1320 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1321 {
1322    bool lower = false;
1323    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1324     * doesn't have const variants, so do the ugly const_cast here. */
1325    nir_instr *instr = (nir_instr *)const_instr;
1326 
1327    nir_foreach_def(instr, def_is_64bit, &lower);
1328    if (lower)
1329       return true;
1330    nir_foreach_src(instr, src_is_64bit, &lower);
1331    return lower;
1332 }
1333 
1334 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1335 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1336 {
1337    nir_instr *instr = (nir_instr *)const_instr;
1338    nir_alu_instr *alu = nir_instr_as_alu(instr);
1339    switch (alu->op) {
1340    case nir_op_pack_64_2x32_split:
1341    case nir_op_pack_32_2x16_split:
1342    case nir_op_unpack_32_2x16_split_x:
1343    case nir_op_unpack_32_2x16_split_y:
1344    case nir_op_unpack_64_2x32_split_x:
1345    case nir_op_unpack_64_2x32_split_y:
1346       return true;
1347    default:
1348       break;
1349    }
1350    return false;
1351 }
1352 
1353 
1354 struct bo_vars {
1355    nir_variable *uniforms[5];
1356    nir_variable *ubo[5];
1357    nir_variable *ssbo[5];
1358    uint32_t first_ubo;
1359    uint32_t first_ssbo;
1360 };
1361 
1362 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1363 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1364 {
1365    struct bo_vars bo;
1366    memset(&bo, 0, sizeof(bo));
1367    if (zs->ubos_used)
1368       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1369    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1370    if (zs->ssbos_used)
1371       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1372    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1373    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1374       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1375       if (var->data.mode == nir_var_mem_ssbo) {
1376          assert(!bo.ssbo[idx]);
1377          bo.ssbo[idx] = var;
1378       } else {
1379          if (var->data.driver_location) {
1380             assert(!bo.ubo[idx]);
1381             bo.ubo[idx] = var;
1382          } else {
1383             assert(!bo.uniforms[idx]);
1384             bo.uniforms[idx] = var;
1385          }
1386       }
1387    }
1388    return bo;
1389 }
1390 
1391 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1392 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1393 {
1394    struct bo_vars *bo = data;
1395    if (instr->type != nir_instr_type_intrinsic)
1396       return false;
1397    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1398    nir_variable *var = NULL;
1399    nir_def *offset = NULL;
1400    bool is_load = true;
1401    b->cursor = nir_before_instr(instr);
1402 
1403    switch (intr->intrinsic) {
1404    case nir_intrinsic_store_ssbo:
1405       var = bo->ssbo[intr->def.bit_size >> 4];
1406       offset = intr->src[2].ssa;
1407       is_load = false;
1408       break;
1409    case nir_intrinsic_load_ssbo:
1410       var = bo->ssbo[intr->def.bit_size >> 4];
1411       offset = intr->src[1].ssa;
1412       break;
1413    case nir_intrinsic_load_ubo:
1414       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1415          var = bo->uniforms[intr->def.bit_size >> 4];
1416       else
1417          var = bo->ubo[intr->def.bit_size >> 4];
1418       offset = intr->src[1].ssa;
1419       break;
1420    default:
1421       return false;
1422    }
1423    nir_src offset_src = nir_src_for_ssa(offset);
1424    if (!nir_src_is_const(offset_src))
1425       return false;
1426 
1427    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1428    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1429    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1430    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1431    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1432       return false;
1433 
1434    unsigned rewrites = 0;
1435    nir_def *result[2];
1436    for (unsigned i = 0; i < intr->num_components; i++) {
1437       if (offset_bytes + i >= size) {
1438          rewrites++;
1439          if (is_load)
1440             result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1441       }
1442    }
1443    assert(rewrites == intr->num_components);
1444    if (is_load) {
1445       nir_def *load = nir_vec(b, result, intr->num_components);
1446       nir_def_rewrite_uses(&intr->def, load);
1447    }
1448    nir_instr_remove(instr);
1449    return true;
1450 }
1451 
1452 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1453 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1454 {
1455    struct bo_vars bo = get_bo_vars(zs, shader);
1456    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1457 }
1458 
1459 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1460 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1461 {
1462    bool progress;
1463    do {
1464       progress = false;
1465       if (s->options->lower_int64_options)
1466          NIR_PASS_V(s, nir_lower_int64);
1467       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1468          NIR_PASS_V(s, lower_64bit_pack);
1469       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1470       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1471       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1472       NIR_PASS(progress, s, nir_copy_prop);
1473       NIR_PASS(progress, s, nir_opt_remove_phis);
1474       if (s->options->lower_int64_options) {
1475          NIR_PASS(progress, s, nir_lower_64bit_phis);
1476          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1477       }
1478       NIR_PASS(progress, s, nir_opt_dce);
1479       NIR_PASS(progress, s, nir_opt_dead_cf);
1480       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1481       NIR_PASS(progress, s, nir_opt_cse);
1482       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1483       NIR_PASS(progress, s, nir_opt_algebraic);
1484       NIR_PASS(progress, s, nir_opt_constant_folding);
1485       NIR_PASS(progress, s, nir_opt_undef);
1486       NIR_PASS(progress, s, zink_nir_lower_b2b);
1487       if (zs)
1488          NIR_PASS(progress, s, bound_bo_access, zs);
1489       if (can_shrink)
1490          NIR_PASS(progress, s, nir_opt_shrink_vectors);
1491    } while (progress);
1492 
1493    do {
1494       progress = false;
1495       NIR_PASS(progress, s, nir_opt_algebraic_late);
1496       if (progress) {
1497          NIR_PASS_V(s, nir_copy_prop);
1498          NIR_PASS_V(s, nir_opt_dce);
1499          NIR_PASS_V(s, nir_opt_cse);
1500       }
1501    } while (progress);
1502 }
1503 
1504 /* - copy the lowered fbfetch variable
1505  * - set the new one up as an input attachment for descriptor 0.6
1506  * - load it as an image
1507  * - overwrite the previous load
1508  */
1509 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1510 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1511 {
1512    bool ms = data != NULL;
1513    if (instr->type != nir_instr_type_intrinsic)
1514       return false;
1515    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1516    if (intr->intrinsic != nir_intrinsic_load_deref)
1517       return false;
1518    nir_variable *var = nir_intrinsic_get_var(intr, 0);
1519    if (!var->data.fb_fetch_output)
1520       return false;
1521    b->cursor = nir_after_instr(instr);
1522    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1523    /* If Dim is SubpassData, ... Image Format must be Unknown
1524     * - SPIRV OpTypeImage specification
1525     */
1526    fbfetch->data.image.format = 0;
1527    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1528    fbfetch->data.mode = nir_var_uniform;
1529    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1530    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1531    fbfetch->data.sample = ms;
1532    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1533    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1534    nir_shader_add_variable(b->shader, fbfetch);
1535    nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1536    nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1537    nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1538    nir_def_rewrite_uses(&intr->def, load);
1539    return true;
1540 }
1541 
1542 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1543 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1544 {
1545    nir_foreach_shader_out_variable(var, shader) {
1546       if (var->data.fb_fetch_output) {
1547          *fbfetch = var;
1548          break;
1549       }
1550    }
1551    assert(*fbfetch);
1552    if (!*fbfetch)
1553       return false;
1554    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1555 }
1556 
1557 /*
1558  * Add a check for out of bounds LOD for every texel fetch op
1559  * It boils down to:
1560  * - if (lod < query_levels(tex))
1561  * -    res = txf(tex)
1562  * - else
1563  * -    res = (0, 0, 0, 1)
1564  */
1565 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1566 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1567 {
1568    if (in->type != nir_instr_type_tex)
1569       return false;
1570    nir_tex_instr *txf = nir_instr_as_tex(in);
1571    if (txf->op != nir_texop_txf)
1572       return false;
1573 
1574    b->cursor = nir_before_instr(in);
1575    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1576    assert(lod_idx >= 0);
1577    nir_src lod_src = txf->src[lod_idx].src;
1578    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1579       return false;
1580 
1581    nir_def *lod = lod_src.ssa;
1582 
1583    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1584    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1585    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1586                                                 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1587    levels->op = nir_texop_query_levels;
1588    levels->texture_index = txf->texture_index;
1589    levels->dest_type = nir_type_int | lod->bit_size;
1590    if (offset_idx >= 0) {
1591       levels->src[0].src_type = nir_tex_src_texture_offset;
1592       levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1593    }
1594    if (handle_idx >= 0) {
1595       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1596       levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1597    }
1598    nir_def_init(&levels->instr, &levels->def,
1599                 nir_tex_instr_dest_size(levels), 32);
1600    nir_builder_instr_insert(b, &levels->instr);
1601 
1602    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1603    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1604    nir_builder_instr_insert(b, &new_txf->instr);
1605 
1606    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1607    nir_const_value oob_values[4] = {0};
1608    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1609    oob_values[3] = (txf->dest_type & nir_type_float) ?
1610                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1611    nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1612 
1613    nir_pop_if(b, lod_oob_else);
1614    nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1615 
1616    nir_def_rewrite_uses(&txf->def, robust_txf);
1617    nir_instr_remove_v(in);
1618    return true;
1619 }
1620 
1621 /* This pass is used to workaround the lack of out of bounds LOD robustness
1622  * for texel fetch ops in VK_EXT_image_robustness.
1623  */
1624 static bool
lower_txf_lod_robustness(nir_shader * shader)1625 lower_txf_lod_robustness(nir_shader *shader)
1626 {
1627    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1628 }
1629 
1630 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1631 static bool
check_psiz(struct nir_shader * s)1632 check_psiz(struct nir_shader *s)
1633 {
1634    bool have_psiz = false;
1635    nir_foreach_shader_out_variable(var, s) {
1636       if (var->data.location == VARYING_SLOT_PSIZ) {
1637          /* genuine PSIZ outputs will have this set */
1638          have_psiz |= !!var->data.explicit_location;
1639       }
1640    }
1641    return have_psiz;
1642 }
1643 
1644 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1645 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1646 {
1647    assert((int)location >= 0);
1648 
1649    nir_foreach_variable_with_modes(var, nir, mode) {
1650       if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1651          unsigned num_components = glsl_get_vector_elements(var->type);
1652          if (glsl_type_is_64bit(glsl_without_array(var->type)))
1653             num_components *= 2;
1654          if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0)
1655             num_components = glsl_get_aoa_size(var->type);
1656          if (var->data.location_frac <= location_frac &&
1657                var->data.location_frac + num_components > location_frac)
1658             return var;
1659       }
1660    }
1661    return NULL;
1662 }
1663 
1664 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1665 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1666 {
1667    unsigned num_components = util_bitcount(output->component_mask);
1668    for (unsigned i = 0; i < num_components; i++)
1669       if (!inlined[output->component_offset + i])
1670          return false;
1671    return true;
1672 }
1673 
1674 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1675 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1676 {
1677    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1678    if (last_output < VARYING_SLOT_VAR0)
1679       last_output = VARYING_SLOT_VAR0;
1680    else
1681       last_output++;
1682    /* this should get fixed up by slot remapping */
1683    psiz->data.location = last_output;
1684 }
1685 
1686 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1687 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1688 {
1689    /* could be dvec/dmat/mat: each member is the same */
1690    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1691    /* determine size of each member type */
1692    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1693    /* normalize slot idx to current type's size */
1694    slot %= slot_count;
1695    unsigned slot_components = glsl_get_components(plain);
1696    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1697       slot_components *= 2;
1698    /* create a vec4 mask of the selected slot's components out of all the components */
1699    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1700    /* return a vecN of the selected components */
1701    slot_components = util_bitcount(mask);
1702    return glsl_vec_type(slot_components);
1703 }
1704 
1705 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1706 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1707 {
1708    const struct glsl_type *type = slot_type;
1709    unsigned slot_count = 0;
1710    unsigned cur_slot = 0;
1711    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1712    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1713       /* use array type for slot counting but return array member type for unroll */
1714       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1715       type = glsl_without_array(arraytype);
1716       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1717    }
1718    *slot_idx -= (cur_slot - slot_count);
1719    if (!glsl_type_is_struct_or_ifc(type))
1720       /* this is a fully unrolled struct: find the number of vec components to output */
1721       type = clamp_slot_type(type, *slot_idx);
1722    return type;
1723 }
1724 
1725 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1726 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1727 {
1728    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1729    const struct glsl_type *orig_type = var->type;
1730    const struct glsl_type *type = glsl_without_array(var->type);
1731    unsigned slot_idx = slot - so_slot;
1732    if (type != orig_type)
1733       slot_idx %= glsl_count_vec4_slots(type, false, false);
1734    /* need to find the vec4 that's being exported by this slot */
1735    while (glsl_type_is_struct_or_ifc(type))
1736       type = unroll_struct_type(type, &slot_idx);
1737 
1738    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1739    unsigned num_components = glsl_get_components(glsl_without_array(type));
1740    /* special handling: clip/cull distance are arrays with vector semantics */
1741    if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1742       num_components = glsl_array_size(type);
1743       if (slot_idx)
1744          /* this is the second vec4 */
1745          num_components %= 4;
1746       else
1747          /* this is the first vec4 */
1748          num_components = MIN2(num_components, 4);
1749    }
1750    assert(num_components);
1751    /* gallium handles xfb in terms of 32bit units */
1752    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1753       num_components *= 2;
1754    return num_components;
1755 }
1756 
1757 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1758 get_var_slot_count(nir_shader *nir, nir_variable *var)
1759 {
1760    assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1761    const struct glsl_type *type = var->type;
1762    if (nir_is_arrayed_io(var, nir->info.stage))
1763       type = glsl_get_array_element(type);
1764    unsigned slot_count = 0;
1765    if (var->data.location >= VARYING_SLOT_VAR0)
1766       slot_count = glsl_count_vec4_slots(type, false, false);
1767    else if (glsl_type_is_array(type))
1768       slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1769    else
1770       slot_count = 1;
1771    return slot_count;
1772 }
1773 
1774 
1775 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1776 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1777 {
1778    for (unsigned i = 0; i < xfb_info->output_count; i++) {
1779       const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1780       if (packed_output->location == slot)
1781          return packed_output;
1782    }
1783    return NULL;
1784 }
1785 
1786 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1787 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1788 {
1789    bool inlined[VARYING_SLOT_MAX][4] = {0};
1790    uint64_t packed = 0;
1791    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1792    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1793    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1794    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1795    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1796       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1797       unsigned xfb_components = util_bitcount(output->component_mask);
1798       /* always set stride to be used during draw */
1799       zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1800       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1801          for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1802             unsigned slot = output->location;
1803             if (inlined[slot][output->component_offset + c])
1804                continue;
1805             nir_variable *var = NULL;
1806             while (!var && slot < VARYING_SLOT_TESS_MAX)
1807                var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1808             slot = output->location;
1809             unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1810             if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1811                /* if no variable is found for the xfb output, no output exists */
1812                inlined[slot][c + output->component_offset] = true;
1813                continue;
1814             }
1815             if (var->data.explicit_xfb_buffer) {
1816                /* handle dvec3 where gallium splits streamout over 2 registers */
1817                for (unsigned j = 0; j < xfb_components; j++)
1818                   inlined[slot][c + output->component_offset + j] = true;
1819             }
1820             if (is_inlined(inlined[slot], output))
1821                continue;
1822             assert(!glsl_type_is_array(var->type) || var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0);
1823             assert(!glsl_type_is_struct_or_ifc(var->type));
1824             unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1825             if (glsl_type_is_64bit(glsl_without_array(var->type)))
1826                num_components *= 2;
1827             /* if this is the entire variable, try to blast it out during the initial declaration
1828             * structs must be handled later to ensure accurate analysis
1829             */
1830             if ((num_components == xfb_components ||
1831                  num_components < xfb_components ||
1832                  (num_components > xfb_components && xfb_components == 4))) {
1833                var->data.explicit_xfb_buffer = 1;
1834                var->data.xfb.buffer = output->buffer;
1835                var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1836                var->data.offset = (output->offset + c * sizeof(uint32_t));
1837                var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1838                for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
1839                   inlined[slot][c + output->component_offset + j] = true;
1840             } else {
1841                /* otherwise store some metadata for later */
1842                packed |= BITFIELD64_BIT(slot);
1843                packed_components[slot] += xfb_components;
1844                packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
1845                packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
1846                for (unsigned j = 0; j < xfb_components; j++)
1847                   packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
1848             }
1849          }
1850       }
1851    }
1852 
1853    /* if this was flagged as a packed output before, and if all the components are
1854     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1855     * can be consolidated into a single output to conserve locations
1856     */
1857    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1858       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1859       unsigned slot = output->location;
1860       if (is_inlined(inlined[slot], output))
1861          continue;
1862       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1863          nir_variable *var = NULL;
1864          while (!var)
1865             var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
1866          slot = output->location;
1867          unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1868          if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
1869             continue;
1870          /* this is a lowered 64bit variable that can't be exported due to packing */
1871          if (var->data.is_xfb)
1872             goto out;
1873 
1874          unsigned num_slots = var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1 ?
1875                               glsl_array_size(var->type) / 4 :
1876                               glsl_count_vec4_slots(var->type, false, false);
1877          /* for each variable, iterate over all the variable's slots and inline the outputs */
1878          for (unsigned j = 0; j < num_slots; j++) {
1879             slot = var->data.location + j;
1880             const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
1881             if (!packed_output)
1882                goto out;
1883 
1884             /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1885             if (!(packed & BITFIELD64_BIT(slot)) ||
1886                 util_bitcount(packed_streams[slot]) != 1 ||
1887                 util_bitcount(packed_buffers[slot]) != 1)
1888                goto out;
1889 
1890             /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1891             unsigned num_components = get_slot_components(var, slot, var->data.location);
1892             if (num_components != packed_components[slot])
1893                goto out;
1894 
1895             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1896             uint32_t prev_offset = packed_offsets[packed_output->location][0];
1897             for (unsigned k = 1; k < num_components; k++) {
1898                /* if the offsets are not incrementing as expected, skip consolidation */
1899                if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
1900                   goto out;
1901                prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
1902             }
1903          }
1904          /* this output can be consolidated: blast out all the data inlined */
1905          var->data.explicit_xfb_buffer = 1;
1906          var->data.xfb.buffer = output->buffer;
1907          var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1908          var->data.offset = output->offset;
1909          var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1910          /* mark all slot components inlined to skip subsequent loop iterations */
1911          for (unsigned j = 0; j < num_slots; j++) {
1912             slot = var->data.location + j;
1913             for (unsigned k = 0; k < packed_components[slot]; k++)
1914                inlined[slot][k] = true;
1915             packed &= ~BITFIELD64_BIT(slot);
1916          }
1917          continue;
1918       }
1919 out:
1920       unreachable("xfb should be inlined by now!");
1921    }
1922 }
1923 
1924 struct decompose_state {
1925   nir_variable **split;
1926   bool needs_w;
1927 };
1928 
1929 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)1930 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
1931 {
1932    struct decompose_state *state = data;
1933    nir_variable **split = state->split;
1934    if (instr->type != nir_instr_type_intrinsic)
1935       return false;
1936    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1937    if (intr->intrinsic != nir_intrinsic_load_deref)
1938       return false;
1939    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1940    nir_variable *var = nir_deref_instr_get_variable(deref);
1941    if (var != split[0])
1942       return false;
1943    unsigned num_components = glsl_get_vector_elements(split[0]->type);
1944    b->cursor = nir_after_instr(instr);
1945    nir_def *loads[4];
1946    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
1947       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
1948    if (state->needs_w) {
1949       /* oob load w comopnent to get correct value for int/float */
1950       loads[3] = nir_channel(b, loads[0], 3);
1951       loads[0] = nir_channel(b, loads[0], 0);
1952    }
1953    nir_def *new_load = nir_vec(b, loads, num_components);
1954    nir_def_rewrite_uses(&intr->def, new_load);
1955    nir_instr_remove_v(instr);
1956    return true;
1957 }
1958 
1959 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)1960 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
1961 {
1962    uint32_t bits = 0;
1963    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
1964       bits |= BITFIELD_BIT(var->data.driver_location);
1965    bits = ~bits;
1966    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
1967       nir_variable *split[5];
1968       struct decompose_state state;
1969       state.split = split;
1970       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
1971       assert(var);
1972       split[0] = var;
1973       bits |= BITFIELD_BIT(var->data.driver_location);
1974       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
1975       unsigned num_components = glsl_get_vector_elements(var->type);
1976       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
1977       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
1978          split[i+1] = nir_variable_clone(var, nir);
1979          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
1980          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
1981             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
1982          else
1983             split[i+1]->type = new_type;
1984          split[i+1]->data.driver_location = ffs(bits) - 1;
1985          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
1986          nir_shader_add_variable(nir, split[i+1]);
1987       }
1988       var->data.mode = nir_var_shader_temp;
1989       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
1990    }
1991    nir_fixup_deref_modes(nir);
1992    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1993    optimize_nir(nir, NULL, true);
1994    return true;
1995 }
1996 
1997 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1998 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1999 {
2000    struct zink_screen *screen = data;
2001    const bool has_int64 = screen->info.feats.features.shaderInt64;
2002    if (instr->type != nir_instr_type_intrinsic)
2003       return false;
2004    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2005    b->cursor = nir_before_instr(instr);
2006    switch (intr->intrinsic) {
2007    case nir_intrinsic_ssbo_atomic:
2008    case nir_intrinsic_ssbo_atomic_swap: {
2009       /* convert offset to uintN_t[idx] */
2010       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2011       nir_src_rewrite(&intr->src[1], offset);
2012       return true;
2013    }
2014    case nir_intrinsic_load_ssbo:
2015    case nir_intrinsic_load_ubo: {
2016       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2017       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2018                         nir_src_is_const(intr->src[0]) &&
2019                         nir_src_as_uint(intr->src[0]) == 0 &&
2020                         intr->def.bit_size == 64 &&
2021                         nir_intrinsic_align_offset(intr) % 8 != 0;
2022       force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2023       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2024       nir_src_rewrite(&intr->src[1], offset);
2025       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2026       if (force_2x32) {
2027          /* this is always scalarized */
2028          assert(intr->def.num_components == 1);
2029          /* rewrite as 2x32 */
2030          nir_def *load[2];
2031          for (unsigned i = 0; i < 2; i++) {
2032             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2033                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2034             else
2035                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2036             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2037          }
2038          /* cast back to 64bit */
2039          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2040          nir_def_rewrite_uses(&intr->def, casted);
2041          nir_instr_remove(instr);
2042       }
2043       return true;
2044    }
2045    case nir_intrinsic_load_scratch:
2046    case nir_intrinsic_load_shared: {
2047       b->cursor = nir_before_instr(instr);
2048       bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2049       nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2050       nir_src_rewrite(&intr->src[0], offset);
2051       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2052       if (force_2x32) {
2053          /* this is always scalarized */
2054          assert(intr->def.num_components == 1);
2055          /* rewrite as 2x32 */
2056          nir_def *load[2];
2057          for (unsigned i = 0; i < 2; i++)
2058             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2059          /* cast back to 64bit */
2060          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2061          nir_def_rewrite_uses(&intr->def, casted);
2062          nir_instr_remove(instr);
2063          return true;
2064       }
2065       break;
2066    }
2067    case nir_intrinsic_store_ssbo: {
2068       b->cursor = nir_before_instr(instr);
2069       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2070       nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2071       nir_src_rewrite(&intr->src[2], offset);
2072       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2073       if (force_2x32) {
2074          /* this is always scalarized */
2075          assert(intr->src[0].ssa->num_components == 1);
2076          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2077          for (unsigned i = 0; i < 2; i++)
2078             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2079          nir_instr_remove(instr);
2080       }
2081       return true;
2082    }
2083    case nir_intrinsic_store_scratch:
2084    case nir_intrinsic_store_shared: {
2085       b->cursor = nir_before_instr(instr);
2086       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2087       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2088       nir_src_rewrite(&intr->src[1], offset);
2089       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2090       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2091          /* this is always scalarized */
2092          assert(intr->src[0].ssa->num_components == 1);
2093          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2094          for (unsigned i = 0; i < 2; i++)
2095             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2096          nir_instr_remove(instr);
2097       }
2098       return true;
2099    }
2100    default:
2101       break;
2102    }
2103    return false;
2104 }
2105 
2106 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2107 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2108 {
2109    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2110 }
2111 
2112 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2113 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2114 {
2115    nir_variable *var, **ptr;
2116    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2117 
2118    if (ssbo)
2119       ptr = &bo->ssbo[bit_size >> 4];
2120    else {
2121       if (!idx) {
2122          ptr = &bo->uniforms[bit_size >> 4];
2123       } else
2124          ptr = &bo->ubo[bit_size >> 4];
2125    }
2126    var = *ptr;
2127    if (!var) {
2128       if (ssbo)
2129          var = bo->ssbo[32 >> 4];
2130       else {
2131          if (!idx)
2132             var = bo->uniforms[32 >> 4];
2133          else
2134             var = bo->ubo[32 >> 4];
2135       }
2136       var = nir_variable_clone(var, shader);
2137       if (ssbo)
2138          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2139       else
2140          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2141       *ptr = var;
2142       nir_shader_add_variable(shader, var);
2143 
2144       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2145       fields[0].name = ralloc_strdup(shader, "base");
2146       fields[1].name = ralloc_strdup(shader, "unsized");
2147       unsigned array_size = glsl_get_length(var->type);
2148       const struct glsl_type *bare_type = glsl_without_array(var->type);
2149       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2150       unsigned length = glsl_get_length(array_type);
2151       const struct glsl_type *type;
2152       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2153       if (bit_size > 32) {
2154          assert(bit_size == 64);
2155          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2156       } else {
2157          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2158       }
2159       fields[0].type = type;
2160       fields[1].type = unsized;
2161       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2162       var->data.driver_location = idx;
2163    }
2164    return var;
2165 }
2166 
2167 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2168 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2169 {
2170    nir_intrinsic_op op;
2171    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2172    if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2173       op = nir_intrinsic_deref_atomic;
2174    else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2175       op = nir_intrinsic_deref_atomic_swap;
2176    else
2177       unreachable("unknown intrinsic");
2178    nir_def *offset = intr->src[1].ssa;
2179    nir_src *src = &intr->src[0];
2180    nir_variable *var = get_bo_var(b->shader, bo, true, src,
2181                                   intr->def.bit_size);
2182    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2183    nir_def *idx = src->ssa;
2184    if (bo->first_ssbo)
2185       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2186    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2187    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2188 
2189    /* generate new atomic deref ops for every component */
2190    nir_def *result[4];
2191    unsigned num_components = intr->def.num_components;
2192    for (unsigned i = 0; i < num_components; i++) {
2193       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2194       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2195       nir_def_init(&new_instr->instr, &new_instr->def, 1,
2196                    intr->def.bit_size);
2197       nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2198       new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2199       /* deref ops have no offset src, so copy the srcs after it */
2200       for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2201          new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2202       nir_builder_instr_insert(b, &new_instr->instr);
2203 
2204       result[i] = &new_instr->def;
2205       offset = nir_iadd_imm(b, offset, 1);
2206    }
2207 
2208    nir_def *load = nir_vec(b, result, num_components);
2209    nir_def_rewrite_uses(&intr->def, load);
2210    nir_instr_remove(instr);
2211 }
2212 
2213 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2214 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2215 {
2216    struct bo_vars *bo = data;
2217    if (instr->type != nir_instr_type_intrinsic)
2218       return false;
2219    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2220    nir_variable *var = NULL;
2221    nir_def *offset = NULL;
2222    bool is_load = true;
2223    b->cursor = nir_before_instr(instr);
2224    nir_src *src;
2225    bool ssbo = true;
2226    switch (intr->intrinsic) {
2227    case nir_intrinsic_ssbo_atomic:
2228    case nir_intrinsic_ssbo_atomic_swap:
2229       rewrite_atomic_ssbo_instr(b, instr, bo);
2230       return true;
2231    case nir_intrinsic_store_ssbo:
2232       src = &intr->src[1];
2233       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2234       offset = intr->src[2].ssa;
2235       is_load = false;
2236       break;
2237    case nir_intrinsic_load_ssbo:
2238       src = &intr->src[0];
2239       var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2240       offset = intr->src[1].ssa;
2241       break;
2242    case nir_intrinsic_load_ubo:
2243       src = &intr->src[0];
2244       var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2245       offset = intr->src[1].ssa;
2246       ssbo = false;
2247       break;
2248    default:
2249       return false;
2250    }
2251    assert(var);
2252    assert(offset);
2253    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2254    nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2255    if (!ssbo && bo->first_ubo && var->data.driver_location)
2256       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2257    else if (ssbo && bo->first_ssbo)
2258       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2259    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2260                                                         nir_i2iN(b, idx, deref_var->def.bit_size));
2261    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2262    assert(intr->num_components <= 2);
2263    if (is_load) {
2264       nir_def *result[2];
2265       for (unsigned i = 0; i < intr->num_components; i++) {
2266          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2267                                                             nir_i2iN(b, offset, deref_struct->def.bit_size));
2268          result[i] = nir_load_deref(b, deref_arr);
2269          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2270             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2271          offset = nir_iadd_imm(b, offset, 1);
2272       }
2273       nir_def *load = nir_vec(b, result, intr->num_components);
2274       nir_def_rewrite_uses(&intr->def, load);
2275    } else {
2276       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2277                                                          nir_i2iN(b, offset, deref_struct->def.bit_size));
2278       nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2279    }
2280    nir_instr_remove(instr);
2281    return true;
2282 }
2283 
2284 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2285 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2286 {
2287    struct bo_vars bo = get_bo_vars(zs, shader);
2288    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2289 }
2290 
2291 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2292 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2293 {
2294    switch (intr->intrinsic) {
2295    case nir_intrinsic_load_interpolated_input:
2296       *is_interp = true;
2297       FALLTHROUGH;
2298    case nir_intrinsic_load_input:
2299    case nir_intrinsic_load_per_vertex_input:
2300       *is_input = true;
2301       FALLTHROUGH;
2302    case nir_intrinsic_load_output:
2303    case nir_intrinsic_load_per_vertex_output:
2304    case nir_intrinsic_load_per_primitive_output:
2305       *is_load = true;
2306       FALLTHROUGH;
2307    case nir_intrinsic_store_output:
2308    case nir_intrinsic_store_per_primitive_output:
2309    case nir_intrinsic_store_per_vertex_output:
2310       break;
2311    default:
2312       return false;
2313    }
2314    return true;
2315 }
2316 
2317 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2318 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2319 {
2320    switch (intr->intrinsic) {
2321    case nir_intrinsic_load_per_vertex_input:
2322    case nir_intrinsic_load_per_vertex_output:
2323    case nir_intrinsic_load_per_primitive_output:
2324    case nir_intrinsic_store_per_primitive_output:
2325    case nir_intrinsic_store_per_vertex_output:
2326       return true;
2327    default:
2328       break;
2329    }
2330    return false;
2331 }
2332 
2333 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2334 find_var_deref(nir_shader *nir, nir_variable *var)
2335 {
2336    nir_foreach_function_impl(impl, nir) {
2337       nir_foreach_block(block, impl) {
2338          nir_foreach_instr(instr, block) {
2339             if (instr->type != nir_instr_type_deref)
2340                continue;
2341             nir_deref_instr *deref = nir_instr_as_deref(instr);
2342             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2343                return true;
2344          }
2345       }
2346    }
2347    return false;
2348 }
2349 
2350 static bool
find_var_io(nir_shader * nir,nir_variable * var)2351 find_var_io(nir_shader *nir, nir_variable *var)
2352 {
2353    nir_foreach_function(function, nir) {
2354       if (!function->impl)
2355          continue;
2356 
2357       nir_foreach_block(block, function->impl) {
2358          nir_foreach_instr(instr, block) {
2359             if (instr->type != nir_instr_type_intrinsic)
2360                continue;
2361             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2362             bool is_load = false;
2363             bool is_input = false;
2364             bool is_interp = false;
2365             if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2366                continue;
2367             if (var->data.mode == nir_var_shader_in && !is_input)
2368                continue;
2369             if (var->data.mode == nir_var_shader_out && is_input)
2370                continue;
2371             unsigned slot_offset = 0;
2372             if (var->data.fb_fetch_output && !is_load)
2373                continue;
2374             if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2375                continue;
2376             nir_src *src_offset = nir_get_io_offset_src(intr);
2377             if (src_offset && nir_src_is_const(*src_offset))
2378                slot_offset = nir_src_as_uint(*src_offset);
2379             unsigned slot_count = get_var_slot_count(nir, var);
2380             if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2381                 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2382                 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2383                 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2384                return true;
2385          }
2386       }
2387    }
2388    return false;
2389 }
2390 
2391 struct clamp_layer_output_state {
2392    nir_variable *original;
2393    nir_variable *clamped;
2394 };
2395 
2396 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2397 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2398 {
2399    nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2400                                                          nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2401    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2402    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2403    nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2404                                   nir_load_deref(b, original_deref),
2405                                   nir_imm_int(b, 0));
2406    nir_store_deref(b, clamped_deref, layer, 0);
2407 }
2408 
2409 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2410 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2411 {
2412    struct clamp_layer_output_state *state = data;
2413    switch (instr->type) {
2414    case nir_instr_type_intrinsic: {
2415       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2416       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2417           intr->intrinsic != nir_intrinsic_emit_vertex)
2418          return false;
2419       b->cursor = nir_before_instr(instr);
2420       clamp_layer_output_emit(b, state);
2421       return true;
2422    }
2423    default: return false;
2424    }
2425 }
2426 
2427 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2428 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2429 {
2430    switch (vs->info.stage) {
2431    case MESA_SHADER_VERTEX:
2432    case MESA_SHADER_GEOMETRY:
2433    case MESA_SHADER_TESS_EVAL:
2434       break;
2435    default:
2436       unreachable("invalid last vertex stage!");
2437    }
2438    struct clamp_layer_output_state state = {0};
2439    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2440    if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2441       return false;
2442    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2443    state.clamped->data.location = VARYING_SLOT_LAYER;
2444    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2445    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2446       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2447       state.original->data.driver_location = (*next_location)++;
2448       if (fs_var) {
2449          fs_var->data.location = state.original->data.location;
2450          fs_var->data.driver_location = state.original->data.driver_location;
2451       }
2452    } else {
2453       if (state.original->data.explicit_xfb_buffer) {
2454          /* Will xfb the clamped output but still better than nothing */
2455          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2456          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2457          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2458          state.clamped->data.offset = state.original->data.offset;
2459          state.clamped->data.stream = state.original->data.stream;
2460       }
2461       state.original->data.mode = nir_var_shader_temp;
2462       nir_fixup_deref_modes(vs);
2463    }
2464    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2465       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2466    } else {
2467       nir_builder b;
2468       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2469       b = nir_builder_at(nir_after_impl(impl));
2470       assert(impl->end_block->predecessors->entries == 1);
2471       clamp_layer_output_emit(&b, &state);
2472       nir_metadata_preserve(impl, nir_metadata_dominance);
2473    }
2474    optimize_nir(vs, NULL, true);
2475    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2476    return true;
2477 }
2478 
2479 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)2480 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2481 {
2482    unsigned slot = var->data.location;
2483    switch (slot) {
2484    case -1:
2485    case VARYING_SLOT_POS:
2486    case VARYING_SLOT_PSIZ:
2487    case VARYING_SLOT_LAYER:
2488    case VARYING_SLOT_PRIMITIVE_ID:
2489    case VARYING_SLOT_CLIP_DIST0:
2490    case VARYING_SLOT_CULL_DIST0:
2491    case VARYING_SLOT_VIEWPORT:
2492    case VARYING_SLOT_FACE:
2493    case VARYING_SLOT_TESS_LEVEL_OUTER:
2494    case VARYING_SLOT_TESS_LEVEL_INNER:
2495       /* use a sentinel value to avoid counting later */
2496       var->data.driver_location = UINT_MAX;
2497       break;
2498 
2499    default:
2500       if (var->data.patch) {
2501          assert(slot >= VARYING_SLOT_PATCH0);
2502          slot -= VARYING_SLOT_PATCH0;
2503       }
2504       if (slot_map[slot] == 0xff) {
2505          assert(*reserved < MAX_VARYING);
2506          unsigned num_slots;
2507          if (nir_is_arrayed_io(var, stage))
2508             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2509          else
2510             num_slots = glsl_count_vec4_slots(var->type, false, false);
2511          assert(*reserved + num_slots <= MAX_VARYING);
2512          for (unsigned i = 0; i < num_slots; i++)
2513             slot_map[slot + i] = (*reserved)++;
2514       }
2515       slot = slot_map[slot];
2516       assert(slot < MAX_VARYING);
2517       var->data.driver_location = slot;
2518    }
2519 }
2520 
2521 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2522 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2523 {
2524    if (stage != MESA_SHADER_FRAGMENT)
2525       return false;
2526    return var->data.location >= VARYING_SLOT_TEX0 &&
2527           var->data.location <= VARYING_SLOT_TEX7;
2528 }
2529 
2530 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)2531 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2532 {
2533    unsigned slot = var->data.location;
2534    switch (slot) {
2535    case VARYING_SLOT_POS:
2536    case VARYING_SLOT_PSIZ:
2537    case VARYING_SLOT_LAYER:
2538    case VARYING_SLOT_PRIMITIVE_ID:
2539    case VARYING_SLOT_CLIP_DIST0:
2540    case VARYING_SLOT_CULL_DIST0:
2541    case VARYING_SLOT_VIEWPORT:
2542    case VARYING_SLOT_FACE:
2543    case VARYING_SLOT_TESS_LEVEL_OUTER:
2544    case VARYING_SLOT_TESS_LEVEL_INNER:
2545       /* use a sentinel value to avoid counting later */
2546       var->data.driver_location = UINT_MAX;
2547       break;
2548    default:
2549       if (var->data.patch) {
2550          assert(slot >= VARYING_SLOT_PATCH0);
2551          slot -= VARYING_SLOT_PATCH0;
2552       }
2553       if (slot_map[slot] == (unsigned char)-1) {
2554          /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2555           * so keep for now and eliminate later
2556           */
2557          if (is_texcoord(stage, var)) {
2558             var->data.driver_location = -1;
2559             return true;
2560          }
2561          /* patch variables may be read in the workgroup */
2562          if (stage != MESA_SHADER_TESS_CTRL)
2563             /* dead io */
2564             return false;
2565          unsigned num_slots;
2566          if (nir_is_arrayed_io(var, stage))
2567             num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2568          else
2569             num_slots = glsl_count_vec4_slots(var->type, false, false);
2570          assert(*reserved + num_slots <= MAX_VARYING);
2571          for (unsigned i = 0; i < num_slots; i++)
2572             slot_map[slot + i] = (*reserved)++;
2573       }
2574       var->data.driver_location = slot_map[slot];
2575    }
2576    return true;
2577 }
2578 
2579 
2580 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2581 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2582 {
2583    nir_variable *var = data;
2584    if (instr->type != nir_instr_type_intrinsic)
2585       return false;
2586 
2587    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2588    bool is_load = false;
2589    bool is_input = false;
2590    bool is_interp = false;
2591    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2592       return false;
2593    if (!is_load)
2594       return false;
2595    unsigned location = nir_intrinsic_io_semantics(intr).location;
2596    if (location != var->data.location)
2597       return false;
2598    b->cursor = nir_before_instr(instr);
2599    nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2600                                 intr->def.bit_size);
2601    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2602       switch (location) {
2603       case VARYING_SLOT_COL0:
2604       case VARYING_SLOT_COL1:
2605       case VARYING_SLOT_BFC0:
2606       case VARYING_SLOT_BFC1:
2607          /* default color is 0,0,0,1 */
2608          if (intr->def.num_components == 4)
2609             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2610          break;
2611       default:
2612          break;
2613       }
2614    }
2615    nir_def_rewrite_uses(&intr->def, zero);
2616    nir_instr_remove(instr);
2617    return true;
2618 }
2619 
2620 
2621 
2622 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2623 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2624 {
2625    switch (intr->intrinsic) {
2626    case nir_intrinsic_store_output:
2627    case nir_intrinsic_store_per_primitive_output:
2628    case nir_intrinsic_store_per_vertex_output:
2629       break;
2630    default:
2631       return false;
2632    }
2633    if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2634       return false;
2635    if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2636       nir_instr_remove(&intr->instr);
2637       return true;
2638    }
2639    return false;
2640 }
2641 
2642 static bool
delete_psiz_store(nir_shader * nir,bool one)2643 delete_psiz_store(nir_shader *nir, bool one)
2644 {
2645    bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2646                                               nir_metadata_dominance, one ? nir : NULL);
2647    if (progress)
2648       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2649    return progress;
2650 }
2651 
2652 struct write_components {
2653    unsigned slot;
2654    uint32_t component_mask;
2655 };
2656 
2657 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2658 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2659 {
2660    struct write_components *wc = data;
2661    bool is_load = false;
2662    bool is_input = false;
2663    bool is_interp = false;
2664    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2665       return false;
2666    if (!is_input)
2667       return false;
2668    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2669    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2670       return false;
2671    unsigned num_components = intr->num_components;
2672    unsigned c = nir_intrinsic_component(intr);
2673    if (intr->def.bit_size == 64)
2674       num_components *= 2;
2675    nir_src *src_offset = nir_get_io_offset_src(intr);
2676    if (nir_src_is_const(*src_offset)) {
2677       unsigned slot_offset = nir_src_as_uint(*src_offset);
2678       if (s.location + slot_offset != wc->slot)
2679          return false;
2680    } else if (s.location > wc->slot || s.location + s.num_slots <= wc->slot) {
2681       return false;
2682    }
2683    uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2684    if (intr->def.bit_size == 64)
2685       readmask |= readmask << (intr->num_components + c);
2686    /* handle dvec3/dvec4 */
2687    if (num_components + c > 4)
2688       readmask >>= 4;
2689    if ((wc->component_mask & readmask) == readmask)
2690       return false;
2691    uint32_t rewrite_mask = readmask & ~wc->component_mask;
2692    if (!rewrite_mask)
2693       return false;
2694    b->cursor = nir_after_instr(&intr->instr);
2695    nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2696    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2697       switch (wc->slot) {
2698       case VARYING_SLOT_COL0:
2699       case VARYING_SLOT_COL1:
2700       case VARYING_SLOT_BFC0:
2701       case VARYING_SLOT_BFC1:
2702          /* default color is 0,0,0,1 */
2703          if (intr->def.num_components == 4)
2704             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2705          break;
2706       default:
2707          break;
2708       }
2709    }
2710    rewrite_mask >>= c;
2711    nir_def *dest = &intr->def;
2712    u_foreach_bit(component, rewrite_mask)
2713       dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2714    nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2715    return true;
2716 }
2717 
2718 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2719 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2720 {
2721    struct write_components *wc = data;
2722    bool is_load = false;
2723    bool is_input = false;
2724    bool is_interp = false;
2725    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2726       return false;
2727    if (is_input || is_load)
2728       return false;
2729    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2730    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2731       return false;
2732    unsigned location = s.location;
2733    unsigned c = nir_intrinsic_component(intr);
2734    uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2735    if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2736       unsigned num_components = intr->num_components * 2;
2737       nir_src *src_offset = nir_get_io_offset_src(intr);
2738       if (nir_src_is_const(*src_offset)) {
2739          if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2740             return false;
2741       }
2742       wrmask |= wrmask << intr->num_components;
2743       /* handle dvec3/dvec4 */
2744       if (num_components + c > 4)
2745          wrmask >>= 4;
2746    }
2747    wc->component_mask |= wrmask;
2748    return false;
2749 }
2750 
2751 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2752 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2753 {
2754    unsigned reserved = 0;
2755    unsigned char slot_map[VARYING_SLOT_MAX];
2756    memset(slot_map, -1, sizeof(slot_map));
2757    bool do_fixup = false;
2758    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2759    nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2760    if (var) {
2761       bool can_remove = false;
2762       if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2763          /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2764          if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2765             can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2766          else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2767             can_remove = !var->data.explicit_location;
2768       }
2769       /* remove injected pointsize from all but the last vertex stage */
2770       if (can_remove) {
2771          var->data.mode = nir_var_shader_temp;
2772          nir_fixup_deref_modes(producer);
2773          delete_psiz_store(producer, false);
2774          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2775          optimize_nir(producer, NULL, true);
2776       }
2777    }
2778    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2779       producer->info.has_transform_feedback_varyings = false;
2780       nir_foreach_shader_out_variable(var_out, producer)
2781          var_out->data.explicit_xfb_buffer = false;
2782    }
2783    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2784       /* never assign from tcs -> tes, always invert */
2785       nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2786          assign_producer_var_io(consumer->info.stage, var_in, &reserved, slot_map);
2787       nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2788          if (!assign_consumer_var_io(producer->info.stage, var_out, &reserved, slot_map))
2789             /* this is an output, nothing more needs to be done for it to be dropped */
2790             do_fixup = true;
2791       }
2792    } else {
2793       nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2794          assign_producer_var_io(producer->info.stage, var_out, &reserved, slot_map);
2795       nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2796          if (!assign_consumer_var_io(consumer->info.stage, var_in, &reserved, slot_map)) {
2797             do_fixup = true;
2798             /* input needs to be rewritten */
2799             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
2800          }
2801       }
2802       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2803          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2804    }
2805    nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
2806    if (producer->info.io_lowered && consumer->info.io_lowered) {
2807       u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
2808          struct write_components wc = {slot, 0};
2809          nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
2810          assert(wc.component_mask);
2811          if (wc.component_mask != BITFIELD_MASK(4))
2812             do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
2813       }
2814    }
2815    if (!do_fixup)
2816       return;
2817    nir_fixup_deref_modes(nir);
2818    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2819    optimize_nir(nir, NULL, true);
2820 }
2821 
2822 /* all types that hit this function contain something that is 64bit */
2823 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)2824 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2825 {
2826    if (glsl_type_is_array(type)) {
2827       const struct glsl_type *child = glsl_get_array_element(type);
2828       unsigned elements = glsl_array_size(type);
2829       unsigned stride = glsl_get_explicit_stride(type);
2830       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2831    }
2832    /* rewrite structs recursively */
2833    if (glsl_type_is_struct_or_ifc(type)) {
2834       unsigned nmembers = glsl_get_length(type);
2835       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2836       unsigned xfb_offset = 0;
2837       for (unsigned i = 0; i < nmembers; i++) {
2838          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2839          fields[i] = *f;
2840          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2841          if (i < nmembers - 1 && xfb_offset % 8 &&
2842              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2843               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2844             var->data.is_xfb = true;
2845          }
2846          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2847       }
2848       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2849    }
2850    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2851       return type;
2852    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2853       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2854    enum glsl_base_type base_type;
2855    switch (glsl_get_base_type(type)) {
2856    case GLSL_TYPE_UINT64:
2857       base_type = GLSL_TYPE_UINT;
2858       break;
2859    case GLSL_TYPE_INT64:
2860       base_type = GLSL_TYPE_INT;
2861       break;
2862    case GLSL_TYPE_DOUBLE:
2863       base_type = GLSL_TYPE_FLOAT;
2864       break;
2865    default:
2866       unreachable("unknown 64-bit vertex attribute format!");
2867    }
2868    if (glsl_type_is_scalar(type))
2869       return glsl_vector_type(base_type, 2);
2870    unsigned num_components;
2871    if (glsl_type_is_matrix(type)) {
2872       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2873       unsigned vec_components = glsl_get_vector_elements(type);
2874       if (vec_components == 3)
2875          vec_components = 4;
2876       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2877    } else {
2878       num_components = glsl_get_vector_elements(type) * 2;
2879       if (num_components <= 4)
2880          return glsl_vector_type(base_type, num_components);
2881    }
2882    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2883    struct glsl_struct_field fields[8] = {0};
2884    unsigned remaining = num_components;
2885    unsigned nfields = 0;
2886    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2887       assert(i < ARRAY_SIZE(fields));
2888       fields[i].name = "";
2889       fields[i].offset = i * 16;
2890       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2891    }
2892    char buf[64];
2893    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2894    return glsl_struct_type(fields, nfields, buf, true);
2895 }
2896 
2897 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)2898 deref_is_matrix(nir_deref_instr *deref)
2899 {
2900    if (glsl_type_is_matrix(deref->type))
2901       return deref->type;
2902    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2903    if (parent)
2904       return deref_is_matrix(parent);
2905    return NULL;
2906 }
2907 
2908 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)2909 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
2910                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
2911 {
2912    bool func_progress = false;
2913    nir_builder b = nir_builder_create(impl);
2914    nir_foreach_block(block, impl) {
2915       nir_foreach_instr_safe(instr, block) {
2916          switch (instr->type) {
2917          case nir_instr_type_deref: {
2918             nir_deref_instr *deref = nir_instr_as_deref(instr);
2919             if (!(deref->modes & var->data.mode))
2920                continue;
2921             if (nir_deref_instr_get_variable(deref) != var)
2922                continue;
2923 
2924             /* matrix types are special: store the original deref type for later use */
2925             const struct glsl_type *matrix = deref_is_matrix(deref);
2926             nir_deref_instr *parent = nir_deref_instr_parent(deref);
2927             if (!matrix) {
2928                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2929                hash_table_foreach(derefs, he) {
2930                   /* propagate parent matrix type to row deref */
2931                   if (he->key == parent)
2932                      matrix = he->data;
2933                }
2934             }
2935             if (matrix)
2936                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2937             if (deref->deref_type == nir_deref_type_var)
2938                deref->type = var->type;
2939             else
2940                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2941          }
2942          break;
2943          case nir_instr_type_intrinsic: {
2944             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2945             if (intr->intrinsic != nir_intrinsic_store_deref &&
2946                   intr->intrinsic != nir_intrinsic_load_deref)
2947                break;
2948             if (nir_intrinsic_get_var(intr, 0) != var)
2949                break;
2950             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2951                   (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
2952                break;
2953             b.cursor = nir_before_instr(instr);
2954             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2955             unsigned num_components = intr->num_components * 2;
2956             nir_def *comp[NIR_MAX_VEC_COMPONENTS];
2957             /* this is the stored matrix type from the deref */
2958             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2959             const struct glsl_type *matrix = he ? he->data : NULL;
2960             if (doubles_only && !matrix)
2961                break;
2962             func_progress = true;
2963             if (intr->intrinsic == nir_intrinsic_store_deref) {
2964                /* first, unpack the src data to 32bit vec2 components */
2965                for (unsigned i = 0; i < intr->num_components; i++) {
2966                   nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2967                   comp[i * 2] = nir_channel(&b, ssa, 0);
2968                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2969                }
2970                unsigned wrmask = nir_intrinsic_write_mask(intr);
2971                unsigned mask = 0;
2972                /* expand writemask for doubled components */
2973                for (unsigned i = 0; i < intr->num_components; i++) {
2974                   if (wrmask & BITFIELD_BIT(i))
2975                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2976                }
2977                if (matrix) {
2978                   /* matrix types always come from array (row) derefs */
2979                   assert(deref->deref_type == nir_deref_type_array);
2980                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2981                   /* let optimization clean up consts later */
2982                   nir_def *index = deref->arr.index.ssa;
2983                   /* this might be an indirect array index:
2984                      * - iterate over matrix columns
2985                      * - add if blocks for each column
2986                      * - perform the store in the block
2987                      */
2988                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2989                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2990                      unsigned vec_components = glsl_get_vector_elements(matrix);
2991                      /* always clamp dvec3 to 4 components */
2992                      if (vec_components == 3)
2993                         vec_components = 4;
2994                      unsigned start_component = idx * vec_components * 2;
2995                      /* struct member */
2996                      unsigned member = start_component / 4;
2997                      /* number of components remaining */
2998                      unsigned remaining = num_components;
2999                      for (unsigned i = 0; i < num_components; member++) {
3000                         if (!(mask & BITFIELD_BIT(i)))
3001                            continue;
3002                         assert(member < glsl_get_length(var_deref->type));
3003                         /* deref the rewritten struct to the appropriate vec4/vec2 */
3004                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3005                         unsigned incr = MIN2(remaining, 4);
3006                         /* assemble the write component vec */
3007                         nir_def *val = nir_vec(&b, &comp[i], incr);
3008                         /* use the number of components being written as the writemask */
3009                         if (glsl_get_vector_elements(strct->type) > val->num_components)
3010                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3011                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3012                         remaining -= incr;
3013                         i += incr;
3014                      }
3015                      nir_pop_if(&b, NULL);
3016                   }
3017                   _mesa_set_add(deletes, &deref->instr);
3018                } else if (num_components <= 4) {
3019                   /* simple store case: just write out the components */
3020                   nir_def *dest = nir_vec(&b, comp, num_components);
3021                   nir_store_deref(&b, deref, dest, mask);
3022                } else {
3023                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3024                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3025                      if (!(mask & BITFIELD_MASK(4)))
3026                         continue;
3027                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3028                      nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3029                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
3030                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3031                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3032                      mask >>= 4;
3033                   }
3034                }
3035             } else {
3036                nir_def *dest = NULL;
3037                if (matrix) {
3038                   /* matrix types always come from array (row) derefs */
3039                   assert(deref->deref_type == nir_deref_type_array);
3040                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3041                   /* let optimization clean up consts later */
3042                   nir_def *index = deref->arr.index.ssa;
3043                   /* this might be an indirect array index:
3044                      * - iterate over matrix columns
3045                      * - add if blocks for each column
3046                      * - phi the loads using the array index
3047                      */
3048                   unsigned cols = glsl_get_matrix_columns(matrix);
3049                   nir_def *dests[4];
3050                   for (unsigned idx = 0; idx < cols; idx++) {
3051                      /* don't add an if for the final row: this will be handled in the else */
3052                      if (idx < cols - 1)
3053                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3054                      unsigned vec_components = glsl_get_vector_elements(matrix);
3055                      /* always clamp dvec3 to 4 components */
3056                      if (vec_components == 3)
3057                         vec_components = 4;
3058                      unsigned start_component = idx * vec_components * 2;
3059                      /* struct member */
3060                      unsigned member = start_component / 4;
3061                      /* number of components remaining */
3062                      unsigned remaining = num_components;
3063                      /* component index */
3064                      unsigned comp_idx = 0;
3065                      for (unsigned i = 0; i < num_components; member++) {
3066                         assert(member < glsl_get_length(var_deref->type));
3067                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3068                         nir_def *load = nir_load_deref(&b, strct);
3069                         unsigned incr = MIN2(remaining, 4);
3070                         /* repack the loads to 64bit */
3071                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3072                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3073                         remaining -= incr;
3074                         i += incr;
3075                      }
3076                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3077                      if (idx < cols - 1)
3078                         nir_push_else(&b, NULL);
3079                   }
3080                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3081                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
3082                      nir_pop_if(&b, NULL);
3083                      dest = nir_if_phi(&b, dests[idx - 1], dest);
3084                   }
3085                   _mesa_set_add(deletes, &deref->instr);
3086                } else if (num_components <= 4) {
3087                   /* simple load case */
3088                   nir_def *load = nir_load_deref(&b, deref);
3089                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3090                   for (unsigned i = 0; i < intr->num_components; i++) {
3091                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3092                   }
3093                   dest = nir_vec(&b, comp, intr->num_components);
3094                } else {
3095                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
3096                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3097                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3098                      nir_def *load = nir_load_deref(&b, strct);
3099                      comp[i * 2] = nir_pack_64_2x32(&b,
3100                                                     nir_trim_vector(&b, load, 2));
3101                      if (num_components > 2)
3102                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3103                   }
3104                   dest = nir_vec(&b, comp, intr->num_components);
3105                }
3106                nir_def_rewrite_uses_after(&intr->def, dest, instr);
3107             }
3108             _mesa_set_add(deletes, instr);
3109             break;
3110          }
3111          break;
3112          default: break;
3113          }
3114       }
3115    }
3116    if (func_progress)
3117       nir_metadata_preserve(impl, nir_metadata_none);
3118    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3119    set_foreach_remove(deletes, he)
3120       nir_instr_remove((void*)he->key);
3121    return func_progress;
3122 }
3123 
3124 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3125 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3126                       struct set *deletes, bool doubles_only)
3127 {
3128    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3129       return false;
3130    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3131    /* once type is rewritten, rewrite all loads and stores */
3132    nir_foreach_function_impl(impl, shader)
3133       lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3134    return true;
3135 }
3136 
3137 /* rewrite all input/output variables using 32bit types and load/stores */
3138 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3139 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3140 {
3141    bool progress = false;
3142    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3143    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3144    nir_foreach_function_impl(impl, shader) {
3145       nir_foreach_function_temp_variable(var, impl) {
3146          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3147             continue;
3148          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3149          progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3150       }
3151    }
3152    ralloc_free(deletes);
3153    ralloc_free(derefs);
3154    if (progress) {
3155       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3156       nir_lower_phis_to_scalar(shader, false);
3157       optimize_nir(shader, NULL, true);
3158    }
3159    return progress;
3160 }
3161 
3162 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3163 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3164 {
3165    FILE *fp = fopen(file, "wb");
3166    if (fp) {
3167       fwrite(words, 1, size, fp);
3168       fclose(fp);
3169       fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3170    }
3171 }
3172 
3173 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3174 zink_get_next_stage(gl_shader_stage stage)
3175 {
3176    switch (stage) {
3177    case MESA_SHADER_VERTEX:
3178       return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3179              VK_SHADER_STAGE_GEOMETRY_BIT |
3180              VK_SHADER_STAGE_FRAGMENT_BIT;
3181    case MESA_SHADER_TESS_CTRL:
3182       return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3183    case MESA_SHADER_TESS_EVAL:
3184       return VK_SHADER_STAGE_GEOMETRY_BIT |
3185              VK_SHADER_STAGE_FRAGMENT_BIT;
3186    case MESA_SHADER_GEOMETRY:
3187       return VK_SHADER_STAGE_FRAGMENT_BIT;
3188    case MESA_SHADER_FRAGMENT:
3189    case MESA_SHADER_COMPUTE:
3190    case MESA_SHADER_KERNEL:
3191       return 0;
3192    default:
3193       unreachable("invalid shader stage");
3194    }
3195 }
3196 
3197 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3198 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3199 {
3200    VkShaderModuleCreateInfo smci = {0};
3201    VkShaderCreateInfoEXT sci = {0};
3202 
3203    if (!spirv)
3204       spirv = zs->spirv;
3205 
3206    if (zink_debug & ZINK_DEBUG_SPIRV) {
3207       char buf[256];
3208       static int i;
3209       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3210       zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3211    }
3212 
3213    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3214    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3215    sci.nextStage = zink_get_next_stage(zs->info.stage);
3216    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3217    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3218    sci.pCode = spirv->words;
3219    sci.pName = "main";
3220    VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3221    if (pg) {
3222       sci.setLayoutCount = pg->num_dsl;
3223       sci.pSetLayouts = pg->dsl;
3224    } else {
3225       sci.setLayoutCount = zs->info.stage + 1;
3226       dsl[zs->info.stage] = zs->precompile.dsl;;
3227       sci.pSetLayouts = dsl;
3228    }
3229    VkPushConstantRange pcr;
3230    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3231    pcr.offset = 0;
3232    pcr.size = sizeof(struct zink_gfx_push_constant);
3233    sci.pushConstantRangeCount = 1;
3234    sci.pPushConstantRanges = &pcr;
3235 
3236    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3237    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3238    smci.pCode = spirv->words;
3239 
3240 #ifndef NDEBUG
3241    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3242       static const struct spirv_to_nir_options spirv_options = {
3243          .environment = NIR_SPIRV_VULKAN,
3244          .caps = {
3245             .float64 = true,
3246             .int16 = true,
3247             .int64 = true,
3248             .tessellation = true,
3249             .float_controls = true,
3250             .image_ms_array = true,
3251             .image_read_without_format = true,
3252             .image_write_without_format = true,
3253             .storage_image_ms = true,
3254             .geometry_streams = true,
3255             .storage_8bit = true,
3256             .storage_16bit = true,
3257             .variable_pointers = true,
3258             .stencil_export = true,
3259             .post_depth_coverage = true,
3260             .transform_feedback = true,
3261             .device_group = true,
3262             .draw_parameters = true,
3263             .shader_viewport_index_layer = true,
3264             .multiview = true,
3265             .physical_storage_buffer_address = true,
3266             .int64_atomics = true,
3267             .subgroup_arithmetic = true,
3268             .subgroup_basic = true,
3269             .subgroup_ballot = true,
3270             .subgroup_quad = true,
3271             .subgroup_shuffle = true,
3272             .subgroup_vote = true,
3273             .vk_memory_model = true,
3274             .vk_memory_model_device_scope = true,
3275             .int8 = true,
3276             .float16 = true,
3277             .demote_to_helper_invocation = true,
3278             .sparse_residency = true,
3279             .min_lod = true,
3280             .workgroup_memory_explicit_layout = true,
3281          },
3282          .ubo_addr_format = nir_address_format_32bit_index_offset,
3283          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3284          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3285          .push_const_addr_format = nir_address_format_logical,
3286          .shared_addr_format = nir_address_format_32bit_offset,
3287       };
3288       uint32_t num_spec_entries = 0;
3289       struct nir_spirv_specialization *spec_entries = NULL;
3290       VkSpecializationInfo sinfo = {0};
3291       VkSpecializationMapEntry me[3];
3292       uint32_t size[3] = {1,1,1};
3293       if (!zs->info.workgroup_size[0]) {
3294          sinfo.mapEntryCount = 3;
3295          sinfo.pMapEntries = &me[0];
3296          sinfo.dataSize = sizeof(uint32_t) * 3;
3297          sinfo.pData = size;
3298          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3299          for (int i = 0; i < 3; i++) {
3300             me[i].size = sizeof(uint32_t);
3301             me[i].constantID = ids[i];
3302             me[i].offset = i * sizeof(uint32_t);
3303          }
3304          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3305       }
3306       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3307                          spec_entries, num_spec_entries,
3308                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3309       assert(nir);
3310       ralloc_free(nir);
3311       free(spec_entries);
3312    }
3313 #endif
3314 
3315    VkResult ret;
3316    struct zink_shader_object obj = {0};
3317    if (!can_shobj || !screen->info.have_EXT_shader_object)
3318       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3319    else
3320       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3321    ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3322    assert(success);
3323    return obj;
3324 }
3325 
3326 static void
prune_io(nir_shader * nir)3327 prune_io(nir_shader *nir)
3328 {
3329    nir_foreach_shader_in_variable_safe(var, nir) {
3330       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3331          var->data.mode = nir_var_shader_temp;
3332    }
3333    nir_foreach_shader_out_variable_safe(var, nir) {
3334       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3335          var->data.mode = nir_var_shader_temp;
3336    }
3337    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3338 }
3339 
3340 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3341 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3342 {
3343    /* unconvert from zink_binding() */
3344    uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3345    assert(sampler_id < 32); //bitfield size for tracking
3346    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3347 }
3348 
3349 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3350 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3351 {
3352    assert(var);
3353    const struct glsl_type *type = glsl_without_array(var->type);
3354    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3355    bool is_int = glsl_base_type_is_integer(ret_type);
3356    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3357    unsigned dest_size = tex->def.bit_size;
3358    b->cursor = nir_after_instr(&tex->instr);
3359    unsigned num_components = tex->def.num_components;
3360    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3361    if (bit_size == dest_size && !rewrite_depth)
3362       return NULL;
3363    nir_def *dest = &tex->def;
3364    if (rewrite_depth && zs) {
3365       if (nir_def_components_read(dest) & ~1) {
3366          /* this needs recompiles */
3367          if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3368             flag_shadow_tex(var, zs);
3369          else
3370             mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3371          return NULL;
3372       }
3373       /* If only .x is used in the NIR, then it's effectively not a legacy depth
3374        * sample anyway and we don't want to ask for shader recompiles.  This is
3375        * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3376        * LUMINANCE, so apps just use the first channel.
3377        */
3378       tex->def.num_components = 1;
3379       tex->is_new_style_shadow = true;
3380    }
3381    if (bit_size != dest_size) {
3382       tex->def.bit_size = bit_size;
3383       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3384 
3385       if (is_int) {
3386          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3387             dest = nir_u2uN(b, &tex->def, dest_size);
3388          else
3389             dest = nir_i2iN(b, &tex->def, dest_size);
3390       } else {
3391          dest = nir_f2fN(b, &tex->def, dest_size);
3392       }
3393       if (rewrite_depth)
3394          return dest;
3395       nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3396    } else if (rewrite_depth) {
3397       return dest;
3398    }
3399    return dest;
3400 }
3401 
3402 struct lower_zs_swizzle_state {
3403    bool shadow_only;
3404    unsigned base_sampler_id;
3405    const struct zink_zs_swizzle_key *swizzle;
3406 };
3407 
3408 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3409 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3410 {
3411    struct lower_zs_swizzle_state *state = data;
3412    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3413    assert(state->shadow_only || swizzle_key);
3414    if (instr->type != nir_instr_type_tex)
3415       return false;
3416    nir_tex_instr *tex = nir_instr_as_tex(instr);
3417    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3418        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3419       return false;
3420    if (tex->is_shadow && tex->op == nir_texop_tg4)
3421       /* Will not even try to emulate the shadow comparison */
3422       return false;
3423    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3424    nir_variable *var = NULL;
3425    if (handle != -1)
3426       /* gtfo bindless depth texture mode */
3427       return false;
3428    nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3429       if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3430          unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3431          if (tex->texture_index >= img->data.driver_location &&
3432                tex->texture_index < img->data.driver_location + size) {
3433             var = img;
3434             break;
3435          }
3436       }
3437    }
3438    assert(var);
3439    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3440    const struct glsl_type *type = glsl_without_array(var->type);
3441    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3442    bool is_int = glsl_base_type_is_integer(ret_type);
3443    unsigned num_components = tex->def.num_components;
3444    if (tex->is_shadow)
3445       tex->is_new_style_shadow = true;
3446    nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3447    assert(dest || !state->shadow_only);
3448    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3449       return false;
3450    else if (!dest)
3451       dest = &tex->def;
3452    else
3453       tex->def.num_components = 1;
3454    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3455       /* these require manual swizzles */
3456       if (tex->op == nir_texop_tg4) {
3457          assert(!tex->is_shadow);
3458          nir_def *swizzle;
3459          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3460          case PIPE_SWIZZLE_0:
3461             swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3462             break;
3463          case PIPE_SWIZZLE_1:
3464             if (is_int)
3465                swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3466             else
3467                swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3468             break;
3469          default:
3470             if (!tex->component)
3471                return false;
3472             tex->component = 0;
3473             return true;
3474          }
3475          nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3476          return true;
3477       }
3478       nir_def *vec[4];
3479       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3480          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3481          case PIPE_SWIZZLE_0:
3482             vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3483             break;
3484          case PIPE_SWIZZLE_1:
3485             if (is_int)
3486                vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3487             else
3488                vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3489             break;
3490          default:
3491             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3492             break;
3493          }
3494       }
3495       nir_def *swizzle = nir_vec(b, vec, num_components);
3496       nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3497    } else {
3498       assert(tex->is_shadow);
3499       nir_def *vec[4] = {dest, dest, dest, dest};
3500       nir_def *splat = nir_vec(b, vec, num_components);
3501       nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3502    }
3503    return true;
3504 }
3505 
3506 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3507  *
3508  * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3509  * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3510  * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3511  * shader to expand out to vec4.  Since this depends on sampler state, it's a
3512  * draw-time shader recompile to do so.
3513  *
3514  * We may also need to apply shader swizzles for
3515  * driver_workarounds.needs_zs_shader_swizzle.
3516  */
3517 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3518 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3519 {
3520    /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3521    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3522    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3523    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3524 }
3525 
3526 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3527 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3528                          void *data)
3529 {
3530    if (intr->intrinsic != nir_intrinsic_load_point_coord)
3531       return false;
3532    b->cursor = nir_after_instr(&intr->instr);
3533    nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3534                                   nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3535    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3536    return true;
3537 }
3538 
3539 static bool
invert_point_coord(nir_shader * nir)3540 invert_point_coord(nir_shader *nir)
3541 {
3542    if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3543       return false;
3544    return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3545                                      nir_metadata_dominance, NULL);
3546 }
3547 
3548 static bool
is_residency_code(nir_def * src)3549 is_residency_code(nir_def *src)
3550 {
3551    nir_instr *parent = src->parent_instr;
3552    while (1) {
3553       if (parent->type == nir_instr_type_intrinsic) {
3554          ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
3555          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
3556          return false;
3557       }
3558       if (parent->type == nir_instr_type_tex)
3559          return true;
3560       assert(parent->type == nir_instr_type_alu);
3561       nir_alu_instr *alu = nir_instr_as_alu(parent);
3562       parent = alu->src[0].src.ssa->parent_instr;
3563    }
3564 }
3565 
3566 static bool
lower_sparse_and_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)3567 lower_sparse_and_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
3568 {
3569    if (instr->intrinsic != nir_intrinsic_sparse_residency_code_and)
3570       return false;
3571 
3572    b->cursor = nir_before_instr(&instr->instr);
3573    nir_def *src0;
3574    if (is_residency_code(instr->src[0].ssa))
3575       src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
3576    else
3577       src0 = instr->src[0].ssa;
3578    nir_def *src1;
3579    if (is_residency_code(instr->src[1].ssa))
3580       src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
3581    else
3582       src1 = instr->src[1].ssa;
3583    nir_def *def = nir_iand(b, src0, src1);
3584    nir_def_rewrite_uses_after(&instr->def, def, &instr->instr);
3585    nir_instr_remove(&instr->instr);
3586    return true;
3587 }
3588 
3589 static bool
lower_sparse_and(nir_shader * shader)3590 lower_sparse_and(nir_shader *shader)
3591 {
3592    return nir_shader_intrinsics_pass(shader, lower_sparse_and_instr,
3593                                      nir_metadata_dominance, NULL);
3594 }
3595 
3596 static bool
lower_sparse_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)3597 lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
3598 {
3599    if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
3600       return false;
3601 
3602    /* vulkan vec can only be a vec4, but this is (maybe) vec5,
3603     * so just rewrite as the first component since ntv is going to use a different
3604     * method for storing the residency value anyway
3605     */
3606    b->cursor = nir_before_instr(&instr->instr);
3607    nir_instr *parent = instr->src[0].ssa->parent_instr;
3608    if (is_residency_code(instr->src[0].ssa)) {
3609       assert(parent->type == nir_instr_type_alu);
3610       nir_alu_instr *alu = nir_instr_as_alu(parent);
3611       nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
3612       nir_instr_remove(parent);
3613    } else {
3614       nir_def *src;
3615       if (parent->type == nir_instr_type_intrinsic) {
3616          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
3617          assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
3618          src = intr->src[0].ssa;
3619       } else {
3620          assert(parent->type == nir_instr_type_alu);
3621          nir_alu_instr *alu = nir_instr_as_alu(parent);
3622          src = alu->src[0].src.ssa;
3623       }
3624       if (instr->def.bit_size != 32) {
3625          if (instr->def.bit_size == 1)
3626             src = nir_ieq_imm(b, src, 1);
3627          else
3628             src = nir_u2uN(b, src, instr->def.bit_size);
3629       }
3630       nir_def_rewrite_uses(&instr->def, src);
3631       nir_instr_remove(&instr->instr);
3632    }
3633    return true;
3634 }
3635 
3636 static bool
lower_sparse(nir_shader * shader)3637 lower_sparse(nir_shader *shader)
3638 {
3639    return nir_shader_intrinsics_pass(shader, lower_sparse_instr,
3640                                      nir_metadata_dominance, NULL);
3641 }
3642 
3643 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3644 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3645 {
3646    bool is_load = false;
3647    bool is_input = false;
3648    bool is_interp = false;
3649    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3650       return false;
3651    unsigned loc = nir_intrinsic_io_semantics(intr).location;
3652    nir_src *src_offset = nir_get_io_offset_src(intr);
3653    const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3654    unsigned location = loc + slot_offset;
3655    unsigned frac = nir_intrinsic_component(intr);
3656    unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3657    /* set c aligned/rounded down to dword */
3658    unsigned c = frac;
3659    if (frac && bit_size < 32)
3660       c = frac * bit_size / 32;
3661    /* loop over all the variables and rewrite corresponding access */
3662    nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3663       const struct glsl_type *type = var->type;
3664       if (nir_is_arrayed_io(var, b->shader->info.stage))
3665          type = glsl_get_array_element(type);
3666       unsigned slot_count = get_var_slot_count(b->shader, var);
3667       /* filter access that isn't specific to this variable */
3668       if (var->data.location > location || var->data.location + slot_count <= location)
3669          continue;
3670       if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3671          continue;
3672       if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3673          continue;
3674 
3675       unsigned size = 0;
3676       bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3677       if (is_struct)
3678          size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3679       else if ((var->data.mode == nir_var_shader_out && var->data.location < VARYING_SLOT_VAR0) ||
3680           (var->data.mode == nir_var_shader_in && var->data.location < (b->shader->info.stage == MESA_SHADER_VERTEX ? VERT_ATTRIB_GENERIC0 : VARYING_SLOT_VAR0)))
3681          size = glsl_type_is_array(type) ? glsl_get_aoa_size(type) : glsl_get_vector_elements(type);
3682       else
3683          size = glsl_get_vector_elements(glsl_without_array(type));
3684       assert(size);
3685       if (glsl_type_is_64bit(glsl_without_array(var->type)))
3686          size *= 2;
3687       if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3688          /* adjust for dvec3-type slot overflow */
3689          assert(location > var->data.location);
3690          size -= (location - var->data.location) * 4;
3691       }
3692       assert(size);
3693       if (var->data.location_frac + size <= c || var->data.location_frac > c)
3694          continue;
3695 
3696       b->cursor = nir_before_instr(&intr->instr);
3697       nir_deref_instr *deref = nir_build_deref_var(b, var);
3698       if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3699          assert(intr->intrinsic != nir_intrinsic_store_output);
3700          deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3701       }
3702       if (glsl_type_is_array(type)) {
3703          /* unroll array derefs */
3704          unsigned idx = frac - var->data.location_frac;
3705          assert(src_offset);
3706          if (var->data.location < VARYING_SLOT_VAR0) {
3707             if (src_offset) {
3708                /* clip/cull dist and tess levels use different array offset semantics */
3709                bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3710                                   var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1;
3711                bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3712                                     var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3713                bool is_builtin_array = is_clipdist || is_tess_level;
3714                /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3715                if (nir_src_is_const(*src_offset)) {
3716                   unsigned offset = slot_offset;
3717                   if (is_builtin_array)
3718                      offset *= 4;
3719                   deref = nir_build_deref_array_imm(b, deref, offset + idx);
3720                } else {
3721                   nir_def *offset = src_offset->ssa;
3722                   if (is_builtin_array)
3723                      nir_imul_imm(b, offset, 4);
3724                   deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3725                }
3726             } else {
3727                deref = nir_build_deref_array_imm(b, deref, idx);
3728             }
3729             type = glsl_get_array_element(type);
3730          } else {
3731             /* need to convert possible N*M to [N][M] */
3732             nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3733             while (glsl_type_is_array(type)) {
3734                const struct glsl_type *elem = glsl_get_array_element(type);
3735                unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3736                nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3737                if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3738                   n = nir_udiv_imm(b, n, 2);
3739                deref = nir_build_deref_array(b, deref, n);
3740                nm = nir_umod_imm(b, nm, type_size);
3741                type = glsl_get_array_element(type);
3742             }
3743          }
3744       } else if (glsl_type_is_struct(type)) {
3745          deref = nir_build_deref_struct(b, deref, slot_offset);
3746       }
3747       if (is_load) {
3748          nir_def *load;
3749          if (is_interp) {
3750             nir_def *interp = intr->src[0].ssa;
3751             nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3752             assert(interp_intr);
3753             var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3754             switch (interp_intr->intrinsic) {
3755             case nir_intrinsic_load_barycentric_centroid:
3756                load = nir_interp_deref_at_centroid(b, intr->num_components, bit_size, &deref->def);
3757                break;
3758             case nir_intrinsic_load_barycentric_sample:
3759                var->data.sample = 1;
3760                load = nir_load_deref(b, deref);
3761                break;
3762             case nir_intrinsic_load_barycentric_pixel:
3763                load = nir_load_deref(b, deref);
3764                break;
3765             case nir_intrinsic_load_barycentric_at_sample:
3766                load = nir_interp_deref_at_sample(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3767                break;
3768             case nir_intrinsic_load_barycentric_at_offset:
3769                load = nir_interp_deref_at_offset(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3770                break;
3771             default:
3772                unreachable("unhandled interp!");
3773             }
3774          } else {
3775             load = nir_load_deref(b, deref);
3776          }
3777          /* filter needed components */
3778          if (intr->num_components < load->num_components)
3779             load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3780          nir_def_rewrite_uses(&intr->def, load);
3781       } else {
3782          nir_def *store = intr->src[0].ssa;
3783          assert(!glsl_type_is_array(type));
3784          unsigned num_components = glsl_get_vector_elements(type);
3785          /* pad/filter components to match deref type */
3786          if (intr->num_components < num_components) {
3787             nir_def *zero = nir_imm_zero(b, 1, bit_size);
3788             nir_def *vec[4] = {zero, zero, zero, zero};
3789             u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3790                vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3791             store = nir_vec(b, vec, num_components);
3792          } if (store->num_components > num_components) {
3793             store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3794          }
3795          if (store->bit_size != glsl_get_bit_size(type)) {
3796             /* this should be some weird bindless io conversion */
3797             assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3798             assert(num_components != store->num_components);
3799             store = nir_unpack_64_2x32(b, store);
3800          }
3801          nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3802       }
3803       nir_instr_remove(&intr->instr);
3804       return true;
3805    }
3806    unreachable("failed to find variable for explicit io!");
3807    return true;
3808 }
3809 
3810 static bool
add_derefs(nir_shader * nir)3811 add_derefs(nir_shader *nir)
3812 {
3813    return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3814                                      nir_metadata_dominance, NULL);
3815 }
3816 
3817 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3818 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3819 {
3820    struct zink_shader_info *sinfo = &zs->sinfo;
3821    prune_io(nir);
3822 
3823    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3824 
3825    if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3826       nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3827    if (zink_debug & ZINK_DEBUG_NIR) {
3828       fprintf(stderr, "NIR shader:\n---8<---\n");
3829       nir_print_shader(nir, stderr);
3830       fprintf(stderr, "---8<---\n");
3831    }
3832 
3833    struct zink_shader_object obj = {0};
3834    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3835    if (spirv)
3836       obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3837 
3838    /* TODO: determine if there's any reason to cache spirv output? */
3839    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3840       zs->spirv = spirv;
3841    else
3842       obj.spirv = spirv;
3843    return obj;
3844 }
3845 
3846 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)3847 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3848                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3849 {
3850    bool need_optimize = true;
3851    bool inlined_uniforms = false;
3852 
3853    NIR_PASS_V(nir, add_derefs);
3854    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
3855    if (key) {
3856       if (key->inline_uniforms) {
3857          NIR_PASS_V(nir, nir_inline_uniforms,
3858                     nir->info.num_inlinable_uniforms,
3859                     key->base.inlined_uniform_values,
3860                     nir->info.inlinable_uniform_dw_offsets);
3861 
3862          inlined_uniforms = true;
3863       }
3864 
3865       /* TODO: use a separate mem ctx here for ralloc */
3866 
3867       if (!screen->optimal_keys) {
3868          switch (zs->info.stage) {
3869          case MESA_SHADER_VERTEX: {
3870             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3871             const struct zink_vs_key *vs_key = zink_vs_key(key);
3872             switch (vs_key->size) {
3873             case 4:
3874                decomposed_attrs = vs_key->u32.decomposed_attrs;
3875                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3876                break;
3877             case 2:
3878                decomposed_attrs = vs_key->u16.decomposed_attrs;
3879                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3880                break;
3881             case 1:
3882                decomposed_attrs = vs_key->u8.decomposed_attrs;
3883                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3884                break;
3885             default: break;
3886             }
3887             if (decomposed_attrs || decomposed_attrs_without_w)
3888                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3889             break;
3890          }
3891 
3892          case MESA_SHADER_GEOMETRY:
3893             if (zink_gs_key(key)->lower_line_stipple) {
3894                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3895                NIR_PASS_V(nir, nir_lower_var_copies);
3896                need_optimize = true;
3897             }
3898 
3899             if (zink_gs_key(key)->lower_line_smooth) {
3900                NIR_PASS_V(nir, lower_line_smooth_gs);
3901                NIR_PASS_V(nir, nir_lower_var_copies);
3902                need_optimize = true;
3903             }
3904 
3905             if (zink_gs_key(key)->lower_gl_point) {
3906                NIR_PASS_V(nir, lower_gl_point_gs);
3907                need_optimize = true;
3908             }
3909 
3910             if (zink_gs_key(key)->lower_pv_mode) {
3911                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3912                need_optimize = true; //TODO verify that this is required
3913             }
3914             break;
3915 
3916          default:
3917             break;
3918          }
3919       }
3920 
3921       switch (zs->info.stage) {
3922       case MESA_SHADER_VERTEX:
3923       case MESA_SHADER_TESS_EVAL:
3924       case MESA_SHADER_GEOMETRY:
3925          if (zink_vs_key_base(key)->last_vertex_stage) {
3926             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3927                NIR_PASS_V(nir, nir_lower_clip_halfz);
3928             }
3929             if (zink_vs_key_base(key)->push_drawid) {
3930                NIR_PASS_V(nir, lower_drawid);
3931             }
3932          } else {
3933             nir->xfb_info = NULL;
3934          }
3935          if (zink_vs_key_base(key)->robust_access)
3936             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3937          break;
3938       case MESA_SHADER_FRAGMENT:
3939          if (zink_fs_key(key)->lower_line_smooth) {
3940             NIR_PASS_V(nir, lower_line_smooth_fs,
3941                        zink_fs_key(key)->lower_line_stipple);
3942             need_optimize = true;
3943          } else if (zink_fs_key(key)->lower_line_stipple)
3944                NIR_PASS_V(nir, lower_line_stipple_fs);
3945 
3946          if (zink_fs_key(key)->lower_point_smooth) {
3947             NIR_PASS_V(nir, nir_lower_point_smooth);
3948             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3949             nir->info.fs.uses_discard = true;
3950             need_optimize = true;
3951          }
3952 
3953          if (zink_fs_key(key)->robust_access)
3954             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3955 
3956          if (!zink_fs_key_base(key)->samples &&
3957             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3958             /* VK will always use gl_SampleMask[] values even if sample count is 0,
3959             * so we need to skip this write here to mimic GL's behavior of ignoring it
3960             */
3961             nir_foreach_shader_out_variable(var, nir) {
3962                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3963                   var->data.mode = nir_var_shader_temp;
3964             }
3965             nir_fixup_deref_modes(nir);
3966             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3967             need_optimize = true;
3968          }
3969          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3970             NIR_PASS_V(nir, lower_dual_blend);
3971          }
3972          if (zink_fs_key_base(key)->coord_replace_bits)
3973             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
3974          if (zink_fs_key_base(key)->point_coord_yinvert)
3975             NIR_PASS_V(nir, invert_point_coord);
3976          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3977             nir_foreach_shader_in_variable(var, nir)
3978                var->data.sample = true;
3979             nir->info.fs.uses_sample_qualifier = true;
3980             nir->info.fs.uses_sample_shading = true;
3981          }
3982          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3983             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3984          if (nir->info.fs.uses_fbfetch_output) {
3985             nir_variable *fbfetch = NULL;
3986             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3987             /* old variable must be deleted to avoid spirv errors */
3988             fbfetch->data.mode = nir_var_shader_temp;
3989             nir_fixup_deref_modes(nir);
3990             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3991             need_optimize = true;
3992          }
3993          nir_foreach_shader_in_variable_safe(var, nir) {
3994             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3995                continue;
3996             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3997             var->data.mode = nir_var_shader_temp;
3998             nir_fixup_deref_modes(nir);
3999             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4000             need_optimize = true;
4001          }
4002          break;
4003       case MESA_SHADER_COMPUTE:
4004          if (zink_cs_key(key)->robust_access)
4005             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4006          break;
4007       default: break;
4008       }
4009       if (key->base.needs_zs_shader_swizzle) {
4010          assert(extra_data);
4011          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4012       }
4013       if (key->base.nonseamless_cube_mask) {
4014          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4015          need_optimize = true;
4016       }
4017    }
4018    if (screen->driconf.inline_uniforms) {
4019       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4020       NIR_PASS_V(nir, rewrite_bo_access, screen);
4021       NIR_PASS_V(nir, remove_bo_access, zs);
4022       need_optimize = true;
4023    }
4024    if (inlined_uniforms) {
4025       optimize_nir(nir, zs, true);
4026 
4027       /* This must be done again. */
4028       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4029                                                        nir_var_shader_out);
4030 
4031       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4032       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4033          zs->can_inline = false;
4034    } else if (need_optimize)
4035       optimize_nir(nir, zs, true);
4036    bool has_sparse = false;
4037    NIR_PASS(has_sparse, nir, lower_sparse);
4038    if (has_sparse)
4039       optimize_nir(nir, zs, false);
4040 
4041    struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4042    ralloc_free(nir);
4043    return obj;
4044 }
4045 
4046 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4047 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4048 {
4049    nir_shader *nir = zink_shader_deserialize(screen, zs);
4050    /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4051    int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4052    if (screen->info.have_EXT_shader_object)
4053       set = zs->info.stage;
4054    unsigned offsets[4];
4055    zink_descriptor_shader_get_binding_offsets(zs, offsets);
4056    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4057       if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4058          continue;
4059       var->data.descriptor_set = set;
4060       switch (var->data.mode) {
4061       case nir_var_mem_ubo:
4062             var->data.binding = !!var->data.driver_location;
4063             break;
4064       case nir_var_uniform:
4065          if (glsl_type_is_sampler(glsl_without_array(var->type)))
4066             var->data.binding += offsets[1];
4067          break;
4068       case nir_var_mem_ssbo:
4069          var->data.binding += offsets[2];
4070          break;
4071       case nir_var_image:
4072          var->data.binding += offsets[3];
4073          break;
4074       default: break;
4075       }
4076    }
4077    NIR_PASS_V(nir, add_derefs);
4078    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4079    if (screen->driconf.inline_uniforms) {
4080       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4081       NIR_PASS_V(nir, rewrite_bo_access, screen);
4082       NIR_PASS_V(nir, remove_bo_access, zs);
4083    }
4084    optimize_nir(nir, zs, true);
4085    zink_descriptor_shader_init(screen, zs);
4086    nir_shader *nir_clone = NULL;
4087    if (screen->info.have_EXT_shader_object)
4088       nir_clone = nir_shader_clone(nir, nir);
4089    struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4090    if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4091       /* always try to pre-generate a tcs in case it's needed */
4092       if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4093          nir_shader *nir_tcs = NULL;
4094          /* use max pcp for compat */
4095          zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
4096          nir_tcs->info.separate_shader = true;
4097          zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4098          ralloc_free(nir_tcs);
4099       }
4100    }
4101    ralloc_free(nir);
4102    spirv_shader_delete(obj.spirv);
4103    obj.spirv = NULL;
4104    return obj;
4105 }
4106 
4107 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4108 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4109                          void *data)
4110 {
4111    if (intr->intrinsic != nir_intrinsic_load_instance_id)
4112       return false;
4113    b->cursor = nir_after_instr(&intr->instr);
4114    nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4115    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4116    return true;
4117 }
4118 
4119 static bool
lower_baseinstance(nir_shader * shader)4120 lower_baseinstance(nir_shader *shader)
4121 {
4122    if (shader->info.stage != MESA_SHADER_VERTEX)
4123       return false;
4124    return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4125                                      nir_metadata_dominance, NULL);
4126 }
4127 
4128 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4129  * so instead we delete all those broken variables and just make new ones
4130  */
4131 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4132 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4133 {
4134    uint64_t max_ssbo_size = 0;
4135    uint64_t max_ubo_size = 0;
4136    uint64_t max_uniform_size = 0;
4137 
4138    if (!shader->info.num_ssbos && !shader->info.num_ubos)
4139       return false;
4140 
4141    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4142       const struct glsl_type *type = glsl_without_array(var->type);
4143       if (type_is_counter(type))
4144          continue;
4145       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4146       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4147       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4148       if (interface_type) {
4149          unsigned block_size = glsl_get_explicit_size(interface_type, true);
4150          if (glsl_get_length(interface_type) == 1) {
4151             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4152             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4153             if (glsl_type_is_array(f) && !glsl_array_size(f))
4154                block_size = 0;
4155          }
4156          if (block_size) {
4157             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4158             size = MAX2(size, block_size);
4159          }
4160       }
4161       if (var->data.mode == nir_var_mem_ubo) {
4162          if (var->data.driver_location)
4163             max_ubo_size = MAX2(max_ubo_size, size);
4164          else
4165             max_uniform_size = MAX2(max_uniform_size, size);
4166       } else {
4167          max_ssbo_size = MAX2(max_ssbo_size, size);
4168          if (interface_type) {
4169             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4170                needs_size = true;
4171          }
4172       }
4173       var->data.mode = nir_var_shader_temp;
4174    }
4175    nir_fixup_deref_modes(shader);
4176    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4177    optimize_nir(shader, NULL, true);
4178 
4179    struct glsl_struct_field field = {0};
4180    field.name = ralloc_strdup(shader, "base");
4181    if (shader->info.num_ubos) {
4182       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4183          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4184          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4185                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4186                                                  "uniform_0@32");
4187          var->interface_type = var->type;
4188          var->data.mode = nir_var_mem_ubo;
4189          var->data.driver_location = 0;
4190       }
4191 
4192       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4193       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4194       if (num_ubos && ubos_used) {
4195          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4196          /* shrink array as much as possible */
4197          unsigned first_ubo = ffs(ubos_used) - 2;
4198          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4199          num_ubos -= first_ubo;
4200          assert(num_ubos);
4201          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4202                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4203                                    "ubos@32");
4204          var->interface_type = var->type;
4205          var->data.mode = nir_var_mem_ubo;
4206          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4207       }
4208    }
4209    if (shader->info.num_ssbos && zs->ssbos_used) {
4210       /* shrink array as much as possible */
4211       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4212       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4213       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4214       assert(num_ssbos);
4215       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4216       field.type = ssbo_type;
4217       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4218                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4219                                               "ssbos@32");
4220       var->interface_type = var->type;
4221       var->data.mode = nir_var_mem_ssbo;
4222       var->data.driver_location = first_ssbo;
4223    }
4224    return true;
4225 }
4226 
4227 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4228 get_src_mask_ssbo(unsigned total, nir_src src)
4229 {
4230    if (nir_src_is_const(src))
4231       return BITFIELD_BIT(nir_src_as_uint(src));
4232    return BITFIELD_MASK(total);
4233 }
4234 
4235 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4236 get_src_mask_ubo(unsigned total, nir_src src)
4237 {
4238    if (nir_src_is_const(src))
4239       return BITFIELD_BIT(nir_src_as_uint(src));
4240    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4241 }
4242 
4243 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4244 analyze_io(struct zink_shader *zs, nir_shader *shader)
4245 {
4246    bool ret = false;
4247    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4248    nir_foreach_block(block, impl) {
4249       nir_foreach_instr(instr, block) {
4250          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4251             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4252             nir_tex_instr *tex = nir_instr_as_tex(instr);
4253             nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
4254                if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4255                   unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4256                   if (tex->texture_index >= img->data.driver_location &&
4257                      tex->texture_index < img->data.driver_location + size) {
4258                      BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4259                      break;
4260                   }
4261                }
4262             }
4263             continue;
4264          }
4265          if (instr->type != nir_instr_type_intrinsic)
4266             continue;
4267 
4268          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4269          switch (intrin->intrinsic) {
4270          case nir_intrinsic_store_ssbo:
4271             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4272             break;
4273 
4274          case nir_intrinsic_get_ssbo_size: {
4275             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4276             ret = true;
4277             break;
4278          }
4279          case nir_intrinsic_ssbo_atomic:
4280          case nir_intrinsic_ssbo_atomic_swap:
4281          case nir_intrinsic_load_ssbo:
4282             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4283             break;
4284          case nir_intrinsic_load_ubo:
4285          case nir_intrinsic_load_ubo_vec4:
4286             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4287             break;
4288          default:
4289             break;
4290          }
4291       }
4292    }
4293    return ret;
4294 }
4295 
4296 struct zink_bindless_info {
4297    nir_variable *bindless[4];
4298    unsigned bindless_set;
4299 };
4300 
4301 /* this is a "default" bindless texture used if the shader has no texture variables */
4302 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4303 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4304 {
4305    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4306    nir_variable *var;
4307 
4308    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4309    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4310    var->data.descriptor_set = descriptor_set;
4311    var->data.driver_location = var->data.binding = binding;
4312    return var;
4313 }
4314 
4315 /* this is a "default" bindless image used if the shader has no image variables */
4316 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4317 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4318 {
4319    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4320    nir_variable *var;
4321 
4322    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4323    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4324    var->data.descriptor_set = descriptor_set;
4325    var->data.driver_location = var->data.binding = binding;
4326    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4327    return var;
4328 }
4329 
4330 /* rewrite bindless instructions as array deref instructions */
4331 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4332 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4333 {
4334    struct zink_bindless_info *bindless = data;
4335 
4336    if (in->type == nir_instr_type_tex) {
4337       nir_tex_instr *tex = nir_instr_as_tex(in);
4338       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4339       if (idx == -1)
4340          return false;
4341 
4342       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4343       if (!var) {
4344          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4345          if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4346             bindless->bindless[1] = var;
4347          else
4348             bindless->bindless[0] = var;
4349       }
4350       b->cursor = nir_before_instr(in);
4351       nir_deref_instr *deref = nir_build_deref_var(b, var);
4352       if (glsl_type_is_array(var->type))
4353          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4354       nir_src_rewrite(&tex->src[idx].src, &deref->def);
4355 
4356       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4357        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4358        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4359        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4360        *
4361        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4362        * - Warhammer 40k: Dawn of War III
4363        */
4364       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4365       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4366       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4367       if (coord_components < needed_components) {
4368          nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4369          nir_src_rewrite(&tex->src[c].src, def);
4370          tex->coord_components = needed_components;
4371       }
4372       return true;
4373    }
4374    if (in->type != nir_instr_type_intrinsic)
4375       return false;
4376    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4377 
4378    nir_intrinsic_op op;
4379 #define OP_SWAP(OP) \
4380    case nir_intrinsic_bindless_image_##OP: \
4381       op = nir_intrinsic_image_deref_##OP; \
4382       break;
4383 
4384 
4385    /* convert bindless intrinsics to deref intrinsics */
4386    switch (instr->intrinsic) {
4387    OP_SWAP(atomic)
4388    OP_SWAP(atomic_swap)
4389    OP_SWAP(format)
4390    OP_SWAP(load)
4391    OP_SWAP(order)
4392    OP_SWAP(samples)
4393    OP_SWAP(size)
4394    OP_SWAP(store)
4395    default:
4396       return false;
4397    }
4398 
4399    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4400    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4401    if (!var)
4402       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4403    instr->intrinsic = op;
4404    b->cursor = nir_before_instr(in);
4405    nir_deref_instr *deref = nir_build_deref_var(b, var);
4406    if (glsl_type_is_array(var->type))
4407       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4408    nir_src_rewrite(&instr->src[0], &deref->def);
4409    return true;
4410 }
4411 
4412 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4413 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4414 {
4415    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4416       return false;
4417    nir_fixup_deref_modes(shader);
4418    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4419    optimize_nir(shader, NULL, true);
4420    return true;
4421 }
4422 
4423 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4424 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4425 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4426                         void *data)
4427 {
4428    bool is_load = false;
4429    bool is_input = false;
4430    bool is_interp = false;
4431    if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4432       return false;
4433 
4434    nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4435    if (var->data.bindless)
4436       return false;
4437    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4438       return false;
4439    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4440       return false;
4441 
4442    var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4443    var->data.bindless = 1;
4444    return true;
4445 }
4446 
4447 static bool
lower_bindless_io(nir_shader * shader)4448 lower_bindless_io(nir_shader *shader)
4449 {
4450    return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4451                                      nir_metadata_dominance, NULL);
4452 }
4453 
4454 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4455 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4456 {
4457    if (stage == MESA_SHADER_NONE) {
4458       unreachable("not supported");
4459    } else {
4460       unsigned base = stage;
4461       /* clamp compute bindings for better driver efficiency */
4462       if (gl_shader_stage_is_compute(stage))
4463          base = 0;
4464       switch (type) {
4465       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4466       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4467          return base * 2 + !!index;
4468 
4469       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4470          assert(stage == MESA_SHADER_KERNEL);
4471          FALLTHROUGH;
4472       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4473          if (stage == MESA_SHADER_KERNEL) {
4474             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4475             return index + PIPE_MAX_SAMPLERS;
4476          }
4477          FALLTHROUGH;
4478       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4479          assert(index < PIPE_MAX_SAMPLERS);
4480          assert(stage != MESA_SHADER_KERNEL);
4481          return (base * PIPE_MAX_SAMPLERS) + index;
4482 
4483       case VK_DESCRIPTOR_TYPE_SAMPLER:
4484          assert(index < PIPE_MAX_SAMPLERS);
4485          assert(stage == MESA_SHADER_KERNEL);
4486          return index;
4487 
4488       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4489          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4490 
4491       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4492       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4493          assert(index < ZINK_MAX_SHADER_IMAGES);
4494          if (stage == MESA_SHADER_KERNEL)
4495             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4496          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4497 
4498       default:
4499          unreachable("unexpected type");
4500       }
4501    }
4502 }
4503 
4504 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4505 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4506 {
4507    if (glsl_type_is_struct(type)) {
4508       for (unsigned i = 0; i < glsl_get_length(type); i++)
4509          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4510       return;
4511    }
4512 
4513    /* just a random scalar in a struct */
4514    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4515       return;
4516 
4517    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4518    unsigned binding;
4519    switch (vktype) {
4520       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4521          binding = 0;
4522          break;
4523       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4524          binding = 1;
4525          break;
4526       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4527          binding = 2;
4528          break;
4529       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4530          binding = 3;
4531          break;
4532       default:
4533          unreachable("unknown");
4534    }
4535    if (!bindless->bindless[binding]) {
4536       bindless->bindless[binding] = nir_variable_clone(var, nir);
4537       bindless->bindless[binding]->data.bindless = 0;
4538       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4539       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4540       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4541       if (!bindless->bindless[binding]->data.image.format)
4542          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4543       nir_shader_add_variable(nir, bindless->bindless[binding]);
4544    } else {
4545       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4546    }
4547    var->data.mode = nir_var_shader_temp;
4548 }
4549 
4550 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4551 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4552 {
4553    struct zink_screen *screen = data;
4554    if (instr->type != nir_instr_type_tex)
4555       return false;
4556    nir_tex_instr *tex = nir_instr_as_tex(instr);
4557    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4558       return false;
4559    if (tex->is_sparse && screen->need_2D_sparse) {
4560       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4561       mesa_loge("unhandled/unsupported 1D sparse texture!");
4562       abort();
4563    }
4564    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4565    b->cursor = nir_before_instr(instr);
4566    tex->coord_components++;
4567    unsigned srcs[] = {
4568       nir_tex_src_coord,
4569       nir_tex_src_offset,
4570       nir_tex_src_ddx,
4571       nir_tex_src_ddy,
4572    };
4573    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4574       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4575       if (c == -1)
4576          continue;
4577       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4578          continue;
4579       nir_def *def;
4580       nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4581       if (tex->src[c].src.ssa->num_components == 1)
4582          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4583       else
4584          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4585       nir_src_rewrite(&tex->src[c].src, def);
4586    }
4587    b->cursor = nir_after_instr(instr);
4588    unsigned needed_components = nir_tex_instr_dest_size(tex);
4589    unsigned num_components = tex->def.num_components;
4590    if (needed_components > num_components) {
4591       tex->def.num_components = needed_components;
4592       assert(num_components < 3);
4593       /* take either xz or just x since this is promoted to 2D from 1D */
4594       uint32_t mask = num_components == 2 ? (1|4) : 1;
4595       nir_def *dst = nir_channels(b, &tex->def, mask);
4596       nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4597    }
4598    return true;
4599 }
4600 
4601 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4602 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4603 {
4604    bool found = false;
4605    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4606       const struct glsl_type *type = glsl_without_array(var->type);
4607       unsigned length = glsl_get_length(var->type);
4608       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4609          continue;
4610       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4611       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4612 
4613       found = true;
4614    }
4615    if (found)
4616       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4617    return found;
4618 }
4619 
4620 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4621 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4622 {
4623    nir_foreach_function_impl(impl, shader) {
4624       nir_foreach_block_safe(block, impl) {
4625          nir_foreach_instr_safe(instr, block) {
4626             if (instr->type == nir_instr_type_tex) {
4627                nir_tex_instr *tex = nir_instr_as_tex(instr);
4628                zs->sinfo.have_sparse |= tex->is_sparse;
4629             }
4630             if (instr->type != nir_instr_type_intrinsic)
4631                continue;
4632             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4633             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4634                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4635                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4636                 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4637                 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4638                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4639                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4640                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4641                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4642 
4643                 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4644 
4645                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4646                 const unsigned size =
4647                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4648                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4649                                  var->data.binding + (MAX2(size, 1) - 1));
4650             }
4651             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4652                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4653                zs->sinfo.have_sparse = true;
4654 
4655             static bool warned = false;
4656             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4657                switch (intr->intrinsic) {
4658                case nir_intrinsic_image_deref_atomic: {
4659                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4660                   if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4661                       util_format_is_float(var->data.image.format))
4662                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4663                   break;
4664                }
4665                default:
4666                   break;
4667                }
4668             }
4669          }
4670       }
4671    }
4672 }
4673 
4674 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data)4675 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4676 {
4677    if (in->type != nir_instr_type_tex)
4678       return false;
4679    nir_tex_instr *tex = nir_instr_as_tex(in);
4680    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4681       return false;
4682    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4683    nir_variable *var = NULL;
4684    if (handle != -1) {
4685       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4686    } else {
4687       nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4688          if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4689             unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4690             if (tex->texture_index >= img->data.driver_location &&
4691                 tex->texture_index < img->data.driver_location + size) {
4692                var = img;
4693                break;
4694             }
4695          }
4696       }
4697    }
4698    return !!rewrite_tex_dest(b, tex, var, data);
4699 }
4700 
4701 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs)4702 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4703 {
4704    return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4705 }
4706 
4707 static bool
split_bitfields_instr(nir_builder * b,nir_instr * in,void * data)4708 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4709 {
4710    if (in->type != nir_instr_type_alu)
4711       return false;
4712    nir_alu_instr *alu = nir_instr_as_alu(in);
4713    switch (alu->op) {
4714    case nir_op_ubitfield_extract:
4715    case nir_op_ibitfield_extract:
4716    case nir_op_bitfield_insert:
4717       break;
4718    default:
4719       return false;
4720    }
4721    unsigned num_components = alu->def.num_components;
4722    if (num_components == 1)
4723       return false;
4724    b->cursor = nir_before_instr(in);
4725    nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4726    for (unsigned i = 0; i < num_components; i++) {
4727       if (alu->op == nir_op_bitfield_insert)
4728          dests[i] = nir_bitfield_insert(b,
4729                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4730                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4731                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4732                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4733       else if (alu->op == nir_op_ubitfield_extract)
4734          dests[i] = nir_ubitfield_extract(b,
4735                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4736                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4737                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4738       else
4739          dests[i] = nir_ibitfield_extract(b,
4740                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4741                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4742                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4743    }
4744    nir_def *dest = nir_vec(b, dests, num_components);
4745    nir_def_rewrite_uses_after(&alu->def, dest, in);
4746    nir_instr_remove(in);
4747    return true;
4748 }
4749 
4750 
4751 static bool
split_bitfields(nir_shader * shader)4752 split_bitfields(nir_shader *shader)
4753 {
4754    return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4755 }
4756 
4757 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)4758 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4759 {
4760    nir_foreach_function_impl(impl, nir) {
4761       nir_foreach_block(block, impl) {
4762          nir_foreach_instr_safe(instr, block) {
4763             if (instr->type != nir_instr_type_deref)
4764                continue;
4765             nir_deref_instr *deref = nir_instr_as_deref(instr);
4766             nir_variable *img = nir_deref_instr_get_variable(deref);
4767             if (img != var)
4768                continue;
4769             if (glsl_type_is_array(var->type)) {
4770                if (deref->deref_type == nir_deref_type_array)
4771                   deref->type = glsl_without_array(var->type);
4772                else
4773                   deref->type = var->type;
4774             } else {
4775                deref->type = var->type;
4776             }
4777          }
4778       }
4779    }
4780 }
4781 
4782 static void
type_image(nir_shader * nir,nir_variable * var)4783 type_image(nir_shader *nir, nir_variable *var)
4784 {
4785    nir_foreach_function_impl(impl, nir) {
4786       nir_foreach_block(block, impl) {
4787          nir_foreach_instr_safe(instr, block) {
4788             if (instr->type != nir_instr_type_intrinsic)
4789                continue;
4790             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4791             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4792                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4793                intr->intrinsic == nir_intrinsic_image_deref_store ||
4794                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4795                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4796                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4797                intr->intrinsic == nir_intrinsic_image_deref_format ||
4798                intr->intrinsic == nir_intrinsic_image_deref_order) {
4799                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4800                nir_variable *img = nir_deref_instr_get_variable(deref);
4801                if (img != var)
4802                   continue;
4803                nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4804                const struct glsl_type *type = glsl_without_array(var->type);
4805                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4806                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4807                   continue;
4808                }
4809                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4810                if (glsl_type_is_array(var->type))
4811                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4812                var->type = img_type;
4813                rewrite_cl_derefs(nir, var);
4814                return;
4815             }
4816          }
4817       }
4818    }
4819    nir_foreach_function_impl(impl, nir) {
4820       nir_foreach_block(block, impl) {
4821          nir_foreach_instr_safe(instr, block) {
4822             if (instr->type != nir_instr_type_intrinsic)
4823                continue;
4824             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4825             if (intr->intrinsic != nir_intrinsic_image_deref_size)
4826                continue;
4827             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4828             nir_variable *img = nir_deref_instr_get_variable(deref);
4829             if (img != var)
4830                continue;
4831             nir_alu_type alu_type = nir_type_uint32;
4832             const struct glsl_type *type = glsl_without_array(var->type);
4833             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4834                continue;
4835             }
4836             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4837             if (glsl_type_is_array(var->type))
4838                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4839             var->type = img_type;
4840             rewrite_cl_derefs(nir, var);
4841             return;
4842          }
4843       }
4844    }
4845    var->data.mode = nir_var_shader_temp;
4846 }
4847 
4848 static bool
type_sampler_vars(nir_shader * nir,unsigned * sampler_mask)4849 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4850 {
4851    bool progress = false;
4852    nir_foreach_function_impl(impl, nir) {
4853       nir_foreach_block(block, impl) {
4854          nir_foreach_instr(instr, block) {
4855             if (instr->type != nir_instr_type_tex)
4856                continue;
4857             nir_tex_instr *tex = nir_instr_as_tex(instr);
4858             if (nir_tex_instr_need_sampler(tex))
4859                *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4860             nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
4861             assert(var);
4862             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
4863                 nir_tex_instr_is_query(tex))
4864                continue;
4865             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4866             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4867             if (size > 1)
4868                img_type = glsl_array_type(img_type, size, 0);
4869             var->type = img_type;
4870             progress = true;
4871          }
4872       }
4873    }
4874    return progress;
4875 }
4876 
4877 static bool
delete_samplers(nir_shader * nir)4878 delete_samplers(nir_shader *nir)
4879 {
4880    bool progress = false;
4881    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4882       if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4883          var->data.mode = nir_var_shader_temp;
4884          progress = true;
4885       }
4886    }
4887    return progress;
4888 }
4889 
4890 static bool
type_images(nir_shader * nir,unsigned * sampler_mask)4891 type_images(nir_shader *nir, unsigned *sampler_mask)
4892 {
4893    bool progress = false;
4894    progress |= delete_samplers(nir);
4895    progress |= type_sampler_vars(nir, sampler_mask);
4896    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4897       type_image(nir, var);
4898       progress = true;
4899    }
4900    return progress;
4901 }
4902 
4903 /* attempt to assign io for separate shaders */
4904 static bool
fixup_io_locations(nir_shader * nir)4905 fixup_io_locations(nir_shader *nir)
4906 {
4907    nir_variable_mode modes;
4908    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
4909       modes = nir_var_shader_in | nir_var_shader_out;
4910    else
4911       modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4912    u_foreach_bit(mode, modes) {
4913       nir_variable_mode m = BITFIELD_BIT(mode);
4914       if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
4915           (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
4916          /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
4917           * - i/o interface blocks don't need to match
4918           * - any location can be present or not
4919           * - it just has to work
4920           *
4921           * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
4922           * since it's a builtin and yolo it with all the other legacy crap
4923           */
4924          nir_foreach_variable_with_modes(var, nir, m) {
4925             if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
4926                continue;
4927             if (var->data.location == VARYING_SLOT_VAR0)
4928                var->data.driver_location = 0;
4929             else if (var->data.patch)
4930                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
4931             else
4932                var->data.driver_location = var->data.location;
4933          }
4934          return true;
4935       }
4936       /* i/o interface blocks are required to be EXACT matches between stages:
4937       * iterate over all locations and set locations incrementally
4938       */
4939       unsigned slot = 0;
4940       for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4941          if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4942             continue;
4943          bool found = false;
4944          unsigned size = 0;
4945          nir_foreach_variable_with_modes(var, nir, m) {
4946             if (var->data.location != i)
4947                continue;
4948             /* only add slots for non-component vars or first-time component vars */
4949             if (!var->data.location_frac || !size) {
4950                /* ensure variable is given enough slots */
4951                if (nir_is_arrayed_io(var, nir->info.stage))
4952                   size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4953                else
4954                   size += glsl_count_vec4_slots(var->type, false, false);
4955             }
4956             if (var->data.patch)
4957                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
4958             else
4959                var->data.driver_location = slot;
4960             found = true;
4961          }
4962          slot += size;
4963          if (found) {
4964             /* ensure the consumed slots aren't double iterated */
4965             i += size - 1;
4966          } else {
4967             /* locations used between stages are not required to be contiguous */
4968             if (i >= VARYING_SLOT_VAR0)
4969                slot++;
4970          }
4971       }
4972    }
4973    return true;
4974 }
4975 
4976 static uint64_t
zink_flat_flags(struct nir_shader * shader)4977 zink_flat_flags(struct nir_shader *shader)
4978 {
4979    uint64_t flat_flags = 0;
4980    nir_foreach_shader_in_variable(var, shader) {
4981       if (var->data.interpolation == INTERP_MODE_FLAT)
4982          flat_flags |= BITFIELD64_BIT(var->data.location);
4983    }
4984 
4985    return flat_flags;
4986 }
4987 
4988 static nir_variable *
find_io_var_with_semantics(nir_shader * nir,nir_variable_mode mode,nir_variable_mode realmode,nir_io_semantics s,unsigned location,unsigned c,bool is_load)4989 find_io_var_with_semantics(nir_shader *nir, nir_variable_mode mode, nir_variable_mode realmode, nir_io_semantics s, unsigned location, unsigned c, bool is_load)
4990 {
4991    nir_foreach_variable_with_modes(var, nir, mode) {
4992       const struct glsl_type *type = var->type;
4993       nir_variable_mode m = var->data.mode;
4994       var->data.mode = realmode;
4995       if (nir_is_arrayed_io(var, nir->info.stage))
4996          type = glsl_get_array_element(type);
4997       var->data.mode = m;
4998       if (var->data.fb_fetch_output != s.fb_fetch_output)
4999          continue;
5000       if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && s.dual_source_blend_index != var->data.index)
5001          continue;
5002       unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5003       if (var->data.location > location || var->data.location + num_slots <= location)
5004          continue;
5005       unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5006       if (glsl_type_contains_64bit(type)) {
5007          num_components *= 2;
5008          if (location > var->data.location) {
5009             unsigned sub_components = (location - var->data.location) * 4;
5010             if (sub_components > num_components)
5011                continue;
5012             num_components -= sub_components;
5013          }
5014       }
5015       if (var->data.location_frac > c || var->data.location_frac + num_components <= c)
5016          continue;
5017       return var;
5018    }
5019    return NULL;
5020 }
5021 
5022 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode)5023 rework_io_vars(nir_shader *nir, nir_variable_mode mode)
5024 {
5025    assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5026    assert(util_bitcount(mode) == 1);
5027    bool found = false;
5028    /* store old vars */
5029    nir_foreach_variable_with_modes(var, nir, mode) {
5030       if (nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out)
5031          var->data.compact |= var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
5032       /* stash vars in this mode for now */
5033       var->data.mode = nir_var_mem_shared;
5034       found = true;
5035    }
5036    if (!found) {
5037       if (mode == nir_var_shader_out)
5038          found = nir->info.outputs_written || nir->info.outputs_read;
5039       else
5040          found = nir->info.inputs_read;
5041       if (!found)
5042          return;
5043    }
5044    /* scan for vars using indirect array access */
5045    BITSET_DECLARE(indirect_access, 128);
5046    BITSET_ZERO(indirect_access);
5047    nir_foreach_function_impl(impl, nir) {
5048       nir_foreach_block(block, impl) {
5049          nir_foreach_instr(instr, block) {
5050             if (instr->type != nir_instr_type_intrinsic)
5051                continue;
5052             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5053             bool is_load = false;
5054             bool is_input = false;
5055             bool is_interp = false;
5056             if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5057                continue;
5058             nir_src *src_offset = nir_get_io_offset_src(intr);
5059             if (!is_input && !src_offset)
5060                continue;
5061             if (mode == nir_var_shader_in && !is_input)
5062                continue;
5063             if (mode == nir_var_shader_out && is_input)
5064                continue;
5065             nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5066             if (!nir_src_is_const(*src_offset))
5067                BITSET_SET(indirect_access, s.location);
5068          }
5069       }
5070    }
5071    /* loop and create vars */
5072    nir_foreach_function_impl(impl, nir) {
5073       nir_foreach_block(block, impl) {
5074          nir_foreach_instr(instr, block) {
5075             if (instr->type != nir_instr_type_intrinsic)
5076                continue;
5077             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5078             bool is_load = false;
5079             bool is_input = false;
5080             bool is_interp = false;
5081             if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5082                continue;
5083             if (mode == nir_var_shader_in && !is_input)
5084                continue;
5085             if (mode == nir_var_shader_out && is_input)
5086                continue;
5087             nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5088             unsigned slot_offset = 0;
5089             bool is_indirect = BITSET_TEST(indirect_access, s.location);
5090             nir_src *src_offset = nir_get_io_offset_src(intr);
5091             if (src_offset && !is_indirect) {
5092                assert(nir_src_is_const(*src_offset));
5093                slot_offset = nir_src_as_uint(*src_offset);
5094             }
5095             unsigned location = s.location + slot_offset;
5096             unsigned frac = nir_intrinsic_component(intr);
5097             unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5098             /* set c aligned/rounded down to dword */
5099             unsigned c = nir_slot_is_sysval_output(location, MESA_SHADER_NONE) ? 0 : frac;
5100             if (frac && bit_size < 32)
5101                c = frac * bit_size / 32;
5102             nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5103             /* ensure dword is filled with like-sized components */
5104             unsigned max_components = intr->num_components;
5105             if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5106                   switch (s.location) {
5107                   case FRAG_RESULT_DEPTH:
5108                   case FRAG_RESULT_STENCIL:
5109                   case FRAG_RESULT_SAMPLE_MASK:
5110                      max_components = 1;
5111                      break;
5112                   default:
5113                      break;
5114                   }
5115             } else if ((nir->info.stage != MESA_SHADER_VERTEX || mode != nir_var_shader_in) && s.location < VARYING_SLOT_VAR0) {
5116                switch (s.location) {
5117                case VARYING_SLOT_FOGC:
5118                   /* use intr components */
5119                   break;
5120                case VARYING_SLOT_POS:
5121                case VARYING_SLOT_COL0:
5122                case VARYING_SLOT_COL1:
5123                case VARYING_SLOT_TEX0:
5124                case VARYING_SLOT_TEX1:
5125                case VARYING_SLOT_TEX2:
5126                case VARYING_SLOT_TEX3:
5127                case VARYING_SLOT_TEX4:
5128                case VARYING_SLOT_TEX5:
5129                case VARYING_SLOT_TEX6:
5130                case VARYING_SLOT_TEX7:
5131                case VARYING_SLOT_BFC0:
5132                case VARYING_SLOT_BFC1:
5133                case VARYING_SLOT_EDGE:
5134                case VARYING_SLOT_CLIP_VERTEX:
5135                case VARYING_SLOT_PNTC:
5136                case VARYING_SLOT_BOUNDING_BOX0:
5137                case VARYING_SLOT_BOUNDING_BOX1:
5138                   max_components = 4;
5139                   break;
5140                case VARYING_SLOT_CLIP_DIST0:
5141                case VARYING_SLOT_CLIP_DIST1:
5142                   max_components = s.num_slots;
5143                   break;
5144                case VARYING_SLOT_CULL_DIST0:
5145                case VARYING_SLOT_CULL_DIST1:
5146                   max_components = s.num_slots;
5147                   break;
5148                case VARYING_SLOT_TESS_LEVEL_OUTER:
5149                   max_components = 4;
5150                   break;
5151                case VARYING_SLOT_TESS_LEVEL_INNER:
5152                   max_components = 2;
5153                   break;
5154                case VARYING_SLOT_PRIMITIVE_ID:
5155                case VARYING_SLOT_LAYER:
5156                case VARYING_SLOT_VIEWPORT:
5157                case VARYING_SLOT_FACE:
5158                case VARYING_SLOT_PSIZ:
5159                case VARYING_SLOT_VIEW_INDEX:
5160                case VARYING_SLOT_VIEWPORT_MASK:
5161                   max_components = 1;
5162                   break;
5163                default:
5164                   unreachable("???");
5165                }
5166             } else if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5167                if (s.location == VERT_ATTRIB_POINT_SIZE)
5168                   max_components = 1;
5169                else if (s.location < VERT_ATTRIB_GENERIC0)
5170                   max_components = 4;
5171                else
5172                   max_components = frac + max_components;
5173             } else if (bit_size == 16)
5174                max_components = align(max_components, 2);
5175             else if (bit_size == 8)
5176                max_components = align(max_components, 4);
5177             if (c + (bit_size == 64 ? max_components * 2 : max_components) > 4)
5178                c = 0;
5179             const struct glsl_type *vec_type;
5180             bool is_compact = false;
5181             if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5182                vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
5183             } else {
5184                switch (s.location) {
5185                case VARYING_SLOT_CLIP_DIST0:
5186                case VARYING_SLOT_CLIP_DIST1:
5187                case VARYING_SLOT_CULL_DIST0:
5188                case VARYING_SLOT_CULL_DIST1:
5189                case VARYING_SLOT_TESS_LEVEL_OUTER:
5190                case VARYING_SLOT_TESS_LEVEL_INNER:
5191                   vec_type = glsl_array_type(glsl_float_type(), max_components, sizeof(uint32_t));
5192                   is_compact = true;
5193                   break;
5194                default:
5195                   vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
5196                   break;
5197                }
5198             }
5199             /* reset the mode for nir_is_arrayed_io to work */
5200             bool is_arrayed = io_instr_is_arrayed(intr);
5201             if (is_indirect && s.location != VARYING_SLOT_TESS_LEVEL_INNER && s.location != VARYING_SLOT_TESS_LEVEL_OUTER) {
5202                /* indirect array access requires the full array in a single variable */
5203                unsigned slot_count = s.num_slots;
5204                if (bit_size == 64 && slot_count > 1)
5205                   slot_count /= 2;
5206                if (slot_count > 1)
5207                   vec_type = glsl_array_type(vec_type, slot_count, glsl_get_explicit_stride(vec_type));
5208             }
5209             if (is_arrayed)
5210                vec_type = glsl_array_type(vec_type, 32 /* MAX_PATCH_VERTICES */, glsl_get_explicit_stride(vec_type));
5211             nir_variable *found_var = find_io_var_with_semantics(nir, mode, mode, s, location, c, is_load);
5212             if (found_var) {
5213                if (glsl_get_vector_elements(glsl_without_array(found_var->type)) < glsl_get_vector_elements(glsl_without_array(vec_type))) {
5214                   /* enlarge existing vars if necessary */
5215                   found_var->type = vec_type;
5216                }
5217                continue;
5218             }
5219 
5220             char name[1024];
5221             if (c)
5222                snprintf(name, sizeof(name), "slot_%u_c%u", location, c);
5223             else
5224                snprintf(name, sizeof(name), "slot_%u", location);
5225             nir_variable *old_var = find_io_var_with_semantics(nir, nir_var_mem_shared, mode, s, location, c, is_load);
5226             nir_variable *var = nir_variable_create(nir, mode, vec_type, old_var ? old_var->name : name);
5227             var->data.mode = mode;
5228             var->type = vec_type;
5229             var->data.driver_location = nir_intrinsic_base(intr) + slot_offset;
5230             var->data.location_frac = c;
5231             var->data.location = location;
5232             var->data.patch = location >= VARYING_SLOT_PATCH0 ||
5233                               ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5234                                (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER));
5235             /* set flat by default */
5236             if (nir->info.stage == MESA_SHADER_FRAGMENT && mode == nir_var_shader_in)
5237                var->data.interpolation = INTERP_MODE_FLAT;
5238             var->data.fb_fetch_output = s.fb_fetch_output;
5239             var->data.index = s.dual_source_blend_index;
5240             var->data.precision = s.medium_precision;
5241             var->data.compact = is_compact;
5242          }
5243       }
5244    }
5245    nir_foreach_variable_with_modes(var, nir, nir_var_mem_shared)
5246       var->data.mode = nir_var_shader_temp;
5247    nir_fixup_deref_modes(nir);
5248    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5249 }
5250 
5251 
5252 static bool
eliminate_io_wrmasks_instr(const nir_instr * instr,const void * data)5253 eliminate_io_wrmasks_instr(const nir_instr *instr, const void *data)
5254 {
5255    const nir_shader *nir = data;
5256    if (instr->type != nir_instr_type_intrinsic)
5257       return false;
5258 
5259    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5260    switch (intr->intrinsic) {
5261    case nir_intrinsic_store_output:
5262    case nir_intrinsic_store_per_primitive_output:
5263    case nir_intrinsic_store_per_vertex_output:
5264       break;
5265    default:
5266       return false;
5267    }
5268    unsigned src_components = nir_intrinsic_src_components(intr, 0);
5269    unsigned wrmask = nir_intrinsic_write_mask(intr);
5270    unsigned num_components = util_bitcount(wrmask);
5271    if (num_components != src_components)
5272       return true;
5273    if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64)
5274       num_components *= 2;
5275    if (nir->xfb_info) {
5276       nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5277       nir_src *src_offset = nir_get_io_offset_src(intr);
5278       if (nir_src_is_const(*src_offset)) {
5279          unsigned slot_offset = nir_src_as_uint(*src_offset);
5280          for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
5281             if (nir->xfb_info->outputs[i].location == s.location + slot_offset) {
5282                unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
5283                if (xfb_components != MIN2(4, num_components))
5284                   return true;
5285                num_components -= xfb_components;
5286                if (!num_components)
5287                   break;
5288             }
5289          }
5290       } else {
5291          for (unsigned i = 0; i <nir->xfb_info->output_count; i++) {
5292             if (nir->xfb_info->outputs[i].location >= s.location &&
5293                nir->xfb_info->outputs[i].location < s.location + s.num_slots) {
5294                unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
5295                if (xfb_components < MIN2(num_components, 4))
5296                   return true;
5297                num_components -= xfb_components;
5298                if (!num_components)
5299                   break;
5300             }
5301          }
5302       }
5303    }
5304    return false;
5305 }
5306 
5307 static int
zink_type_size(const struct glsl_type * type,bool bindless)5308 zink_type_size(const struct glsl_type *type, bool bindless)
5309 {
5310    return glsl_count_attribute_slots(type, false);
5311 }
5312 
5313 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5314 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5315                          uint8_t bit_size, uint32_t align,
5316                          uint32_t align_offset, bool offset_is_const,
5317                          const void *cb_data)
5318 {
5319    align = nir_combined_align(align, align_offset);
5320 
5321    assert(util_is_power_of_two_nonzero(align));
5322 
5323    /* simply drop the bit_size for unaligned load/stores */
5324    if (align < (bit_size / 8)) {
5325       return (nir_mem_access_size_align){
5326          .num_components = MIN2(bytes / align, 4),
5327          .bit_size = align * 8,
5328          .align = align,
5329       };
5330    } else {
5331       return (nir_mem_access_size_align){
5332          .num_components = MIN2(bytes / (bit_size / 8), 4),
5333          .bit_size = bit_size,
5334          .align = bit_size / 8,
5335       };
5336    }
5337 }
5338 
5339 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5340 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5341                                  uint8_t bit_size, uint32_t align,
5342                                  uint32_t align_offset, bool offset_is_const,
5343                                  const void *cb_data)
5344 {
5345    bit_size = *(const uint8_t *)cb_data;
5346    align = nir_combined_align(align, align_offset);
5347 
5348    assert(util_is_power_of_two_nonzero(align));
5349 
5350    return (nir_mem_access_size_align){
5351       .num_components = MIN2(bytes / (bit_size / 8), 4),
5352       .bit_size = bit_size,
5353       .align = bit_size / 8,
5354    };
5355 }
5356 
5357 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5358 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5359 {
5360    uint8_t *bit_size = data;
5361    switch (instr->intrinsic) {
5362    case nir_intrinsic_load_scratch:
5363       *bit_size = MIN2(*bit_size, instr->def.bit_size);
5364       return false;
5365    case nir_intrinsic_store_scratch:
5366       *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5367       return false;
5368    default:
5369       return false;
5370    }
5371 }
5372 
5373 static bool
alias_scratch_memory(nir_shader * nir)5374 alias_scratch_memory(nir_shader *nir)
5375 {
5376    uint8_t bit_size = 64;
5377 
5378    nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5379    nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5380       .modes = nir_var_function_temp,
5381       .may_lower_unaligned_stores_to_atomics = true,
5382       .callback = mem_access_scratch_size_align_cb,
5383       .cb_data = &bit_size,
5384    };
5385    return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5386 }
5387 
5388 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5389 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5390 {
5391    return 4;
5392 }
5393 
5394 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)5395 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
5396 {
5397    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5398    bool have_psiz = false;
5399 
5400    ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
5401                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
5402 
5403    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
5404    ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
5405    if (screen->info.have_KHR_shader_float_controls) {
5406       if (screen->info.props12.shaderDenormFlushToZeroFloat16)
5407          ret->sinfo.float_controls.flush_denorms |= 0x1;
5408       if (screen->info.props12.shaderDenormFlushToZeroFloat32)
5409          ret->sinfo.float_controls.flush_denorms |= 0x2;
5410       if (screen->info.props12.shaderDenormFlushToZeroFloat64)
5411          ret->sinfo.float_controls.flush_denorms |= 0x4;
5412 
5413       if (screen->info.props12.shaderDenormPreserveFloat16)
5414          ret->sinfo.float_controls.preserve_denorms |= 0x1;
5415       if (screen->info.props12.shaderDenormPreserveFloat32)
5416          ret->sinfo.float_controls.preserve_denorms |= 0x2;
5417       if (screen->info.props12.shaderDenormPreserveFloat64)
5418          ret->sinfo.float_controls.preserve_denorms |= 0x4;
5419 
5420       ret->sinfo.float_controls.denorms_all_independence =
5421          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
5422 
5423       ret->sinfo.float_controls.denorms_32_bit_independence =
5424          ret->sinfo.float_controls.denorms_all_independence ||
5425          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
5426    }
5427    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
5428 
5429    util_queue_fence_init(&ret->precompile.fence);
5430    util_dynarray_init(&ret->pipeline_libs, ret);
5431    ret->hash = _mesa_hash_pointer(ret);
5432 
5433    ret->programs = _mesa_pointer_set_create(NULL);
5434    simple_mtx_init(&ret->lock, mtx_plain);
5435 
5436    nir_lower_io_options lower_io_flags = 0;
5437    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5438       lower_io_flags = nir_lower_io_lower_64bit_to_32;
5439    else if (!screen->info.feats.features.shaderFloat64)
5440       lower_io_flags = nir_lower_io_lower_64bit_float_to_32;
5441    bool temp_inputs = nir->info.stage != MESA_SHADER_VERTEX && nir->info.inputs_read & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
5442    bool temp_outputs = nir->info.stage != MESA_SHADER_FRAGMENT && (nir->info.outputs_read | nir->info.outputs_written) & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
5443    if (temp_inputs || temp_outputs) {
5444       NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), temp_outputs, temp_inputs);
5445       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
5446       NIR_PASS_V(nir, nir_split_var_copies);
5447       NIR_PASS_V(nir, nir_lower_var_copies);
5448    }
5449    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, zink_type_size, lower_io_flags);
5450    if (nir->info.stage == MESA_SHADER_VERTEX)
5451       lower_io_flags |= nir_lower_io_lower_64bit_to_32;
5452    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, zink_type_size, lower_io_flags);
5453    nir->info.io_lowered = true;
5454 
5455    if (nir->info.stage == MESA_SHADER_KERNEL) {
5456       nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
5457          .modes = nir_var_all ^ nir_var_function_temp,
5458          .may_lower_unaligned_stores_to_atomics = true,
5459          .callback = mem_access_size_align_cb,
5460          .cb_data = screen,
5461       };
5462       NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
5463       NIR_PASS_V(nir, alias_scratch_memory);
5464       NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
5465       NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
5466    }
5467 
5468    optimize_nir(nir, NULL, true);
5469    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
5470       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
5471          NIR_PASS_V(nir, lower_bindless_io);
5472          break;
5473       }
5474    }
5475    nir_gather_xfb_info_from_intrinsics(nir);
5476    NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, eliminate_io_wrmasks_instr, nir);
5477    /* clean up io to improve direct access */
5478    optimize_nir(nir, NULL, true);
5479    rework_io_vars(nir, nir_var_shader_in);
5480    rework_io_vars(nir, nir_var_shader_out);
5481 
5482    if (nir->info.stage < MESA_SHADER_COMPUTE)
5483       create_gfx_pushconst(nir);
5484 
5485    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
5486             nir->info.stage == MESA_SHADER_TESS_EVAL)
5487       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
5488 
5489    if (nir->info.stage < MESA_SHADER_FRAGMENT)
5490       have_psiz = check_psiz(nir);
5491    if (nir->info.stage == MESA_SHADER_FRAGMENT)
5492       ret->flat_flags = zink_flat_flags(nir);
5493 
5494    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
5495       NIR_PASS_V(nir, fixup_io_locations);
5496 
5497    NIR_PASS_V(nir, lower_basevertex);
5498    NIR_PASS_V(nir, lower_baseinstance);
5499    NIR_PASS_V(nir, lower_sparse_and);
5500    NIR_PASS_V(nir, split_bitfields);
5501    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
5502 
5503    if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
5504       NIR_PASS_V(nir, nir_lower_discard_or_demote, true);
5505    }
5506 
5507    if (screen->need_2D_zs)
5508       NIR_PASS_V(nir, lower_1d_shadow, screen);
5509 
5510    {
5511       nir_lower_subgroups_options subgroup_options = {0};
5512       subgroup_options.lower_to_scalar = true;
5513       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
5514       subgroup_options.ballot_bit_size = 32;
5515       subgroup_options.ballot_components = 4;
5516       subgroup_options.lower_subgroup_masks = true;
5517       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
5518          subgroup_options.subgroup_size = 1;
5519          subgroup_options.lower_vote_trivial = true;
5520       }
5521       subgroup_options.lower_inverse_ballot = true;
5522       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
5523    }
5524 
5525    optimize_nir(nir, NULL, true);
5526    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5527    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
5528                                           nir_lower_demote_if_to_cf |
5529                                           nir_lower_terminate_if_to_cf));
5530 
5531    bool needs_size = analyze_io(ret, nir);
5532    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
5533    /* run in compile if there could be inlined uniforms */
5534    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
5535       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
5536       NIR_PASS_V(nir, rewrite_bo_access, screen);
5537       NIR_PASS_V(nir, remove_bo_access, ret);
5538    }
5539 
5540    struct zink_bindless_info bindless = {0};
5541    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
5542    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
5543       var->data.is_xfb = false;
5544 
5545    optimize_nir(nir, NULL, true);
5546    prune_io(nir);
5547 
5548    scan_nir(screen, nir, ret);
5549    unsigned sampler_mask = 0;
5550    if (nir->info.stage == MESA_SHADER_KERNEL) {
5551       NIR_PASS_V(nir, type_images, &sampler_mask);
5552       enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
5553       VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
5554       u_foreach_bit(s, sampler_mask) {
5555          ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
5556          ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
5557          ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5558          ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5559          ret->num_bindings[ztype]++;
5560       }
5561       ret->sinfo.sampler_mask = sampler_mask;
5562    }
5563 
5564    unsigned ubo_binding_mask = 0;
5565    unsigned ssbo_binding_mask = 0;
5566    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
5567       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
5568                                         nir_var_image |
5569                                         nir_var_mem_ubo |
5570                                         nir_var_mem_ssbo)) {
5571          enum zink_descriptor_type ztype;
5572          const struct glsl_type *type = glsl_without_array(var->type);
5573          if (var->data.mode == nir_var_mem_ubo) {
5574             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
5575             /* buffer 0 is a push descriptor */
5576             var->data.descriptor_set = !!var->data.driver_location;
5577             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
5578                                 zink_binding(nir->info.stage,
5579                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
5580                                              var->data.driver_location,
5581                                              screen->compact_descriptors);
5582             assert(var->data.driver_location || var->data.binding < 10);
5583             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
5584             int binding = var->data.binding;
5585 
5586             if (!var->data.driver_location) {
5587                ret->has_uniforms = true;
5588             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
5589                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5590                ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
5591                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5592                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
5593                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
5594                ret->num_bindings[ztype]++;
5595                ubo_binding_mask |= BITFIELD_BIT(binding);
5596             }
5597          } else if (var->data.mode == nir_var_mem_ssbo) {
5598             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
5599             var->data.descriptor_set = screen->desc_set_id[ztype];
5600             var->data.binding = zink_binding(clamp_stage(&nir->info),
5601                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
5602                                              var->data.driver_location,
5603                                              screen->compact_descriptors);
5604             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
5605                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5606                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5607                ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
5608                ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
5609                assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
5610                ret->num_bindings[ztype]++;
5611                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
5612             }
5613          } else {
5614             assert(var->data.mode == nir_var_uniform ||
5615                    var->data.mode == nir_var_image);
5616             if (var->data.bindless) {
5617                ret->bindless = true;
5618                handle_bindless_var(nir, var, type, &bindless);
5619             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5620                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5621                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5622                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5623                ztype = zink_desc_type_from_vktype(vktype);
5624                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5625                   ret->num_texel_buffers++;
5626                var->data.driver_location = var->data.binding;
5627                var->data.descriptor_set = screen->desc_set_id[ztype];
5628                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5629                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5630                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5631                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5632                if (glsl_type_is_array(var->type))
5633                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5634                else
5635                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5636                ret->num_bindings[ztype]++;
5637             } else if (var->data.mode == nir_var_uniform) {
5638                /* this is a dead uniform */
5639                var->data.mode = 0;
5640                exec_node_remove(&var->node);
5641             }
5642          }
5643       }
5644    }
5645    bool bindless_lowered = false;
5646    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5647    ret->bindless |= bindless_lowered;
5648 
5649    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5650       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5651    if (nir->info.stage != MESA_SHADER_KERNEL)
5652       NIR_PASS_V(nir, match_tex_dests, ret);
5653 
5654    if (!nir->info.internal)
5655       nir_foreach_shader_out_variable(var, nir)
5656          var->data.explicit_xfb_buffer = 0;
5657    if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
5658       update_so_info(ret, nir, nir->info.outputs_written, have_psiz);
5659    else if (have_psiz) {
5660       bool have_fake_psiz = false;
5661       nir_variable *psiz = NULL;
5662       nir_foreach_shader_out_variable(var, nir) {
5663          if (var->data.location == VARYING_SLOT_PSIZ) {
5664             if (!var->data.explicit_location)
5665                have_fake_psiz = true;
5666             else
5667                psiz = var;
5668          }
5669       }
5670       /* maintenance5 allows injected psiz deletion */
5671       if (have_fake_psiz && (psiz || screen->info.have_KHR_maintenance5)) {
5672          psiz->data.mode = nir_var_shader_temp;
5673          nir_fixup_deref_modes(nir);
5674          delete_psiz_store(nir, true);
5675          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5676       }
5677    }
5678    zink_shader_serialize_blob(nir, &ret->blob);
5679    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5680 
5681    ret->can_inline = true;
5682 
5683    return ret;
5684 }
5685 
5686 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)5687 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5688 {
5689    struct zink_screen *screen = zink_screen(pscreen);
5690    nir_shader *nir = nirptr;
5691 
5692    nir_lower_tex_options tex_opts = {
5693       .lower_invalid_implicit_lod = true,
5694    };
5695    /*
5696       Sampled Image must be an object whose type is OpTypeSampledImage.
5697       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5698       or Rect, and the Arrayed and MS operands must be 0.
5699       - SPIRV, OpImageSampleProj* opcodes
5700     */
5701    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5702                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5703    tex_opts.lower_txp_array = true;
5704    if (!screen->info.feats.features.shaderImageGatherExtended)
5705       tex_opts.lower_tg4_offsets = true;
5706    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5707    optimize_nir(nir, NULL, false);
5708    if (nir->info.stage == MESA_SHADER_VERTEX)
5709       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5710    if (screen->driconf.inline_uniforms)
5711       nir_find_inlinable_uniforms(nir);
5712 
5713    return NULL;
5714 }
5715 
5716 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)5717 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5718 {
5719    _mesa_set_destroy(shader->programs, NULL);
5720    util_queue_fence_wait(&shader->precompile.fence);
5721    util_queue_fence_destroy(&shader->precompile.fence);
5722    zink_descriptor_shader_deinit(screen, shader);
5723    if (screen->info.have_EXT_shader_object) {
5724       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5725    } else {
5726       if (shader->precompile.obj.mod)
5727          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5728       if (shader->precompile.gpl)
5729          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5730    }
5731    blob_finish(&shader->blob);
5732    ralloc_free(shader->spirv);
5733    free(shader->precompile.bindings);
5734    ralloc_free(shader);
5735 }
5736 
5737 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)5738 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5739 {
5740    assert(shader->info.stage != MESA_SHADER_COMPUTE);
5741    util_queue_fence_wait(&shader->precompile.fence);
5742    set_foreach(shader->programs, entry) {
5743       struct zink_gfx_program *prog = (void*)entry->key;
5744       gl_shader_stage stage = shader->info.stage;
5745       assert(stage < ZINK_GFX_SHADER_COUNT);
5746       unsigned stages_present = prog->stages_present;
5747       if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5748             prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5749          stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5750       unsigned idx = zink_program_cache_stages(stages_present);
5751       if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5752           (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5753          struct hash_table *ht = &prog->ctx->program_cache[idx];
5754          simple_mtx_lock(&prog->ctx->program_lock[idx]);
5755          struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5756          assert(he && he->data == prog);
5757          _mesa_hash_table_remove(ht, he);
5758          prog->base.removed = true;
5759          simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5760          util_queue_fence_wait(&prog->base.cache_fence);
5761 
5762          for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5763             for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5764                hash_table_foreach(&prog->pipelines[r][i], table_entry) {
5765                   struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
5766 
5767                   util_queue_fence_wait(&pc_entry->fence);
5768                }
5769             }
5770          }
5771 
5772       }
5773       if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5774          prog->shaders[stage] = NULL;
5775          prog->stages_remaining &= ~BITFIELD_BIT(stage);
5776       }
5777       /* only remove generated tcs during parent tes destruction */
5778       if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5779          prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5780       if (stage != MESA_SHADER_FRAGMENT &&
5781           prog->shaders[MESA_SHADER_GEOMETRY] &&
5782           prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5783           shader) {
5784          prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5785       }
5786       zink_gfx_program_reference(screen, &prog, NULL);
5787    }
5788    while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5789       struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5790       if (!libs->removed) {
5791          libs->removed = true;
5792          unsigned idx = zink_program_cache_stages(libs->stages_present);
5793          simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5794          _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5795          simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5796       }
5797       zink_gfx_lib_cache_unref(screen, libs);
5798    }
5799    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5800        shader->non_fs.generated_tcs) {
5801       /* automatically destroy generated tcs shaders when tes is destroyed */
5802       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5803       shader->non_fs.generated_tcs = NULL;
5804    }
5805    if (shader->info.stage != MESA_SHADER_FRAGMENT) {
5806       for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5807          for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5808             if (shader->non_fs.generated_gs[i][j]) {
5809                /* automatically destroy generated gs shaders when owner is destroyed */
5810                zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5811                shader->non_fs.generated_gs[i][j] = NULL;
5812             }
5813          }
5814       }
5815    }
5816    zink_shader_free(screen, shader);
5817 }
5818 
5819 
5820 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)5821 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
5822 {
5823    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5824    /* shortcut all the nir passes since we just have to change this one word */
5825    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5826    return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
5827 }
5828 
5829 /* creating a passthrough tcs shader that's roughly:
5830 
5831 #version 150
5832 #extension GL_ARB_tessellation_shader : require
5833 
5834 in vec4 some_var[gl_MaxPatchVertices];
5835 out vec4 some_var_out;
5836 
5837 layout(push_constant) uniform tcsPushConstants {
5838     layout(offset = 0) float TessLevelInner[2];
5839     layout(offset = 8) float TessLevelOuter[4];
5840 } u_tcsPushConstants;
5841 layout(vertices = $vertices_per_patch) out;
5842 void main()
5843 {
5844   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5845   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5846   some_var_out = some_var[gl_InvocationID];
5847 }
5848 
5849 */
5850 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,nir_shader * tes,unsigned vertices_per_patch,nir_shader ** nir_ret)5851 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5852 {
5853    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5854    util_queue_fence_init(&ret->precompile.fence);
5855    ret->hash = _mesa_hash_pointer(ret);
5856    ret->programs = _mesa_pointer_set_create(NULL);
5857    simple_mtx_init(&ret->lock, mtx_plain);
5858 
5859    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5860    nir_function *fn = nir_function_create(nir, "main");
5861    fn->is_entrypoint = true;
5862    nir_function_impl *impl = nir_function_impl_create(fn);
5863 
5864    nir_builder b = nir_builder_at(nir_before_impl(impl));
5865 
5866    nir_def *invocation_id = nir_load_invocation_id(&b);
5867 
5868    nir_foreach_shader_in_variable(var, tes) {
5869       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5870          continue;
5871       const struct glsl_type *in_type = var->type;
5872       const struct glsl_type *out_type = var->type;
5873       char buf[1024];
5874       snprintf(buf, sizeof(buf), "%s_out", var->name);
5875       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5876          const struct glsl_type *type = var->type;
5877          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5878          out_type = glsl_array_type(type, vertices_per_patch, 0);
5879       }
5880 
5881       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5882       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5883       out->data.location = in->data.location = var->data.location;
5884       out->data.location_frac = in->data.location_frac = var->data.location_frac;
5885 
5886       /* gl_in[] receives values from equivalent built-in output
5887          variables written by the vertex shader (section 2.14.7).  Each array
5888          element of gl_in[] is a structure holding values for a specific vertex of
5889          the input patch.  The length of gl_in[] is equal to the
5890          implementation-dependent maximum patch size (gl_MaxPatchVertices).
5891          - ARB_tessellation_shader
5892        */
5893       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5894       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5895       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5896       copy_vars(&b, out_value, in_value);
5897    }
5898    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5899    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5900    gl_TessLevelInner->data.patch = 1;
5901    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5902    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5903    gl_TessLevelOuter->data.patch = 1;
5904 
5905    create_gfx_pushconst(nir);
5906 
5907    nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
5908                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
5909    nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
5910                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
5911 
5912    for (unsigned i = 0; i < 2; i++) {
5913       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5914       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5915    }
5916    for (unsigned i = 0; i < 4; i++) {
5917       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5918       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5919    }
5920 
5921    nir->info.tess.tcs_vertices_out = vertices_per_patch;
5922    nir_validate_shader(nir, "created");
5923 
5924    optimize_nir(nir, NULL, true);
5925    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5926    NIR_PASS_V(nir, nir_convert_from_ssa, true);
5927 
5928    *nir_ret = nir;
5929    zink_shader_serialize_blob(nir, &ret->blob);
5930    memcpy(&ret->info, &nir->info, sizeof(nir->info));
5931    ret->non_fs.is_generated = true;
5932    return ret;
5933 }
5934 
5935 bool
zink_shader_has_cubes(nir_shader * nir)5936 zink_shader_has_cubes(nir_shader *nir)
5937 {
5938    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5939       const struct glsl_type *type = glsl_without_array(var->type);
5940       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5941          return true;
5942    }
5943    return false;
5944 }
5945 
5946 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)5947 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5948 {
5949    struct blob_reader blob_reader;
5950    blob_reader_init(&blob_reader, blob->data, blob->size);
5951    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5952 }
5953 
5954 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)5955 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5956 {
5957    return zink_shader_blob_deserialize(screen, &zs->blob);
5958 }
5959 
5960 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)5961 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5962 {
5963    blob_init(blob);
5964 #ifndef NDEBUG
5965    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5966 #else
5967    bool strip = false;
5968 #endif
5969    nir_serialize(blob, nir, strip);
5970 }
5971 
5972 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)5973 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5974 {
5975    nir_shader *nir = zink_shader_deserialize(screen, zs);
5976    nir_print_shader(nir, fp);
5977    ralloc_free(nir);
5978 }
5979