• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_opcodes.h"
25 #include "shader_enums.h"
26 #include "zink_context.h"
27 #include "zink_compiler.h"
28 #include "zink_descriptors.h"
29 #include "zink_program.h"
30 #include "zink_screen.h"
31 #include "nir_to_spirv/nir_to_spirv.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "nir.h"
36 #include "nir_xfb_info.h"
37 #include "nir/nir_draw_helpers.h"
38 #include "compiler/nir/nir_builder.h"
39 #include "compiler/nir/nir_serialize.h"
40 #include "compiler/nir/nir_builtin_builder.h"
41 
42 #include "nir/tgsi_to_nir.h"
43 #include "tgsi/tgsi_dump.h"
44 
45 #include "util/u_memory.h"
46 
47 #include "compiler/spirv/nir_spirv.h"
48 #include "compiler/spirv/spirv_info.h"
49 #include "vk_util.h"
50 
51 bool
52 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
53 
54 
55 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)56 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
57 {
58    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
59    if (glsl_type_is_struct_or_ifc(dst->type)) {
60       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
61          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
62       }
63    } else if (glsl_type_is_array_or_matrix(dst->type)) {
64       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
65       for (unsigned i = 0; i < count; i++) {
66          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
67       }
68    } else {
69       nir_def *load = nir_load_deref(b, src);
70       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
71    }
72 }
73 
74 static bool
is_clipcull_dist(int location)75 is_clipcull_dist(int location)
76 {
77    switch (location) {
78    case VARYING_SLOT_CLIP_DIST0:
79    case VARYING_SLOT_CLIP_DIST1:
80    case VARYING_SLOT_CULL_DIST0:
81    case VARYING_SLOT_CULL_DIST1:
82       return true;
83    default: break;
84    }
85    return false;
86 }
87 
88 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
89 
90 static void
create_gfx_pushconst(nir_shader * nir)91 create_gfx_pushconst(nir_shader *nir)
92 {
93 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
94 fields[member_idx].type =                                                                                       \
95    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
96 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
97 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
98 
99    nir_variable *pushconst;
100    /* create compatible layout for the ntv push constant loader */
101    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
102    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
103    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
104    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
105    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
106    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
107    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
108    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
109    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
110 
111    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
112                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
113                                    "gfx_pushconst");
114    pushconst->data.location = INT_MAX; //doesn't really matter
115 
116 #undef PUSHCONST_MEMBER
117 }
118 
119 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)120 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
121 {
122    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
123       return false;
124 
125    b->cursor = nir_after_instr(&instr->instr);
126    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
127    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
128    load->num_components = 1;
129    nir_def_init(&load->instr, &load->def, 1, 32);
130    nir_builder_instr_insert(b, &load->instr);
131 
132    nir_def *composite = nir_build_alu(b, nir_op_bcsel,
133                                           nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
134                                           &instr->def,
135                                           nir_imm_int(b, 0),
136                                           NULL);
137 
138    nir_def_rewrite_uses_after(&instr->def, composite,
139                                   composite->parent_instr);
140    return true;
141 }
142 
143 static bool
lower_basevertex(nir_shader * shader)144 lower_basevertex(nir_shader *shader)
145 {
146    if (shader->info.stage != MESA_SHADER_VERTEX)
147       return false;
148 
149    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
150       return false;
151 
152    return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
153                                      nir_metadata_dominance, NULL);
154 }
155 
156 
157 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)158 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
159 {
160    if (instr->intrinsic != nir_intrinsic_load_draw_id)
161       return false;
162 
163    b->cursor = nir_before_instr(&instr->instr);
164    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
165    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
166    load->num_components = 1;
167    nir_def_init(&load->instr, &load->def, 1, 32);
168    nir_builder_instr_insert(b, &load->instr);
169 
170    nir_def_rewrite_uses(&instr->def, &load->def);
171 
172    return true;
173 }
174 
175 static bool
lower_drawid(nir_shader * shader)176 lower_drawid(nir_shader *shader)
177 {
178    if (shader->info.stage != MESA_SHADER_VERTEX)
179       return false;
180 
181    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
182       return false;
183 
184    return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
185                                      nir_metadata_dominance, NULL);
186 }
187 
188 struct lower_gl_point_state {
189    nir_variable *gl_pos_out;
190    nir_variable *gl_point_size;
191 };
192 
193 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)194 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
195 {
196    struct lower_gl_point_state *state = data;
197    nir_def *vp_scale, *pos;
198 
199    if (instr->type != nir_instr_type_intrinsic)
200       return false;
201 
202    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
203    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
204        intrin->intrinsic != nir_intrinsic_emit_vertex)
205       return false;
206 
207    if (nir_intrinsic_stream_id(intrin) != 0)
208       return false;
209 
210    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
211          intrin->intrinsic == nir_intrinsic_end_primitive) {
212       nir_instr_remove(&intrin->instr);
213       return true;
214    }
215 
216    b->cursor = nir_before_instr(instr);
217 
218    // viewport-map endpoints
219    nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
220    vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
221 
222    // Load point info values
223    nir_def *point_size = nir_load_var(b, state->gl_point_size);
224    nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
225 
226    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
227    nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
228    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
229    // halt_w_delta = w_delta / 2
230    nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
231 
232    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
233    nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
234    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
235    // halt_h_delta = h_delta / 2
236    nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
237 
238    nir_def *point_dir[4][2] = {
239       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
240       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
241       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
242       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
243    };
244 
245    nir_def *point_pos_x = nir_channel(b, point_pos, 0);
246    nir_def *point_pos_y = nir_channel(b, point_pos, 1);
247 
248    for (size_t i = 0; i < 4; i++) {
249       pos = nir_vec4(b,
250                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
251                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
252                      nir_channel(b, point_pos, 2),
253                      nir_channel(b, point_pos, 3));
254 
255       nir_store_var(b, state->gl_pos_out, pos, 0xf);
256 
257       nir_emit_vertex(b);
258    }
259 
260    nir_end_primitive(b);
261 
262    nir_instr_remove(&intrin->instr);
263 
264    return true;
265 }
266 
267 static bool
lower_gl_point_gs(nir_shader * shader)268 lower_gl_point_gs(nir_shader *shader)
269 {
270    struct lower_gl_point_state state;
271 
272    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
273    shader->info.gs.vertices_out *= 4;
274 
275    // Gets the gl_Position in and out
276    state.gl_pos_out =
277       nir_find_variable_with_location(shader, nir_var_shader_out,
278                                       VARYING_SLOT_POS);
279    state.gl_point_size =
280       nir_find_variable_with_location(shader, nir_var_shader_out,
281                                       VARYING_SLOT_PSIZ);
282 
283    // if position in or gl_PointSize aren't written, we have nothing to do
284    if (!state.gl_pos_out || !state.gl_point_size)
285       return false;
286 
287    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
288                                        nir_metadata_dominance, &state);
289 }
290 
291 struct lower_pv_mode_state {
292    nir_variable *varyings[VARYING_SLOT_MAX][4];
293    nir_variable *pos_counter;
294    nir_variable *out_pos_counter;
295    nir_variable *ring_offset;
296    unsigned ring_size;
297    unsigned primitive_vert_count;
298    unsigned prim;
299 };
300 
301 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)302 lower_pv_mode_gs_ring_index(nir_builder *b,
303                             struct lower_pv_mode_state *state,
304                             nir_def *index)
305 {
306    nir_def *ring_offset = nir_load_var(b, state->ring_offset);
307    return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
308                           state->ring_size);
309 }
310 
311 /* Given the final deref of chain of derefs this function will walk up the chain
312  * until it finds a var deref.
313  *
314  * It will then recreate an identical chain that ends with the provided deref.
315  */
316 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)317 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
318 {
319    nir_deref_instr *parent = nir_deref_instr_parent(old);
320    if (!parent)
321       return new;
322    switch(old->deref_type) {
323    case nir_deref_type_var:
324       return new;
325    case nir_deref_type_array:
326       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
327    case nir_deref_type_struct:
328       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
329    case nir_deref_type_array_wildcard:
330    case nir_deref_type_ptr_as_array:
331    case nir_deref_type_cast:
332       unreachable("unexpected deref type");
333    }
334    unreachable("impossible deref type");
335 }
336 
337 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)338 lower_pv_mode_gs_store(nir_builder *b,
339                        nir_intrinsic_instr *intrin,
340                        struct lower_pv_mode_state *state)
341 {
342    b->cursor = nir_before_instr(&intrin->instr);
343    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
344    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
345       nir_variable *var = nir_deref_instr_get_variable(deref);
346 
347       gl_varying_slot location = var->data.location;
348       unsigned location_frac = var->data.location_frac;
349       assert(state->varyings[location][location_frac]);
350       nir_def *pos_counter = nir_load_var(b, state->pos_counter);
351       nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
352       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
353       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
354       // recreate the chain of deref that lead to the store.
355       nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
356       nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
357       nir_instr_remove(&intrin->instr);
358       return true;
359    }
360 
361    return false;
362 }
363 
364 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)365 lower_pv_mode_emit_rotated_prim(nir_builder *b,
366                                 struct lower_pv_mode_state *state,
367                                 nir_def *current_vertex)
368 {
369    nir_def *two = nir_imm_int(b, 2);
370    nir_def *three = nir_imm_int(b, 3);
371    bool is_triangle = state->primitive_vert_count == 3;
372    /* This shader will always see the last three vertices emitted by the user gs.
373     * The following table is used to to rotate primitives within a strip generated
374     * by the user gs such that the last vertex becomes the first.
375     *
376     * [lines, tris][even/odd index][vertex mod 3]
377     */
378    static const unsigned vert_maps[2][2][3] = {
379       {{1, 0, 0}, {1, 0, 0}},
380       {{2, 0, 1}, {2, 1, 0}}
381    };
382    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
383     * is either the last or the second, depending on whether the triangle is at an odd
384     * or even position within the strip.
385     *
386     * odd or even primitive within draw
387     */
388    nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
389    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
390       /* odd or even triangle within strip emitted by user GS
391        * this is handled using the table
392        */
393       nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
394       unsigned offset_even = vert_maps[is_triangle][0][i];
395       unsigned offset_odd = vert_maps[is_triangle][1][i];
396       nir_def *offset_even_value = nir_imm_int(b, offset_even);
397       nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
398       nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
399                                             offset_odd_value, offset_even_value);
400       /* Here we account for how triangles are provided to the gs from a strip.
401        * For even primitives we rotate by 3, meaning we do nothing.
402        * For odd primitives we rotate by 2, combined with the previous rotation this
403        * means the second vertex becomes the last.
404        */
405       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
406         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
407                                             nir_isub(b, three,
408                                                         odd_prim)),
409                                             three);
410       /* Triangles that come from fans are provided to the gs the same way as
411        * odd triangles from a strip so always rotate by 2.
412        */
413       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
414         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
415                                 three);
416       rotated_i = nir_iadd(b, rotated_i, current_vertex);
417       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
418          gl_varying_slot location = var->data.location;
419          unsigned location_frac = var->data.location_frac;
420          if (state->varyings[location][location_frac]) {
421             nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
422             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
423             copy_vars(b, nir_build_deref_var(b, var), value);
424          }
425       }
426       nir_emit_vertex(b);
427    }
428 }
429 
430 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)431 lower_pv_mode_gs_emit_vertex(nir_builder *b,
432                              nir_intrinsic_instr *intrin,
433                              struct lower_pv_mode_state *state)
434 {
435    b->cursor = nir_before_instr(&intrin->instr);
436 
437    // increment pos_counter
438    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
439    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
440 
441    nir_instr_remove(&intrin->instr);
442    return true;
443 }
444 
445 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)446 lower_pv_mode_gs_end_primitive(nir_builder *b,
447                                nir_intrinsic_instr *intrin,
448                                struct lower_pv_mode_state *state)
449 {
450    b->cursor = nir_before_instr(&intrin->instr);
451 
452    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
453    nir_push_loop(b);
454    {
455       nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
456       nir_break_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
457                                  nir_imm_int(b, state->primitive_vert_count)));
458 
459       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
460       nir_end_primitive(b);
461 
462       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
463    }
464    nir_pop_loop(b, NULL);
465    /* Set the ring offset such that when position 0 is
466     * read we get the last value written
467     */
468    nir_store_var(b, state->ring_offset, pos_counter, 1);
469    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
470    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
471 
472    nir_instr_remove(&intrin->instr);
473    return true;
474 }
475 
476 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)477 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
478 {
479    if (instr->type != nir_instr_type_intrinsic)
480       return false;
481 
482    struct lower_pv_mode_state *state = data;
483    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
484 
485    switch (intrin->intrinsic) {
486    case nir_intrinsic_store_deref:
487       return lower_pv_mode_gs_store(b, intrin, state);
488    case nir_intrinsic_copy_deref:
489       unreachable("should be lowered");
490    case nir_intrinsic_emit_vertex_with_counter:
491    case nir_intrinsic_emit_vertex:
492       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
493    case nir_intrinsic_end_primitive:
494    case nir_intrinsic_end_primitive_with_counter:
495       return lower_pv_mode_gs_end_primitive(b, intrin, state);
496    default:
497       return false;
498    }
499 }
500 
501 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)502 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
503 {
504    nir_builder b;
505    struct lower_pv_mode_state state;
506    memset(state.varyings, 0, sizeof(state.varyings));
507 
508    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
509    b = nir_builder_at(nir_before_impl(entry));
510 
511    state.primitive_vert_count =
512       mesa_vertices_per_prim(shader->info.gs.output_primitive);
513    state.ring_size = shader->info.gs.vertices_out;
514 
515    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
516       gl_varying_slot location = var->data.location;
517       unsigned location_frac = var->data.location_frac;
518 
519       char name[100];
520       snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
521       state.varyings[location][location_frac] =
522          nir_local_variable_create(entry,
523                                    glsl_array_type(var->type,
524                                                    state.ring_size,
525                                                    false),
526                                    name);
527    }
528 
529    state.pos_counter = nir_local_variable_create(entry,
530                                                  glsl_uint_type(),
531                                                  "__pos_counter");
532 
533    state.out_pos_counter = nir_local_variable_create(entry,
534                                                      glsl_uint_type(),
535                                                      "__out_pos_counter");
536 
537    state.ring_offset = nir_local_variable_create(entry,
538                                                  glsl_uint_type(),
539                                                  "__ring_offset");
540 
541    state.prim = prim;
542 
543    // initialize pos_counter and out_pos_counter
544    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
545    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
546    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
547 
548    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
549                                    (state.primitive_vert_count - 1)) *
550                                   state.primitive_vert_count;
551    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
552                                        nir_metadata_dominance, &state);
553 }
554 
555 struct lower_line_stipple_state {
556    nir_variable *pos_out;
557    nir_variable *stipple_out;
558    nir_variable *prev_pos;
559    nir_variable *pos_counter;
560    nir_variable *stipple_counter;
561    bool line_rectangular;
562 };
563 
564 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)565 viewport_map(nir_builder *b, nir_def *vert,
566              nir_def *scale)
567 {
568    nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
569    nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
570                                         w_recip);
571    return nir_fmul(b, ndc_point, scale);
572 }
573 
574 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)575 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
576 {
577    struct lower_line_stipple_state *state = data;
578    if (instr->type != nir_instr_type_intrinsic)
579       return false;
580 
581    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
582    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
583        intrin->intrinsic != nir_intrinsic_emit_vertex)
584       return false;
585 
586    b->cursor = nir_before_instr(instr);
587 
588    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
589    // viewport-map endpoints
590    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
591                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
592    nir_def *prev = nir_load_var(b, state->prev_pos);
593    nir_def *curr = nir_load_var(b, state->pos_out);
594    prev = viewport_map(b, prev, vp_scale);
595    curr = viewport_map(b, curr, vp_scale);
596 
597    // calculate length of line
598    nir_def *len;
599    if (state->line_rectangular)
600       len = nir_fast_distance(b, prev, curr);
601    else {
602       nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
603       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
604    }
605    // update stipple_counter
606    nir_store_var(b, state->stipple_counter,
607                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
608                                 len), 1);
609    nir_pop_if(b, NULL);
610    // emit stipple out
611    nir_copy_var(b, state->stipple_out, state->stipple_counter);
612    nir_copy_var(b, state->prev_pos, state->pos_out);
613 
614    // update prev_pos and pos_counter for next vertex
615    b->cursor = nir_after_instr(instr);
616    nir_store_var(b, state->pos_counter,
617                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
618                                     1), 1);
619 
620    return true;
621 }
622 
623 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)624 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
625 {
626    nir_builder b;
627    struct lower_line_stipple_state state;
628 
629    state.pos_out =
630       nir_find_variable_with_location(shader, nir_var_shader_out,
631                                       VARYING_SLOT_POS);
632 
633    // if position isn't written, we have nothing to do
634    if (!state.pos_out)
635       return false;
636 
637    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
638                                            glsl_float_type(),
639                                            "__stipple");
640    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
641    state.stipple_out->data.driver_location = shader->num_outputs++;
642    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
643    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
644 
645    // create temp variables
646    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
647                                         glsl_vec4_type(),
648                                         "__prev_pos");
649    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
650                                            glsl_uint_type(),
651                                            "__pos_counter");
652    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
653                                                glsl_float_type(),
654                                                "__stipple_counter");
655 
656    state.line_rectangular = line_rectangular;
657    // initialize pos_counter and stipple_counter
658    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
659    b = nir_builder_at(nir_before_impl(entry));
660    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
661    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
662 
663    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
664                                        nir_metadata_dominance, &state);
665 }
666 
667 static bool
lower_line_stipple_fs(nir_shader * shader)668 lower_line_stipple_fs(nir_shader *shader)
669 {
670    nir_builder b;
671    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
672    b = nir_builder_at(nir_after_impl(entry));
673 
674    // create stipple counter
675    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
676                                                glsl_float_type(),
677                                                "__stipple");
678    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
679    stipple->data.driver_location = shader->num_inputs++;
680    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
681    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
682 
683    nir_variable *sample_mask_out =
684       nir_find_variable_with_location(shader, nir_var_shader_out,
685                                       FRAG_RESULT_SAMPLE_MASK);
686    if (!sample_mask_out) {
687       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
688                                         glsl_uint_type(), "sample_mask");
689       sample_mask_out->data.driver_location = shader->num_outputs++;
690       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
691    }
692 
693    nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
694                                                       nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
695    nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
696    pattern = nir_iand_imm(&b, pattern, 0xffff);
697 
698    nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
699    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
700    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
701    nir_store_var(&b, v, sample_mask_in, 1);
702    nir_store_var(&b, sample_mask, sample_mask_in, 1);
703    nir_push_loop(&b);
704    {
705       nir_def *value = nir_load_var(&b, v);
706       nir_def *index = nir_ufind_msb(&b, value);
707       nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
708       nir_def *new_value = nir_ixor(&b, value, index_mask);
709       nir_store_var(&b, v, new_value,  1);
710       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
711       nir_jump(&b, nir_jump_break);
712       nir_pop_if(&b, NULL);
713 
714       nir_def *stipple_pos =
715          nir_interp_deref_at_sample(&b, 1, 32,
716             &nir_build_deref_var(&b, stipple)->def, index);
717       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
718                                  nir_imm_float(&b, 16.0));
719       stipple_pos = nir_f2i32(&b, stipple_pos);
720       nir_def *bit =
721          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
722       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
723       {
724          nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
725          sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
726          nir_store_var(&b, sample_mask, sample_mask_value, 1);
727       }
728       nir_pop_if(&b, NULL);
729    }
730    nir_pop_loop(&b, NULL);
731    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
732 
733    return true;
734 }
735 
736 struct lower_line_smooth_state {
737    nir_variable *pos_out;
738    nir_variable *line_coord_out;
739    nir_variable *prev_pos;
740    nir_variable *pos_counter;
741    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
742                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
743 };
744 
745 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)746 lower_line_smooth_gs_store(nir_builder *b,
747                            nir_intrinsic_instr *intrin,
748                            struct lower_line_smooth_state *state)
749 {
750    b->cursor = nir_before_instr(&intrin->instr);
751    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
752    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
753       nir_variable *var = nir_deref_instr_get_variable(deref);
754 
755       // we take care of position elsewhere
756       gl_varying_slot location = var->data.location;
757       unsigned location_frac = var->data.location_frac;
758       if (location != VARYING_SLOT_POS) {
759          assert(state->varyings[location]);
760          nir_store_var(b, state->varyings[location][location_frac],
761                        intrin->src[1].ssa,
762                        nir_intrinsic_write_mask(intrin));
763          nir_instr_remove(&intrin->instr);
764          return true;
765       }
766    }
767 
768    return false;
769 }
770 
771 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)772 lower_line_smooth_gs_emit_vertex(nir_builder *b,
773                                  nir_intrinsic_instr *intrin,
774                                  struct lower_line_smooth_state *state)
775 {
776    b->cursor = nir_before_instr(&intrin->instr);
777 
778    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
779    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
780                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
781    nir_def *prev = nir_load_var(b, state->prev_pos);
782    nir_def *curr = nir_load_var(b, state->pos_out);
783    nir_def *prev_vp = viewport_map(b, prev, vp_scale);
784    nir_def *curr_vp = viewport_map(b, curr, vp_scale);
785 
786    nir_def *width = nir_load_push_constant_zink(b, 1, 32,
787                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
788    nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
789 
790    const unsigned yx[2] = { 1, 0 };
791    nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
792    nir_def *len = nir_fast_length(b, vec);
793    nir_def *dir = nir_normalize(b, vec);
794    nir_def *half_length = nir_fmul_imm(b, len, 0.5);
795    half_length = nir_fadd_imm(b, half_length, 0.5);
796 
797    nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
798    nir_def *tangent =
799       nir_fmul(b,
800                nir_fmul(b,
801                         nir_swizzle(b, dir, yx, 2),
802                         nir_imm_vec2(b, 1.0, -1.0)),
803                vp_scale_rcp);
804    tangent = nir_fmul(b, tangent, half_width);
805    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
806    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
807 
808    nir_def *line_offets[8] = {
809       nir_fadd(b, tangent, nir_fneg(b, dir)),
810       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
811       tangent,
812       nir_fneg(b, tangent),
813       tangent,
814       nir_fneg(b, tangent),
815       nir_fadd(b, tangent, dir),
816       nir_fadd(b, nir_fneg(b, tangent), dir),
817    };
818    nir_def *line_coord =
819       nir_vec4(b, half_width, half_width, half_length, half_length);
820    nir_def *line_coords[8] = {
821       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
822       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
823       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
824       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
825       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
826       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
827       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
828       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
829    };
830 
831    /* emit first end-cap, and start line */
832    for (int i = 0; i < 4; ++i) {
833       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
834          gl_varying_slot location = var->data.location;
835          unsigned location_frac = var->data.location_frac;
836          if (state->prev_varyings[location][location_frac])
837             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
838       }
839       nir_store_var(b, state->pos_out,
840                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
841                              nir_channel(b, prev, 3))), 0xf);
842       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
843       nir_emit_vertex(b);
844    }
845 
846    /* finish line and emit last end-cap */
847    for (int i = 4; i < 8; ++i) {
848       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
849          gl_varying_slot location = var->data.location;
850          unsigned location_frac = var->data.location_frac;
851          if (state->varyings[location][location_frac])
852             nir_copy_var(b, var, state->varyings[location][location_frac]);
853       }
854       nir_store_var(b, state->pos_out,
855                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
856                              nir_channel(b, curr, 3))), 0xf);
857       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
858       nir_emit_vertex(b);
859    }
860    nir_end_primitive(b);
861 
862    nir_pop_if(b, NULL);
863 
864    nir_copy_var(b, state->prev_pos, state->pos_out);
865    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
866       gl_varying_slot location = var->data.location;
867       unsigned location_frac = var->data.location_frac;
868       if (state->varyings[location][location_frac])
869          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
870    }
871 
872    // update prev_pos and pos_counter for next vertex
873    b->cursor = nir_after_instr(&intrin->instr);
874    nir_store_var(b, state->pos_counter,
875                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
876                                     1), 1);
877 
878    nir_instr_remove(&intrin->instr);
879    return true;
880 }
881 
882 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)883 lower_line_smooth_gs_end_primitive(nir_builder *b,
884                                    nir_intrinsic_instr *intrin,
885                                    struct lower_line_smooth_state *state)
886 {
887    b->cursor = nir_before_instr(&intrin->instr);
888 
889    // reset line counter
890    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
891 
892    nir_instr_remove(&intrin->instr);
893    return true;
894 }
895 
896 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)897 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
898 {
899    if (instr->type != nir_instr_type_intrinsic)
900       return false;
901 
902    struct lower_line_smooth_state *state = data;
903    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
904 
905    switch (intrin->intrinsic) {
906    case nir_intrinsic_store_deref:
907       return lower_line_smooth_gs_store(b, intrin, state);
908    case nir_intrinsic_copy_deref:
909       unreachable("should be lowered");
910    case nir_intrinsic_emit_vertex_with_counter:
911    case nir_intrinsic_emit_vertex:
912       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
913    case nir_intrinsic_end_primitive:
914    case nir_intrinsic_end_primitive_with_counter:
915       return lower_line_smooth_gs_end_primitive(b, intrin, state);
916    default:
917       return false;
918    }
919 }
920 
921 static bool
lower_line_smooth_gs(nir_shader * shader)922 lower_line_smooth_gs(nir_shader *shader)
923 {
924    nir_builder b;
925    struct lower_line_smooth_state state;
926 
927    memset(state.varyings, 0, sizeof(state.varyings));
928    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
929    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
930       gl_varying_slot location = var->data.location;
931       unsigned location_frac = var->data.location_frac;
932       if (location == VARYING_SLOT_POS)
933          continue;
934 
935       char name[100];
936       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
937       state.varyings[location][location_frac] =
938          nir_variable_create(shader, nir_var_shader_temp,
939                               var->type, name);
940 
941       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
942       state.prev_varyings[location][location_frac] =
943          nir_variable_create(shader, nir_var_shader_temp,
944                               var->type, name);
945    }
946 
947    state.pos_out =
948       nir_find_variable_with_location(shader, nir_var_shader_out,
949                                       VARYING_SLOT_POS);
950 
951    // if position isn't written, we have nothing to do
952    if (!state.pos_out)
953       return false;
954 
955    unsigned location = 0;
956    nir_foreach_shader_in_variable(var, shader) {
957      if (var->data.driver_location >= location)
958          location = var->data.driver_location + 1;
959    }
960 
961    state.line_coord_out =
962       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
963                           "__line_coord");
964    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
965    state.line_coord_out->data.driver_location = location;
966    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
967    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
968    shader->num_outputs++;
969 
970    // create temp variables
971    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
972                                         glsl_vec4_type(),
973                                         "__prev_pos");
974    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
975                                            glsl_uint_type(),
976                                            "__pos_counter");
977 
978    // initialize pos_counter
979    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
980    b = nir_builder_at(nir_before_impl(entry));
981    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
982 
983    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
984    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
985 
986    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
987                                        nir_metadata_dominance, &state);
988 }
989 
990 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)991 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
992 {
993    int dummy;
994    nir_builder b;
995 
996    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
997    if (lower_stipple) {
998       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
999                                             glsl_float_type(),
1000                                             "__stipple");
1001       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1002       stipple_counter->data.driver_location = shader->num_inputs++;
1003       stipple_counter->data.location =
1004          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1005       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1006 
1007       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1008                                             glsl_uint_type(),
1009                                             "stipple_pattern");
1010 
1011       // initialize stipple_pattern
1012       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1013       b = nir_builder_at(nir_before_impl(entry));
1014       nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1015                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1016       nir_store_var(&b, stipple_pattern, pattern, 1);
1017    }
1018 
1019    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1020    return true;
1021 }
1022 
1023 static bool
lower_dual_blend(nir_shader * shader)1024 lower_dual_blend(nir_shader *shader)
1025 {
1026    bool progress = false;
1027    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1028    if (var) {
1029       var->data.location = FRAG_RESULT_DATA0;
1030       var->data.index = 1;
1031       progress = true;
1032    }
1033    nir_shader_preserve_all_metadata(shader);
1034    return progress;
1035 }
1036 
1037 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1038 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1039 {
1040    if (instr->type != nir_instr_type_alu)
1041       return false;
1042    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1043    if (alu_instr->op != nir_op_pack_64_2x32 &&
1044        alu_instr->op != nir_op_unpack_64_2x32)
1045       return false;
1046    b->cursor = nir_before_instr(&alu_instr->instr);
1047    nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1048    nir_def *dest;
1049    switch (alu_instr->op) {
1050    case nir_op_pack_64_2x32:
1051       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1052       break;
1053    case nir_op_unpack_64_2x32:
1054       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1055       break;
1056    default:
1057       unreachable("Impossible opcode");
1058    }
1059    nir_def_replace(&alu_instr->def, dest);
1060    return true;
1061 }
1062 
1063 static bool
lower_64bit_pack(nir_shader * shader)1064 lower_64bit_pack(nir_shader *shader)
1065 {
1066    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1067                                        nir_metadata_control_flow, NULL);
1068 }
1069 
1070 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1071 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1072                                const nir_shader *prev_stage)
1073 {
1074    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1075                                                   options,
1076                                                   "filled quad gs");
1077 
1078    nir_shader *nir = b.shader;
1079    nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1080    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1081    nir->info.gs.vertices_in = 4;
1082    nir->info.gs.vertices_out = 6;
1083    nir->info.gs.invocations = 1;
1084    nir->info.gs.active_stream_mask = 1;
1085 
1086    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1087    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1088    if (prev_stage->xfb_info) {
1089       size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1090       nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1091    }
1092 
1093    nir_variable *in_vars[VARYING_SLOT_MAX];
1094    nir_variable *out_vars[VARYING_SLOT_MAX];
1095    unsigned num_vars = 0;
1096 
1097    /* Create input/output variables. */
1098    nir_foreach_shader_out_variable(var, prev_stage) {
1099       assert(!var->data.patch);
1100       assert(var->data.location != VARYING_SLOT_PRIMITIVE_ID &&
1101             "not a VS output");
1102 
1103       /* input vars can't be created for those */
1104       if (var->data.location == VARYING_SLOT_LAYER ||
1105           var->data.location == VARYING_SLOT_VIEW_INDEX ||
1106           /* psiz not needed for quads */
1107           var->data.location == VARYING_SLOT_PSIZ)
1108          continue;
1109 
1110       char name[100];
1111       if (var->name)
1112          snprintf(name, sizeof(name), "in_%s", var->name);
1113       else
1114          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1115 
1116       nir_variable *in = nir_variable_clone(var, nir);
1117       ralloc_free(in->name);
1118       in->name = ralloc_strdup(in, name);
1119       in->type = glsl_array_type(var->type, 4, false);
1120       in->data.mode = nir_var_shader_in;
1121       nir_shader_add_variable(nir, in);
1122 
1123       if (var->name)
1124          snprintf(name, sizeof(name), "out_%s", var->name);
1125       else
1126          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1127 
1128       nir_variable *out = nir_variable_clone(var, nir);
1129       ralloc_free(out->name);
1130       out->name = ralloc_strdup(out, name);
1131       out->data.mode = nir_var_shader_out;
1132       nir_shader_add_variable(nir, out);
1133 
1134       in_vars[num_vars] = in;
1135       out_vars[num_vars++] = out;
1136    }
1137 
1138    /* When a geometry shader is not used, a fragment shader may read primitive
1139     * ID and get an implicit value without the vertex shader writing an ID. This
1140     * case needs to work even when we inject a GS internally.
1141     *
1142     * However, if a geometry shader precedes a fragment shader that reads
1143     * primitive ID, Vulkan requires that the geometry shader write primitive ID.
1144     * To handle this case correctly, we must write primitive ID, copying the
1145     * fixed-function gl_PrimitiveIDIn input which matches what the fragment
1146     * shader will expect.
1147     *
1148     * If the fragment shader doesn't read primitive ID, this copy will likely be
1149     * optimized out at link-time by the Vulkan driver. Unless this is
1150     * non-monolithic -- in which case we don't know whether the fragment shader
1151     * will read primitive ID either. In both cases, the right thing for Zink
1152     * to do is copy primitive ID unconditionally.
1153     */
1154    in_vars[num_vars] = nir_create_variable_with_location(
1155          nir, nir_var_shader_in, VARYING_SLOT_PRIMITIVE_ID, glsl_int_type());
1156 
1157    out_vars[num_vars] = nir_create_variable_with_location(
1158          nir, nir_var_shader_out, VARYING_SLOT_PRIMITIVE_ID, glsl_int_type());
1159 
1160    num_vars++;
1161 
1162    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1163    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1164    nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1165    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1166    for (unsigned i = 0; i < 6; ++i) {
1167       /* swap indices 2 and 3 */
1168       nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1169                                    nir_imm_int(&b, mapping_last[i]),
1170                                    nir_imm_int(&b, mapping_first[i]));
1171       /* Copy inputs to outputs. */
1172       for (unsigned j = 0; j < num_vars; ++j) {
1173          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1174             continue;
1175          }
1176 
1177          /* gl_PrimitiveIDIn is not arrayed, all other inputs are */
1178          nir_deref_instr *in_value = nir_build_deref_var(&b, in_vars[j]);
1179          if (in_vars[j]->data.location != VARYING_SLOT_PRIMITIVE_ID)
1180             in_value = nir_build_deref_array(&b, in_value, idx);
1181 
1182          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1183       }
1184       nir_emit_vertex(&b, 0);
1185       if (i == 2)
1186         nir_end_primitive(&b, 0);
1187    }
1188 
1189    nir_end_primitive(&b, 0);
1190    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1191    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1192    return nir;
1193 }
1194 
1195 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1196 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1197                                               nir_intrinsic_instr *intrin,
1198                                               void *data)
1199 {
1200    int inlined_uniform_offset;
1201    switch (intrin->intrinsic) {
1202    case nir_intrinsic_load_flat_mask:
1203       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1204       break;
1205    case nir_intrinsic_load_provoking_last:
1206       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1207       break;
1208    default:
1209       return false;
1210    }
1211 
1212    b->cursor = nir_before_instr(&intrin->instr);
1213    assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1214    /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1215     * anything with a different bit_size) so we need to split the load. */
1216    int num_dwords = intrin->def.bit_size / 32;
1217    nir_def *dwords[2] = {NULL};
1218    for (unsigned i = 0; i < num_dwords; i++)
1219       dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1220                                    nir_imm_int(b, inlined_uniform_offset + i),
1221                                    .align_mul = intrin->def.bit_size / 8,
1222                                    .align_offset = 0,
1223                                    .range_base = 0, .range = ~0);
1224    nir_def *new_dest_def;
1225    if (intrin->def.bit_size == 32)
1226       new_dest_def = dwords[0];
1227    else
1228       new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1229    nir_def_replace(&intrin->def, new_dest_def);
1230    return true;
1231 }
1232 
1233 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1234 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1235 {
1236    return nir_shader_intrinsics_pass(nir,
1237                                        lower_system_values_to_inlined_uniforms_instr,
1238                                        nir_metadata_dominance, NULL);
1239 }
1240 
1241 /* from radeonsi */
1242 static unsigned
amd_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1243 amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1244 {
1245    /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
1246 
1247    switch (consumer->info.stage) {
1248    case MESA_SHADER_TESS_CTRL: /* VS->TCS */
1249       /* Non-amplifying shaders can always have their variyng expressions
1250        * moved into later shaders.
1251        */
1252       return UINT_MAX;
1253 
1254    case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
1255       return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
1256              consumer->info.gs.vertices_in == 2 ? 20 : 14;
1257 
1258    case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
1259    case MESA_SHADER_FRAGMENT:
1260       /* Up to 3 uniforms and 5 ALUs. */
1261       return 14;
1262 
1263    default:
1264       unreachable("unexpected shader stage");
1265    }
1266 }
1267 
1268 void
zink_screen_init_compiler(struct zink_screen * screen)1269 zink_screen_init_compiler(struct zink_screen *screen)
1270 {
1271    static const struct nir_shader_compiler_options
1272    default_options = {
1273       .io_options = nir_io_has_intrinsics | nir_io_separate_clip_cull_distance_arrays,
1274       .lower_ffma16 = true,
1275       .lower_ffma32 = true,
1276       .lower_ffma64 = true,
1277       .lower_scmp = true,
1278       .lower_fdph = true,
1279       .lower_flrp32 = true,
1280       .lower_fsat = true,
1281       .lower_hadd = true,
1282       .lower_iadd_sat = true,
1283       .lower_fisnormal = true,
1284       .lower_extract_byte = true,
1285       .lower_extract_word = true,
1286       .lower_insert_byte = true,
1287       .lower_insert_word = true,
1288 
1289       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1290        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1291        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1292        * effectively unused (no instances in shader-db), it's not worth the
1293        * effort to do so.
1294        * */
1295       .lower_ldexp = true,
1296 
1297       .lower_mul_high = true,
1298       .lower_to_scalar = true,
1299       .lower_uadd_carry = true,
1300       .compact_arrays = true,
1301       .lower_usub_borrow = true,
1302       .lower_uadd_sat = true,
1303       .lower_usub_sat = true,
1304       .lower_vector_cmp = true,
1305       .lower_int64_options =
1306          nir_lower_bit_count64 |
1307          nir_lower_find_lsb64 |
1308          nir_lower_ufind_msb64,
1309       .lower_doubles_options = nir_lower_dround_even,
1310       .lower_uniforms_to_ubo = true,
1311       .has_fsub = true,
1312       .has_isub = true,
1313       .lower_mul_2x32_64 = true,
1314       .support_16bit_alu = true, /* not quite what it sounds like */
1315       .support_indirect_inputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_COMPUTE),
1316       .support_indirect_outputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_COMPUTE),
1317       .max_unroll_iterations = 0,
1318    };
1319 
1320    screen->nir_options = default_options;
1321 
1322    if (!screen->info.feats.features.shaderInt64)
1323       screen->nir_options.lower_int64_options = ~0;
1324 
1325    if (!screen->info.feats.features.shaderFloat64) {
1326       screen->nir_options.lower_doubles_options = ~0;
1327       screen->nir_options.lower_flrp64 = true;
1328       screen->nir_options.lower_ffma64 = true;
1329       /* soft fp64 function inlining will blow up loop bodies and effectively
1330        * stop Vulkan drivers from unrolling the loops.
1331        */
1332       screen->nir_options.max_unroll_iterations_fp64 = 32;
1333    }
1334 
1335    if (screen->driver_compiler_workarounds.io_opt) {
1336       switch (zink_driverid(screen)) {
1337       case VK_DRIVER_ID_MESA_RADV:
1338       case VK_DRIVER_ID_AMD_OPEN_SOURCE:
1339       case VK_DRIVER_ID_AMD_PROPRIETARY:
1340          screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1341          break;
1342       default:
1343          mesa_logw("zink: instruction costs not implemented for this implementation!");
1344          screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1345       }
1346    } else {
1347       screen->nir_options.io_options |= nir_io_dont_optimize;
1348    }
1349 
1350    /*
1351        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1352        and the error can be large due to the discontinuity in trunc() and floor().
1353        This can produce mathematically unexpected results in some cases, such as
1354        FMod(x,x) computing x rather than 0, and can also cause the result to have
1355        a different sign than the infinitely precise result.
1356 
1357        -Table 84. Precision of core SPIR-V Instructions
1358        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1359     */
1360    if (zink_driverid(screen) == VK_DRIVER_ID_MESA_RADV ||
1361        zink_driverid(screen) == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1362        zink_driverid(screen) == VK_DRIVER_ID_AMD_PROPRIETARY)
1363       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1364 
1365    if (screen->info.have_EXT_shader_demote_to_helper_invocation)
1366       screen->nir_options.discard_is_demote = true;
1367 
1368    screen->nir_options.support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES);
1369    screen->nir_options.support_indirect_outputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES);
1370 }
1371 
1372 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1373 zink_get_compiler_options(struct pipe_screen *pscreen,
1374                           enum pipe_shader_ir ir,
1375                           gl_shader_stage shader)
1376 {
1377    assert(ir == PIPE_SHADER_IR_NIR);
1378    return &zink_screen(pscreen)->nir_options;
1379 }
1380 
1381 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1382 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1383 {
1384    if (zink_debug & ZINK_DEBUG_TGSI) {
1385       fprintf(stderr, "TGSI shader:\n---8<---\n");
1386       tgsi_dump_to_file(tokens, 0, stderr);
1387       fprintf(stderr, "---8<---\n\n");
1388    }
1389 
1390    return tgsi_to_nir(tokens, screen, false);
1391 }
1392 
1393 
1394 static bool
def_is_64bit(nir_def * def,void * state)1395 def_is_64bit(nir_def *def, void *state)
1396 {
1397    bool *lower = (bool *)state;
1398    if (def && (def->bit_size == 64)) {
1399       *lower = true;
1400       return false;
1401    }
1402    return true;
1403 }
1404 
1405 static bool
src_is_64bit(nir_src * src,void * state)1406 src_is_64bit(nir_src *src, void *state)
1407 {
1408    bool *lower = (bool *)state;
1409    if (src && (nir_src_bit_size(*src) == 64)) {
1410       *lower = true;
1411       return false;
1412    }
1413    return true;
1414 }
1415 
1416 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1417 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1418 {
1419    bool lower = false;
1420    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1421     * doesn't have const variants, so do the ugly const_cast here. */
1422    nir_instr *instr = (nir_instr *)const_instr;
1423 
1424    nir_foreach_def(instr, def_is_64bit, &lower);
1425    if (lower)
1426       return true;
1427    nir_foreach_src(instr, src_is_64bit, &lower);
1428    return lower;
1429 }
1430 
1431 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1432 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1433 {
1434    nir_instr *instr = (nir_instr *)const_instr;
1435    nir_alu_instr *alu = nir_instr_as_alu(instr);
1436    switch (alu->op) {
1437    case nir_op_pack_64_2x32_split:
1438    case nir_op_pack_32_2x16_split:
1439    case nir_op_unpack_32_2x16_split_x:
1440    case nir_op_unpack_32_2x16_split_y:
1441    case nir_op_unpack_64_2x32_split_x:
1442    case nir_op_unpack_64_2x32_split_y:
1443       return true;
1444    default:
1445       break;
1446    }
1447    return false;
1448 }
1449 
1450 
1451 struct bo_vars {
1452    nir_variable *uniforms[5];
1453    nir_variable *ubo[5];
1454    nir_variable *ssbo[5];
1455    uint32_t first_ubo;
1456    uint32_t first_ssbo;
1457 };
1458 
1459 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1460 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1461 {
1462    struct bo_vars bo;
1463    memset(&bo, 0, sizeof(bo));
1464    if (zs->ubos_used)
1465       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1466    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1467    if (zs->ssbos_used)
1468       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1469    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1470    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1471       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1472       if (var->data.mode == nir_var_mem_ssbo) {
1473          assert(!bo.ssbo[idx]);
1474          bo.ssbo[idx] = var;
1475       } else {
1476          if (var->data.driver_location) {
1477             assert(!bo.ubo[idx]);
1478             bo.ubo[idx] = var;
1479          } else {
1480             assert(!bo.uniforms[idx]);
1481             bo.uniforms[idx] = var;
1482          }
1483       }
1484    }
1485    return bo;
1486 }
1487 
1488 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1489 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1490 {
1491    struct bo_vars *bo = data;
1492    if (instr->type != nir_instr_type_intrinsic)
1493       return false;
1494    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1495    nir_variable *var = NULL;
1496    nir_def *offset = NULL;
1497    bool is_load = true;
1498    b->cursor = nir_before_instr(instr);
1499 
1500    switch (intr->intrinsic) {
1501    case nir_intrinsic_store_ssbo:
1502       var = bo->ssbo[intr->def.bit_size >> 4];
1503       offset = intr->src[2].ssa;
1504       is_load = false;
1505       break;
1506    case nir_intrinsic_load_ssbo:
1507       var = bo->ssbo[intr->def.bit_size >> 4];
1508       offset = intr->src[1].ssa;
1509       break;
1510    case nir_intrinsic_load_ubo:
1511       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1512          var = bo->uniforms[intr->def.bit_size >> 4];
1513       else
1514          var = bo->ubo[intr->def.bit_size >> 4];
1515       offset = intr->src[1].ssa;
1516       break;
1517    default:
1518       return false;
1519    }
1520    nir_src offset_src = nir_src_for_ssa(offset);
1521    if (!nir_src_is_const(offset_src))
1522       return false;
1523 
1524    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1525    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1526    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1527    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1528    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1529       return false;
1530 
1531    unsigned rewrites = 0;
1532    nir_def *result[2];
1533    for (unsigned i = 0; i < intr->num_components; i++) {
1534       if (offset_bytes + i >= size) {
1535          rewrites++;
1536          if (is_load)
1537             result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1538       }
1539    }
1540    assert(rewrites == intr->num_components);
1541    if (is_load) {
1542       nir_def *load = nir_vec(b, result, intr->num_components);
1543       nir_def_rewrite_uses(&intr->def, load);
1544    }
1545    nir_instr_remove(instr);
1546    return true;
1547 }
1548 
1549 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1550 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1551 {
1552    struct bo_vars bo = get_bo_vars(zs, shader);
1553    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1554 }
1555 
1556 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1557 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1558 {
1559    bool progress;
1560    do {
1561       progress = false;
1562       if (s->options->lower_int64_options)
1563          NIR_PASS_V(s, nir_lower_int64);
1564       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1565          NIR_PASS_V(s, lower_64bit_pack);
1566       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1567       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1568       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1569       NIR_PASS(progress, s, nir_copy_prop);
1570       NIR_PASS(progress, s, nir_opt_remove_phis);
1571       if (s->options->lower_int64_options) {
1572          NIR_PASS(progress, s, nir_lower_64bit_phis);
1573          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1574       }
1575       NIR_PASS(progress, s, nir_opt_dce);
1576       NIR_PASS(progress, s, nir_opt_dead_cf);
1577       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1578       NIR_PASS(progress, s, nir_opt_cse);
1579       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1580       NIR_PASS(progress, s, nir_opt_algebraic);
1581       NIR_PASS(progress, s, nir_opt_constant_folding);
1582       NIR_PASS(progress, s, nir_opt_undef);
1583       NIR_PASS(progress, s, zink_nir_lower_b2b);
1584       if (zs)
1585          NIR_PASS(progress, s, bound_bo_access, zs);
1586       if (can_shrink)
1587          NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
1588    } while (progress);
1589 
1590    do {
1591       progress = false;
1592       NIR_PASS(progress, s, nir_opt_algebraic_late);
1593       if (progress) {
1594          NIR_PASS_V(s, nir_copy_prop);
1595          NIR_PASS_V(s, nir_opt_dce);
1596          NIR_PASS_V(s, nir_opt_cse);
1597       }
1598    } while (progress);
1599 }
1600 
1601 /* - copy the lowered fbfetch variable
1602  * - set the new one up as an input attachment for descriptor 0.6
1603  * - load it as an image
1604  * - overwrite the previous load
1605  */
1606 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1607 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1608 {
1609    bool ms = data != NULL;
1610    if (instr->type != nir_instr_type_intrinsic)
1611       return false;
1612    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1613    if (intr->intrinsic != nir_intrinsic_load_deref)
1614       return false;
1615    nir_variable *var = nir_intrinsic_get_var(intr, 0);
1616    if (!var->data.fb_fetch_output)
1617       return false;
1618    b->cursor = nir_after_instr(instr);
1619    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1620    /* If Dim is SubpassData, ... Image Format must be Unknown
1621     * - SPIRV OpTypeImage specification
1622     */
1623    fbfetch->data.image.format = 0;
1624    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1625    fbfetch->data.mode = nir_var_uniform;
1626    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1627    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1628    fbfetch->data.sample = ms;
1629    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1630    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1631    nir_shader_add_variable(b->shader, fbfetch);
1632    nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1633    nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1634    nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1635    nir_def_rewrite_uses(&intr->def, load);
1636    return true;
1637 }
1638 
1639 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1640 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1641 {
1642    nir_foreach_shader_out_variable(var, shader) {
1643       if (var->data.fb_fetch_output) {
1644          *fbfetch = var;
1645          break;
1646       }
1647    }
1648    assert(*fbfetch);
1649    if (!*fbfetch)
1650       return false;
1651    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1652 }
1653 
1654 /*
1655  * Add a check for out of bounds LOD for every texel fetch op
1656  * It boils down to:
1657  * - if (lod < query_levels(tex))
1658  * -    res = txf(tex)
1659  * - else
1660  * -    res = (0, 0, 0, 1)
1661  */
1662 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1663 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1664 {
1665    if (in->type != nir_instr_type_tex)
1666       return false;
1667    nir_tex_instr *txf = nir_instr_as_tex(in);
1668    if (txf->op != nir_texop_txf)
1669       return false;
1670 
1671    b->cursor = nir_before_instr(in);
1672    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1673    assert(lod_idx >= 0);
1674    nir_src lod_src = txf->src[lod_idx].src;
1675    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1676       return false;
1677 
1678    nir_def *lod = lod_src.ssa;
1679 
1680    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1681    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1682    int deref_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_deref);
1683    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1684                                                 1 + !!(offset_idx >= 0) + !!(handle_idx >= 0));
1685    unsigned src_idx = 0;
1686    levels->op = nir_texop_query_levels;
1687    levels->dest_type = nir_type_int | lod->bit_size;
1688    if (deref_idx >= 0) {
1689       levels->src[src_idx].src_type = nir_tex_src_texture_deref;
1690       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[deref_idx].src.ssa);
1691    }
1692    if (offset_idx >= 0) {
1693       levels->src[src_idx].src_type = nir_tex_src_texture_offset;
1694       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1695    }
1696    if (handle_idx >= 0) {
1697       levels->src[src_idx].src_type = nir_tex_src_texture_handle;
1698       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1699    }
1700    nir_def_init(&levels->instr, &levels->def,
1701                 nir_tex_instr_dest_size(levels), 32);
1702    nir_builder_instr_insert(b, &levels->instr);
1703 
1704    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1705    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1706    nir_builder_instr_insert(b, &new_txf->instr);
1707 
1708    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1709    nir_const_value oob_values[4] = {0};
1710    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1711    oob_values[3] = (txf->dest_type & nir_type_float) ?
1712                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1713    nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1714 
1715    nir_pop_if(b, lod_oob_else);
1716    nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1717 
1718    nir_def_rewrite_uses(&txf->def, robust_txf);
1719    nir_instr_remove_v(in);
1720    return true;
1721 }
1722 
1723 /* This pass is used to workaround the lack of out of bounds LOD robustness
1724  * for texel fetch ops in VK_EXT_image_robustness.
1725  */
1726 static bool
lower_txf_lod_robustness(nir_shader * shader)1727 lower_txf_lod_robustness(nir_shader *shader)
1728 {
1729    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1730 }
1731 
1732 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1733 static bool
check_psiz(struct nir_shader * s)1734 check_psiz(struct nir_shader *s)
1735 {
1736    bool have_psiz = false;
1737    nir_foreach_shader_out_variable(var, s) {
1738       if (var->data.location == VARYING_SLOT_PSIZ) {
1739          /* genuine PSIZ outputs will have this set */
1740          have_psiz |= !!var->data.explicit_location;
1741       }
1742    }
1743    return have_psiz;
1744 }
1745 
1746 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1747 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1748 {
1749    assert((int)location >= 0);
1750 
1751    nir_foreach_variable_with_modes(var, nir, mode) {
1752       if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1753          unsigned num_components = glsl_get_vector_elements(var->type);
1754          if (glsl_type_is_64bit(glsl_without_array(var->type)))
1755             num_components *= 2;
1756          if (is_clipcull_dist(var->data.location))
1757             num_components = glsl_get_aoa_size(var->type);
1758          if (var->data.location_frac <= location_frac &&
1759                var->data.location_frac + num_components > location_frac)
1760             return var;
1761       }
1762    }
1763    return NULL;
1764 }
1765 
1766 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1767 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1768 {
1769    unsigned num_components = util_bitcount(output->component_mask);
1770    for (unsigned i = 0; i < num_components; i++)
1771       if (!inlined[output->component_offset + i])
1772          return false;
1773    return true;
1774 }
1775 
1776 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1777 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1778 {
1779    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1780    if (last_output < VARYING_SLOT_VAR0)
1781       last_output = VARYING_SLOT_VAR0;
1782    else
1783       last_output++;
1784    /* this should get fixed up by slot remapping */
1785    psiz->data.location = last_output;
1786 }
1787 
1788 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1789 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1790 {
1791    /* could be dvec/dmat/mat: each member is the same */
1792    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1793    /* determine size of each member type */
1794    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1795    /* normalize slot idx to current type's size */
1796    slot %= slot_count;
1797    unsigned slot_components = glsl_get_components(plain);
1798    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1799       slot_components *= 2;
1800    /* create a vec4 mask of the selected slot's components out of all the components */
1801    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1802    /* return a vecN of the selected components */
1803    slot_components = util_bitcount(mask);
1804    return glsl_vec_type(slot_components);
1805 }
1806 
1807 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1808 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1809 {
1810    const struct glsl_type *type = slot_type;
1811    unsigned slot_count = 0;
1812    unsigned cur_slot = 0;
1813    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1814    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1815       /* use array type for slot counting but return array member type for unroll */
1816       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1817       type = glsl_without_array(arraytype);
1818       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1819    }
1820    *slot_idx -= (cur_slot - slot_count);
1821    if (!glsl_type_is_struct_or_ifc(type))
1822       /* this is a fully unrolled struct: find the number of vec components to output */
1823       type = clamp_slot_type(type, *slot_idx);
1824    return type;
1825 }
1826 
1827 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1828 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1829 {
1830    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1831    const struct glsl_type *orig_type = var->type;
1832    const struct glsl_type *type = glsl_without_array(var->type);
1833    unsigned slot_idx = slot - so_slot;
1834    if (type != orig_type)
1835       slot_idx %= glsl_count_vec4_slots(type, false, false);
1836    /* need to find the vec4 that's being exported by this slot */
1837    while (glsl_type_is_struct_or_ifc(type))
1838       type = unroll_struct_type(type, &slot_idx);
1839 
1840    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1841    unsigned num_components = glsl_get_components(glsl_without_array(type));
1842    /* special handling: clip/cull distance are arrays with vector semantics */
1843    if (is_clipcull_dist(var->data.location)) {
1844       num_components = glsl_array_size(type);
1845       if (slot_idx)
1846          /* this is the second vec4 */
1847          num_components %= 4;
1848       else
1849          /* this is the first vec4 */
1850          num_components = MIN2(num_components, 4);
1851    }
1852    assert(num_components);
1853    /* gallium handles xfb in terms of 32bit units */
1854    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1855       num_components *= 2;
1856    return num_components;
1857 }
1858 
1859 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1860 get_var_slot_count(nir_shader *nir, nir_variable *var)
1861 {
1862    assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1863    const struct glsl_type *type = var->type;
1864    if (nir_is_arrayed_io(var, nir->info.stage))
1865       type = glsl_get_array_element(type);
1866    unsigned slot_count = 0;
1867    if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) ||
1868        var->data.location >= VARYING_SLOT_VAR0)
1869       slot_count = glsl_count_vec4_slots(type, false, false);
1870    else if (glsl_type_is_array(type))
1871       slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1872    else
1873       slot_count = 1;
1874    return slot_count;
1875 }
1876 
1877 
1878 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1879 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1880 {
1881    for (unsigned i = 0; i < xfb_info->output_count; i++) {
1882       const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1883       if (packed_output->location == slot)
1884          return packed_output;
1885    }
1886    return NULL;
1887 }
1888 
1889 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1890 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1891 {
1892    bool inlined[VARYING_SLOT_MAX][4] = {0};
1893    uint64_t packed = 0;
1894    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1895    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1896    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1897    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1898    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1899       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1900       unsigned xfb_components = util_bitcount(output->component_mask);
1901       /* always set stride to be used during draw */
1902       zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1903       for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1904          unsigned slot = output->location;
1905          if (inlined[slot][output->component_offset + c])
1906             continue;
1907          nir_variable *var = NULL;
1908          while (!var && slot < VARYING_SLOT_TESS_MAX)
1909             var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1910          slot = output->location;
1911          unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1912          if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1913             /* if no variable is found for the xfb output, no output exists */
1914             inlined[slot][c + output->component_offset] = true;
1915             continue;
1916          }
1917          if (var->data.explicit_xfb_buffer) {
1918             /* handle dvec3 where gallium splits streamout over 2 registers */
1919             for (unsigned j = 0; j < xfb_components; j++)
1920                inlined[slot][c + output->component_offset + j] = true;
1921          }
1922          if (is_inlined(inlined[slot], output))
1923             continue;
1924          assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location));
1925          assert(!glsl_type_is_struct_or_ifc(var->type));
1926          unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1927          if (glsl_type_is_64bit(glsl_without_array(var->type)))
1928             num_components *= 2;
1929          /* if this is the entire variable, try to blast it out during the initial declaration
1930          * structs must be handled later to ensure accurate analysis
1931          */
1932          if ((num_components == xfb_components ||
1933                num_components < xfb_components ||
1934                (num_components > xfb_components && xfb_components == 4))) {
1935             var->data.explicit_xfb_buffer = 1;
1936             var->data.xfb.buffer = output->buffer;
1937             var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1938             var->data.offset = (output->offset + c * sizeof(uint32_t));
1939             var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1940             for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
1941                inlined[slot][c + output->component_offset + j] = true;
1942          } else {
1943             /* otherwise store some metadata for later */
1944             packed |= BITFIELD64_BIT(slot);
1945             packed_components[slot] += xfb_components;
1946             packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
1947             packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
1948             for (unsigned j = 0; j < xfb_components; j++)
1949                packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
1950          }
1951       }
1952    }
1953 
1954    /* if this was flagged as a packed output before, and if all the components are
1955     * being output with the same stream on the same buffer with increasing offsets, this entire variable
1956     * can be consolidated into a single output to conserve locations
1957     */
1958    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1959       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1960       unsigned slot = output->location;
1961       if (is_inlined(inlined[slot], output))
1962          continue;
1963       nir_variable *var = NULL;
1964       while (!var)
1965          var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
1966       slot = output->location;
1967       unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1968       if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
1969          continue;
1970       /* this is a lowered 64bit variable that can't be exported due to packing */
1971       if (var->data.is_xfb)
1972          goto out;
1973 
1974       unsigned num_slots = is_clipcull_dist(var->data.location) ?
1975                            glsl_array_size(var->type) / 4 :
1976                            glsl_count_vec4_slots(var->type, false, false);
1977       /* for each variable, iterate over all the variable's slots and inline the outputs */
1978       for (unsigned j = 0; j < num_slots; j++) {
1979          slot = var->data.location + j;
1980          const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
1981          if (!packed_output)
1982             goto out;
1983 
1984          /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1985          if (!(packed & BITFIELD64_BIT(slot)) ||
1986                util_bitcount(packed_streams[slot]) != 1 ||
1987                util_bitcount(packed_buffers[slot]) != 1)
1988             goto out;
1989 
1990          /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1991          unsigned num_components = get_slot_components(var, slot, var->data.location);
1992          if (num_components != packed_components[slot])
1993             goto out;
1994 
1995          /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1996          uint32_t prev_offset = packed_offsets[packed_output->location][0];
1997          for (unsigned k = 1; k < num_components; k++) {
1998             /* if the offsets are not incrementing as expected, skip consolidation */
1999             if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
2000                goto out;
2001             prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
2002          }
2003       }
2004       /* this output can be consolidated: blast out all the data inlined */
2005       var->data.explicit_xfb_buffer = 1;
2006       var->data.xfb.buffer = output->buffer;
2007       var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2008       var->data.offset = output->offset;
2009       var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2010       /* mark all slot components inlined to skip subsequent loop iterations */
2011       for (unsigned j = 0; j < num_slots; j++) {
2012          slot = var->data.location + j;
2013          for (unsigned k = 0; k < packed_components[slot]; k++)
2014             inlined[slot][k] = true;
2015          packed &= ~BITFIELD64_BIT(slot);
2016       }
2017       continue;
2018 out:
2019       unreachable("xfb should be inlined by now!");
2020    }
2021 }
2022 
2023 struct decompose_state {
2024   nir_variable **split;
2025   bool needs_w;
2026 };
2027 
2028 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)2029 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2030 {
2031    struct decompose_state *state = data;
2032    nir_variable **split = state->split;
2033    if (instr->type != nir_instr_type_intrinsic)
2034       return false;
2035    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2036    if (intr->intrinsic != nir_intrinsic_load_deref)
2037       return false;
2038    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2039    nir_variable *var = nir_deref_instr_get_variable(deref);
2040    if (var != split[0])
2041       return false;
2042    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2043    b->cursor = nir_after_instr(instr);
2044    nir_def *loads[4];
2045    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2046       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2047    if (state->needs_w) {
2048       /* oob load w comopnent to get correct value for int/float */
2049       loads[3] = nir_channel(b, loads[0], 3);
2050       loads[0] = nir_channel(b, loads[0], 0);
2051    }
2052    nir_def *new_load = nir_vec(b, loads, num_components);
2053    nir_def_rewrite_uses(&intr->def, new_load);
2054    nir_instr_remove_v(instr);
2055    return true;
2056 }
2057 
2058 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)2059 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2060 {
2061    uint32_t bits = 0;
2062    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2063       bits |= BITFIELD_BIT(var->data.driver_location);
2064    bits = ~bits;
2065    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2066       nir_variable *split[5];
2067       struct decompose_state state;
2068       state.split = split;
2069       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2070       assert(var);
2071       split[0] = var;
2072       bits |= BITFIELD_BIT(var->data.driver_location);
2073       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2074       unsigned num_components = glsl_get_vector_elements(var->type);
2075       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2076       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2077          split[i+1] = nir_variable_clone(var, nir);
2078          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2079          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2080             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2081          else
2082             split[i+1]->type = new_type;
2083          split[i+1]->data.driver_location = ffs(bits) - 1;
2084          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2085          nir_shader_add_variable(nir, split[i+1]);
2086       }
2087       var->data.mode = nir_var_shader_temp;
2088       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2089    }
2090    nir_fixup_deref_modes(nir);
2091    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2092    optimize_nir(nir, NULL, true);
2093    return true;
2094 }
2095 
2096 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2097 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2098 {
2099    struct zink_screen *screen = data;
2100    const bool has_int64 = screen->info.feats.features.shaderInt64;
2101    if (instr->type != nir_instr_type_intrinsic)
2102       return false;
2103    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2104    b->cursor = nir_before_instr(instr);
2105    switch (intr->intrinsic) {
2106    case nir_intrinsic_ssbo_atomic:
2107    case nir_intrinsic_ssbo_atomic_swap: {
2108       /* convert offset to uintN_t[idx] */
2109       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2110       nir_src_rewrite(&intr->src[1], offset);
2111       return true;
2112    }
2113    case nir_intrinsic_load_ssbo:
2114    case nir_intrinsic_load_ubo: {
2115       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2116       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2117                         nir_src_is_const(intr->src[0]) &&
2118                         nir_src_as_uint(intr->src[0]) == 0 &&
2119                         intr->def.bit_size == 64 &&
2120                         nir_intrinsic_align_offset(intr) % 8 != 0;
2121       force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2122       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2123       nir_src_rewrite(&intr->src[1], offset);
2124       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2125       if (force_2x32) {
2126          /* this is always scalarized */
2127          assert(intr->def.num_components == 1);
2128          /* rewrite as 2x32 */
2129          nir_def *load[2];
2130          for (unsigned i = 0; i < 2; i++) {
2131             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2132                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2133             else
2134                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2135             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2136          }
2137          /* cast back to 64bit */
2138          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2139          nir_def_rewrite_uses(&intr->def, casted);
2140          nir_instr_remove(instr);
2141       }
2142       return true;
2143    }
2144    case nir_intrinsic_load_scratch:
2145    case nir_intrinsic_load_shared: {
2146       b->cursor = nir_before_instr(instr);
2147       bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2148       nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2149       nir_src_rewrite(&intr->src[0], offset);
2150       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2151       if (force_2x32) {
2152          /* this is always scalarized */
2153          assert(intr->def.num_components == 1);
2154          /* rewrite as 2x32 */
2155          nir_def *load[2];
2156          for (unsigned i = 0; i < 2; i++)
2157             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2158          /* cast back to 64bit */
2159          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2160          nir_def_rewrite_uses(&intr->def, casted);
2161          nir_instr_remove(instr);
2162          return true;
2163       }
2164       break;
2165    }
2166    case nir_intrinsic_store_ssbo: {
2167       b->cursor = nir_before_instr(instr);
2168       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2169       nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2170       nir_src_rewrite(&intr->src[2], offset);
2171       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2172       if (force_2x32) {
2173          /* this is always scalarized */
2174          assert(intr->src[0].ssa->num_components == 1);
2175          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2176          for (unsigned i = 0; i < 2; i++)
2177             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2178          nir_instr_remove(instr);
2179       }
2180       return true;
2181    }
2182    case nir_intrinsic_store_scratch:
2183    case nir_intrinsic_store_shared: {
2184       b->cursor = nir_before_instr(instr);
2185       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2186       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2187       nir_src_rewrite(&intr->src[1], offset);
2188       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2189       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2190          /* this is always scalarized */
2191          assert(intr->src[0].ssa->num_components == 1);
2192          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2193          for (unsigned i = 0; i < 2; i++)
2194             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2195          nir_instr_remove(instr);
2196       }
2197       return true;
2198    }
2199    default:
2200       break;
2201    }
2202    return false;
2203 }
2204 
2205 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2206 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2207 {
2208    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2209 }
2210 
2211 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2212 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2213 {
2214    nir_variable *var, **ptr;
2215    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2216 
2217    if (ssbo)
2218       ptr = &bo->ssbo[bit_size >> 4];
2219    else {
2220       if (!idx) {
2221          ptr = &bo->uniforms[bit_size >> 4];
2222       } else
2223          ptr = &bo->ubo[bit_size >> 4];
2224    }
2225    var = *ptr;
2226    if (!var) {
2227       if (ssbo)
2228          var = bo->ssbo[32 >> 4];
2229       else {
2230          if (!idx)
2231             var = bo->uniforms[32 >> 4];
2232          else
2233             var = bo->ubo[32 >> 4];
2234       }
2235       var = nir_variable_clone(var, shader);
2236       if (ssbo)
2237          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2238       else
2239          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2240       *ptr = var;
2241       nir_shader_add_variable(shader, var);
2242 
2243       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2244       fields[0].name = ralloc_strdup(shader, "base");
2245       fields[1].name = ralloc_strdup(shader, "unsized");
2246       unsigned array_size = glsl_get_length(var->type);
2247       const struct glsl_type *bare_type = glsl_without_array(var->type);
2248       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2249       unsigned length = glsl_get_length(array_type);
2250       const struct glsl_type *type;
2251       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2252       if (bit_size > 32) {
2253          assert(bit_size == 64);
2254          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2255       } else {
2256          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2257       }
2258       fields[0].type = type;
2259       fields[1].type = unsized;
2260       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2261       var->data.driver_location = idx;
2262    }
2263    return var;
2264 }
2265 
2266 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2267 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2268 {
2269    nir_intrinsic_op op;
2270    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2271    if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2272       op = nir_intrinsic_deref_atomic;
2273    else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2274       op = nir_intrinsic_deref_atomic_swap;
2275    else
2276       unreachable("unknown intrinsic");
2277    nir_def *offset = intr->src[1].ssa;
2278    nir_src *src = &intr->src[0];
2279    nir_variable *var = get_bo_var(b->shader, bo, true, src,
2280                                   intr->def.bit_size);
2281    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2282    nir_def *idx = src->ssa;
2283    if (bo->first_ssbo)
2284       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2285    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2286    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2287 
2288    /* generate new atomic deref ops for every component */
2289    nir_def *result[4];
2290    unsigned num_components = intr->def.num_components;
2291    for (unsigned i = 0; i < num_components; i++) {
2292       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2293       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2294       nir_def_init(&new_instr->instr, &new_instr->def, 1,
2295                    intr->def.bit_size);
2296       nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2297       new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2298       /* deref ops have no offset src, so copy the srcs after it */
2299       for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2300          new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2301       nir_builder_instr_insert(b, &new_instr->instr);
2302 
2303       result[i] = &new_instr->def;
2304       offset = nir_iadd_imm(b, offset, 1);
2305    }
2306 
2307    nir_def *load = nir_vec(b, result, num_components);
2308    nir_def_replace(&intr->def, load);
2309 }
2310 
2311 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2312 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2313 {
2314    struct bo_vars *bo = data;
2315    if (instr->type != nir_instr_type_intrinsic)
2316       return false;
2317    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2318    nir_variable *var = NULL;
2319    nir_def *offset = NULL;
2320    bool is_load = true;
2321    b->cursor = nir_before_instr(instr);
2322    nir_src *src;
2323    bool ssbo = true;
2324    switch (intr->intrinsic) {
2325    case nir_intrinsic_ssbo_atomic:
2326    case nir_intrinsic_ssbo_atomic_swap:
2327       rewrite_atomic_ssbo_instr(b, instr, bo);
2328       return true;
2329    case nir_intrinsic_store_ssbo:
2330       src = &intr->src[1];
2331       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2332       offset = intr->src[2].ssa;
2333       is_load = false;
2334       break;
2335    case nir_intrinsic_load_ssbo:
2336       src = &intr->src[0];
2337       var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2338       offset = intr->src[1].ssa;
2339       break;
2340    case nir_intrinsic_load_ubo:
2341       src = &intr->src[0];
2342       var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2343       offset = intr->src[1].ssa;
2344       ssbo = false;
2345       break;
2346    default:
2347       return false;
2348    }
2349    assert(var);
2350    assert(offset);
2351    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2352    nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2353    if (!ssbo && bo->first_ubo && var->data.driver_location)
2354       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2355    else if (ssbo && bo->first_ssbo)
2356       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2357    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2358                                                         nir_i2iN(b, idx, deref_var->def.bit_size));
2359    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2360    assert(intr->num_components <= 2);
2361    if (is_load) {
2362       nir_def *result[2];
2363       for (unsigned i = 0; i < intr->num_components; i++) {
2364          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2365                                                             nir_i2iN(b, offset, deref_struct->def.bit_size));
2366          result[i] = nir_load_deref(b, deref_arr);
2367          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2368             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2369          offset = nir_iadd_imm(b, offset, 1);
2370       }
2371       nir_def *load = nir_vec(b, result, intr->num_components);
2372       nir_def_rewrite_uses(&intr->def, load);
2373    } else {
2374       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2375                                                          nir_i2iN(b, offset, deref_struct->def.bit_size));
2376       nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2377    }
2378    nir_instr_remove(instr);
2379    return true;
2380 }
2381 
2382 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2383 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2384 {
2385    struct bo_vars bo = get_bo_vars(zs, shader);
2386    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2387 }
2388 
2389 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2390 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2391 {
2392    switch (intr->intrinsic) {
2393    case nir_intrinsic_load_interpolated_input:
2394       *is_interp = true;
2395       FALLTHROUGH;
2396    case nir_intrinsic_load_input:
2397    case nir_intrinsic_load_per_vertex_input:
2398       *is_input = true;
2399       FALLTHROUGH;
2400    case nir_intrinsic_load_output:
2401    case nir_intrinsic_load_per_vertex_output:
2402    case nir_intrinsic_load_per_primitive_output:
2403       *is_load = true;
2404       FALLTHROUGH;
2405    case nir_intrinsic_store_output:
2406    case nir_intrinsic_store_per_primitive_output:
2407    case nir_intrinsic_store_per_vertex_output:
2408       break;
2409    default:
2410       return false;
2411    }
2412    return true;
2413 }
2414 
2415 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2416 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2417 {
2418    switch (intr->intrinsic) {
2419    case nir_intrinsic_load_per_vertex_input:
2420    case nir_intrinsic_load_per_vertex_output:
2421    case nir_intrinsic_load_per_primitive_output:
2422    case nir_intrinsic_store_per_primitive_output:
2423    case nir_intrinsic_store_per_vertex_output:
2424       return true;
2425    default:
2426       break;
2427    }
2428    return false;
2429 }
2430 
2431 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2432 find_var_deref(nir_shader *nir, nir_variable *var)
2433 {
2434    nir_foreach_function_impl(impl, nir) {
2435       nir_foreach_block(block, impl) {
2436          nir_foreach_instr(instr, block) {
2437             if (instr->type != nir_instr_type_deref)
2438                continue;
2439             nir_deref_instr *deref = nir_instr_as_deref(instr);
2440             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2441                return true;
2442          }
2443       }
2444    }
2445    return false;
2446 }
2447 
2448 static bool
find_var_io(nir_shader * nir,nir_variable * var)2449 find_var_io(nir_shader *nir, nir_variable *var)
2450 {
2451    nir_foreach_function(function, nir) {
2452       if (!function->impl)
2453          continue;
2454 
2455       nir_foreach_block(block, function->impl) {
2456          nir_foreach_instr(instr, block) {
2457             if (instr->type != nir_instr_type_intrinsic)
2458                continue;
2459             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2460             bool is_load = false;
2461             bool is_input = false;
2462             bool is_interp = false;
2463             if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2464                continue;
2465             if (var->data.mode == nir_var_shader_in && !is_input)
2466                continue;
2467             if (var->data.mode == nir_var_shader_out && is_input)
2468                continue;
2469             unsigned slot_offset = 0;
2470             if (var->data.fb_fetch_output && !is_load)
2471                continue;
2472             if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2473                continue;
2474             nir_src *src_offset = nir_get_io_offset_src(intr);
2475             if (src_offset && nir_src_is_const(*src_offset))
2476                slot_offset = nir_src_as_uint(*src_offset);
2477             unsigned slot_count = get_var_slot_count(nir, var);
2478             if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2479                 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2480                 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2481                 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2482                return true;
2483          }
2484       }
2485    }
2486    return false;
2487 }
2488 
2489 struct clamp_layer_output_state {
2490    nir_variable *original;
2491    nir_variable *clamped;
2492 };
2493 
2494 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2495 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2496 {
2497    nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2498                                                          nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2499    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2500    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2501    nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2502                                   nir_load_deref(b, original_deref),
2503                                   nir_imm_int(b, 0));
2504    nir_store_deref(b, clamped_deref, layer, 0);
2505 }
2506 
2507 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2508 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2509 {
2510    struct clamp_layer_output_state *state = data;
2511    switch (instr->type) {
2512    case nir_instr_type_intrinsic: {
2513       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2514       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2515           intr->intrinsic != nir_intrinsic_emit_vertex)
2516          return false;
2517       b->cursor = nir_before_instr(instr);
2518       clamp_layer_output_emit(b, state);
2519       return true;
2520    }
2521    default: return false;
2522    }
2523 }
2524 
2525 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2526 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2527 {
2528    switch (vs->info.stage) {
2529    case MESA_SHADER_VERTEX:
2530    case MESA_SHADER_GEOMETRY:
2531    case MESA_SHADER_TESS_EVAL:
2532       break;
2533    default:
2534       unreachable("invalid last vertex stage!");
2535    }
2536    struct clamp_layer_output_state state = {0};
2537    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2538    if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2539       return false;
2540    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2541    state.clamped->data.location = VARYING_SLOT_LAYER;
2542    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2543    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2544       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2545       state.original->data.driver_location = (*next_location)++;
2546       if (fs_var) {
2547          fs_var->data.location = state.original->data.location;
2548          fs_var->data.driver_location = state.original->data.driver_location;
2549       }
2550    } else {
2551       if (state.original->data.explicit_xfb_buffer) {
2552          /* Will xfb the clamped output but still better than nothing */
2553          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2554          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2555          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2556          state.clamped->data.offset = state.original->data.offset;
2557          state.clamped->data.stream = state.original->data.stream;
2558       }
2559       state.original->data.mode = nir_var_shader_temp;
2560       nir_fixup_deref_modes(vs);
2561    }
2562    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2563       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2564    } else {
2565       nir_builder b;
2566       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2567       b = nir_builder_at(nir_after_impl(impl));
2568       assert(impl->end_block->predecessors->entries == 1);
2569       clamp_layer_output_emit(&b, &state);
2570       nir_metadata_preserve(impl, nir_metadata_dominance);
2571    }
2572    optimize_nir(vs, NULL, true);
2573    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2574    return true;
2575 }
2576 
2577 struct io_slot_map {
2578    uint64_t *patch_slot_track;
2579    uint64_t *slot_track;
2580    unsigned char *slot_map;
2581    unsigned reserved;
2582    unsigned char *patch_slot_map;
2583    unsigned patch_reserved;
2584 };
2585 
2586 static void
assign_track_slot_mask(struct io_slot_map * io,nir_variable * var,unsigned slot,unsigned num_slots)2587 assign_track_slot_mask(struct io_slot_map *io, nir_variable *var, unsigned slot, unsigned num_slots)
2588 {
2589    uint64_t *track = var->data.patch ? io->patch_slot_track : io->slot_track;
2590    uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(glsl_without_array(var->type))) << var->data.location_frac;
2591    uint64_t slot_mask = BITFIELD64_RANGE(slot, num_slots);
2592    u_foreach_bit(c, mask) {
2593       assert((track[c] & slot_mask) == 0);
2594       track[c] |= slot_mask;
2595    }
2596 }
2597 
2598 static void
assign_slot_io(gl_shader_stage stage,struct io_slot_map * io,nir_variable * var,unsigned slot)2599 assign_slot_io(gl_shader_stage stage, struct io_slot_map *io, nir_variable *var, unsigned slot)
2600 {
2601    unsigned num_slots;
2602    if (nir_is_arrayed_io(var, stage))
2603       num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2604    else
2605       num_slots = glsl_count_vec4_slots(var->type, false, false);
2606    uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2607    assign_track_slot_mask(io, var, slot, num_slots);
2608    if (slot_map[slot] != 0xff)
2609       return;
2610    unsigned *reserved = var->data.patch ? &io->patch_reserved : &io->reserved;
2611    assert(*reserved + num_slots <= MAX_VARYING);
2612    assert(*reserved < MAX_VARYING);
2613    for (unsigned i = 0; i < num_slots; i++)
2614       slot_map[slot + i] = (*reserved)++;
2615 }
2616 
2617 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2618 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2619 {
2620    unsigned slot = var->data.location;
2621    switch (slot) {
2622    case -1:
2623       unreachable("there should be no UINT32_MAX location variables!");
2624       break;
2625    case VARYING_SLOT_POS:
2626    case VARYING_SLOT_PSIZ:
2627    case VARYING_SLOT_LAYER:
2628    case VARYING_SLOT_PRIMITIVE_ID:
2629    case VARYING_SLOT_CLIP_DIST0:
2630    case VARYING_SLOT_CULL_DIST0:
2631    case VARYING_SLOT_VIEWPORT:
2632    case VARYING_SLOT_FACE:
2633    case VARYING_SLOT_TESS_LEVEL_OUTER:
2634    case VARYING_SLOT_TESS_LEVEL_INNER:
2635       /* use a sentinel value to avoid counting later */
2636       var->data.driver_location = UINT32_MAX;
2637       return;
2638 
2639    default:
2640       break;
2641    }
2642    if (var->data.patch) {
2643       assert(slot >= VARYING_SLOT_PATCH0);
2644       slot -= VARYING_SLOT_PATCH0;
2645    }
2646    assign_slot_io(stage, io, var, slot);
2647    slot = var->data.patch ? io->patch_slot_map[slot] : io->slot_map[slot];
2648    assert(slot < MAX_VARYING);
2649    var->data.driver_location = slot;
2650 }
2651 
2652 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2653 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2654 {
2655    if (stage != MESA_SHADER_FRAGMENT)
2656       return false;
2657    return var->data.location >= VARYING_SLOT_TEX0 &&
2658           var->data.location <= VARYING_SLOT_TEX7;
2659 }
2660 
2661 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2662 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2663 {
2664    unsigned slot = var->data.location;
2665    switch (slot) {
2666    case VARYING_SLOT_POS:
2667    case VARYING_SLOT_PSIZ:
2668    case VARYING_SLOT_LAYER:
2669    case VARYING_SLOT_PRIMITIVE_ID:
2670    case VARYING_SLOT_CLIP_DIST0:
2671    case VARYING_SLOT_CULL_DIST0:
2672    case VARYING_SLOT_VIEWPORT:
2673    case VARYING_SLOT_FACE:
2674    case VARYING_SLOT_TESS_LEVEL_OUTER:
2675    case VARYING_SLOT_TESS_LEVEL_INNER:
2676       /* use a sentinel value to avoid counting later */
2677       var->data.driver_location = UINT_MAX;
2678       return true;
2679    default:
2680       break;
2681    }
2682    if (var->data.patch) {
2683       assert(slot >= VARYING_SLOT_PATCH0);
2684       slot -= VARYING_SLOT_PATCH0;
2685    }
2686    uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2687    if (slot_map[slot] == (unsigned char)-1) {
2688       /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2689          * so keep for now and eliminate later
2690          */
2691       if (is_texcoord(stage, var)) {
2692          var->data.driver_location = UINT32_MAX;
2693          return true;
2694       }
2695       /* patch variables may be read in the workgroup */
2696       if (stage != MESA_SHADER_TESS_CTRL)
2697          /* dead io */
2698          return false;
2699       assign_slot_io(stage, io, var, slot);
2700    }
2701    var->data.driver_location = slot_map[slot];
2702    return true;
2703 }
2704 
2705 
2706 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2707 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2708 {
2709    nir_variable *var = data;
2710    if (instr->type != nir_instr_type_intrinsic)
2711       return false;
2712 
2713    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2714    bool is_load = false;
2715    bool is_input = false;
2716    bool is_interp = false;
2717    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2718       return false;
2719    if (!is_load)
2720       return false;
2721    unsigned location = nir_intrinsic_io_semantics(intr).location;
2722    if (location != var->data.location)
2723       return false;
2724    b->cursor = nir_before_instr(instr);
2725    nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2726                                 intr->def.bit_size);
2727    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2728       switch (location) {
2729       case VARYING_SLOT_COL0:
2730       case VARYING_SLOT_COL1:
2731       case VARYING_SLOT_BFC0:
2732       case VARYING_SLOT_BFC1:
2733          /* default color is 0,0,0,1 */
2734          if (intr->def.num_components == 4)
2735             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2736          break;
2737       default:
2738          break;
2739       }
2740    }
2741    nir_def_replace(&intr->def, zero);
2742    return true;
2743 }
2744 
2745 
2746 
2747 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2748 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2749 {
2750    switch (intr->intrinsic) {
2751    case nir_intrinsic_store_output:
2752    case nir_intrinsic_store_per_primitive_output:
2753    case nir_intrinsic_store_per_vertex_output:
2754       break;
2755    default:
2756       return false;
2757    }
2758    if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2759       return false;
2760    if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2761       nir_instr_remove(&intr->instr);
2762       return true;
2763    }
2764    return false;
2765 }
2766 
2767 static bool
delete_psiz_store(nir_shader * nir,bool one)2768 delete_psiz_store(nir_shader *nir, bool one)
2769 {
2770    bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2771                                               nir_metadata_dominance, one ? nir : NULL);
2772    if (progress)
2773       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2774    return progress;
2775 }
2776 
2777 struct write_components {
2778    unsigned slot;
2779    uint32_t component_mask;
2780 };
2781 
2782 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2783 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2784 {
2785    struct write_components *wc = data;
2786    bool is_load = false;
2787    bool is_input = false;
2788    bool is_interp = false;
2789    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2790       return false;
2791    if (!is_input)
2792       return false;
2793    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2794    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2795       return false;
2796    unsigned num_components = intr->num_components;
2797    unsigned c = nir_intrinsic_component(intr);
2798    if (intr->def.bit_size == 64)
2799       num_components *= 2;
2800    nir_src *src_offset = nir_get_io_offset_src(intr);
2801    if (!nir_src_is_const(*src_offset))
2802       return false;
2803    unsigned slot_offset = nir_src_as_uint(*src_offset);
2804    if (s.location + slot_offset != wc->slot)
2805       return false;
2806    uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2807    if (intr->def.bit_size == 64)
2808       readmask |= readmask << (intr->num_components + c);
2809    /* handle dvec3/dvec4 */
2810    if (num_components + c > 4)
2811       readmask >>= 4;
2812    if ((wc->component_mask & readmask) == readmask)
2813       return false;
2814    uint32_t rewrite_mask = readmask & ~wc->component_mask;
2815    if (!rewrite_mask)
2816       return false;
2817    b->cursor = nir_after_instr(&intr->instr);
2818    nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2819    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2820       switch (wc->slot) {
2821       case VARYING_SLOT_COL0:
2822       case VARYING_SLOT_COL1:
2823       case VARYING_SLOT_BFC0:
2824       case VARYING_SLOT_BFC1:
2825          /* default color is 0,0,0,1 */
2826          if (intr->def.num_components == 4)
2827             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2828          break;
2829       default:
2830          break;
2831       }
2832    }
2833    rewrite_mask >>= c;
2834    nir_def *dest = &intr->def;
2835    u_foreach_bit(component, rewrite_mask)
2836       dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2837    nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2838    return true;
2839 }
2840 
2841 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2842 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2843 {
2844    struct write_components *wc = data;
2845    bool is_load = false;
2846    bool is_input = false;
2847    bool is_interp = false;
2848    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2849       return false;
2850    if (is_input || is_load)
2851       return false;
2852    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2853    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2854       return false;
2855    unsigned location = s.location;
2856    unsigned c = nir_intrinsic_component(intr);
2857    uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2858    if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2859       unsigned num_components = intr->num_components * 2;
2860       nir_src *src_offset = nir_get_io_offset_src(intr);
2861       if (nir_src_is_const(*src_offset)) {
2862          if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2863             return false;
2864       }
2865       wrmask |= wrmask << intr->num_components;
2866       /* handle dvec3/dvec4 */
2867       if (num_components + c > 4)
2868          wrmask >>= 4;
2869    }
2870    wc->component_mask |= wrmask;
2871    return false;
2872 }
2873 
2874 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2875 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2876 {
2877    uint64_t slot_track[4] = {0};
2878    uint64_t patch_slot_track[4] = {0};
2879    unsigned char slot_map[VARYING_SLOT_MAX];
2880    memset(slot_map, -1, sizeof(slot_map));
2881    unsigned char patch_slot_map[VARYING_SLOT_MAX];
2882    memset(patch_slot_map, -1, sizeof(patch_slot_map));
2883    struct io_slot_map io = {
2884       .patch_slot_track = patch_slot_track,
2885       .slot_track = slot_track,
2886       .slot_map = slot_map,
2887       .patch_slot_map = patch_slot_map,
2888       .reserved = 0,
2889       .patch_reserved = 0,
2890    };
2891    bool do_fixup = false;
2892    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2893    nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2894    if (var) {
2895       bool can_remove = false;
2896       if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2897          /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2898          if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2899             can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2900          else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2901             can_remove = !var->data.explicit_location;
2902       }
2903       /* remove injected pointsize from all but the last vertex stage */
2904       if (can_remove) {
2905          var->data.mode = nir_var_shader_temp;
2906          nir_fixup_deref_modes(producer);
2907          delete_psiz_store(producer, false);
2908          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2909          optimize_nir(producer, NULL, true);
2910       }
2911    }
2912    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2913       producer->info.has_transform_feedback_varyings = false;
2914       nir_foreach_shader_out_variable(var_out, producer)
2915          var_out->data.explicit_xfb_buffer = false;
2916    }
2917    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2918       /* never assign from tcs -> tes, always invert */
2919       nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2920          assign_producer_var_io(consumer->info.stage, var_in, &io);
2921       nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2922          if (!assign_consumer_var_io(producer->info.stage, var_out, &io))
2923             /* this is an output, nothing more needs to be done for it to be dropped */
2924             do_fixup = true;
2925       }
2926    } else {
2927       nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2928          assign_producer_var_io(producer->info.stage, var_out, &io);
2929       nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2930          if (!assign_consumer_var_io(consumer->info.stage, var_in, &io)) {
2931             do_fixup = true;
2932             /* input needs to be rewritten */
2933             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
2934          }
2935       }
2936       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_compiler_workarounds.needs_sanitised_layer)
2937          do_fixup |= clamp_layer_output(producer, consumer, &io.reserved);
2938    }
2939    nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
2940    if (producer->info.io_lowered && consumer->info.io_lowered) {
2941       u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
2942          struct write_components wc = {slot, 0};
2943          nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
2944          assert(wc.component_mask);
2945          if (wc.component_mask != BITFIELD_MASK(4))
2946             do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
2947       }
2948    }
2949    if (!do_fixup)
2950       return;
2951    nir_fixup_deref_modes(nir);
2952    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2953    optimize_nir(nir, NULL, true);
2954 }
2955 
2956 /* all types that hit this function contain something that is 64bit */
2957 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)2958 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2959 {
2960    if (glsl_type_is_array(type)) {
2961       const struct glsl_type *child = glsl_get_array_element(type);
2962       unsigned elements = glsl_array_size(type);
2963       unsigned stride = glsl_get_explicit_stride(type);
2964       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2965    }
2966    /* rewrite structs recursively */
2967    if (glsl_type_is_struct_or_ifc(type)) {
2968       unsigned nmembers = glsl_get_length(type);
2969       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2970       unsigned xfb_offset = 0;
2971       for (unsigned i = 0; i < nmembers; i++) {
2972          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2973          fields[i] = *f;
2974          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2975          if (i < nmembers - 1 && xfb_offset % 8 &&
2976              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2977               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2978             var->data.is_xfb = true;
2979          }
2980          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2981       }
2982       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2983    }
2984    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2985       return type;
2986    if (doubles_only && glsl_type_is_vector_or_scalar(type))
2987       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2988    enum glsl_base_type base_type;
2989    switch (glsl_get_base_type(type)) {
2990    case GLSL_TYPE_UINT64:
2991       base_type = GLSL_TYPE_UINT;
2992       break;
2993    case GLSL_TYPE_INT64:
2994       base_type = GLSL_TYPE_INT;
2995       break;
2996    case GLSL_TYPE_DOUBLE:
2997       base_type = GLSL_TYPE_FLOAT;
2998       break;
2999    default:
3000       unreachable("unknown 64-bit vertex attribute format!");
3001    }
3002    if (glsl_type_is_scalar(type))
3003       return glsl_vector_type(base_type, 2);
3004    unsigned num_components;
3005    if (glsl_type_is_matrix(type)) {
3006       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
3007       unsigned vec_components = glsl_get_vector_elements(type);
3008       if (vec_components == 3)
3009          vec_components = 4;
3010       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
3011    } else {
3012       num_components = glsl_get_vector_elements(type) * 2;
3013       if (num_components <= 4)
3014          return glsl_vector_type(base_type, num_components);
3015    }
3016    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
3017    struct glsl_struct_field fields[8] = {0};
3018    unsigned remaining = num_components;
3019    unsigned nfields = 0;
3020    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
3021       assert(i < ARRAY_SIZE(fields));
3022       fields[i].name = "";
3023       fields[i].offset = i * 16;
3024       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
3025    }
3026    char buf[64];
3027    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
3028    return glsl_struct_type(fields, nfields, buf, true);
3029 }
3030 
3031 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)3032 deref_is_matrix(nir_deref_instr *deref)
3033 {
3034    if (glsl_type_is_matrix(deref->type))
3035       return deref->type;
3036    nir_deref_instr *parent = nir_deref_instr_parent(deref);
3037    if (parent)
3038       return deref_is_matrix(parent);
3039    return NULL;
3040 }
3041 
3042 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3043 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
3044                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
3045 {
3046    bool func_progress = false;
3047    nir_builder b = nir_builder_create(impl);
3048    nir_foreach_block(block, impl) {
3049       nir_foreach_instr_safe(instr, block) {
3050          switch (instr->type) {
3051          case nir_instr_type_deref: {
3052             nir_deref_instr *deref = nir_instr_as_deref(instr);
3053             if (!(deref->modes & var->data.mode))
3054                continue;
3055             if (nir_deref_instr_get_variable(deref) != var)
3056                continue;
3057 
3058             /* matrix types are special: store the original deref type for later use */
3059             const struct glsl_type *matrix = deref_is_matrix(deref);
3060             nir_deref_instr *parent = nir_deref_instr_parent(deref);
3061             if (!matrix) {
3062                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
3063                hash_table_foreach(derefs, he) {
3064                   /* propagate parent matrix type to row deref */
3065                   if (he->key == parent)
3066                      matrix = he->data;
3067                }
3068             }
3069             if (matrix)
3070                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
3071             if (deref->deref_type == nir_deref_type_var)
3072                deref->type = var->type;
3073             else
3074                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
3075          }
3076          break;
3077          case nir_instr_type_intrinsic: {
3078             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3079             if (intr->intrinsic != nir_intrinsic_store_deref &&
3080                   intr->intrinsic != nir_intrinsic_load_deref)
3081                break;
3082             if (nir_intrinsic_get_var(intr, 0) != var)
3083                break;
3084             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
3085                   (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
3086                break;
3087             b.cursor = nir_before_instr(instr);
3088             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3089             unsigned num_components = intr->num_components * 2;
3090             nir_def *comp[NIR_MAX_VEC_COMPONENTS];
3091             /* this is the stored matrix type from the deref */
3092             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
3093             const struct glsl_type *matrix = he ? he->data : NULL;
3094             if (doubles_only && !matrix)
3095                break;
3096             func_progress = true;
3097             if (intr->intrinsic == nir_intrinsic_store_deref) {
3098                /* first, unpack the src data to 32bit vec2 components */
3099                for (unsigned i = 0; i < intr->num_components; i++) {
3100                   nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
3101                   comp[i * 2] = nir_channel(&b, ssa, 0);
3102                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
3103                }
3104                unsigned wrmask = nir_intrinsic_write_mask(intr);
3105                unsigned mask = 0;
3106                /* expand writemask for doubled components */
3107                for (unsigned i = 0; i < intr->num_components; i++) {
3108                   if (wrmask & BITFIELD_BIT(i))
3109                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
3110                }
3111                if (matrix) {
3112                   /* matrix types always come from array (row) derefs */
3113                   assert(deref->deref_type == nir_deref_type_array);
3114                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3115                   /* let optimization clean up consts later */
3116                   nir_def *index = deref->arr.index.ssa;
3117                   /* this might be an indirect array index:
3118                      * - iterate over matrix columns
3119                      * - add if blocks for each column
3120                      * - perform the store in the block
3121                      */
3122                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
3123                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3124                      unsigned vec_components = glsl_get_vector_elements(matrix);
3125                      /* always clamp dvec3 to 4 components */
3126                      if (vec_components == 3)
3127                         vec_components = 4;
3128                      unsigned start_component = idx * vec_components * 2;
3129                      /* struct member */
3130                      unsigned member = start_component / 4;
3131                      /* number of components remaining */
3132                      unsigned remaining = num_components;
3133                      for (unsigned i = 0; i < num_components; member++) {
3134                         if (!(mask & BITFIELD_BIT(i)))
3135                            continue;
3136                         assert(member < glsl_get_length(var_deref->type));
3137                         /* deref the rewritten struct to the appropriate vec4/vec2 */
3138                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3139                         unsigned incr = MIN2(remaining, 4);
3140                         /* assemble the write component vec */
3141                         nir_def *val = nir_vec(&b, &comp[i], incr);
3142                         /* use the number of components being written as the writemask */
3143                         if (glsl_get_vector_elements(strct->type) > val->num_components)
3144                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3145                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3146                         remaining -= incr;
3147                         i += incr;
3148                      }
3149                      nir_pop_if(&b, NULL);
3150                   }
3151                   _mesa_set_add(deletes, &deref->instr);
3152                } else if (num_components <= 4) {
3153                   /* simple store case: just write out the components */
3154                   nir_def *dest = nir_vec(&b, comp, num_components);
3155                   nir_store_deref(&b, deref, dest, mask);
3156                } else {
3157                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3158                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3159                      if (!(mask & BITFIELD_MASK(4)))
3160                         continue;
3161                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3162                      nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3163                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
3164                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3165                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3166                      mask >>= 4;
3167                   }
3168                }
3169             } else {
3170                nir_def *dest = NULL;
3171                if (matrix) {
3172                   /* matrix types always come from array (row) derefs */
3173                   assert(deref->deref_type == nir_deref_type_array);
3174                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3175                   /* let optimization clean up consts later */
3176                   nir_def *index = deref->arr.index.ssa;
3177                   /* this might be an indirect array index:
3178                      * - iterate over matrix columns
3179                      * - add if blocks for each column
3180                      * - phi the loads using the array index
3181                      */
3182                   unsigned cols = glsl_get_matrix_columns(matrix);
3183                   nir_def *dests[4];
3184                   for (unsigned idx = 0; idx < cols; idx++) {
3185                      /* don't add an if for the final row: this will be handled in the else */
3186                      if (idx < cols - 1)
3187                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3188                      unsigned vec_components = glsl_get_vector_elements(matrix);
3189                      /* always clamp dvec3 to 4 components */
3190                      if (vec_components == 3)
3191                         vec_components = 4;
3192                      unsigned start_component = idx * vec_components * 2;
3193                      /* struct member */
3194                      unsigned member = start_component / 4;
3195                      /* number of components remaining */
3196                      unsigned remaining = num_components;
3197                      /* component index */
3198                      unsigned comp_idx = 0;
3199                      for (unsigned i = 0; i < num_components; member++) {
3200                         assert(member < glsl_get_length(var_deref->type));
3201                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3202                         nir_def *load = nir_load_deref(&b, strct);
3203                         unsigned incr = MIN2(remaining, 4);
3204                         /* repack the loads to 64bit */
3205                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3206                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3207                         remaining -= incr;
3208                         i += incr;
3209                      }
3210                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3211                      if (idx < cols - 1)
3212                         nir_push_else(&b, NULL);
3213                   }
3214                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3215                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
3216                      nir_pop_if(&b, NULL);
3217                      dest = nir_if_phi(&b, dests[idx - 1], dest);
3218                   }
3219                   _mesa_set_add(deletes, &deref->instr);
3220                } else if (num_components <= 4) {
3221                   /* simple load case */
3222                   nir_def *load = nir_load_deref(&b, deref);
3223                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3224                   for (unsigned i = 0; i < intr->num_components; i++) {
3225                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3226                   }
3227                   dest = nir_vec(&b, comp, intr->num_components);
3228                } else {
3229                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
3230                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3231                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3232                      nir_def *load = nir_load_deref(&b, strct);
3233                      comp[i * 2] = nir_pack_64_2x32(&b,
3234                                                     nir_trim_vector(&b, load, 2));
3235                      if (num_components > 2)
3236                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3237                   }
3238                   dest = nir_vec(&b, comp, intr->num_components);
3239                }
3240                nir_def_rewrite_uses_after(&intr->def, dest, instr);
3241             }
3242             _mesa_set_add(deletes, instr);
3243             break;
3244          }
3245          break;
3246          default: break;
3247          }
3248       }
3249    }
3250    if (func_progress)
3251       nir_metadata_preserve(impl, nir_metadata_none);
3252    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3253    set_foreach_remove(deletes, he)
3254       nir_instr_remove((void*)he->key);
3255    return func_progress;
3256 }
3257 
3258 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3259 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3260                       struct set *deletes, bool doubles_only)
3261 {
3262    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3263       return false;
3264    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3265    /* once type is rewritten, rewrite all loads and stores */
3266    nir_foreach_function_impl(impl, shader)
3267       lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3268    return true;
3269 }
3270 
3271 /* rewrite all input/output variables using 32bit types and load/stores */
3272 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3273 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3274 {
3275    bool progress = false;
3276    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3277    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3278    nir_foreach_function_impl(impl, shader) {
3279       nir_foreach_function_temp_variable(var, impl) {
3280          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3281             continue;
3282          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3283          progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3284       }
3285    }
3286    ralloc_free(deletes);
3287    ralloc_free(derefs);
3288    if (progress) {
3289       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3290       nir_lower_phis_to_scalar(shader, false);
3291       optimize_nir(shader, NULL, true);
3292    }
3293    return progress;
3294 }
3295 
3296 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3297 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3298 {
3299    FILE *fp = fopen(file, "wb");
3300    if (fp) {
3301       fwrite(words, 1, size, fp);
3302       fclose(fp);
3303       fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3304    }
3305 }
3306 
3307 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3308 zink_get_next_stage(gl_shader_stage stage)
3309 {
3310    switch (stage) {
3311    case MESA_SHADER_VERTEX:
3312       return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3313              VK_SHADER_STAGE_GEOMETRY_BIT |
3314              VK_SHADER_STAGE_FRAGMENT_BIT;
3315    case MESA_SHADER_TESS_CTRL:
3316       return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3317    case MESA_SHADER_TESS_EVAL:
3318       return VK_SHADER_STAGE_GEOMETRY_BIT |
3319              VK_SHADER_STAGE_FRAGMENT_BIT;
3320    case MESA_SHADER_GEOMETRY:
3321       return VK_SHADER_STAGE_FRAGMENT_BIT;
3322    case MESA_SHADER_FRAGMENT:
3323    case MESA_SHADER_COMPUTE:
3324    case MESA_SHADER_KERNEL:
3325       return 0;
3326    default:
3327       unreachable("invalid shader stage");
3328    }
3329 }
3330 
3331 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3332 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3333 {
3334    VkShaderModuleCreateInfo smci = {0};
3335    VkShaderCreateInfoEXT sci = {0};
3336 
3337    if (!spirv)
3338       spirv = zs->spirv;
3339 
3340    if (zink_debug & ZINK_DEBUG_SPIRV) {
3341       char buf[256];
3342       static int i;
3343       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3344       zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3345    }
3346 
3347    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3348    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3349    sci.nextStage = zink_get_next_stage(zs->info.stage);
3350    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3351    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3352    sci.pCode = spirv->words;
3353    sci.pName = "main";
3354    VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3355    if (pg) {
3356       sci.setLayoutCount = pg->num_dsl;
3357       sci.pSetLayouts = pg->dsl;
3358    } else {
3359       sci.setLayoutCount = zs->info.stage + 1;
3360       dsl[zs->info.stage] = zs->precompile.dsl;;
3361       sci.pSetLayouts = dsl;
3362    }
3363    VkPushConstantRange pcr;
3364    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3365    pcr.offset = 0;
3366    pcr.size = sizeof(struct zink_gfx_push_constant);
3367    sci.pushConstantRangeCount = 1;
3368    sci.pPushConstantRanges = &pcr;
3369 
3370    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3371    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3372    smci.pCode = spirv->words;
3373 
3374 #ifndef NDEBUG
3375    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3376       static const struct spirv_to_nir_options spirv_options = {
3377          .environment = NIR_SPIRV_VULKAN,
3378          .capabilities = NULL,
3379          .ubo_addr_format = nir_address_format_32bit_index_offset,
3380          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3381          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3382          .push_const_addr_format = nir_address_format_logical,
3383          .shared_addr_format = nir_address_format_32bit_offset,
3384       };
3385       uint32_t num_spec_entries = 0;
3386       struct nir_spirv_specialization *spec_entries = NULL;
3387       VkSpecializationInfo sinfo = {0};
3388       VkSpecializationMapEntry me[3];
3389       uint32_t size[3] = {1,1,1};
3390       if (!zs->info.workgroup_size[0]) {
3391          sinfo.mapEntryCount = 3;
3392          sinfo.pMapEntries = &me[0];
3393          sinfo.dataSize = sizeof(uint32_t) * 3;
3394          sinfo.pData = size;
3395          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3396          for (int i = 0; i < 3; i++) {
3397             me[i].size = sizeof(uint32_t);
3398             me[i].constantID = ids[i];
3399             me[i].offset = i * sizeof(uint32_t);
3400          }
3401          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3402       }
3403       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3404                          spec_entries, num_spec_entries,
3405                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3406       assert(nir);
3407       ralloc_free(nir);
3408       free(spec_entries);
3409    }
3410 #endif
3411 
3412    VkResult ret;
3413    struct zink_shader_object obj = {0};
3414    if (!can_shobj || !screen->info.have_EXT_shader_object)
3415       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3416    else
3417       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3418    ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3419    assert(success);
3420    return obj;
3421 }
3422 
3423 static void
prune_io(nir_shader * nir)3424 prune_io(nir_shader *nir)
3425 {
3426    nir_foreach_shader_in_variable_safe(var, nir) {
3427       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3428          var->data.mode = nir_var_shader_temp;
3429    }
3430    nir_foreach_shader_out_variable_safe(var, nir) {
3431       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3432          var->data.mode = nir_var_shader_temp;
3433    }
3434    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3435 }
3436 
3437 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3438 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3439 {
3440    assert(var->data.driver_location < 32); //bitfield size for tracking
3441    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(var->data.driver_location);
3442 }
3443 
3444 static void
flag_shadow_tex_instr(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3445 flag_shadow_tex_instr(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3446 {
3447    assert(var);
3448    unsigned num_components = tex->def.num_components;
3449    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3450    if (rewrite_depth && nir_def_components_read( &tex->def) & ~1) {
3451       /* this needs recompiles */
3452       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3453          flag_shadow_tex(var, zs);
3454       else
3455          mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3456    }
3457 }
3458 
3459 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3460 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3461 {
3462    assert(var);
3463    const struct glsl_type *type = glsl_without_array(var->type);
3464    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3465    bool is_int = glsl_base_type_is_integer(ret_type);
3466    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3467    unsigned dest_size = tex->def.bit_size;
3468    b->cursor = nir_after_instr(&tex->instr);
3469    unsigned num_components = tex->def.num_components;
3470    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3471    if (bit_size == dest_size && !rewrite_depth)
3472       return NULL;
3473    nir_def *dest = &tex->def;
3474    if (rewrite_depth && zs) {
3475       if (nir_def_components_read(dest) & ~1) {
3476          /* handled above */
3477          return NULL;
3478       }
3479       /* If only .x is used in the NIR, then it's effectively not a legacy depth
3480        * sample anyway and we don't want to ask for shader recompiles.  This is
3481        * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3482        * LUMINANCE, so apps just use the first channel.
3483        */
3484       tex->def.num_components = 1;
3485       tex->is_new_style_shadow = true;
3486    }
3487    if (bit_size != dest_size) {
3488       tex->def.bit_size = bit_size;
3489       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3490 
3491       if (is_int) {
3492          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3493             dest = nir_u2uN(b, &tex->def, dest_size);
3494          else
3495             dest = nir_i2iN(b, &tex->def, dest_size);
3496       } else {
3497          dest = nir_f2fN(b, &tex->def, dest_size);
3498       }
3499       if (!rewrite_depth)
3500          nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3501    }
3502    return dest;
3503 }
3504 
3505 struct lower_zs_swizzle_state {
3506    bool shadow_only;
3507    unsigned base_sampler_id;
3508    const struct zink_zs_swizzle_key *swizzle;
3509 };
3510 
3511 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3512 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3513 {
3514    struct lower_zs_swizzle_state *state = data;
3515    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3516    assert(state->shadow_only || swizzle_key);
3517    if (instr->type != nir_instr_type_tex)
3518       return false;
3519    nir_tex_instr *tex = nir_instr_as_tex(instr);
3520    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3521        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3522       return false;
3523    if (tex->is_shadow && tex->op == nir_texop_tg4)
3524       /* Will not even try to emulate the shadow comparison */
3525       return false;
3526    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3527    nir_variable *var = NULL;
3528    if (handle != -1)
3529       /* gtfo bindless depth texture mode */
3530       return false;
3531    var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
3532    assert(var);
3533    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3534    const struct glsl_type *type = glsl_without_array(var->type);
3535    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3536    bool is_int = glsl_base_type_is_integer(ret_type);
3537    unsigned num_components = tex->def.num_components;
3538    if (tex->is_shadow)
3539       tex->is_new_style_shadow = true;
3540    nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3541    assert(dest || !state->shadow_only);
3542    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3543       return false;
3544    else if (!dest)
3545       dest = &tex->def;
3546    else
3547       tex->def.num_components = 1;
3548    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3549       /* these require manual swizzles */
3550       if (tex->op == nir_texop_tg4) {
3551          assert(!tex->is_shadow);
3552          nir_def *swizzle;
3553          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3554          case PIPE_SWIZZLE_0:
3555             swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3556             break;
3557          case PIPE_SWIZZLE_1:
3558             if (is_int)
3559                swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3560             else
3561                swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3562             break;
3563          default:
3564             if (!tex->component)
3565                return false;
3566             tex->component = 0;
3567             return true;
3568          }
3569          nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3570          return true;
3571       }
3572       nir_def *vec[4];
3573       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3574          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3575          case PIPE_SWIZZLE_0:
3576             vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3577             break;
3578          case PIPE_SWIZZLE_1:
3579             if (is_int)
3580                vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3581             else
3582                vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3583             break;
3584          default:
3585             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3586             break;
3587          }
3588       }
3589       nir_def *swizzle = nir_vec(b, vec, num_components);
3590       nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3591    } else {
3592       assert(tex->is_shadow);
3593       nir_def *vec[4] = {dest, dest, dest, dest};
3594       nir_def *splat = nir_vec(b, vec, num_components);
3595       nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3596    }
3597    return true;
3598 }
3599 
3600 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3601  *
3602  * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3603  * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3604  * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3605  * shader to expand out to vec4.  Since this depends on sampler state, it's a
3606  * draw-time shader recompile to do so.
3607  *
3608  * We may also need to apply shader swizzles for
3609  * driver_compiler_workarounds.needs_zs_shader_swizzle.
3610  */
3611 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3612 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3613 {
3614    /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3615    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3616    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3617    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr,
3618                                        nir_metadata_control_flow,
3619                                        (void*)&state);
3620 }
3621 
3622 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3623 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3624                          void *data)
3625 {
3626    if (intr->intrinsic != nir_intrinsic_load_point_coord)
3627       return false;
3628    b->cursor = nir_after_instr(&intr->instr);
3629    nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3630                                   nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3631    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3632    return true;
3633 }
3634 
3635 static bool
invert_point_coord(nir_shader * nir)3636 invert_point_coord(nir_shader *nir)
3637 {
3638    if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3639       return false;
3640    return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3641                                      nir_metadata_dominance, NULL);
3642 }
3643 
3644 static bool
lower_sparse_instr(nir_builder * b,nir_instr * instr,void * data)3645 lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data)
3646 {
3647    b->cursor = nir_after_instr(instr);
3648 
3649    switch (instr->type) {
3650    case nir_instr_type_tex: {
3651       nir_tex_instr *tex = nir_instr_as_tex(instr);
3652       if (!tex->is_sparse)
3653          return false;
3654 
3655       nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def));
3656       nir_def *vec = nir_vector_insert_imm(b, &tex->def, res,
3657                                            tex->def.num_components - 1);
3658       nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr);
3659       return true;
3660    }
3661 
3662    case nir_instr_type_intrinsic: {
3663       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3664       switch (intrin->intrinsic) {
3665       case nir_intrinsic_image_deref_sparse_load: {
3666          nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def));
3667          nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4);
3668          nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
3669          return true;
3670       }
3671 
3672       case nir_intrinsic_sparse_residency_code_and: {
3673          nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa);
3674          nir_def_rewrite_uses(&intrin->def, res);
3675          return true;
3676       }
3677 
3678       case nir_intrinsic_is_sparse_texels_resident: {
3679          nir_def *res = nir_i2b(b, intrin->src[0].ssa);
3680          nir_def_rewrite_uses(&intrin->def, res);
3681          return true;
3682       }
3683 
3684       default:
3685          return false;
3686       }
3687    }
3688 
3689    default:
3690       return false;
3691    }
3692 }
3693 
3694 static bool
lower_sparse(nir_shader * shader)3695 lower_sparse(nir_shader *shader)
3696 {
3697    return nir_shader_instructions_pass(shader, lower_sparse_instr,
3698                                        nir_metadata_dominance, NULL);
3699 }
3700 
3701 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3702 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3703 {
3704    bool is_load = false;
3705    bool is_input = false;
3706    bool is_interp = false;
3707    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3708       return false;
3709    bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) ||
3710                         (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input);
3711    unsigned loc = nir_intrinsic_io_semantics(intr).location;
3712    nir_src *src_offset = nir_get_io_offset_src(intr);
3713    const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3714    unsigned location = loc + slot_offset;
3715    unsigned frac = nir_intrinsic_component(intr);
3716    unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3717    /* set c aligned/rounded down to dword */
3718    unsigned c = frac;
3719    if (frac && bit_size < 32)
3720       c = frac * bit_size / 32;
3721    /* loop over all the variables and rewrite corresponding access */
3722    nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3723       const struct glsl_type *type = var->type;
3724       if (nir_is_arrayed_io(var, b->shader->info.stage))
3725          type = glsl_get_array_element(type);
3726       unsigned slot_count = get_var_slot_count(b->shader, var);
3727       /* filter access that isn't specific to this variable */
3728       if (var->data.location > location || var->data.location + slot_count <= location)
3729          continue;
3730       if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3731          continue;
3732       if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3733          continue;
3734 
3735       unsigned size = 0;
3736       bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3737       if (is_struct)
3738          size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3739       else if (!is_special_io && var->data.compact)
3740          size = glsl_get_aoa_size(type);
3741       else
3742          size = glsl_get_vector_elements(glsl_without_array(type));
3743       assert(size);
3744       if (glsl_type_is_64bit(glsl_without_array(var->type)))
3745          size *= 2;
3746       if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3747          /* adjust for dvec3-type slot overflow */
3748          assert(location > var->data.location);
3749          size -= (location - var->data.location) * 4;
3750       }
3751       assert(size);
3752       if (var->data.location_frac + size <= c || var->data.location_frac > c)
3753          continue;
3754 
3755       b->cursor = nir_before_instr(&intr->instr);
3756       nir_deref_instr *deref = nir_build_deref_var(b, var);
3757       if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3758          assert(intr->intrinsic != nir_intrinsic_store_output);
3759          deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3760       }
3761       if (glsl_type_is_array(type)) {
3762          /* unroll array derefs */
3763          unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0;
3764          assert(src_offset);
3765          if (var->data.location < VARYING_SLOT_VAR0) {
3766             if (src_offset) {
3767                /* clip/cull dist and tess levels use different array offset semantics */
3768                bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3769                                   is_clipcull_dist(var->data.location);
3770                bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3771                                     var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3772                bool is_builtin_array = is_clipdist || is_tess_level;
3773                /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3774                if (nir_src_is_const(*src_offset)) {
3775                   unsigned offset = slot_offset;
3776                   if (is_builtin_array)
3777                      offset *= 4;
3778                   if (is_clipdist) {
3779                      if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1)
3780                         offset += 4;
3781                   }
3782                   deref = nir_build_deref_array_imm(b, deref, offset + idx);
3783                } else {
3784                   nir_def *offset = src_offset->ssa;
3785                   if (is_builtin_array)
3786                      nir_imul_imm(b, offset, 4);
3787                   deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3788                }
3789             } else {
3790                deref = nir_build_deref_array_imm(b, deref, idx);
3791             }
3792             type = glsl_get_array_element(type);
3793          } else {
3794             idx += location - var->data.location;
3795             /* need to convert possible N*M to [N][M] */
3796             nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3797             while (glsl_type_is_array(type)) {
3798                const struct glsl_type *elem = glsl_get_array_element(type);
3799                unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3800                nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3801                if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3802                   n = nir_udiv_imm(b, n, 2);
3803                deref = nir_build_deref_array(b, deref, n);
3804                nm = nir_umod_imm(b, nm, type_size);
3805                type = glsl_get_array_element(type);
3806             }
3807          }
3808       } else if (glsl_type_is_struct(type)) {
3809          deref = nir_build_deref_struct(b, deref, slot_offset);
3810       }
3811       assert(!glsl_type_is_array(type));
3812       unsigned num_components = glsl_get_vector_elements(type);
3813       if (is_load) {
3814          nir_def *load;
3815          if (is_interp) {
3816             nir_def *interp = intr->src[0].ssa;
3817             nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3818             assert(interp_intr);
3819             var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3820             switch (interp_intr->intrinsic) {
3821             case nir_intrinsic_load_barycentric_centroid:
3822                load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def);
3823                break;
3824             case nir_intrinsic_load_barycentric_sample:
3825                var->data.sample = 1;
3826                load = nir_load_deref(b, deref);
3827                break;
3828             case nir_intrinsic_load_barycentric_pixel:
3829                load = nir_load_deref(b, deref);
3830                break;
3831             case nir_intrinsic_load_barycentric_at_sample:
3832                load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3833                break;
3834             case nir_intrinsic_load_barycentric_at_offset:
3835                load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3836                break;
3837             default:
3838                unreachable("unhandled interp!");
3839             }
3840          } else {
3841             load = nir_load_deref(b, deref);
3842          }
3843          /* filter needed components */
3844          if (intr->num_components < load->num_components)
3845             load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3846          nir_def_rewrite_uses(&intr->def, load);
3847       } else {
3848          nir_def *store = intr->src[0].ssa;
3849          /* pad/filter components to match deref type */
3850          if (intr->num_components < num_components) {
3851             nir_def *zero = nir_imm_zero(b, 1, bit_size);
3852             nir_def *vec[4] = {zero, zero, zero, zero};
3853             u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3854                vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3855             store = nir_vec(b, vec, num_components);
3856          } if (store->num_components > num_components) {
3857             store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3858          }
3859          if (store->bit_size != glsl_get_bit_size(type)) {
3860             /* this should be some weird bindless io conversion */
3861             assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3862             assert(num_components != store->num_components);
3863             store = nir_unpack_64_2x32(b, store);
3864          }
3865          nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3866       }
3867       nir_instr_remove(&intr->instr);
3868       return true;
3869    }
3870    unreachable("failed to find variable for explicit io!");
3871    return true;
3872 }
3873 
3874 static bool
add_derefs(nir_shader * nir)3875 add_derefs(nir_shader *nir)
3876 {
3877    return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3878                                      nir_metadata_dominance, NULL);
3879 }
3880 
3881 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3882 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3883 {
3884    struct zink_shader_info *sinfo = &zs->sinfo;
3885    prune_io(nir);
3886 
3887    switch (nir->info.stage) {
3888    case MESA_SHADER_VERTEX:
3889    case MESA_SHADER_TESS_EVAL:
3890    case MESA_SHADER_GEOMETRY:
3891       NIR_PASS_V(nir, nir_divergence_analysis);
3892       break;
3893    default: break;
3894    }
3895    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3896 
3897    if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3898       nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3899    if (zink_debug & ZINK_DEBUG_NIR) {
3900       fprintf(stderr, "NIR shader:\n---8<---\n");
3901       nir_print_shader(nir, stderr);
3902       fprintf(stderr, "---8<---\n");
3903    }
3904 
3905    struct zink_shader_object obj = {0};
3906    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen);
3907    if (spirv)
3908       obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3909 
3910    /* TODO: determine if there's any reason to cache spirv output? */
3911    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3912       zs->spirv = spirv;
3913    else
3914       obj.spirv = spirv;
3915    return obj;
3916 }
3917 
3918 static bool
remove_interpolate_at_sample(struct nir_builder * b,nir_intrinsic_instr * interp,void * data)3919 remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data)
3920 {
3921    if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample)
3922       return false;
3923 
3924    b->cursor = nir_before_instr(&interp->instr);
3925    nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0]));
3926    nir_def_rewrite_uses(&interp->def, res);
3927 
3928    return true;
3929 }
3930 
3931 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)3932 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3933                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3934 {
3935    bool need_optimize = true;
3936    bool inlined_uniforms = false;
3937 
3938    NIR_PASS_V(nir, add_derefs);
3939    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
3940    if (key) {
3941       if (key->inline_uniforms) {
3942          NIR_PASS_V(nir, nir_inline_uniforms,
3943                     nir->info.num_inlinable_uniforms,
3944                     key->base.inlined_uniform_values,
3945                     nir->info.inlinable_uniform_dw_offsets);
3946 
3947          inlined_uniforms = true;
3948       }
3949 
3950       /* TODO: use a separate mem ctx here for ralloc */
3951 
3952       if (!screen->optimal_keys) {
3953          switch (zs->info.stage) {
3954          case MESA_SHADER_VERTEX: {
3955             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3956             const struct zink_vs_key *vs_key = zink_vs_key(key);
3957             switch (vs_key->size) {
3958             case 4:
3959                decomposed_attrs = vs_key->u32.decomposed_attrs;
3960                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3961                break;
3962             case 2:
3963                decomposed_attrs = vs_key->u16.decomposed_attrs;
3964                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3965                break;
3966             case 1:
3967                decomposed_attrs = vs_key->u8.decomposed_attrs;
3968                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3969                break;
3970             default: break;
3971             }
3972             if (decomposed_attrs || decomposed_attrs_without_w)
3973                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3974             break;
3975          }
3976 
3977          case MESA_SHADER_GEOMETRY:
3978             if (zink_gs_key(key)->lower_line_stipple) {
3979                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3980                NIR_PASS_V(nir, nir_lower_var_copies);
3981                need_optimize = true;
3982             }
3983 
3984             if (zink_gs_key(key)->lower_line_smooth) {
3985                NIR_PASS_V(nir, lower_line_smooth_gs);
3986                NIR_PASS_V(nir, nir_lower_var_copies);
3987                need_optimize = true;
3988             }
3989 
3990             if (zink_gs_key(key)->lower_gl_point) {
3991                NIR_PASS_V(nir, lower_gl_point_gs);
3992                need_optimize = true;
3993             }
3994 
3995             if (zink_gs_key(key)->lower_pv_mode) {
3996                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3997                need_optimize = true; //TODO verify that this is required
3998             }
3999             break;
4000 
4001          default:
4002             break;
4003          }
4004       }
4005 
4006       switch (zs->info.stage) {
4007       case MESA_SHADER_VERTEX:
4008       case MESA_SHADER_TESS_EVAL:
4009       case MESA_SHADER_GEOMETRY:
4010          if (zink_vs_key_base(key)->last_vertex_stage) {
4011             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
4012                NIR_PASS_V(nir, nir_lower_clip_halfz);
4013             }
4014             if (zink_vs_key_base(key)->push_drawid) {
4015                NIR_PASS_V(nir, lower_drawid);
4016             }
4017          } else {
4018             nir->xfb_info = NULL;
4019          }
4020          if (zink_vs_key_base(key)->robust_access)
4021             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4022          break;
4023       case MESA_SHADER_FRAGMENT:
4024          if (zink_fs_key(key)->lower_line_smooth) {
4025             NIR_PASS_V(nir, lower_line_smooth_fs,
4026                        zink_fs_key(key)->lower_line_stipple);
4027             need_optimize = true;
4028          } else if (zink_fs_key(key)->lower_line_stipple)
4029                NIR_PASS_V(nir, lower_line_stipple_fs);
4030 
4031          if (zink_fs_key(key)->lower_point_smooth) {
4032             NIR_PASS_V(nir, nir_lower_point_smooth, false);
4033             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
4034             nir->info.fs.uses_discard = true;
4035             need_optimize = true;
4036          }
4037 
4038          if (zink_fs_key(key)->robust_access)
4039             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4040 
4041          if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) {
4042             /* VK will always use gl_SampleMask[] values even if sample count is 0,
4043             * so we need to skip this write here to mimic GL's behavior of ignoring it
4044             */
4045             nir_foreach_shader_out_variable(var, nir) {
4046                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
4047                   var->data.mode = nir_var_shader_temp;
4048             }
4049             nir_fixup_deref_modes(nir);
4050             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4051             NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample,
4052                        nir_metadata_control_flow, NULL);
4053 
4054             need_optimize = true;
4055          }
4056          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
4057             NIR_PASS_V(nir, lower_dual_blend);
4058          }
4059          if (zink_fs_key_base(key)->coord_replace_bits)
4060             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
4061          if (zink_fs_key_base(key)->point_coord_yinvert)
4062             NIR_PASS_V(nir, invert_point_coord);
4063          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
4064             nir_foreach_shader_in_variable(var, nir)
4065                var->data.sample = true;
4066             nir->info.fs.uses_sample_qualifier = true;
4067             nir->info.fs.uses_sample_shading = true;
4068          }
4069          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
4070             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
4071          if (nir->info.fs.uses_fbfetch_output) {
4072             nir_variable *fbfetch = NULL;
4073             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
4074             /* old variable must be deleted to avoid spirv errors */
4075             fbfetch->data.mode = nir_var_shader_temp;
4076             nir_fixup_deref_modes(nir);
4077             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4078             need_optimize = true;
4079          }
4080          nir_foreach_shader_in_variable_safe(var, nir) {
4081             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
4082                continue;
4083             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
4084             var->data.mode = nir_var_shader_temp;
4085             nir_fixup_deref_modes(nir);
4086             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4087             need_optimize = true;
4088          }
4089          break;
4090       case MESA_SHADER_COMPUTE:
4091          if (zink_cs_key(key)->robust_access)
4092             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4093          break;
4094       default: break;
4095       }
4096       if (key->base.needs_zs_shader_swizzle) {
4097          assert(extra_data);
4098          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4099       }
4100       if (key->base.nonseamless_cube_mask) {
4101          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4102          need_optimize = true;
4103       }
4104    }
4105    if (screen->driconf.inline_uniforms) {
4106       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4107       NIR_PASS_V(nir, rewrite_bo_access, screen);
4108       NIR_PASS_V(nir, remove_bo_access, zs);
4109       need_optimize = true;
4110    }
4111    if (inlined_uniforms) {
4112       optimize_nir(nir, zs, true);
4113 
4114       /* This must be done again. */
4115       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4116                                                        nir_var_shader_out);
4117 
4118       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4119       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4120          zs->can_inline = false;
4121    } else if (need_optimize)
4122       optimize_nir(nir, zs, true);
4123    bool has_sparse = false;
4124    NIR_PASS(has_sparse, nir, lower_sparse);
4125    if (has_sparse)
4126       optimize_nir(nir, zs, false);
4127 
4128    struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4129    ralloc_free(nir);
4130    return obj;
4131 }
4132 
4133 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4134 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4135 {
4136    nir_shader *nir = zs->nir;
4137    /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4138    int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4139    if (screen->info.have_EXT_shader_object)
4140       set = zs->info.stage;
4141    unsigned offsets[4];
4142    zink_descriptor_shader_get_binding_offsets(zs, offsets);
4143    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4144       if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4145          continue;
4146       var->data.descriptor_set = set;
4147       switch (var->data.mode) {
4148       case nir_var_mem_ubo:
4149             var->data.binding = !!var->data.driver_location;
4150             break;
4151       case nir_var_uniform:
4152          if (glsl_type_is_sampler(glsl_without_array(var->type)))
4153             var->data.binding += offsets[1];
4154          break;
4155       case nir_var_mem_ssbo:
4156          var->data.binding += offsets[2];
4157          break;
4158       case nir_var_image:
4159          var->data.binding += offsets[3];
4160          break;
4161       default: break;
4162       }
4163    }
4164    NIR_PASS_V(nir, add_derefs);
4165    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4166    if (screen->driconf.inline_uniforms) {
4167       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4168       NIR_PASS_V(nir, rewrite_bo_access, screen);
4169       NIR_PASS_V(nir, remove_bo_access, zs);
4170    }
4171    optimize_nir(nir, zs, true);
4172    zink_descriptor_shader_init(screen, zs);
4173    nir_shader *nir_clone = NULL;
4174    if (screen->info.have_EXT_shader_object)
4175       nir_clone = nir_shader_clone(nir, nir);
4176    struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4177    if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4178       /* always try to pre-generate a tcs in case it's needed */
4179       if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4180          nir_shader *nir_tcs = NULL;
4181          /* use max pcp for compat */
4182          zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, 32);
4183          zink_shader_tcs_init(screen, zs->non_fs.generated_tcs, nir_clone, &nir_tcs);
4184          nir_tcs->info.separate_shader = true;
4185          zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4186          ralloc_free(nir_tcs);
4187          zs->non_fs.generated_tcs->nir = NULL;
4188       }
4189    }
4190    spirv_shader_delete(obj.spirv);
4191    obj.spirv = NULL;
4192    return obj;
4193 }
4194 
4195 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4196 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4197                          void *data)
4198 {
4199    if (intr->intrinsic != nir_intrinsic_load_instance_id)
4200       return false;
4201    b->cursor = nir_after_instr(&intr->instr);
4202    nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4203    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4204    return true;
4205 }
4206 
4207 static bool
lower_baseinstance(nir_shader * shader)4208 lower_baseinstance(nir_shader *shader)
4209 {
4210    if (shader->info.stage != MESA_SHADER_VERTEX)
4211       return false;
4212    return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4213                                      nir_metadata_dominance, NULL);
4214 }
4215 
4216 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4217  * so instead we delete all those broken variables and just make new ones
4218  */
4219 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4220 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4221 {
4222    uint64_t max_ssbo_size = 0;
4223    uint64_t max_ubo_size = 0;
4224    uint64_t max_uniform_size = 0;
4225 
4226    if (!shader->info.num_ssbos && !shader->info.num_ubos)
4227       return false;
4228 
4229    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4230       const struct glsl_type *type = glsl_without_array(var->type);
4231       if (type_is_counter(type))
4232          continue;
4233       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4234       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4235       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4236       if (interface_type) {
4237          unsigned block_size = glsl_get_explicit_size(interface_type, true);
4238          if (glsl_get_length(interface_type) == 1) {
4239             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4240             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4241             if (glsl_type_is_array(f) && !glsl_array_size(f))
4242                block_size = 0;
4243          }
4244          if (block_size) {
4245             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4246             size = MAX2(size, block_size);
4247          }
4248       }
4249       if (var->data.mode == nir_var_mem_ubo) {
4250          if (var->data.driver_location)
4251             max_ubo_size = MAX2(max_ubo_size, size);
4252          else
4253             max_uniform_size = MAX2(max_uniform_size, size);
4254       } else {
4255          max_ssbo_size = MAX2(max_ssbo_size, size);
4256          if (interface_type) {
4257             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4258                needs_size = true;
4259          }
4260       }
4261       var->data.mode = nir_var_shader_temp;
4262    }
4263    nir_fixup_deref_modes(shader);
4264    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4265    optimize_nir(shader, NULL, true);
4266 
4267    struct glsl_struct_field field = {0};
4268    field.name = ralloc_strdup(shader, "base");
4269    if (shader->info.num_ubos) {
4270       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4271          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4272          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4273                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4274                                                  "uniform_0@32");
4275          var->interface_type = var->type;
4276          var->data.mode = nir_var_mem_ubo;
4277          var->data.driver_location = 0;
4278       }
4279 
4280       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4281       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4282       if (num_ubos && ubos_used) {
4283          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4284          /* shrink array as much as possible */
4285          unsigned first_ubo = ffs(ubos_used) - 2;
4286          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4287          num_ubos -= first_ubo;
4288          assert(num_ubos);
4289          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4290                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4291                                    "ubos@32");
4292          var->interface_type = var->type;
4293          var->data.mode = nir_var_mem_ubo;
4294          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4295       }
4296    }
4297    if (shader->info.num_ssbos && zs->ssbos_used) {
4298       /* shrink array as much as possible */
4299       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4300       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4301       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4302       assert(num_ssbos);
4303       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4304       field.type = ssbo_type;
4305       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4306                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4307                                               "ssbos@32");
4308       var->interface_type = var->type;
4309       var->data.mode = nir_var_mem_ssbo;
4310       var->data.driver_location = first_ssbo;
4311    }
4312    return true;
4313 }
4314 
4315 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4316 get_src_mask_ssbo(unsigned total, nir_src src)
4317 {
4318    if (nir_src_is_const(src))
4319       return BITFIELD_BIT(nir_src_as_uint(src));
4320    return BITFIELD_MASK(total);
4321 }
4322 
4323 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4324 get_src_mask_ubo(unsigned total, nir_src src)
4325 {
4326    if (nir_src_is_const(src))
4327       return BITFIELD_BIT(nir_src_as_uint(src));
4328    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4329 }
4330 
4331 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4332 analyze_io(struct zink_shader *zs, nir_shader *shader)
4333 {
4334    bool ret = false;
4335    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4336    nir_foreach_block(block, impl) {
4337       nir_foreach_instr(instr, block) {
4338          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4339             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4340             nir_tex_instr *tex = nir_instr_as_tex(instr);
4341             int deref_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
4342             if (deref_idx >= 0) {
4343                nir_variable *img = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[deref_idx].src.ssa->parent_instr));
4344                unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4345                BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4346             }
4347             continue;
4348          }
4349          if (instr->type != nir_instr_type_intrinsic)
4350             continue;
4351 
4352          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4353          switch (intrin->intrinsic) {
4354          case nir_intrinsic_store_ssbo:
4355             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4356             break;
4357 
4358          case nir_intrinsic_get_ssbo_size: {
4359             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4360             ret = true;
4361             break;
4362          }
4363          case nir_intrinsic_ssbo_atomic:
4364          case nir_intrinsic_ssbo_atomic_swap:
4365          case nir_intrinsic_load_ssbo:
4366             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4367             break;
4368          case nir_intrinsic_load_ubo:
4369          case nir_intrinsic_load_ubo_vec4:
4370             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4371             break;
4372          default:
4373             break;
4374          }
4375       }
4376    }
4377    return ret;
4378 }
4379 
4380 struct zink_bindless_info {
4381    nir_variable *bindless[4];
4382    unsigned bindless_set;
4383 };
4384 
4385 /* this is a "default" bindless texture used if the shader has no texture variables */
4386 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4387 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4388 {
4389    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4390    nir_variable *var;
4391 
4392    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4393    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4394    var->data.descriptor_set = descriptor_set;
4395    var->data.driver_location = var->data.binding = binding;
4396    return var;
4397 }
4398 
4399 /* this is a "default" bindless image used if the shader has no image variables */
4400 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4401 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4402 {
4403    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4404    nir_variable *var;
4405 
4406    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4407    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4408    var->data.descriptor_set = descriptor_set;
4409    var->data.driver_location = var->data.binding = binding;
4410    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4411    return var;
4412 }
4413 
4414 /* rewrite bindless instructions as array deref instructions */
4415 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4416 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4417 {
4418    struct zink_bindless_info *bindless = data;
4419 
4420    if (in->type == nir_instr_type_tex) {
4421       nir_tex_instr *tex = nir_instr_as_tex(in);
4422       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4423       if (idx == -1)
4424          return false;
4425 
4426       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4427       if (!var) {
4428          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4429          if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4430             bindless->bindless[1] = var;
4431          else
4432             bindless->bindless[0] = var;
4433       }
4434       b->cursor = nir_before_instr(in);
4435       nir_deref_instr *deref = nir_build_deref_var(b, var);
4436       if (glsl_type_is_array(var->type))
4437          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4438       nir_src_rewrite(&tex->src[idx].src, &deref->def);
4439 
4440       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4441        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4442        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4443        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4444        *
4445        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4446        * - Warhammer 40k: Dawn of War III
4447        */
4448       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4449       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4450       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4451       if (coord_components < needed_components) {
4452          nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4453          nir_src_rewrite(&tex->src[c].src, def);
4454          tex->coord_components = needed_components;
4455       }
4456       return true;
4457    }
4458    if (in->type != nir_instr_type_intrinsic)
4459       return false;
4460    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4461 
4462    nir_intrinsic_op op;
4463 #define OP_SWAP(OP) \
4464    case nir_intrinsic_bindless_image_##OP: \
4465       op = nir_intrinsic_image_deref_##OP; \
4466       break;
4467 
4468 
4469    /* convert bindless intrinsics to deref intrinsics */
4470    switch (instr->intrinsic) {
4471    OP_SWAP(atomic)
4472    OP_SWAP(atomic_swap)
4473    OP_SWAP(format)
4474    OP_SWAP(load)
4475    OP_SWAP(order)
4476    OP_SWAP(samples)
4477    OP_SWAP(size)
4478    OP_SWAP(store)
4479    default:
4480       return false;
4481    }
4482 
4483    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4484    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4485    if (!var)
4486       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4487    instr->intrinsic = op;
4488    b->cursor = nir_before_instr(in);
4489    nir_deref_instr *deref = nir_build_deref_var(b, var);
4490    if (glsl_type_is_array(var->type))
4491       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4492    nir_src_rewrite(&instr->src[0], &deref->def);
4493    return true;
4494 }
4495 
4496 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4497 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4498 {
4499    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4500       return false;
4501    nir_fixup_deref_modes(shader);
4502    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4503    optimize_nir(shader, NULL, true);
4504    return true;
4505 }
4506 
4507 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4508 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4509 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4510                         void *data)
4511 {
4512    bool is_load = false;
4513    bool is_input = false;
4514    bool is_interp = false;
4515    if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4516       return false;
4517 
4518    nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4519    if (var->data.bindless)
4520       return false;
4521    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4522       return false;
4523    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4524       return false;
4525 
4526    var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4527    var->data.bindless = 1;
4528    return true;
4529 }
4530 
4531 static bool
lower_bindless_io(nir_shader * shader)4532 lower_bindless_io(nir_shader *shader)
4533 {
4534    return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4535                                      nir_metadata_dominance, NULL);
4536 }
4537 
4538 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4539 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4540 {
4541    if (stage == MESA_SHADER_NONE) {
4542       unreachable("not supported");
4543    } else {
4544       unsigned base = stage;
4545       /* clamp compute bindings for better driver efficiency */
4546       if (gl_shader_stage_is_compute(stage))
4547          base = 0;
4548       switch (type) {
4549       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4550       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4551          return base * 2 + !!index;
4552 
4553       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4554          assert(stage == MESA_SHADER_KERNEL);
4555          FALLTHROUGH;
4556       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4557          if (stage == MESA_SHADER_KERNEL) {
4558             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4559             return index + PIPE_MAX_SAMPLERS;
4560          }
4561          FALLTHROUGH;
4562       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4563          assert(index < PIPE_MAX_SAMPLERS);
4564          assert(stage != MESA_SHADER_KERNEL);
4565          return (base * PIPE_MAX_SAMPLERS) + index;
4566 
4567       case VK_DESCRIPTOR_TYPE_SAMPLER:
4568          assert(index < PIPE_MAX_SAMPLERS);
4569          assert(stage == MESA_SHADER_KERNEL);
4570          return index;
4571 
4572       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4573          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4574 
4575       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4576       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4577          assert(index < ZINK_MAX_SHADER_IMAGES);
4578          if (stage == MESA_SHADER_KERNEL)
4579             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4580          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4581 
4582       default:
4583          unreachable("unexpected type");
4584       }
4585    }
4586 }
4587 
4588 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4589 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4590 {
4591    if (glsl_type_is_struct(type)) {
4592       for (unsigned i = 0; i < glsl_get_length(type); i++)
4593          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4594       return;
4595    }
4596 
4597    /* just a random scalar in a struct */
4598    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4599       return;
4600 
4601    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4602    unsigned binding;
4603    switch (vktype) {
4604       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4605          binding = 0;
4606          break;
4607       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4608          binding = 1;
4609          break;
4610       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4611          binding = 2;
4612          break;
4613       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4614          binding = 3;
4615          break;
4616       default:
4617          unreachable("unknown");
4618    }
4619    if (!bindless->bindless[binding]) {
4620       bindless->bindless[binding] = nir_variable_clone(var, nir);
4621       bindless->bindless[binding]->data.bindless = 0;
4622       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4623       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4624       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4625       if (!bindless->bindless[binding]->data.image.format)
4626          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4627       nir_shader_add_variable(nir, bindless->bindless[binding]);
4628    } else {
4629       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4630    }
4631    var->data.mode = nir_var_shader_temp;
4632 }
4633 
4634 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4635 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4636 {
4637    struct zink_screen *screen = data;
4638    if (instr->type != nir_instr_type_tex)
4639       return false;
4640    nir_tex_instr *tex = nir_instr_as_tex(instr);
4641    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4642       return false;
4643    if (tex->is_sparse && screen->need_2D_sparse) {
4644       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4645       mesa_loge("unhandled/unsupported 1D sparse texture!");
4646       abort();
4647    }
4648    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4649    b->cursor = nir_before_instr(instr);
4650    tex->coord_components++;
4651    unsigned srcs[] = {
4652       nir_tex_src_coord,
4653       nir_tex_src_offset,
4654       nir_tex_src_ddx,
4655       nir_tex_src_ddy,
4656    };
4657    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4658       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4659       if (c == -1)
4660          continue;
4661       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4662          continue;
4663       nir_def *def;
4664       nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4665       if (tex->src[c].src.ssa->num_components == 1)
4666          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4667       else
4668          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4669       nir_src_rewrite(&tex->src[c].src, def);
4670    }
4671    b->cursor = nir_after_instr(instr);
4672    unsigned needed_components = nir_tex_instr_dest_size(tex);
4673    unsigned num_components = tex->def.num_components;
4674    if (needed_components > num_components) {
4675       tex->def.num_components = needed_components;
4676       assert(num_components < 3);
4677       /* take either xz or just x since this is promoted to 2D from 1D */
4678       uint32_t mask = num_components == 2 ? (1|4) : 1;
4679       nir_def *dst = nir_channels(b, &tex->def, mask);
4680       nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4681    }
4682    return true;
4683 }
4684 
4685 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4686 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4687 {
4688    bool found = false;
4689    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4690       const struct glsl_type *type = glsl_without_array(var->type);
4691       unsigned length = glsl_get_length(var->type);
4692       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4693          continue;
4694       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4695       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4696 
4697       found = true;
4698    }
4699    if (found) {
4700       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4701       nir_fixup_deref_types(shader);
4702    }
4703    return found;
4704 }
4705 
4706 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4707 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4708 {
4709    nir_foreach_function_impl(impl, shader) {
4710       nir_foreach_block_safe(block, impl) {
4711          nir_foreach_instr_safe(instr, block) {
4712             if (instr->type == nir_instr_type_tex) {
4713                nir_tex_instr *tex = nir_instr_as_tex(instr);
4714                zs->sinfo.have_sparse |= tex->is_sparse;
4715             }
4716             if (instr->type != nir_instr_type_intrinsic)
4717                continue;
4718             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4719             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4720                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4721                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4722                 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4723                 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4724                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4725                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4726                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4727                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4728 
4729                 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4730 
4731                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4732                 const unsigned size =
4733                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4734                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4735                                  var->data.binding + (MAX2(size, 1) - 1));
4736             }
4737             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4738                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4739                zs->sinfo.have_sparse = true;
4740 
4741             bool is_load = false;
4742             bool is_input = false;
4743             bool is_interp = false;
4744             if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) {
4745                nir_io_semantics s = nir_intrinsic_io_semantics(intr);
4746                if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) {
4747                   if (is_input)
4748                      zs->arrayed_inputs |= BITFIELD64_BIT(s.location);
4749                   else
4750                      zs->arrayed_outputs |= BITFIELD64_BIT(s.location);
4751                }
4752                /* TODO: delete this once #10826 is fixed */
4753                if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) {
4754                   if (is_clipcull_dist(s.location)) {
4755                      unsigned frac = nir_intrinsic_component(intr) + 1;
4756                      if (s.location < VARYING_SLOT_CULL_DIST0) {
4757                         if (s.location == VARYING_SLOT_CLIP_DIST1)
4758                            frac += 4;
4759                         shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots);
4760                      } else {
4761                         if (s.location == VARYING_SLOT_CULL_DIST1)
4762                            frac += 4;
4763                         shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots);
4764                      }
4765                   }
4766                }
4767             }
4768 
4769             static bool warned = false;
4770             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4771                switch (intr->intrinsic) {
4772                case nir_intrinsic_image_deref_atomic: {
4773                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4774                   if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4775                       util_format_is_float(var->data.image.format))
4776                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4777                   break;
4778                }
4779                default:
4780                   break;
4781                }
4782             }
4783          }
4784       }
4785    }
4786 }
4787 
4788 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data,bool pre)4789 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data, bool pre)
4790 {
4791    if (in->type != nir_instr_type_tex)
4792       return false;
4793    nir_tex_instr *tex = nir_instr_as_tex(in);
4794    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4795       return false;
4796    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4797    nir_variable *var = NULL;
4798    if (handle != -1) {
4799       if (pre)
4800          return false;
4801       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4802    } else {
4803       var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
4804    }
4805    if (pre) {
4806       flag_shadow_tex_instr(b, tex, var, data);
4807       return false;
4808    }
4809    return !!rewrite_tex_dest(b, tex, var, data);
4810 }
4811 
4812 static bool
match_tex_dests_instr_pre(nir_builder * b,nir_instr * in,void * data)4813 match_tex_dests_instr_pre(nir_builder *b, nir_instr *in, void *data)
4814 {
4815    return match_tex_dests_instr(b, in, data, true);
4816 }
4817 
4818 static bool
match_tex_dests_instr_post(nir_builder * b,nir_instr * in,void * data)4819 match_tex_dests_instr_post(nir_builder *b, nir_instr *in, void *data)
4820 {
4821    return match_tex_dests_instr(b, in, data, false);
4822 }
4823 
4824 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs,bool pre_mangle)4825 match_tex_dests(nir_shader *shader, struct zink_shader *zs, bool pre_mangle)
4826 {
4827    return nir_shader_instructions_pass(shader, pre_mangle ? match_tex_dests_instr_pre : match_tex_dests_instr_post, nir_metadata_dominance, zs);
4828 }
4829 
4830 static bool
split_bitfields_instr(nir_builder * b,nir_alu_instr * alu,void * data)4831 split_bitfields_instr(nir_builder *b, nir_alu_instr *alu, void *data)
4832 {
4833    switch (alu->op) {
4834    case nir_op_ubitfield_extract:
4835    case nir_op_ibitfield_extract:
4836    case nir_op_bitfield_insert:
4837       break;
4838    default:
4839       return false;
4840    }
4841    unsigned num_components = alu->def.num_components;
4842    if (num_components == 1)
4843       return false;
4844    b->cursor = nir_before_instr(&alu->instr);
4845    nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4846    for (unsigned i = 0; i < num_components; i++) {
4847       if (alu->op == nir_op_bitfield_insert)
4848          dests[i] = nir_bitfield_insert(b,
4849                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4850                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4851                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4852                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4853       else if (alu->op == nir_op_ubitfield_extract)
4854          dests[i] = nir_ubitfield_extract(b,
4855                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4856                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4857                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4858       else
4859          dests[i] = nir_ibitfield_extract(b,
4860                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4861                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4862                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4863    }
4864    nir_def *dest = nir_vec(b, dests, num_components);
4865    nir_def_rewrite_uses_after(&alu->def, dest, &alu->instr);
4866    nir_instr_remove(&alu->instr);
4867    return true;
4868 }
4869 
4870 
4871 static bool
split_bitfields(nir_shader * shader)4872 split_bitfields(nir_shader *shader)
4873 {
4874    return nir_shader_alu_pass(shader, split_bitfields_instr,
4875                               nir_metadata_dominance, NULL);
4876 }
4877 
4878 static bool
strip_tex_ms_instr(nir_builder * b,nir_instr * in,void * data)4879 strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data)
4880 {
4881    if (in->type != nir_instr_type_intrinsic)
4882       return false;
4883    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in);
4884    switch (intr->intrinsic) {
4885    case nir_intrinsic_image_deref_samples:
4886       b->cursor = nir_before_instr(in);
4887       nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in);
4888       nir_instr_remove(in);
4889       break;
4890    case nir_intrinsic_image_deref_store:
4891    case nir_intrinsic_image_deref_load:
4892    case nir_intrinsic_image_deref_atomic:
4893    case nir_intrinsic_image_deref_atomic_swap:
4894       break;
4895    default:
4896       return false;
4897    }
4898    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
4899    if (dim != GLSL_SAMPLER_DIM_MS)
4900       return false;
4901 
4902    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4903    nir_variable *var = nir_deref_instr_get_variable(deref);
4904    nir_deref_instr *parent = nir_deref_instr_parent(deref);
4905    if (parent) {
4906       parent->type = var->type;
4907       deref->type = glsl_without_array(var->type);
4908    } else {
4909       deref->type = var->type;
4910    }
4911    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
4912    return true;
4913 }
4914 
4915 
4916 static bool
strip_tex_ms(nir_shader * shader)4917 strip_tex_ms(nir_shader *shader)
4918 {
4919    bool progress = false;
4920    nir_foreach_image_variable(var, shader) {
4921       const struct glsl_type *bare_type = glsl_without_array(var->type);
4922       if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS)
4923          continue;
4924       unsigned array_size = 0;
4925       if (glsl_type_is_array(var->type))
4926          array_size = glsl_array_size(var->type);
4927 
4928       const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type));
4929       if (array_size)
4930          new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type));
4931       var->type = new_type;
4932       progress = true;
4933    }
4934    if (!progress)
4935       return false;
4936    return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL);
4937 }
4938 
4939 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)4940 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4941 {
4942    nir_foreach_function_impl(impl, nir) {
4943       nir_foreach_block(block, impl) {
4944          nir_foreach_instr_safe(instr, block) {
4945             if (instr->type != nir_instr_type_deref)
4946                continue;
4947             nir_deref_instr *deref = nir_instr_as_deref(instr);
4948             nir_variable *img = nir_deref_instr_get_variable(deref);
4949             if (img != var)
4950                continue;
4951             if (glsl_type_is_array(var->type)) {
4952                if (deref->deref_type == nir_deref_type_array)
4953                   deref->type = glsl_without_array(var->type);
4954                else
4955                   deref->type = var->type;
4956             } else {
4957                deref->type = var->type;
4958             }
4959          }
4960       }
4961    }
4962 }
4963 
4964 static void
type_image(nir_shader * nir,nir_variable * var)4965 type_image(nir_shader *nir, nir_variable *var)
4966 {
4967    nir_foreach_function_impl(impl, nir) {
4968       nir_foreach_block(block, impl) {
4969          nir_foreach_instr_safe(instr, block) {
4970             if (instr->type != nir_instr_type_intrinsic)
4971                continue;
4972             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4973             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4974                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4975                intr->intrinsic == nir_intrinsic_image_deref_store ||
4976                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4977                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4978                intr->intrinsic == nir_intrinsic_image_deref_samples ||
4979                intr->intrinsic == nir_intrinsic_image_deref_format ||
4980                intr->intrinsic == nir_intrinsic_image_deref_order) {
4981                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4982                nir_variable *img = nir_deref_instr_get_variable(deref);
4983                if (img != var)
4984                   continue;
4985 
4986                nir_alu_type alu_type;
4987                if (nir_intrinsic_has_src_type(intr))
4988                   alu_type = nir_intrinsic_src_type(intr);
4989                else
4990                   alu_type = nir_intrinsic_dest_type(intr);
4991 
4992                const struct glsl_type *type = glsl_without_array(var->type);
4993                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4994                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4995                   continue;
4996                }
4997                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4998                if (glsl_type_is_array(var->type))
4999                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5000                var->type = img_type;
5001                rewrite_cl_derefs(nir, var);
5002                return;
5003             }
5004          }
5005       }
5006    }
5007    nir_foreach_function_impl(impl, nir) {
5008       nir_foreach_block(block, impl) {
5009          nir_foreach_instr_safe(instr, block) {
5010             if (instr->type != nir_instr_type_intrinsic)
5011                continue;
5012             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5013             if (intr->intrinsic != nir_intrinsic_image_deref_size)
5014                continue;
5015             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5016             nir_variable *img = nir_deref_instr_get_variable(deref);
5017             if (img != var)
5018                continue;
5019             nir_alu_type alu_type = nir_type_uint32;
5020             const struct glsl_type *type = glsl_without_array(var->type);
5021             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5022                continue;
5023             }
5024             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5025             if (glsl_type_is_array(var->type))
5026                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5027             var->type = img_type;
5028             rewrite_cl_derefs(nir, var);
5029             return;
5030          }
5031       }
5032    }
5033    var->data.mode = nir_var_shader_temp;
5034 }
5035 
5036 static bool
type_sampler_vars(nir_shader * nir)5037 type_sampler_vars(nir_shader *nir)
5038 {
5039    bool progress = false;
5040    nir_foreach_function_impl(impl, nir) {
5041       nir_foreach_block(block, impl) {
5042          nir_foreach_instr(instr, block) {
5043             if (instr->type != nir_instr_type_tex)
5044                continue;
5045             nir_tex_instr *tex = nir_instr_as_tex(instr);
5046             nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
5047             assert(var);
5048             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
5049                 nir_tex_instr_is_query(tex))
5050                continue;
5051             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
5052             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
5053             if (size > 1)
5054                img_type = glsl_array_type(img_type, size, 0);
5055             var->type = img_type;
5056             progress = true;
5057          }
5058       }
5059    }
5060    return progress;
5061 }
5062 
5063 static bool
type_images(nir_shader * nir)5064 type_images(nir_shader *nir)
5065 {
5066    bool progress = false;
5067    progress |= type_sampler_vars(nir);
5068    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
5069       type_image(nir, var);
5070       progress = true;
5071    }
5072    if (progress) {
5073       nir_fixup_deref_types(nir);
5074       nir_fixup_deref_modes(nir);
5075    }
5076    return progress;
5077 }
5078 
5079 /* attempt to assign io for separate shaders */
5080 static bool
fixup_io_locations(nir_shader * nir)5081 fixup_io_locations(nir_shader *nir)
5082 {
5083    nir_variable_mode modes;
5084    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
5085       modes = nir_var_shader_in | nir_var_shader_out;
5086    else
5087       modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
5088    u_foreach_bit(mode, modes) {
5089       nir_variable_mode m = BITFIELD_BIT(mode);
5090       if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
5091           (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
5092          /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
5093           * - i/o interface blocks don't need to match
5094           * - any location can be present or not
5095           * - it just has to work
5096           *
5097           * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
5098           * since it's a builtin and yolo it with all the other legacy crap
5099           */
5100          nir_foreach_variable_with_modes(var, nir, m) {
5101             if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
5102                continue;
5103             if (var->data.location == VARYING_SLOT_VAR0)
5104                var->data.driver_location = 0;
5105             else if (var->data.patch)
5106                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5107             else
5108                var->data.driver_location = var->data.location;
5109          }
5110          continue;
5111       }
5112       /* i/o interface blocks are required to be EXACT matches between stages:
5113       * iterate over all locations and set locations incrementally
5114       */
5115       unsigned slot = 0;
5116       for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) {
5117          if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
5118             continue;
5119          bool found = false;
5120          unsigned size = 0;
5121          nir_foreach_variable_with_modes(var, nir, m) {
5122             if (var->data.location != i)
5123                continue;
5124             /* only add slots for non-component vars or first-time component vars */
5125             if (!var->data.location_frac || !size) {
5126                /* ensure variable is given enough slots */
5127                if (nir_is_arrayed_io(var, nir->info.stage))
5128                   size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
5129                else
5130                   size += glsl_count_vec4_slots(var->type, false, false);
5131             }
5132             if (var->data.patch)
5133                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5134             else
5135                var->data.driver_location = slot;
5136             found = true;
5137          }
5138          slot += size;
5139          if (found) {
5140             /* ensure the consumed slots aren't double iterated */
5141             i += size - 1;
5142          } else {
5143             /* locations used between stages are not required to be contiguous */
5144             if (i >= VARYING_SLOT_VAR0)
5145                slot++;
5146          }
5147       }
5148    }
5149    return true;
5150 }
5151 
5152 static uint64_t
zink_flat_flags(struct nir_shader * shader)5153 zink_flat_flags(struct nir_shader *shader)
5154 {
5155    uint64_t flat_flags = 0;
5156    nir_foreach_shader_in_variable(var, shader) {
5157       if (var->data.interpolation == INTERP_MODE_FLAT)
5158          flat_flags |= BITFIELD64_BIT(var->data.location);
5159    }
5160 
5161    return flat_flags;
5162 }
5163 
5164 struct rework_io_state {
5165    /* these are search criteria */
5166    bool indirect_only;
5167    unsigned location;
5168    nir_variable_mode mode;
5169    gl_shader_stage stage;
5170    nir_shader *nir;
5171    const char *name;
5172 
5173    /* these are found by scanning */
5174    bool arrayed_io;
5175    bool medium_precision;
5176    bool fb_fetch_output;
5177    bool dual_source_blend_index;
5178    uint32_t component_mask;
5179    uint32_t ignored_component_mask;
5180    unsigned array_size;
5181    unsigned bit_size;
5182    unsigned base;
5183    nir_alu_type type;
5184    /* must be last */
5185    char *newname;
5186 };
5187 
5188 /* match an existing variable against the rework state */
5189 static nir_variable *
find_rework_var(nir_shader * nir,struct rework_io_state * ris)5190 find_rework_var(nir_shader *nir, struct rework_io_state *ris)
5191 {
5192    nir_foreach_variable_with_modes(var, nir, ris->mode) {
5193       const struct glsl_type *type = var->type;
5194       if (nir_is_arrayed_io(var, nir->info.stage))
5195          type = glsl_get_array_element(type);
5196       if (var->data.fb_fetch_output != ris->fb_fetch_output)
5197          continue;
5198       if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
5199          continue;
5200       unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5201       if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
5202          continue;
5203       unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5204       assert(!glsl_type_contains_64bit(type));
5205       uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
5206       if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
5207          return var;
5208    }
5209    return NULL;
5210 }
5211 
5212 static void
update_io_var_name(struct rework_io_state * ris,const char * name)5213 update_io_var_name(struct rework_io_state *ris, const char *name)
5214 {
5215    if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
5216       return;
5217    if (!name)
5218       return;
5219    if (ris->name && !strcmp(ris->name, name))
5220       return;
5221    if (ris->newname && !strcmp(ris->newname, name))
5222       return;
5223    if (ris->newname) {
5224       ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
5225    } else if (ris->name) {
5226       ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
5227    } else {
5228       ris->newname = ralloc_strdup(ris->nir, name);
5229    }
5230 }
5231 
5232 /* check/update tracking state for variable info */
5233 static void
update_io_var_state(nir_intrinsic_instr * intr,struct rework_io_state * ris)5234 update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
5235 {
5236    bool is_load = false;
5237    bool is_input = false;
5238    bool is_interp = false;
5239    filter_io_instr(intr, &is_load, &is_input, &is_interp);
5240    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5241    unsigned frac = nir_intrinsic_component(intr);
5242    /* the mask of components for the instruction */
5243    uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
5244 
5245    /* always check for existing variables first */
5246    struct rework_io_state test = {
5247       .location = ris->location,
5248       .mode = ris->mode,
5249       .stage = ris->stage,
5250       .arrayed_io = io_instr_is_arrayed(intr),
5251       .medium_precision = sem.medium_precision,
5252       .fb_fetch_output = sem.fb_fetch_output,
5253       .dual_source_blend_index = sem.dual_source_blend_index,
5254       .component_mask = cmask,
5255       .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
5256    };
5257    if (find_rework_var(ris->nir, &test))
5258       return;
5259 
5260    /* filter ignored components to scan later:
5261     * - ignore no-overlapping-components case
5262     * - always match fbfetch and dual src blend
5263     */
5264    if (ris->component_mask &&
5265        (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
5266       ris->ignored_component_mask |= cmask;
5267       return;
5268    }
5269 
5270    assert(!ris->indirect_only || sem.num_slots > 1);
5271    if (sem.num_slots > 1)
5272       ris->array_size = MAX2(ris->array_size, sem.num_slots);
5273 
5274    assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
5275    ris->arrayed_io = io_instr_is_arrayed(intr);
5276 
5277    ris->component_mask |= cmask;
5278 
5279    unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5280    assert(!ris->bit_size || ris->bit_size == bit_size);
5281    ris->bit_size = bit_size;
5282 
5283    nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5284    if (ris->type) {
5285       /* in the case of clashing types, this heuristic guarantees some semblance of a match */
5286       if (ris->type & nir_type_float || type & nir_type_float) {
5287          ris->type = nir_type_float | bit_size;
5288       } else if (ris->type & nir_type_int || type & nir_type_int) {
5289          ris->type = nir_type_int | bit_size;
5290       } else if (ris->type & nir_type_uint || type & nir_type_uint) {
5291          ris->type = nir_type_uint | bit_size;
5292       } else {
5293          assert(bit_size == 1);
5294          ris->type = nir_type_bool;
5295       }
5296    } else {
5297       ris->type = type;
5298    }
5299 
5300    update_io_var_name(ris, intr->name);
5301 
5302    ris->medium_precision |= sem.medium_precision;
5303    ris->fb_fetch_output |= sem.fb_fetch_output;
5304    ris->dual_source_blend_index |= sem.dual_source_blend_index;
5305    if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5306       ris->base = nir_intrinsic_base(intr);
5307 }
5308 
5309 /* instruction-level scanning for variable data */
5310 static bool
scan_io_var_usage(nir_builder * b,nir_intrinsic_instr * intr,void * data)5311 scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5312 {
5313    struct rework_io_state *ris = data;
5314    bool is_load = false;
5315    bool is_input = false;
5316    bool is_interp = false;
5317    /* mode-based filtering */
5318    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5319       return false;
5320    if (ris->mode == nir_var_shader_in) {
5321       if (!is_input)
5322          return false;
5323    } else {
5324       if (is_input)
5325          return false;
5326    }
5327    /* location-based filtering */
5328    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5329    if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
5330       return false;
5331 
5332    /* only scan indirect i/o when indirect_only is set */
5333    nir_src *src_offset = nir_get_io_offset_src(intr);
5334    if (!nir_src_is_const(*src_offset)) {
5335       if (!ris->indirect_only)
5336          return false;
5337       update_io_var_state(intr, ris);
5338       return false;
5339    }
5340 
5341    /* don't scan direct i/o when indirect_only is set */
5342    if (ris->indirect_only)
5343       return false;
5344 
5345    update_io_var_state(intr, ris);
5346    return false;
5347 }
5348 
5349 /* scan a given i/o slot for state info */
5350 static struct rework_io_state
scan_io_var_slot(nir_shader * nir,nir_variable_mode mode,unsigned location,bool scan_indirects)5351 scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
5352 {
5353    struct rework_io_state ris = {
5354       .location = location,
5355       .mode = mode,
5356       .stage = nir->info.stage,
5357       .nir = nir,
5358    };
5359 
5360    struct rework_io_state test;
5361    do {
5362       update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
5363       test = ris;
5364       /* always run indirect scan first to detect potential overlaps */
5365       if (scan_indirects) {
5366          ris.indirect_only = true;
5367          nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5368       }
5369       ris.indirect_only = false;
5370       nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5371       /* keep scanning until no changes found */
5372    } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
5373    return ris;
5374 }
5375 
5376 /* create a variable using explicit/scan info */
5377 static void
create_io_var(nir_shader * nir,struct rework_io_state * ris)5378 create_io_var(nir_shader *nir, struct rework_io_state *ris)
5379 {
5380    char name[1024];
5381    assert(ris->component_mask);
5382    if (ris->newname || ris->name) {
5383       snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
5384    /* always use builtin name where possible */
5385    } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
5386       snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
5387    } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
5388       snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
5389    } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
5390       snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
5391    } else {
5392       int c = ffs(ris->component_mask) - 1;
5393       if (c)
5394          snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
5395       else
5396          snprintf(name, sizeof(name), "slot_%u", ris->location);
5397    }
5398    /* calculate vec/array type */
5399    int frac = ffs(ris->component_mask) - 1;
5400    int num_components = util_last_bit(ris->component_mask) - frac;
5401    assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
5402    const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
5403    if (ris->array_size)
5404       vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
5405    if (ris->arrayed_io) {
5406       /* tess size may be unknown with generated tcs */
5407       unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
5408                          nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */;
5409       vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
5410    }
5411    nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
5412    var->data.location_frac = frac;
5413    var->data.location = ris->location;
5414    /* gallium vertex inputs use intrinsic 'base' indexing */
5415    if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5416       var->data.driver_location = ris->base;
5417    var->data.patch = ris->location >= VARYING_SLOT_PATCH0 ||
5418                      ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5419                       (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER));
5420    /* set flat by default: add_derefs will fill this in later after more shader passes */
5421    if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
5422       var->data.interpolation = INTERP_MODE_FLAT;
5423    var->data.fb_fetch_output = ris->fb_fetch_output;
5424    var->data.index = ris->dual_source_blend_index;
5425    var->data.precision = ris->medium_precision;
5426    /* only clip/cull dist and tess levels are compact */
5427    if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
5428       var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
5429 }
5430 
5431 /* loop the i/o mask and generate variables for specified locations */
5432 static void
loop_io_var_mask(nir_shader * nir,nir_variable_mode mode,bool indirect,bool patch,uint64_t mask)5433 loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
5434 {
5435    ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
5436    u_foreach_bit64(slot, mask) {
5437       if (patch)
5438          slot += VARYING_SLOT_PATCH0;
5439 
5440       /* this should've been handled explicitly */
5441       assert(is_vertex_input || !is_clipcull_dist(slot));
5442 
5443       unsigned remaining = 0;
5444       do {
5445          /* scan the slot for usage */
5446          struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect);
5447          /* one of these must be true or things have gone very wrong */
5448          assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
5449          /* release builds only */
5450          if (!ris.component_mask)
5451             break;
5452 
5453          /* whatever reaches this point is either enough info to create a variable or an existing variable */
5454          if (!find_rework_var(nir, &ris))
5455             create_io_var(nir, &ris);
5456          /* scanning may detect multiple potential variables per location at component offsets: process again */
5457          remaining = ris.ignored_component_mask;
5458       } while (remaining);
5459    }
5460 }
5461 
5462 /* for a given mode, generate variables */
5463 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode,struct zink_shader * zs)5464 rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
5465 {
5466    assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5467    assert(util_bitcount(mode) == 1);
5468    bool found = false;
5469    /* if no i/o, skip */
5470    if (mode == nir_var_shader_out)
5471       found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
5472    else
5473       found = nir->info.inputs_read || nir->info.patch_inputs_read;
5474    if (!found)
5475       return;
5476 
5477    /* use local copies to enable incremental processing */
5478    uint64_t inputs_read = nir->info.inputs_read;
5479    uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
5480    uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
5481    uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly;
5482 
5483    /* fragment outputs are special: handle separately */
5484    if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5485       assert(!outputs_accessed_indirectly);
5486       u_foreach_bit64(slot, outputs_accessed) {
5487          struct rework_io_state ris = {
5488             .location = slot,
5489             .mode = mode,
5490             .stage = nir->info.stage,
5491          };
5492          /* explicitly handle builtins */
5493          switch (slot) {
5494          case FRAG_RESULT_DEPTH:
5495          case FRAG_RESULT_STENCIL:
5496          case FRAG_RESULT_SAMPLE_MASK:
5497             ris.bit_size = 32;
5498             ris.component_mask = 0x1;
5499             ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
5500             create_io_var(nir, &ris);
5501             outputs_accessed &= ~BITFIELD64_BIT(slot);
5502             break;
5503          default:
5504             break;
5505          }
5506       }
5507       /* the rest of the outputs can be generated normally */
5508       loop_io_var_mask(nir, mode, false, false, outputs_accessed);
5509       return;
5510    }
5511 
5512    /* vertex inputs are special: handle separately */
5513    if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5514       assert(!inputs_read_indirectly);
5515       u_foreach_bit64(slot, inputs_read) {
5516          /* explicitly handle builtins */
5517          if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
5518             continue;
5519 
5520          uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
5521          struct rework_io_state ris = {
5522             .location = slot,
5523             .mode = mode,
5524             .stage = nir->info.stage,
5525             .bit_size = 32,
5526             .component_mask = component_mask,
5527             .type = nir_type_float32,
5528             .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
5529          };
5530          create_io_var(nir, &ris);
5531          inputs_read &= ~BITFIELD64_BIT(slot);
5532       }
5533       /* the rest of the inputs can be generated normally */
5534       loop_io_var_mask(nir, mode, false, false, inputs_read);
5535       return;
5536    }
5537 
5538    /* these are the masks to process based on the mode: nothing "special" as above */
5539    uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
5540    uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
5541    u_foreach_bit64(slot, mask) {
5542       struct rework_io_state ris = {
5543          .location = slot,
5544          .mode = mode,
5545          .stage = nir->info.stage,
5546          .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
5547       };
5548       /* explicitly handle builtins */
5549       unsigned max_components = 0;
5550       switch (slot) {
5551       case VARYING_SLOT_FOGC:
5552          /* use intr components */
5553          break;
5554       case VARYING_SLOT_POS:
5555       case VARYING_SLOT_CLIP_VERTEX:
5556       case VARYING_SLOT_PNTC:
5557       case VARYING_SLOT_BOUNDING_BOX0:
5558       case VARYING_SLOT_BOUNDING_BOX1:
5559          max_components = 4;
5560          ris.type = nir_type_float32;
5561          break;
5562       case VARYING_SLOT_CLIP_DIST0:
5563          max_components = nir->info.clip_distance_array_size;
5564          assert(max_components);
5565          ris.type = nir_type_float32;
5566          break;
5567       case VARYING_SLOT_CULL_DIST0:
5568          max_components = nir->info.cull_distance_array_size;
5569          assert(max_components);
5570          ris.type = nir_type_float32;
5571          break;
5572       case VARYING_SLOT_CLIP_DIST1:
5573       case VARYING_SLOT_CULL_DIST1:
5574          mask &= ~BITFIELD64_BIT(slot);
5575          indirect_mask &= ~BITFIELD64_BIT(slot);
5576          continue;
5577       case VARYING_SLOT_TESS_LEVEL_OUTER:
5578          max_components = 4;
5579          ris.type = nir_type_float32;
5580          break;
5581       case VARYING_SLOT_TESS_LEVEL_INNER:
5582          max_components = 2;
5583          ris.type = nir_type_float32;
5584          break;
5585       case VARYING_SLOT_PRIMITIVE_ID:
5586       case VARYING_SLOT_LAYER:
5587       case VARYING_SLOT_VIEWPORT:
5588       case VARYING_SLOT_FACE:
5589       case VARYING_SLOT_VIEW_INDEX:
5590       case VARYING_SLOT_VIEWPORT_MASK:
5591          ris.type = nir_type_int32;
5592          max_components = 1;
5593          break;
5594       case VARYING_SLOT_PSIZ:
5595          max_components = 1;
5596          ris.type = nir_type_float32;
5597          break;
5598       default:
5599          break;
5600       }
5601       if (!max_components)
5602          continue;
5603       switch (slot) {
5604       case VARYING_SLOT_CLIP_DIST0:
5605       case VARYING_SLOT_CLIP_DIST1:
5606       case VARYING_SLOT_CULL_DIST0:
5607       case VARYING_SLOT_CULL_DIST1:
5608       case VARYING_SLOT_TESS_LEVEL_OUTER:
5609       case VARYING_SLOT_TESS_LEVEL_INNER:
5610          /* compact arrays */
5611          ris.component_mask = 0x1;
5612          ris.array_size = max_components;
5613          break;
5614       default:
5615          ris.component_mask = BITFIELD_MASK(max_components);
5616          break;
5617       }
5618       ris.bit_size = 32;
5619       create_io_var(nir, &ris);
5620       mask &= ~BITFIELD64_BIT(slot);
5621       /* eliminate clip/cull distance scanning early */
5622       indirect_mask &= ~BITFIELD64_BIT(slot);
5623    }
5624 
5625    /* patch i/o */
5626    if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
5627        (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
5628       uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
5629       uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly;
5630       uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
5631 
5632       loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
5633       loop_io_var_mask(nir, mode, false, true, patch_mask);
5634    }
5635 
5636    /* regular i/o */
5637    loop_io_var_mask(nir, mode, true, false, indirect_mask);
5638    loop_io_var_mask(nir, mode, false, false, mask);
5639 }
5640 
5641 static int
zink_type_size(const struct glsl_type * type,bool bindless)5642 zink_type_size(const struct glsl_type *type, bool bindless)
5643 {
5644    return glsl_count_attribute_slots(type, false);
5645 }
5646 
5647 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)5648 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5649                          uint8_t bit_size, uint32_t align,
5650                          uint32_t align_offset, bool offset_is_const,
5651                          enum gl_access_qualifier access, const void *cb_data)
5652 {
5653    align = nir_combined_align(align, align_offset);
5654 
5655    assert(util_is_power_of_two_nonzero(align));
5656 
5657    /* simply drop the bit_size for unaligned load/stores */
5658    if (align < (bit_size / 8)) {
5659       return (nir_mem_access_size_align){
5660          .num_components = MIN2(bytes / align, 4),
5661          .bit_size = align * 8,
5662          .align = align,
5663          .shift = nir_mem_access_shift_method_scalar,
5664       };
5665    } else {
5666       return (nir_mem_access_size_align){
5667          .num_components = MIN2(bytes / (bit_size / 8), 4),
5668          .bit_size = bit_size,
5669          .align = bit_size / 8,
5670          .shift = nir_mem_access_shift_method_scalar,
5671       };
5672    }
5673 }
5674 
5675 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)5676 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5677                                  uint8_t bit_size, uint32_t align,
5678                                  uint32_t align_offset, bool offset_is_const,
5679                                  enum gl_access_qualifier access, const void *cb_data)
5680 {
5681    bit_size = *(const uint8_t *)cb_data;
5682    align = nir_combined_align(align, align_offset);
5683 
5684    assert(util_is_power_of_two_nonzero(align));
5685 
5686    return (nir_mem_access_size_align){
5687       .num_components = MIN2(bytes / (bit_size / 8), 4),
5688       .bit_size = bit_size,
5689       .align = bit_size / 8,
5690       .shift = nir_mem_access_shift_method_scalar,
5691    };
5692 }
5693 
5694 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5695 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5696 {
5697    uint8_t *bit_size = data;
5698    switch (instr->intrinsic) {
5699    case nir_intrinsic_load_scratch:
5700       *bit_size = MIN2(*bit_size, instr->def.bit_size);
5701       return false;
5702    case nir_intrinsic_store_scratch:
5703       *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5704       return false;
5705    default:
5706       return false;
5707    }
5708 }
5709 
5710 static bool
alias_scratch_memory(nir_shader * nir)5711 alias_scratch_memory(nir_shader *nir)
5712 {
5713    uint8_t bit_size = 64;
5714 
5715    nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5716    nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5717       .modes = nir_var_function_temp,
5718       .may_lower_unaligned_stores_to_atomics = true,
5719       .callback = mem_access_scratch_size_align_cb,
5720       .cb_data = &bit_size,
5721    };
5722    return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5723 }
5724 
5725 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5726 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5727 {
5728    return 4;
5729 }
5730 
5731 static unsigned
zink_lower_bit_size_cb(const nir_instr * instr,void * data)5732 zink_lower_bit_size_cb(const nir_instr *instr, void *data)
5733 {
5734    switch (instr->type) {
5735    case nir_instr_type_alu: {
5736       nir_alu_instr *alu = nir_instr_as_alu(instr);
5737       switch (alu->op) {
5738       case nir_op_bit_count:
5739       case nir_op_find_lsb:
5740       case nir_op_ifind_msb:
5741       case nir_op_ufind_msb:
5742          return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32;
5743       default:
5744          return 0;
5745       }
5746    }
5747    default:
5748       return 0;
5749    }
5750 }
5751 
5752 static bool
fix_vertex_input_locations_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)5753 fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5754 {
5755    bool is_load = false;
5756    bool is_input = false;
5757    bool is_interp = false;
5758    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input)
5759       return false;
5760 
5761    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5762    if (sem.location < VERT_ATTRIB_GENERIC0)
5763       return false;
5764    sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr);
5765    nir_intrinsic_set_io_semantics(intr, sem);
5766    return true;
5767 }
5768 
5769 static bool
fix_vertex_input_locations(nir_shader * nir)5770 fix_vertex_input_locations(nir_shader *nir)
5771 {
5772    if (nir->info.stage != MESA_SHADER_VERTEX)
5773       return false;
5774 
5775    return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
5776 }
5777 
5778 struct trivial_revectorize_state {
5779    bool has_xfb;
5780    uint32_t component_mask;
5781    nir_intrinsic_instr *base;
5782    nir_intrinsic_instr *next_emit_vertex;
5783    nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
5784    struct set *deletions;
5785 };
5786 
5787 /* always skip xfb; scalarized xfb is preferred */
5788 static bool
intr_has_xfb(nir_intrinsic_instr * intr)5789 intr_has_xfb(nir_intrinsic_instr *intr)
5790 {
5791    if (!nir_intrinsic_has_io_xfb(intr))
5792       return false;
5793    for (unsigned i = 0; i < 2; i++) {
5794       if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
5795          return true;
5796       }
5797    }
5798    return false;
5799 }
5800 
5801 /* helper to avoid vectorizing i/o for different vertices */
5802 static nir_intrinsic_instr *
find_next_emit_vertex(nir_intrinsic_instr * intr)5803 find_next_emit_vertex(nir_intrinsic_instr *intr)
5804 {
5805    bool found = false;
5806    nir_foreach_instr_safe(instr, intr->instr.block) {
5807       if (instr->type == nir_instr_type_intrinsic) {
5808          nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5809          if (!found && test_intr != intr)
5810             continue;
5811          if (!found) {
5812             assert(intr == test_intr);
5813             found = true;
5814             continue;
5815          }
5816          if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5817             return test_intr;
5818       }
5819    }
5820    return NULL;
5821 }
5822 
5823 /* scan for vectorizable instrs on a given location */
5824 static bool
trivial_revectorize_intr_scan(nir_shader * nir,nir_intrinsic_instr * intr,struct trivial_revectorize_state * state)5825 trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
5826 {
5827    nir_intrinsic_instr *base = state->base;
5828 
5829    if (intr == base)
5830       return false;
5831 
5832    if (intr->intrinsic != base->intrinsic)
5833       return false;
5834 
5835    if (_mesa_set_search(state->deletions, intr))
5836       return false;
5837 
5838    bool is_load = false;
5839    bool is_input = false;
5840    bool is_interp = false;
5841    filter_io_instr(intr, &is_load, &is_input, &is_interp);
5842 
5843    nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
5844    nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
5845    nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
5846    nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5847    int c = nir_intrinsic_component(intr);
5848    /* already detected */
5849    if (state->component_mask & BITFIELD_BIT(c))
5850       return false;
5851    /* not a match */
5852    if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
5853       return false;
5854    /* only vectorize when all srcs match */
5855    for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
5856       if (!nir_srcs_equal(intr->src[i], base->src[i]))
5857          return false;
5858    }
5859    /* never match xfb */
5860    state->has_xfb |= intr_has_xfb(intr);
5861    if (state->has_xfb)
5862       return false;
5863    if (nir->info.stage == MESA_SHADER_GEOMETRY) {
5864       /* only match same vertex */
5865       if (state->next_emit_vertex != find_next_emit_vertex(intr))
5866          return false;
5867    }
5868    uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
5869    state->component_mask |= mask;
5870    u_foreach_bit(component, mask)
5871       state->merge[component] = intr;
5872 
5873    return true;
5874 }
5875 
5876 static bool
trivial_revectorize_scan(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)5877 trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
5878 {
5879    bool is_load = false;
5880    bool is_input = false;
5881    bool is_interp = false;
5882    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5883       return false;
5884    if (intr->num_components != 1)
5885       return false;
5886    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5887    if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
5888       /* always ignore compact arrays */
5889       switch (sem.location) {
5890       case VARYING_SLOT_CLIP_DIST0:
5891       case VARYING_SLOT_CLIP_DIST1:
5892       case VARYING_SLOT_CULL_DIST0:
5893       case VARYING_SLOT_CULL_DIST1:
5894       case VARYING_SLOT_TESS_LEVEL_INNER:
5895       case VARYING_SLOT_TESS_LEVEL_OUTER:
5896          return false;
5897       default: break;
5898       }
5899    }
5900    /* always ignore to-be-deleted instrs */
5901    if (_mesa_set_search(data, intr))
5902       return false;
5903 
5904    /* never vectorize xfb */
5905    if (intr_has_xfb(intr))
5906       return false;
5907 
5908    int ic = nir_intrinsic_component(intr);
5909    uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
5910    /* already vectorized */
5911    if (util_bitcount(mask) == 4)
5912       return false;
5913    struct trivial_revectorize_state state = {
5914       .component_mask = mask,
5915       .base = intr,
5916       /* avoid clobbering i/o for different vertices */
5917       .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
5918       .deletions = data,
5919    };
5920    u_foreach_bit(bit, mask)
5921       state.merge[bit] = intr;
5922    bool progress = false;
5923    nir_foreach_instr(instr, intr->instr.block) {
5924       if (instr->type != nir_instr_type_intrinsic)
5925          continue;
5926       nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5927       /* no matching across vertex emission */
5928       if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5929          break;
5930       progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
5931    }
5932    if (!progress || state.has_xfb)
5933       return false;
5934 
5935    /* verify nothing crazy happened */
5936    assert(state.component_mask);
5937    for (unsigned i = 0; i < 4; i++) {
5938       assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
5939    }
5940 
5941    unsigned first_component = ffs(state.component_mask) - 1;
5942    unsigned num_components = util_bitcount(state.component_mask);
5943    unsigned num_contiguous = 0;
5944    uint32_t contiguous_mask = 0;
5945    for (unsigned i = 0; i < num_components; i++) {
5946       unsigned c = i + first_component;
5947       /* calc mask of contiguous components to vectorize */
5948       if (state.component_mask & BITFIELD_BIT(c)) {
5949          num_contiguous++;
5950          contiguous_mask |= BITFIELD_BIT(c);
5951       }
5952       /* on the first gap or the the last component, vectorize */
5953       if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
5954          if (num_contiguous > 1) {
5955             /* reindex to enable easy src/dest index comparison */
5956             nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
5957             /* determine the first/last instr to use for the base (vectorized) load/store */
5958             unsigned first_c = ffs(contiguous_mask) - 1;
5959             nir_intrinsic_instr *base = NULL;
5960             unsigned test_idx = is_load ? UINT32_MAX : 0;
5961             for (unsigned j = 0; j < num_contiguous; j++) {
5962                unsigned merge_c = j + first_c;
5963                nir_intrinsic_instr *merge_intr = state.merge[merge_c];
5964                /* avoid breaking ssa ordering by using:
5965                 * - first instr for vectorized load
5966                 * - last instr for vectorized store
5967                 * this guarantees all srcs have been seen
5968                 */
5969                if ((is_load && merge_intr->def.index < test_idx) ||
5970                    (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
5971                   test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
5972                   base = merge_intr;
5973                }
5974             }
5975             assert(base);
5976             /* update instr components */
5977             nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
5978             unsigned orig_components = base->num_components;
5979             base->num_components = num_contiguous;
5980             /* do rewrites after loads and before stores */
5981             b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
5982             if (is_load) {
5983                base->def.num_components = num_contiguous;
5984                /* iterate the contiguous loaded components and rewrite merged dests */
5985                for (unsigned j = 0; j < num_contiguous; j++) {
5986                   unsigned merge_c = j + first_c;
5987                   nir_intrinsic_instr *merge_intr = state.merge[merge_c];
5988                   /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
5989                   unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
5990                   nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
5991                   nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr);
5992                   j += use_components - 1;
5993                }
5994             } else {
5995                nir_def *comp[NIR_MAX_VEC_COMPONENTS];
5996                /* generate swizzled vec of store components and rewrite store src */
5997                for (unsigned j = 0; j < num_contiguous; j++) {
5998                   unsigned merge_c = j + first_c;
5999                   nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6000                   /* detect if the merged instr stored multiple components and extract them for rewrite */
6001                   unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
6002                   for (unsigned k = 0; k < use_components; k++)
6003                      comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
6004                   j += use_components - 1;
6005                }
6006                nir_def *val = nir_vec(b, comp, num_contiguous);
6007                nir_src_rewrite(&base->src[0], val);
6008                nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
6009             }
6010             /* deleting instructions during a foreach explodes the compiler, so delete later */
6011             for (unsigned j = 0; j < num_contiguous; j++) {
6012                unsigned merge_c = j + first_c;
6013                nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6014                if (merge_intr != base)
6015                   _mesa_set_add(data, &merge_intr->instr);
6016             }
6017          }
6018          contiguous_mask = 0;
6019          num_contiguous = 0;
6020       }
6021    }
6022 
6023    return true;
6024 }
6025 
6026 /* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
6027 static bool
trivial_revectorize(nir_shader * nir)6028 trivial_revectorize(nir_shader *nir)
6029 {
6030    struct set deletions;
6031 
6032    if (nir->info.stage > MESA_SHADER_FRAGMENT)
6033       return false;
6034 
6035    _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6036    bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions);
6037    /* now it's safe to delete */
6038    set_foreach_remove(&deletions, entry) {
6039       nir_instr *instr = (void*)entry->key;
6040       nir_instr_remove(instr);
6041    }
6042    ralloc_free(deletions.table);
6043    return progress;
6044 }
6045 
6046 static bool
flatten_image_arrays_intr(struct nir_builder * b,nir_instr * instr,void * data)6047 flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data)
6048 {
6049    if (instr->type != nir_instr_type_deref)
6050       return false;
6051 
6052    nir_deref_instr *deref = nir_instr_as_deref(instr);
6053    if (deref->deref_type != nir_deref_type_array)
6054       return false;
6055    nir_deref_instr *parent = nir_deref_instr_parent(deref);
6056    if (!parent || parent->deref_type != nir_deref_type_array)
6057       return false;
6058    nir_variable *var = nir_deref_instr_get_variable(deref);
6059    const struct glsl_type *type = glsl_without_array(var->type);
6060    if (type == var->type || (!glsl_type_is_sampler(type) && !glsl_type_is_image(type)))
6061       return false;
6062 
6063    nir_deref_instr *parent_parent = nir_deref_instr_parent(parent);
6064    int parent_size = glsl_array_size(parent->type);
6065    b->cursor = nir_after_instr(instr);
6066    nir_deref_instr *new_deref = nir_build_deref_array(b, parent_parent, nir_iadd(b, nir_imul_imm(b, parent->arr.index.ssa, parent_size), deref->arr.index.ssa));
6067    nir_def_rewrite_uses_after(&deref->def, &new_deref->def, &new_deref->instr);
6068    _mesa_set_add(data, instr);
6069    _mesa_set_add(data, &parent->instr);
6070    return true;
6071 }
6072 
6073 static bool
flatten_image_arrays(nir_shader * nir)6074 flatten_image_arrays(nir_shader *nir)
6075 {
6076    bool progress = false;
6077    nir_foreach_variable_with_modes(var, nir, nir_var_uniform | nir_var_image) {
6078       const struct glsl_type *type = glsl_without_array(var->type);
6079       if (!glsl_type_is_sampler(type) && !glsl_type_is_image(type))
6080          continue;
6081       if (type == var->type)
6082          continue;
6083       var->type = glsl_array_type(type, glsl_get_aoa_size(var->type), sizeof(void*));
6084       progress = true;
6085    }
6086    struct set *deletions = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6087    progress |= nir_shader_instructions_pass(nir, flatten_image_arrays_intr, nir_metadata_dominance, deletions);
6088    set_foreach_remove(deletions, he) {
6089       nir_instr *instr = (void*)he->key;
6090       nir_instr_remove_v(instr);
6091    }
6092    _mesa_set_destroy(deletions, NULL);
6093    if (progress)
6094       nir_fixup_deref_types(nir);
6095    return progress;
6096 }
6097 
6098 static bool
bound_image_arrays_instr(struct nir_builder * b,nir_instr * instr,void * data)6099 bound_image_arrays_instr(struct nir_builder *b, nir_instr *instr, void *data)
6100 {
6101    if (instr->type != nir_instr_type_deref)
6102       return false;
6103 
6104    nir_deref_instr *deref = nir_instr_as_deref(instr);
6105    if (deref->deref_type != nir_deref_type_array)
6106       return false;
6107 
6108    if (!nir_src_is_const(deref->arr.index))
6109       return false;
6110    nir_deref_instr *parent = nir_deref_instr_parent(deref);
6111    int parent_size = glsl_array_size(parent->type);
6112    unsigned idx = nir_src_as_uint(deref->arr.index);
6113    if (idx >= parent_size) {
6114       b->cursor = nir_before_instr(instr);
6115       nir_src_rewrite(&deref->arr.index, nir_imm_zero(b, 1, 32));
6116       return true;
6117    }
6118    return false;
6119 }
6120 
6121 static bool
bound_image_arrays(nir_shader * nir)6122 bound_image_arrays(nir_shader *nir)
6123 {
6124    return nir_shader_instructions_pass(nir, bound_image_arrays_instr, nir_metadata_dominance, NULL);
6125 }
6126 
6127 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)6128 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
6129 {
6130    struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6131 
6132    zs->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
6133                        nir->info.outputs_written & VARYING_BIT_EDGE;
6134 
6135    zs->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
6136    zs->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
6137    zs->sinfo.broken_arbitary_type_const = screen->driver_compiler_workarounds.broken_const;
6138    if (screen->info.have_KHR_shader_float_controls) {
6139       if (screen->info.props12.shaderDenormFlushToZeroFloat16)
6140          zs->sinfo.float_controls.flush_denorms |= 0x1;
6141       if (screen->info.props12.shaderDenormFlushToZeroFloat32)
6142          zs->sinfo.float_controls.flush_denorms |= 0x2;
6143       if (screen->info.props12.shaderDenormFlushToZeroFloat64)
6144          zs->sinfo.float_controls.flush_denorms |= 0x4;
6145 
6146       if (screen->info.props12.shaderDenormPreserveFloat16)
6147          zs->sinfo.float_controls.preserve_denorms |= 0x1;
6148       if (screen->info.props12.shaderDenormPreserveFloat32)
6149          zs->sinfo.float_controls.preserve_denorms |= 0x2;
6150       if (screen->info.props12.shaderDenormPreserveFloat64)
6151          zs->sinfo.float_controls.preserve_denorms |= 0x4;
6152 
6153       zs->sinfo.float_controls.denorms_all_independence =
6154          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
6155 
6156       zs->sinfo.float_controls.denorms_32_bit_independence =
6157          zs->sinfo.float_controls.denorms_all_independence ||
6158          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
6159    }
6160    zs->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6161 
6162    util_queue_fence_init(&zs->precompile.fence);
6163    util_dynarray_init(&zs->pipeline_libs, zs);
6164    zs->hash = _mesa_hash_pointer(zs);
6165 
6166    zs->programs = _mesa_pointer_set_create(NULL);
6167    simple_mtx_init(&zs->lock, mtx_plain);
6168    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6169    zs->info.name = ralloc_strdup(zs, nir->info.name);
6170 
6171    zs->can_inline = true;
6172    zs->nir = nir;
6173 
6174    if (nir->info.stage != MESA_SHADER_KERNEL)
6175       match_tex_dests(nir, zs, true);
6176 
6177    return zs;
6178 }
6179 
6180 void
zink_shader_init(struct zink_screen * screen,struct zink_shader * zs)6181 zink_shader_init(struct zink_screen *screen, struct zink_shader *zs)
6182 {
6183    bool have_psiz = false;
6184    nir_shader *nir = zs->nir;
6185 
6186    if (nir->info.stage == MESA_SHADER_KERNEL) {
6187       nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
6188          .modes = nir_var_all ^ nir_var_function_temp,
6189          .may_lower_unaligned_stores_to_atomics = true,
6190          .callback = mem_access_size_align_cb,
6191          .cb_data = screen,
6192       };
6193       NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
6194       NIR_PASS_V(nir, nir_lower_bit_size, zink_lower_bit_size_cb, NULL);
6195       NIR_PASS_V(nir, alias_scratch_memory);
6196       NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
6197       NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
6198    }
6199 
6200    NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
6201    optimize_nir(nir, NULL, true);
6202    NIR_PASS_V(nir, bound_image_arrays);
6203    NIR_PASS_V(nir, flatten_image_arrays);
6204    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
6205       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
6206          NIR_PASS_V(nir, lower_bindless_io);
6207          break;
6208       }
6209    }
6210    if (nir->info.stage < MESA_SHADER_FRAGMENT)
6211       nir_gather_xfb_info_from_intrinsics(nir);
6212    NIR_PASS_V(nir, fix_vertex_input_locations);
6213    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6214    scan_nir(screen, nir, zs);
6215    NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL);
6216    NIR_PASS_V(nir, trivial_revectorize);
6217    if (nir->info.io_lowered) {
6218       rework_io_vars(nir, nir_var_shader_in, zs);
6219       rework_io_vars(nir, nir_var_shader_out, zs);
6220       nir_sort_variables_by_location(nir, nir_var_shader_in);
6221       nir_sort_variables_by_location(nir, nir_var_shader_out);
6222    }
6223 
6224    if (nir->info.stage < MESA_SHADER_COMPUTE)
6225       create_gfx_pushconst(nir);
6226 
6227    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
6228             nir->info.stage == MESA_SHADER_TESS_EVAL)
6229       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
6230 
6231    if (nir->info.stage < MESA_SHADER_FRAGMENT)
6232       have_psiz = check_psiz(nir);
6233    if (nir->info.stage == MESA_SHADER_FRAGMENT)
6234       zs->flat_flags = zink_flat_flags(nir);
6235 
6236    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
6237       NIR_PASS_V(nir, fixup_io_locations);
6238 
6239    NIR_PASS_V(nir, lower_basevertex);
6240    NIR_PASS_V(nir, lower_baseinstance);
6241    NIR_PASS_V(nir, split_bitfields);
6242    if (!screen->info.feats.features.shaderStorageImageMultisample)
6243       NIR_PASS_V(nir, strip_tex_ms);
6244    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
6245 
6246    if (screen->need_2D_zs)
6247       NIR_PASS_V(nir, lower_1d_shadow, screen);
6248 
6249    {
6250       nir_lower_subgroups_options subgroup_options = {0};
6251       subgroup_options.lower_to_scalar = true;
6252       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
6253       subgroup_options.ballot_bit_size = 32;
6254       subgroup_options.ballot_components = 4;
6255       subgroup_options.lower_subgroup_masks = true;
6256       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
6257          subgroup_options.subgroup_size = 1;
6258          subgroup_options.lower_vote_trivial = true;
6259       }
6260       subgroup_options.lower_inverse_ballot = true;
6261       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
6262    }
6263 
6264    optimize_nir(nir, NULL, true);
6265    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6266    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
6267                                           nir_lower_demote_if_to_cf |
6268                                           nir_lower_terminate_if_to_cf));
6269 
6270    bool needs_size = analyze_io(zs, nir);
6271    NIR_PASS_V(nir, unbreak_bos, zs, needs_size);
6272    /* run in compile if there could be inlined uniforms */
6273    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
6274       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
6275       NIR_PASS_V(nir, rewrite_bo_access, screen);
6276       NIR_PASS_V(nir, remove_bo_access, zs);
6277    }
6278 
6279    struct zink_bindless_info bindless = {0};
6280    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6281    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
6282       var->data.is_xfb = false;
6283 
6284    optimize_nir(nir, NULL, true);
6285    prune_io(nir);
6286 
6287    if (nir->info.stage == MESA_SHADER_KERNEL) {
6288       NIR_PASS_V(nir, type_images);
6289    }
6290 
6291    unsigned ubo_binding_mask = 0;
6292    unsigned ssbo_binding_mask = 0;
6293    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
6294       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
6295                                         nir_var_image |
6296                                         nir_var_mem_ubo |
6297                                         nir_var_mem_ssbo)) {
6298          enum zink_descriptor_type ztype;
6299          const struct glsl_type *type = glsl_without_array(var->type);
6300          if (var->data.mode == nir_var_mem_ubo) {
6301             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
6302             /* buffer 0 is a push descriptor */
6303             var->data.descriptor_set = !!var->data.driver_location;
6304             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
6305                                 zink_binding(nir->info.stage,
6306                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
6307                                              var->data.driver_location,
6308                                              screen->compact_descriptors);
6309             assert(var->data.driver_location || var->data.binding < 10);
6310             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6311             int binding = var->data.binding;
6312 
6313             if (!var->data.driver_location) {
6314                zs->has_uniforms = true;
6315             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
6316                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6317                zs->bindings[ztype][zs->num_bindings[ztype]].binding = binding;
6318                zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6319                zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6320                assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6321                zs->num_bindings[ztype]++;
6322                ubo_binding_mask |= BITFIELD_BIT(binding);
6323             }
6324          } else if (var->data.mode == nir_var_mem_ssbo) {
6325             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
6326             var->data.descriptor_set = screen->desc_set_id[ztype];
6327             var->data.binding = zink_binding(clamp_stage(&nir->info),
6328                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
6329                                              var->data.driver_location,
6330                                              screen->compact_descriptors);
6331             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
6332                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6333                zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6334                zs->bindings[ztype][zs->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
6335                zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6336                assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6337                zs->num_bindings[ztype]++;
6338                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
6339             }
6340          } else {
6341             assert(var->data.mode == nir_var_uniform ||
6342                    var->data.mode == nir_var_image);
6343             if (var->data.bindless) {
6344                zs->bindless = true;
6345                handle_bindless_var(nir, var, type, &bindless);
6346             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
6347                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : glsl_type_is_bare_sampler(type) ? VK_DESCRIPTOR_TYPE_SAMPLER : zink_sampler_type(type);
6348                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
6349                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
6350                ztype = zink_desc_type_from_vktype(vktype);
6351                var->data.driver_location = var->data.binding;
6352                var->data.descriptor_set = screen->desc_set_id[ztype];
6353                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
6354                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6355                zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6356                zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6357                if (glsl_type_is_array(var->type))
6358                   zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
6359                else
6360                   zs->bindings[ztype][zs->num_bindings[ztype]].size = 1;
6361                zs->num_bindings[ztype]++;
6362             } else if (var->data.mode == nir_var_uniform) {
6363                /* this is a dead uniform */
6364                var->data.mode = 0;
6365                exec_node_remove(&var->node);
6366             }
6367          }
6368       }
6369    }
6370    bool bindless_lowered = false;
6371    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
6372    zs->bindless |= bindless_lowered;
6373 
6374    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
6375       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
6376    if (nir->info.stage != MESA_SHADER_KERNEL)
6377       NIR_PASS_V(nir, match_tex_dests, zs, false);
6378 
6379    if (!nir->info.internal)
6380       nir_foreach_shader_out_variable(var, nir)
6381          var->data.explicit_xfb_buffer = 0;
6382    if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
6383       update_so_info(zs, nir, nir->info.outputs_written, have_psiz);
6384    zink_shader_serialize_blob(nir, &zs->blob);
6385    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6386 }
6387 
6388 char *
zink_shader_finalize(struct pipe_screen * pscreen,struct nir_shader * nir)6389 zink_shader_finalize(struct pipe_screen *pscreen, struct nir_shader *nir)
6390 {
6391    struct zink_screen *screen = zink_screen(pscreen);
6392 
6393    nir_lower_tex_options tex_opts = {
6394       .lower_invalid_implicit_lod = true,
6395    };
6396    /*
6397       Sampled Image must be an object whose type is OpTypeSampledImage.
6398       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
6399       or Rect, and the Arrayed and MS operands must be 0.
6400       - SPIRV, OpImageSampleProj* opcodes
6401     */
6402    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
6403                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
6404    tex_opts.lower_txp_array = true;
6405    if (!screen->info.feats.features.shaderImageGatherExtended)
6406       tex_opts.lower_tg4_offsets = true;
6407    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
6408    optimize_nir(nir, NULL, false);
6409    if (nir->info.stage == MESA_SHADER_VERTEX)
6410       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6411    if (screen->driconf.inline_uniforms)
6412       nir_find_inlinable_uniforms(nir);
6413 
6414    return NULL;
6415 }
6416 
6417 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)6418 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6419 {
6420    _mesa_set_destroy(shader->programs, NULL);
6421    util_queue_fence_wait(&shader->precompile.fence);
6422    util_queue_fence_destroy(&shader->precompile.fence);
6423    zink_descriptor_shader_deinit(screen, shader);
6424    if (screen->info.have_EXT_shader_object) {
6425       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
6426    } else {
6427       if (shader->precompile.obj.mod)
6428          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
6429       if (shader->precompile.gpl)
6430          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
6431    }
6432    blob_finish(&shader->blob);
6433    ralloc_free(shader->spirv);
6434    free(shader->precompile.bindings);
6435    ralloc_free(shader);
6436 }
6437 
6438 static bool
gfx_shader_prune(struct zink_screen * screen,struct zink_shader * shader)6439 gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader)
6440 {
6441    /* this shader may still be precompiling, so access here must be locked and singular */
6442    simple_mtx_lock(&shader->lock);
6443    struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL);
6444    struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL);
6445    if (entry)
6446       _mesa_set_remove(shader->programs, entry);
6447    simple_mtx_unlock(&shader->lock);
6448    if (!prog)
6449       return false;
6450    gl_shader_stage stage = shader->info.stage;
6451    assert(stage < ZINK_GFX_SHADER_COUNT);
6452    util_queue_fence_wait(&prog->base.cache_fence);
6453    unsigned stages_present = prog->stages_present;
6454    if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
6455          prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
6456       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
6457    unsigned idx = zink_program_cache_stages(stages_present);
6458    if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
6459          (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
6460       struct hash_table *ht = &prog->base.ctx->program_cache[idx];
6461       simple_mtx_lock(&prog->base.ctx->program_lock[idx]);
6462       struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
6463       assert(he && he->data == prog);
6464       _mesa_hash_table_remove(ht, he);
6465       prog->base.removed = true;
6466       simple_mtx_unlock(&prog->base.ctx->program_lock[idx]);
6467 
6468       for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
6469          for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
6470             hash_table_foreach(&prog->pipelines[r][i], table_entry) {
6471                struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
6472 
6473                util_queue_fence_wait(&pc_entry->fence);
6474             }
6475          }
6476       }
6477    }
6478    if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
6479       prog->shaders[stage] = NULL;
6480       prog->stages_remaining &= ~BITFIELD_BIT(stage);
6481    }
6482    /* only remove generated tcs during parent tes destruction */
6483    if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
6484       prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
6485    if (stage != MESA_SHADER_FRAGMENT &&
6486       prog->shaders[MESA_SHADER_GEOMETRY] &&
6487       prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
6488       shader) {
6489       prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
6490    }
6491    zink_gfx_program_reference(screen, &prog, NULL);
6492    return true;
6493 }
6494 
6495 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)6496 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6497 {
6498    assert(shader->info.stage != MESA_SHADER_COMPUTE);
6499    util_queue_fence_wait(&shader->precompile.fence);
6500 
6501    /* if the shader is still precompiling, the program set must be pruned under lock */
6502    while (gfx_shader_prune(screen, shader));
6503 
6504    while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
6505       struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
6506       if (!libs->removed) {
6507          libs->removed = true;
6508          unsigned idx = zink_program_cache_stages(libs->stages_present);
6509          simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
6510          _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
6511          simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
6512       }
6513       zink_gfx_lib_cache_unref(screen, libs);
6514    }
6515    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
6516        shader->non_fs.generated_tcs) {
6517       /* automatically destroy generated tcs shaders when tes is destroyed */
6518       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
6519       shader->non_fs.generated_tcs = NULL;
6520    }
6521    if (shader->info.stage != MESA_SHADER_FRAGMENT) {
6522       for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
6523          for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
6524             if (shader->non_fs.generated_gs[i][j]) {
6525                /* automatically destroy generated gs shaders when owner is destroyed */
6526                zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
6527                shader->non_fs.generated_gs[i][j] = NULL;
6528             }
6529          }
6530       }
6531    }
6532    zink_shader_free(screen, shader);
6533 }
6534 
6535 
6536 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)6537 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
6538 {
6539    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
6540    /* shortcut all the nir passes since we just have to change this one word */
6541    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
6542    return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
6543 }
6544 
6545 /* creating a passthrough tcs shader that's roughly:
6546 
6547 #version 150
6548 #extension GL_ARB_tessellation_shader : require
6549 
6550 in vec4 some_var[gl_MaxPatchVertices];
6551 out vec4 some_var_out;
6552 
6553 layout(push_constant) uniform tcsPushConstants {
6554     layout(offset = 0) float TessLevelInner[2];
6555     layout(offset = 8) float TessLevelOuter[4];
6556 } u_tcsPushConstants;
6557 layout(vertices = $vertices_per_patch) out;
6558 void main()
6559 {
6560   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
6561   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
6562   some_var_out = some_var[gl_InvocationID];
6563 }
6564 
6565 */
6566 void
zink_shader_tcs_init(struct zink_screen * screen,struct zink_shader * zs,nir_shader * tes,nir_shader ** nir_ret)6567 zink_shader_tcs_init(struct zink_screen *screen, struct zink_shader *zs, nir_shader *tes, nir_shader **nir_ret)
6568 {
6569    nir_shader *nir = zs->nir;
6570 
6571    nir_builder b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir)));
6572 
6573    nir_def *invocation_id = nir_load_invocation_id(&b);
6574 
6575    nir_foreach_shader_in_variable(var, tes) {
6576       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
6577          continue;
6578       const struct glsl_type *in_type = var->type;
6579       const struct glsl_type *out_type = var->type;
6580       char buf[1024];
6581       snprintf(buf, sizeof(buf), "%s_out", var->name);
6582       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
6583          const struct glsl_type *type = var->type;
6584          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
6585          out_type = glsl_array_type(type, nir->info.tess.tcs_vertices_out, 0);
6586       }
6587 
6588       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
6589       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
6590       out->data.location = in->data.location = var->data.location;
6591       out->data.location_frac = in->data.location_frac = var->data.location_frac;
6592 
6593       /* gl_in[] receives values from equivalent built-in output
6594          variables written by the vertex shader (section 2.14.7).  Each array
6595          element of gl_in[] is a structure holding values for a specific vertex of
6596          the input patch.  The length of gl_in[] is equal to the
6597          implementation-dependent maximum patch size (gl_MaxPatchVertices).
6598          - ARB_tessellation_shader
6599        */
6600       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
6601       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
6602       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
6603       copy_vars(&b, out_value, in_value);
6604    }
6605    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
6606    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
6607    gl_TessLevelInner->data.patch = 1;
6608    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
6609    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
6610    gl_TessLevelOuter->data.patch = 1;
6611 
6612    create_gfx_pushconst(nir);
6613 
6614    nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
6615                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
6616    nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
6617                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
6618 
6619    for (unsigned i = 0; i < 2; i++) {
6620       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
6621       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
6622    }
6623    for (unsigned i = 0; i < 4; i++) {
6624       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
6625       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
6626    }
6627 
6628    nir_validate_shader(nir, "created");
6629 
6630    optimize_nir(nir, NULL, true);
6631    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6632    NIR_PASS_V(nir, nir_convert_from_ssa, true);
6633 
6634    *nir_ret = nir;
6635    zink_shader_serialize_blob(nir, &zs->blob);
6636 }
6637 
6638 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,unsigned vertices_per_patch)6639 zink_shader_tcs_create(struct zink_screen *screen, unsigned vertices_per_patch)
6640 {
6641    struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6642    util_queue_fence_init(&zs->precompile.fence);
6643    zs->hash = _mesa_hash_pointer(zs);
6644    zs->programs = _mesa_pointer_set_create(NULL);
6645    simple_mtx_init(&zs->lock, mtx_plain);
6646 
6647    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
6648    nir_function *fn = nir_function_create(nir, "main");
6649    fn->is_entrypoint = true;
6650    nir_function_impl_create(fn);
6651    zs->nir = nir;
6652 
6653    nir->info.tess.tcs_vertices_out = vertices_per_patch;
6654    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6655    zs->non_fs.is_generated = true;
6656    return zs;
6657 }
6658 
6659 bool
zink_shader_has_cubes(nir_shader * nir)6660 zink_shader_has_cubes(nir_shader *nir)
6661 {
6662    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
6663       const struct glsl_type *type = glsl_without_array(var->type);
6664       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
6665          return true;
6666    }
6667    return false;
6668 }
6669 
6670 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)6671 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
6672 {
6673    struct blob_reader blob_reader;
6674    blob_reader_init(&blob_reader, blob->data, blob->size);
6675    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
6676 }
6677 
6678 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)6679 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
6680 {
6681    return zink_shader_blob_deserialize(screen, &zs->blob);
6682 }
6683 
6684 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)6685 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
6686 {
6687    blob_init(blob);
6688 #ifndef NDEBUG
6689    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
6690 #else
6691    bool strip = false;
6692 #endif
6693    nir_serialize(blob, nir, strip);
6694 }
6695 
6696 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)6697 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
6698 {
6699    nir_shader *nir = zink_shader_deserialize(screen, zs);
6700    nir_print_shader(nir, fp);
6701    ralloc_free(nir);
6702 }
6703