1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_opcodes.h"
25 #include "shader_enums.h"
26 #include "zink_context.h"
27 #include "zink_compiler.h"
28 #include "zink_descriptors.h"
29 #include "zink_program.h"
30 #include "zink_screen.h"
31 #include "nir_to_spirv/nir_to_spirv.h"
32
33 #include "pipe/p_state.h"
34
35 #include "nir.h"
36 #include "nir_xfb_info.h"
37 #include "nir/nir_draw_helpers.h"
38 #include "compiler/nir/nir_builder.h"
39 #include "compiler/nir/nir_serialize.h"
40 #include "compiler/nir/nir_builtin_builder.h"
41
42 #include "nir/tgsi_to_nir.h"
43 #include "tgsi/tgsi_dump.h"
44
45 #include "util/u_memory.h"
46
47 #include "compiler/spirv/nir_spirv.h"
48 #include "compiler/spirv/spirv_info.h"
49 #include "vk_util.h"
50
51 bool
52 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
53
54
55 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)56 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
57 {
58 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
59 if (glsl_type_is_struct_or_ifc(dst->type)) {
60 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
61 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
62 }
63 } else if (glsl_type_is_array_or_matrix(dst->type)) {
64 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
65 for (unsigned i = 0; i < count; i++) {
66 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
67 }
68 } else {
69 nir_def *load = nir_load_deref(b, src);
70 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
71 }
72 }
73
74 static bool
is_clipcull_dist(int location)75 is_clipcull_dist(int location)
76 {
77 switch (location) {
78 case VARYING_SLOT_CLIP_DIST0:
79 case VARYING_SLOT_CLIP_DIST1:
80 case VARYING_SLOT_CULL_DIST0:
81 case VARYING_SLOT_CULL_DIST1:
82 return true;
83 default: break;
84 }
85 return false;
86 }
87
88 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
89
90 static void
create_gfx_pushconst(nir_shader * nir)91 create_gfx_pushconst(nir_shader *nir)
92 {
93 #define PUSHCONST_MEMBER(member_idx, field) \
94 fields[member_idx].type = \
95 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
96 fields[member_idx].name = ralloc_asprintf(nir, #field); \
97 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
98
99 nir_variable *pushconst;
100 /* create compatible layout for the ntv push constant loader */
101 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
102 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
103 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
104 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
105 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
106 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
107 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
108 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
109 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
110
111 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
112 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
113 "gfx_pushconst");
114 pushconst->data.location = INT_MAX; //doesn't really matter
115
116 #undef PUSHCONST_MEMBER
117 }
118
119 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)120 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
121 {
122 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
123 return false;
124
125 b->cursor = nir_after_instr(&instr->instr);
126 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
127 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
128 load->num_components = 1;
129 nir_def_init(&load->instr, &load->def, 1, 32);
130 nir_builder_instr_insert(b, &load->instr);
131
132 nir_def *composite = nir_build_alu(b, nir_op_bcsel,
133 nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
134 &instr->def,
135 nir_imm_int(b, 0),
136 NULL);
137
138 nir_def_rewrite_uses_after(&instr->def, composite,
139 composite->parent_instr);
140 return true;
141 }
142
143 static bool
lower_basevertex(nir_shader * shader)144 lower_basevertex(nir_shader *shader)
145 {
146 if (shader->info.stage != MESA_SHADER_VERTEX)
147 return false;
148
149 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
150 return false;
151
152 return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
153 nir_metadata_dominance, NULL);
154 }
155
156
157 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)158 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
159 {
160 if (instr->intrinsic != nir_intrinsic_load_draw_id)
161 return false;
162
163 b->cursor = nir_before_instr(&instr->instr);
164 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
165 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
166 load->num_components = 1;
167 nir_def_init(&load->instr, &load->def, 1, 32);
168 nir_builder_instr_insert(b, &load->instr);
169
170 nir_def_rewrite_uses(&instr->def, &load->def);
171
172 return true;
173 }
174
175 static bool
lower_drawid(nir_shader * shader)176 lower_drawid(nir_shader *shader)
177 {
178 if (shader->info.stage != MESA_SHADER_VERTEX)
179 return false;
180
181 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
182 return false;
183
184 return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
185 nir_metadata_dominance, NULL);
186 }
187
188 struct lower_gl_point_state {
189 nir_variable *gl_pos_out;
190 nir_variable *gl_point_size;
191 };
192
193 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)194 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
195 {
196 struct lower_gl_point_state *state = data;
197 nir_def *vp_scale, *pos;
198
199 if (instr->type != nir_instr_type_intrinsic)
200 return false;
201
202 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
203 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
204 intrin->intrinsic != nir_intrinsic_emit_vertex)
205 return false;
206
207 if (nir_intrinsic_stream_id(intrin) != 0)
208 return false;
209
210 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
211 intrin->intrinsic == nir_intrinsic_end_primitive) {
212 nir_instr_remove(&intrin->instr);
213 return true;
214 }
215
216 b->cursor = nir_before_instr(instr);
217
218 // viewport-map endpoints
219 nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
220 vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
221
222 // Load point info values
223 nir_def *point_size = nir_load_var(b, state->gl_point_size);
224 nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
225
226 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
227 nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
228 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
229 // halt_w_delta = w_delta / 2
230 nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
231
232 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
233 nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
234 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
235 // halt_h_delta = h_delta / 2
236 nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
237
238 nir_def *point_dir[4][2] = {
239 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
240 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
241 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
242 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
243 };
244
245 nir_def *point_pos_x = nir_channel(b, point_pos, 0);
246 nir_def *point_pos_y = nir_channel(b, point_pos, 1);
247
248 for (size_t i = 0; i < 4; i++) {
249 pos = nir_vec4(b,
250 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
251 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
252 nir_channel(b, point_pos, 2),
253 nir_channel(b, point_pos, 3));
254
255 nir_store_var(b, state->gl_pos_out, pos, 0xf);
256
257 nir_emit_vertex(b);
258 }
259
260 nir_end_primitive(b);
261
262 nir_instr_remove(&intrin->instr);
263
264 return true;
265 }
266
267 static bool
lower_gl_point_gs(nir_shader * shader)268 lower_gl_point_gs(nir_shader *shader)
269 {
270 struct lower_gl_point_state state;
271
272 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
273 shader->info.gs.vertices_out *= 4;
274
275 // Gets the gl_Position in and out
276 state.gl_pos_out =
277 nir_find_variable_with_location(shader, nir_var_shader_out,
278 VARYING_SLOT_POS);
279 state.gl_point_size =
280 nir_find_variable_with_location(shader, nir_var_shader_out,
281 VARYING_SLOT_PSIZ);
282
283 // if position in or gl_PointSize aren't written, we have nothing to do
284 if (!state.gl_pos_out || !state.gl_point_size)
285 return false;
286
287 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
288 nir_metadata_dominance, &state);
289 }
290
291 struct lower_pv_mode_state {
292 nir_variable *varyings[VARYING_SLOT_MAX][4];
293 nir_variable *pos_counter;
294 nir_variable *out_pos_counter;
295 nir_variable *ring_offset;
296 unsigned ring_size;
297 unsigned primitive_vert_count;
298 unsigned prim;
299 };
300
301 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)302 lower_pv_mode_gs_ring_index(nir_builder *b,
303 struct lower_pv_mode_state *state,
304 nir_def *index)
305 {
306 nir_def *ring_offset = nir_load_var(b, state->ring_offset);
307 return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
308 state->ring_size);
309 }
310
311 /* Given the final deref of chain of derefs this function will walk up the chain
312 * until it finds a var deref.
313 *
314 * It will then recreate an identical chain that ends with the provided deref.
315 */
316 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)317 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
318 {
319 nir_deref_instr *parent = nir_deref_instr_parent(old);
320 if (!parent)
321 return new;
322 switch(old->deref_type) {
323 case nir_deref_type_var:
324 return new;
325 case nir_deref_type_array:
326 return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
327 case nir_deref_type_struct:
328 return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
329 case nir_deref_type_array_wildcard:
330 case nir_deref_type_ptr_as_array:
331 case nir_deref_type_cast:
332 unreachable("unexpected deref type");
333 }
334 unreachable("impossible deref type");
335 }
336
337 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)338 lower_pv_mode_gs_store(nir_builder *b,
339 nir_intrinsic_instr *intrin,
340 struct lower_pv_mode_state *state)
341 {
342 b->cursor = nir_before_instr(&intrin->instr);
343 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
344 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
345 nir_variable *var = nir_deref_instr_get_variable(deref);
346
347 gl_varying_slot location = var->data.location;
348 unsigned location_frac = var->data.location_frac;
349 assert(state->varyings[location][location_frac]);
350 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
351 nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
352 nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
353 nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
354 // recreate the chain of deref that lead to the store.
355 nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
356 nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
357 nir_instr_remove(&intrin->instr);
358 return true;
359 }
360
361 return false;
362 }
363
364 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)365 lower_pv_mode_emit_rotated_prim(nir_builder *b,
366 struct lower_pv_mode_state *state,
367 nir_def *current_vertex)
368 {
369 nir_def *two = nir_imm_int(b, 2);
370 nir_def *three = nir_imm_int(b, 3);
371 bool is_triangle = state->primitive_vert_count == 3;
372 /* This shader will always see the last three vertices emitted by the user gs.
373 * The following table is used to to rotate primitives within a strip generated
374 * by the user gs such that the last vertex becomes the first.
375 *
376 * [lines, tris][even/odd index][vertex mod 3]
377 */
378 static const unsigned vert_maps[2][2][3] = {
379 {{1, 0, 0}, {1, 0, 0}},
380 {{2, 0, 1}, {2, 1, 0}}
381 };
382 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
383 * is either the last or the second, depending on whether the triangle is at an odd
384 * or even position within the strip.
385 *
386 * odd or even primitive within draw
387 */
388 nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
389 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
390 /* odd or even triangle within strip emitted by user GS
391 * this is handled using the table
392 */
393 nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
394 unsigned offset_even = vert_maps[is_triangle][0][i];
395 unsigned offset_odd = vert_maps[is_triangle][1][i];
396 nir_def *offset_even_value = nir_imm_int(b, offset_even);
397 nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
398 nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
399 offset_odd_value, offset_even_value);
400 /* Here we account for how triangles are provided to the gs from a strip.
401 * For even primitives we rotate by 3, meaning we do nothing.
402 * For odd primitives we rotate by 2, combined with the previous rotation this
403 * means the second vertex becomes the last.
404 */
405 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
406 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
407 nir_isub(b, three,
408 odd_prim)),
409 three);
410 /* Triangles that come from fans are provided to the gs the same way as
411 * odd triangles from a strip so always rotate by 2.
412 */
413 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
414 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
415 three);
416 rotated_i = nir_iadd(b, rotated_i, current_vertex);
417 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
418 gl_varying_slot location = var->data.location;
419 unsigned location_frac = var->data.location_frac;
420 if (state->varyings[location][location_frac]) {
421 nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
422 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
423 copy_vars(b, nir_build_deref_var(b, var), value);
424 }
425 }
426 nir_emit_vertex(b);
427 }
428 }
429
430 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)431 lower_pv_mode_gs_emit_vertex(nir_builder *b,
432 nir_intrinsic_instr *intrin,
433 struct lower_pv_mode_state *state)
434 {
435 b->cursor = nir_before_instr(&intrin->instr);
436
437 // increment pos_counter
438 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
439 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
440
441 nir_instr_remove(&intrin->instr);
442 return true;
443 }
444
445 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)446 lower_pv_mode_gs_end_primitive(nir_builder *b,
447 nir_intrinsic_instr *intrin,
448 struct lower_pv_mode_state *state)
449 {
450 b->cursor = nir_before_instr(&intrin->instr);
451
452 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
453 nir_push_loop(b);
454 {
455 nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
456 nir_break_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
457 nir_imm_int(b, state->primitive_vert_count)));
458
459 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
460 nir_end_primitive(b);
461
462 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
463 }
464 nir_pop_loop(b, NULL);
465 /* Set the ring offset such that when position 0 is
466 * read we get the last value written
467 */
468 nir_store_var(b, state->ring_offset, pos_counter, 1);
469 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
470 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
471
472 nir_instr_remove(&intrin->instr);
473 return true;
474 }
475
476 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)477 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
478 {
479 if (instr->type != nir_instr_type_intrinsic)
480 return false;
481
482 struct lower_pv_mode_state *state = data;
483 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
484
485 switch (intrin->intrinsic) {
486 case nir_intrinsic_store_deref:
487 return lower_pv_mode_gs_store(b, intrin, state);
488 case nir_intrinsic_copy_deref:
489 unreachable("should be lowered");
490 case nir_intrinsic_emit_vertex_with_counter:
491 case nir_intrinsic_emit_vertex:
492 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
493 case nir_intrinsic_end_primitive:
494 case nir_intrinsic_end_primitive_with_counter:
495 return lower_pv_mode_gs_end_primitive(b, intrin, state);
496 default:
497 return false;
498 }
499 }
500
501 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)502 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
503 {
504 nir_builder b;
505 struct lower_pv_mode_state state;
506 memset(state.varyings, 0, sizeof(state.varyings));
507
508 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
509 b = nir_builder_at(nir_before_impl(entry));
510
511 state.primitive_vert_count =
512 mesa_vertices_per_prim(shader->info.gs.output_primitive);
513 state.ring_size = shader->info.gs.vertices_out;
514
515 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
516 gl_varying_slot location = var->data.location;
517 unsigned location_frac = var->data.location_frac;
518
519 char name[100];
520 snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
521 state.varyings[location][location_frac] =
522 nir_local_variable_create(entry,
523 glsl_array_type(var->type,
524 state.ring_size,
525 false),
526 name);
527 }
528
529 state.pos_counter = nir_local_variable_create(entry,
530 glsl_uint_type(),
531 "__pos_counter");
532
533 state.out_pos_counter = nir_local_variable_create(entry,
534 glsl_uint_type(),
535 "__out_pos_counter");
536
537 state.ring_offset = nir_local_variable_create(entry,
538 glsl_uint_type(),
539 "__ring_offset");
540
541 state.prim = prim;
542
543 // initialize pos_counter and out_pos_counter
544 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
545 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
546 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
547
548 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
549 (state.primitive_vert_count - 1)) *
550 state.primitive_vert_count;
551 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
552 nir_metadata_dominance, &state);
553 }
554
555 struct lower_line_stipple_state {
556 nir_variable *pos_out;
557 nir_variable *stipple_out;
558 nir_variable *prev_pos;
559 nir_variable *pos_counter;
560 nir_variable *stipple_counter;
561 bool line_rectangular;
562 };
563
564 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)565 viewport_map(nir_builder *b, nir_def *vert,
566 nir_def *scale)
567 {
568 nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
569 nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
570 w_recip);
571 return nir_fmul(b, ndc_point, scale);
572 }
573
574 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)575 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
576 {
577 struct lower_line_stipple_state *state = data;
578 if (instr->type != nir_instr_type_intrinsic)
579 return false;
580
581 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
582 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
583 intrin->intrinsic != nir_intrinsic_emit_vertex)
584 return false;
585
586 b->cursor = nir_before_instr(instr);
587
588 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
589 // viewport-map endpoints
590 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
591 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
592 nir_def *prev = nir_load_var(b, state->prev_pos);
593 nir_def *curr = nir_load_var(b, state->pos_out);
594 prev = viewport_map(b, prev, vp_scale);
595 curr = viewport_map(b, curr, vp_scale);
596
597 // calculate length of line
598 nir_def *len;
599 if (state->line_rectangular)
600 len = nir_fast_distance(b, prev, curr);
601 else {
602 nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
603 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
604 }
605 // update stipple_counter
606 nir_store_var(b, state->stipple_counter,
607 nir_fadd(b, nir_load_var(b, state->stipple_counter),
608 len), 1);
609 nir_pop_if(b, NULL);
610 // emit stipple out
611 nir_copy_var(b, state->stipple_out, state->stipple_counter);
612 nir_copy_var(b, state->prev_pos, state->pos_out);
613
614 // update prev_pos and pos_counter for next vertex
615 b->cursor = nir_after_instr(instr);
616 nir_store_var(b, state->pos_counter,
617 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
618 1), 1);
619
620 return true;
621 }
622
623 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)624 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
625 {
626 nir_builder b;
627 struct lower_line_stipple_state state;
628
629 state.pos_out =
630 nir_find_variable_with_location(shader, nir_var_shader_out,
631 VARYING_SLOT_POS);
632
633 // if position isn't written, we have nothing to do
634 if (!state.pos_out)
635 return false;
636
637 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
638 glsl_float_type(),
639 "__stipple");
640 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
641 state.stipple_out->data.driver_location = shader->num_outputs++;
642 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
643 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
644
645 // create temp variables
646 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
647 glsl_vec4_type(),
648 "__prev_pos");
649 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
650 glsl_uint_type(),
651 "__pos_counter");
652 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
653 glsl_float_type(),
654 "__stipple_counter");
655
656 state.line_rectangular = line_rectangular;
657 // initialize pos_counter and stipple_counter
658 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
659 b = nir_builder_at(nir_before_impl(entry));
660 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
661 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
662
663 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
664 nir_metadata_dominance, &state);
665 }
666
667 static bool
lower_line_stipple_fs(nir_shader * shader)668 lower_line_stipple_fs(nir_shader *shader)
669 {
670 nir_builder b;
671 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
672 b = nir_builder_at(nir_after_impl(entry));
673
674 // create stipple counter
675 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
676 glsl_float_type(),
677 "__stipple");
678 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
679 stipple->data.driver_location = shader->num_inputs++;
680 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
681 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
682
683 nir_variable *sample_mask_out =
684 nir_find_variable_with_location(shader, nir_var_shader_out,
685 FRAG_RESULT_SAMPLE_MASK);
686 if (!sample_mask_out) {
687 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
688 glsl_uint_type(), "sample_mask");
689 sample_mask_out->data.driver_location = shader->num_outputs++;
690 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
691 }
692
693 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
694 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
695 nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
696 pattern = nir_iand_imm(&b, pattern, 0xffff);
697
698 nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
699 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
700 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
701 nir_store_var(&b, v, sample_mask_in, 1);
702 nir_store_var(&b, sample_mask, sample_mask_in, 1);
703 nir_push_loop(&b);
704 {
705 nir_def *value = nir_load_var(&b, v);
706 nir_def *index = nir_ufind_msb(&b, value);
707 nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
708 nir_def *new_value = nir_ixor(&b, value, index_mask);
709 nir_store_var(&b, v, new_value, 1);
710 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
711 nir_jump(&b, nir_jump_break);
712 nir_pop_if(&b, NULL);
713
714 nir_def *stipple_pos =
715 nir_interp_deref_at_sample(&b, 1, 32,
716 &nir_build_deref_var(&b, stipple)->def, index);
717 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
718 nir_imm_float(&b, 16.0));
719 stipple_pos = nir_f2i32(&b, stipple_pos);
720 nir_def *bit =
721 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
722 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
723 {
724 nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
725 sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
726 nir_store_var(&b, sample_mask, sample_mask_value, 1);
727 }
728 nir_pop_if(&b, NULL);
729 }
730 nir_pop_loop(&b, NULL);
731 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
732
733 return true;
734 }
735
736 struct lower_line_smooth_state {
737 nir_variable *pos_out;
738 nir_variable *line_coord_out;
739 nir_variable *prev_pos;
740 nir_variable *pos_counter;
741 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
742 *varyings[VARYING_SLOT_MAX][4]; // location_frac
743 };
744
745 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)746 lower_line_smooth_gs_store(nir_builder *b,
747 nir_intrinsic_instr *intrin,
748 struct lower_line_smooth_state *state)
749 {
750 b->cursor = nir_before_instr(&intrin->instr);
751 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
752 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
753 nir_variable *var = nir_deref_instr_get_variable(deref);
754
755 // we take care of position elsewhere
756 gl_varying_slot location = var->data.location;
757 unsigned location_frac = var->data.location_frac;
758 if (location != VARYING_SLOT_POS) {
759 assert(state->varyings[location]);
760 nir_store_var(b, state->varyings[location][location_frac],
761 intrin->src[1].ssa,
762 nir_intrinsic_write_mask(intrin));
763 nir_instr_remove(&intrin->instr);
764 return true;
765 }
766 }
767
768 return false;
769 }
770
771 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)772 lower_line_smooth_gs_emit_vertex(nir_builder *b,
773 nir_intrinsic_instr *intrin,
774 struct lower_line_smooth_state *state)
775 {
776 b->cursor = nir_before_instr(&intrin->instr);
777
778 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
779 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
780 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
781 nir_def *prev = nir_load_var(b, state->prev_pos);
782 nir_def *curr = nir_load_var(b, state->pos_out);
783 nir_def *prev_vp = viewport_map(b, prev, vp_scale);
784 nir_def *curr_vp = viewport_map(b, curr, vp_scale);
785
786 nir_def *width = nir_load_push_constant_zink(b, 1, 32,
787 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
788 nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
789
790 const unsigned yx[2] = { 1, 0 };
791 nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
792 nir_def *len = nir_fast_length(b, vec);
793 nir_def *dir = nir_normalize(b, vec);
794 nir_def *half_length = nir_fmul_imm(b, len, 0.5);
795 half_length = nir_fadd_imm(b, half_length, 0.5);
796
797 nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
798 nir_def *tangent =
799 nir_fmul(b,
800 nir_fmul(b,
801 nir_swizzle(b, dir, yx, 2),
802 nir_imm_vec2(b, 1.0, -1.0)),
803 vp_scale_rcp);
804 tangent = nir_fmul(b, tangent, half_width);
805 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
806 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
807
808 nir_def *line_offets[8] = {
809 nir_fadd(b, tangent, nir_fneg(b, dir)),
810 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
811 tangent,
812 nir_fneg(b, tangent),
813 tangent,
814 nir_fneg(b, tangent),
815 nir_fadd(b, tangent, dir),
816 nir_fadd(b, nir_fneg(b, tangent), dir),
817 };
818 nir_def *line_coord =
819 nir_vec4(b, half_width, half_width, half_length, half_length);
820 nir_def *line_coords[8] = {
821 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
822 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
823 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
824 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
825 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
826 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
827 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
828 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
829 };
830
831 /* emit first end-cap, and start line */
832 for (int i = 0; i < 4; ++i) {
833 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
834 gl_varying_slot location = var->data.location;
835 unsigned location_frac = var->data.location_frac;
836 if (state->prev_varyings[location][location_frac])
837 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
838 }
839 nir_store_var(b, state->pos_out,
840 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
841 nir_channel(b, prev, 3))), 0xf);
842 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
843 nir_emit_vertex(b);
844 }
845
846 /* finish line and emit last end-cap */
847 for (int i = 4; i < 8; ++i) {
848 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
849 gl_varying_slot location = var->data.location;
850 unsigned location_frac = var->data.location_frac;
851 if (state->varyings[location][location_frac])
852 nir_copy_var(b, var, state->varyings[location][location_frac]);
853 }
854 nir_store_var(b, state->pos_out,
855 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
856 nir_channel(b, curr, 3))), 0xf);
857 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
858 nir_emit_vertex(b);
859 }
860 nir_end_primitive(b);
861
862 nir_pop_if(b, NULL);
863
864 nir_copy_var(b, state->prev_pos, state->pos_out);
865 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
866 gl_varying_slot location = var->data.location;
867 unsigned location_frac = var->data.location_frac;
868 if (state->varyings[location][location_frac])
869 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
870 }
871
872 // update prev_pos and pos_counter for next vertex
873 b->cursor = nir_after_instr(&intrin->instr);
874 nir_store_var(b, state->pos_counter,
875 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
876 1), 1);
877
878 nir_instr_remove(&intrin->instr);
879 return true;
880 }
881
882 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)883 lower_line_smooth_gs_end_primitive(nir_builder *b,
884 nir_intrinsic_instr *intrin,
885 struct lower_line_smooth_state *state)
886 {
887 b->cursor = nir_before_instr(&intrin->instr);
888
889 // reset line counter
890 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
891
892 nir_instr_remove(&intrin->instr);
893 return true;
894 }
895
896 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)897 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
898 {
899 if (instr->type != nir_instr_type_intrinsic)
900 return false;
901
902 struct lower_line_smooth_state *state = data;
903 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
904
905 switch (intrin->intrinsic) {
906 case nir_intrinsic_store_deref:
907 return lower_line_smooth_gs_store(b, intrin, state);
908 case nir_intrinsic_copy_deref:
909 unreachable("should be lowered");
910 case nir_intrinsic_emit_vertex_with_counter:
911 case nir_intrinsic_emit_vertex:
912 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
913 case nir_intrinsic_end_primitive:
914 case nir_intrinsic_end_primitive_with_counter:
915 return lower_line_smooth_gs_end_primitive(b, intrin, state);
916 default:
917 return false;
918 }
919 }
920
921 static bool
lower_line_smooth_gs(nir_shader * shader)922 lower_line_smooth_gs(nir_shader *shader)
923 {
924 nir_builder b;
925 struct lower_line_smooth_state state;
926
927 memset(state.varyings, 0, sizeof(state.varyings));
928 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
929 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
930 gl_varying_slot location = var->data.location;
931 unsigned location_frac = var->data.location_frac;
932 if (location == VARYING_SLOT_POS)
933 continue;
934
935 char name[100];
936 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
937 state.varyings[location][location_frac] =
938 nir_variable_create(shader, nir_var_shader_temp,
939 var->type, name);
940
941 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
942 state.prev_varyings[location][location_frac] =
943 nir_variable_create(shader, nir_var_shader_temp,
944 var->type, name);
945 }
946
947 state.pos_out =
948 nir_find_variable_with_location(shader, nir_var_shader_out,
949 VARYING_SLOT_POS);
950
951 // if position isn't written, we have nothing to do
952 if (!state.pos_out)
953 return false;
954
955 unsigned location = 0;
956 nir_foreach_shader_in_variable(var, shader) {
957 if (var->data.driver_location >= location)
958 location = var->data.driver_location + 1;
959 }
960
961 state.line_coord_out =
962 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
963 "__line_coord");
964 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
965 state.line_coord_out->data.driver_location = location;
966 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
967 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
968 shader->num_outputs++;
969
970 // create temp variables
971 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
972 glsl_vec4_type(),
973 "__prev_pos");
974 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
975 glsl_uint_type(),
976 "__pos_counter");
977
978 // initialize pos_counter
979 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
980 b = nir_builder_at(nir_before_impl(entry));
981 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
982
983 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
984 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
985
986 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
987 nir_metadata_dominance, &state);
988 }
989
990 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)991 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
992 {
993 int dummy;
994 nir_builder b;
995
996 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
997 if (lower_stipple) {
998 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
999 glsl_float_type(),
1000 "__stipple");
1001 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1002 stipple_counter->data.driver_location = shader->num_inputs++;
1003 stipple_counter->data.location =
1004 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1005 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1006
1007 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1008 glsl_uint_type(),
1009 "stipple_pattern");
1010
1011 // initialize stipple_pattern
1012 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1013 b = nir_builder_at(nir_before_impl(entry));
1014 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1015 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1016 nir_store_var(&b, stipple_pattern, pattern, 1);
1017 }
1018
1019 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1020 return true;
1021 }
1022
1023 static bool
lower_dual_blend(nir_shader * shader)1024 lower_dual_blend(nir_shader *shader)
1025 {
1026 bool progress = false;
1027 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1028 if (var) {
1029 var->data.location = FRAG_RESULT_DATA0;
1030 var->data.index = 1;
1031 progress = true;
1032 }
1033 nir_shader_preserve_all_metadata(shader);
1034 return progress;
1035 }
1036
1037 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1038 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1039 {
1040 if (instr->type != nir_instr_type_alu)
1041 return false;
1042 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1043 if (alu_instr->op != nir_op_pack_64_2x32 &&
1044 alu_instr->op != nir_op_unpack_64_2x32)
1045 return false;
1046 b->cursor = nir_before_instr(&alu_instr->instr);
1047 nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1048 nir_def *dest;
1049 switch (alu_instr->op) {
1050 case nir_op_pack_64_2x32:
1051 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1052 break;
1053 case nir_op_unpack_64_2x32:
1054 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1055 break;
1056 default:
1057 unreachable("Impossible opcode");
1058 }
1059 nir_def_replace(&alu_instr->def, dest);
1060 return true;
1061 }
1062
1063 static bool
lower_64bit_pack(nir_shader * shader)1064 lower_64bit_pack(nir_shader *shader)
1065 {
1066 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1067 nir_metadata_control_flow, NULL);
1068 }
1069
1070 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1071 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1072 const nir_shader *prev_stage)
1073 {
1074 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1075 options,
1076 "filled quad gs");
1077
1078 nir_shader *nir = b.shader;
1079 nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1080 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1081 nir->info.gs.vertices_in = 4;
1082 nir->info.gs.vertices_out = 6;
1083 nir->info.gs.invocations = 1;
1084 nir->info.gs.active_stream_mask = 1;
1085
1086 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1087 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1088 if (prev_stage->xfb_info) {
1089 size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1090 nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1091 }
1092
1093 nir_variable *in_vars[VARYING_SLOT_MAX];
1094 nir_variable *out_vars[VARYING_SLOT_MAX];
1095 unsigned num_vars = 0;
1096
1097 /* Create input/output variables. */
1098 nir_foreach_shader_out_variable(var, prev_stage) {
1099 assert(!var->data.patch);
1100 assert(var->data.location != VARYING_SLOT_PRIMITIVE_ID &&
1101 "not a VS output");
1102
1103 /* input vars can't be created for those */
1104 if (var->data.location == VARYING_SLOT_LAYER ||
1105 var->data.location == VARYING_SLOT_VIEW_INDEX ||
1106 /* psiz not needed for quads */
1107 var->data.location == VARYING_SLOT_PSIZ)
1108 continue;
1109
1110 char name[100];
1111 if (var->name)
1112 snprintf(name, sizeof(name), "in_%s", var->name);
1113 else
1114 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1115
1116 nir_variable *in = nir_variable_clone(var, nir);
1117 ralloc_free(in->name);
1118 in->name = ralloc_strdup(in, name);
1119 in->type = glsl_array_type(var->type, 4, false);
1120 in->data.mode = nir_var_shader_in;
1121 nir_shader_add_variable(nir, in);
1122
1123 if (var->name)
1124 snprintf(name, sizeof(name), "out_%s", var->name);
1125 else
1126 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1127
1128 nir_variable *out = nir_variable_clone(var, nir);
1129 ralloc_free(out->name);
1130 out->name = ralloc_strdup(out, name);
1131 out->data.mode = nir_var_shader_out;
1132 nir_shader_add_variable(nir, out);
1133
1134 in_vars[num_vars] = in;
1135 out_vars[num_vars++] = out;
1136 }
1137
1138 /* When a geometry shader is not used, a fragment shader may read primitive
1139 * ID and get an implicit value without the vertex shader writing an ID. This
1140 * case needs to work even when we inject a GS internally.
1141 *
1142 * However, if a geometry shader precedes a fragment shader that reads
1143 * primitive ID, Vulkan requires that the geometry shader write primitive ID.
1144 * To handle this case correctly, we must write primitive ID, copying the
1145 * fixed-function gl_PrimitiveIDIn input which matches what the fragment
1146 * shader will expect.
1147 *
1148 * If the fragment shader doesn't read primitive ID, this copy will likely be
1149 * optimized out at link-time by the Vulkan driver. Unless this is
1150 * non-monolithic -- in which case we don't know whether the fragment shader
1151 * will read primitive ID either. In both cases, the right thing for Zink
1152 * to do is copy primitive ID unconditionally.
1153 */
1154 in_vars[num_vars] = nir_create_variable_with_location(
1155 nir, nir_var_shader_in, VARYING_SLOT_PRIMITIVE_ID, glsl_int_type());
1156
1157 out_vars[num_vars] = nir_create_variable_with_location(
1158 nir, nir_var_shader_out, VARYING_SLOT_PRIMITIVE_ID, glsl_int_type());
1159
1160 num_vars++;
1161
1162 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1163 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1164 nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1165 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1166 for (unsigned i = 0; i < 6; ++i) {
1167 /* swap indices 2 and 3 */
1168 nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1169 nir_imm_int(&b, mapping_last[i]),
1170 nir_imm_int(&b, mapping_first[i]));
1171 /* Copy inputs to outputs. */
1172 for (unsigned j = 0; j < num_vars; ++j) {
1173 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1174 continue;
1175 }
1176
1177 /* gl_PrimitiveIDIn is not arrayed, all other inputs are */
1178 nir_deref_instr *in_value = nir_build_deref_var(&b, in_vars[j]);
1179 if (in_vars[j]->data.location != VARYING_SLOT_PRIMITIVE_ID)
1180 in_value = nir_build_deref_array(&b, in_value, idx);
1181
1182 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1183 }
1184 nir_emit_vertex(&b, 0);
1185 if (i == 2)
1186 nir_end_primitive(&b, 0);
1187 }
1188
1189 nir_end_primitive(&b, 0);
1190 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1191 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1192 return nir;
1193 }
1194
1195 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1196 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1197 nir_intrinsic_instr *intrin,
1198 void *data)
1199 {
1200 int inlined_uniform_offset;
1201 switch (intrin->intrinsic) {
1202 case nir_intrinsic_load_flat_mask:
1203 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1204 break;
1205 case nir_intrinsic_load_provoking_last:
1206 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1207 break;
1208 default:
1209 return false;
1210 }
1211
1212 b->cursor = nir_before_instr(&intrin->instr);
1213 assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1214 /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1215 * anything with a different bit_size) so we need to split the load. */
1216 int num_dwords = intrin->def.bit_size / 32;
1217 nir_def *dwords[2] = {NULL};
1218 for (unsigned i = 0; i < num_dwords; i++)
1219 dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1220 nir_imm_int(b, inlined_uniform_offset + i),
1221 .align_mul = intrin->def.bit_size / 8,
1222 .align_offset = 0,
1223 .range_base = 0, .range = ~0);
1224 nir_def *new_dest_def;
1225 if (intrin->def.bit_size == 32)
1226 new_dest_def = dwords[0];
1227 else
1228 new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1229 nir_def_replace(&intrin->def, new_dest_def);
1230 return true;
1231 }
1232
1233 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1234 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1235 {
1236 return nir_shader_intrinsics_pass(nir,
1237 lower_system_values_to_inlined_uniforms_instr,
1238 nir_metadata_dominance, NULL);
1239 }
1240
1241 /* from radeonsi */
1242 static unsigned
amd_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1243 amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1244 {
1245 /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
1246
1247 switch (consumer->info.stage) {
1248 case MESA_SHADER_TESS_CTRL: /* VS->TCS */
1249 /* Non-amplifying shaders can always have their variyng expressions
1250 * moved into later shaders.
1251 */
1252 return UINT_MAX;
1253
1254 case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
1255 return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
1256 consumer->info.gs.vertices_in == 2 ? 20 : 14;
1257
1258 case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
1259 case MESA_SHADER_FRAGMENT:
1260 /* Up to 3 uniforms and 5 ALUs. */
1261 return 14;
1262
1263 default:
1264 unreachable("unexpected shader stage");
1265 }
1266 }
1267
1268 void
zink_screen_init_compiler(struct zink_screen * screen)1269 zink_screen_init_compiler(struct zink_screen *screen)
1270 {
1271 static const struct nir_shader_compiler_options
1272 default_options = {
1273 .io_options = nir_io_has_intrinsics | nir_io_separate_clip_cull_distance_arrays,
1274 .lower_ffma16 = true,
1275 .lower_ffma32 = true,
1276 .lower_ffma64 = true,
1277 .lower_scmp = true,
1278 .lower_fdph = true,
1279 .lower_flrp32 = true,
1280 .lower_fsat = true,
1281 .lower_hadd = true,
1282 .lower_iadd_sat = true,
1283 .lower_fisnormal = true,
1284 .lower_extract_byte = true,
1285 .lower_extract_word = true,
1286 .lower_insert_byte = true,
1287 .lower_insert_word = true,
1288
1289 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1290 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1291 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1292 * effectively unused (no instances in shader-db), it's not worth the
1293 * effort to do so.
1294 * */
1295 .lower_ldexp = true,
1296
1297 .lower_mul_high = true,
1298 .lower_to_scalar = true,
1299 .lower_uadd_carry = true,
1300 .compact_arrays = true,
1301 .lower_usub_borrow = true,
1302 .lower_uadd_sat = true,
1303 .lower_usub_sat = true,
1304 .lower_vector_cmp = true,
1305 .lower_int64_options =
1306 nir_lower_bit_count64 |
1307 nir_lower_find_lsb64 |
1308 nir_lower_ufind_msb64,
1309 .lower_doubles_options = nir_lower_dround_even,
1310 .lower_uniforms_to_ubo = true,
1311 .has_fsub = true,
1312 .has_isub = true,
1313 .lower_mul_2x32_64 = true,
1314 .support_16bit_alu = true, /* not quite what it sounds like */
1315 .support_indirect_inputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_COMPUTE),
1316 .support_indirect_outputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_COMPUTE),
1317 .max_unroll_iterations = 0,
1318 };
1319
1320 screen->nir_options = default_options;
1321
1322 if (!screen->info.feats.features.shaderInt64)
1323 screen->nir_options.lower_int64_options = ~0;
1324
1325 if (!screen->info.feats.features.shaderFloat64) {
1326 screen->nir_options.lower_doubles_options = ~0;
1327 screen->nir_options.lower_flrp64 = true;
1328 screen->nir_options.lower_ffma64 = true;
1329 /* soft fp64 function inlining will blow up loop bodies and effectively
1330 * stop Vulkan drivers from unrolling the loops.
1331 */
1332 screen->nir_options.max_unroll_iterations_fp64 = 32;
1333 }
1334
1335 if (screen->driver_compiler_workarounds.io_opt) {
1336 switch (zink_driverid(screen)) {
1337 case VK_DRIVER_ID_MESA_RADV:
1338 case VK_DRIVER_ID_AMD_OPEN_SOURCE:
1339 case VK_DRIVER_ID_AMD_PROPRIETARY:
1340 screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1341 break;
1342 default:
1343 mesa_logw("zink: instruction costs not implemented for this implementation!");
1344 screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1345 }
1346 } else {
1347 screen->nir_options.io_options |= nir_io_dont_optimize;
1348 }
1349
1350 /*
1351 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1352 and the error can be large due to the discontinuity in trunc() and floor().
1353 This can produce mathematically unexpected results in some cases, such as
1354 FMod(x,x) computing x rather than 0, and can also cause the result to have
1355 a different sign than the infinitely precise result.
1356
1357 -Table 84. Precision of core SPIR-V Instructions
1358 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1359 */
1360 if (zink_driverid(screen) == VK_DRIVER_ID_MESA_RADV ||
1361 zink_driverid(screen) == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1362 zink_driverid(screen) == VK_DRIVER_ID_AMD_PROPRIETARY)
1363 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1364
1365 if (screen->info.have_EXT_shader_demote_to_helper_invocation)
1366 screen->nir_options.discard_is_demote = true;
1367
1368 screen->nir_options.support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES);
1369 screen->nir_options.support_indirect_outputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES);
1370 }
1371
1372 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1373 zink_get_compiler_options(struct pipe_screen *pscreen,
1374 enum pipe_shader_ir ir,
1375 gl_shader_stage shader)
1376 {
1377 assert(ir == PIPE_SHADER_IR_NIR);
1378 return &zink_screen(pscreen)->nir_options;
1379 }
1380
1381 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1382 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1383 {
1384 if (zink_debug & ZINK_DEBUG_TGSI) {
1385 fprintf(stderr, "TGSI shader:\n---8<---\n");
1386 tgsi_dump_to_file(tokens, 0, stderr);
1387 fprintf(stderr, "---8<---\n\n");
1388 }
1389
1390 return tgsi_to_nir(tokens, screen, false);
1391 }
1392
1393
1394 static bool
def_is_64bit(nir_def * def,void * state)1395 def_is_64bit(nir_def *def, void *state)
1396 {
1397 bool *lower = (bool *)state;
1398 if (def && (def->bit_size == 64)) {
1399 *lower = true;
1400 return false;
1401 }
1402 return true;
1403 }
1404
1405 static bool
src_is_64bit(nir_src * src,void * state)1406 src_is_64bit(nir_src *src, void *state)
1407 {
1408 bool *lower = (bool *)state;
1409 if (src && (nir_src_bit_size(*src) == 64)) {
1410 *lower = true;
1411 return false;
1412 }
1413 return true;
1414 }
1415
1416 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1417 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1418 {
1419 bool lower = false;
1420 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1421 * doesn't have const variants, so do the ugly const_cast here. */
1422 nir_instr *instr = (nir_instr *)const_instr;
1423
1424 nir_foreach_def(instr, def_is_64bit, &lower);
1425 if (lower)
1426 return true;
1427 nir_foreach_src(instr, src_is_64bit, &lower);
1428 return lower;
1429 }
1430
1431 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1432 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1433 {
1434 nir_instr *instr = (nir_instr *)const_instr;
1435 nir_alu_instr *alu = nir_instr_as_alu(instr);
1436 switch (alu->op) {
1437 case nir_op_pack_64_2x32_split:
1438 case nir_op_pack_32_2x16_split:
1439 case nir_op_unpack_32_2x16_split_x:
1440 case nir_op_unpack_32_2x16_split_y:
1441 case nir_op_unpack_64_2x32_split_x:
1442 case nir_op_unpack_64_2x32_split_y:
1443 return true;
1444 default:
1445 break;
1446 }
1447 return false;
1448 }
1449
1450
1451 struct bo_vars {
1452 nir_variable *uniforms[5];
1453 nir_variable *ubo[5];
1454 nir_variable *ssbo[5];
1455 uint32_t first_ubo;
1456 uint32_t first_ssbo;
1457 };
1458
1459 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1460 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1461 {
1462 struct bo_vars bo;
1463 memset(&bo, 0, sizeof(bo));
1464 if (zs->ubos_used)
1465 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1466 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1467 if (zs->ssbos_used)
1468 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1469 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1470 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1471 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1472 if (var->data.mode == nir_var_mem_ssbo) {
1473 assert(!bo.ssbo[idx]);
1474 bo.ssbo[idx] = var;
1475 } else {
1476 if (var->data.driver_location) {
1477 assert(!bo.ubo[idx]);
1478 bo.ubo[idx] = var;
1479 } else {
1480 assert(!bo.uniforms[idx]);
1481 bo.uniforms[idx] = var;
1482 }
1483 }
1484 }
1485 return bo;
1486 }
1487
1488 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1489 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1490 {
1491 struct bo_vars *bo = data;
1492 if (instr->type != nir_instr_type_intrinsic)
1493 return false;
1494 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1495 nir_variable *var = NULL;
1496 nir_def *offset = NULL;
1497 bool is_load = true;
1498 b->cursor = nir_before_instr(instr);
1499
1500 switch (intr->intrinsic) {
1501 case nir_intrinsic_store_ssbo:
1502 var = bo->ssbo[intr->def.bit_size >> 4];
1503 offset = intr->src[2].ssa;
1504 is_load = false;
1505 break;
1506 case nir_intrinsic_load_ssbo:
1507 var = bo->ssbo[intr->def.bit_size >> 4];
1508 offset = intr->src[1].ssa;
1509 break;
1510 case nir_intrinsic_load_ubo:
1511 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1512 var = bo->uniforms[intr->def.bit_size >> 4];
1513 else
1514 var = bo->ubo[intr->def.bit_size >> 4];
1515 offset = intr->src[1].ssa;
1516 break;
1517 default:
1518 return false;
1519 }
1520 nir_src offset_src = nir_src_for_ssa(offset);
1521 if (!nir_src_is_const(offset_src))
1522 return false;
1523
1524 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1525 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1526 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1527 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1528 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1529 return false;
1530
1531 unsigned rewrites = 0;
1532 nir_def *result[2];
1533 for (unsigned i = 0; i < intr->num_components; i++) {
1534 if (offset_bytes + i >= size) {
1535 rewrites++;
1536 if (is_load)
1537 result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1538 }
1539 }
1540 assert(rewrites == intr->num_components);
1541 if (is_load) {
1542 nir_def *load = nir_vec(b, result, intr->num_components);
1543 nir_def_rewrite_uses(&intr->def, load);
1544 }
1545 nir_instr_remove(instr);
1546 return true;
1547 }
1548
1549 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1550 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1551 {
1552 struct bo_vars bo = get_bo_vars(zs, shader);
1553 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1554 }
1555
1556 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1557 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1558 {
1559 bool progress;
1560 do {
1561 progress = false;
1562 if (s->options->lower_int64_options)
1563 NIR_PASS_V(s, nir_lower_int64);
1564 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1565 NIR_PASS_V(s, lower_64bit_pack);
1566 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1567 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1568 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1569 NIR_PASS(progress, s, nir_copy_prop);
1570 NIR_PASS(progress, s, nir_opt_remove_phis);
1571 if (s->options->lower_int64_options) {
1572 NIR_PASS(progress, s, nir_lower_64bit_phis);
1573 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1574 }
1575 NIR_PASS(progress, s, nir_opt_dce);
1576 NIR_PASS(progress, s, nir_opt_dead_cf);
1577 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1578 NIR_PASS(progress, s, nir_opt_cse);
1579 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1580 NIR_PASS(progress, s, nir_opt_algebraic);
1581 NIR_PASS(progress, s, nir_opt_constant_folding);
1582 NIR_PASS(progress, s, nir_opt_undef);
1583 NIR_PASS(progress, s, zink_nir_lower_b2b);
1584 if (zs)
1585 NIR_PASS(progress, s, bound_bo_access, zs);
1586 if (can_shrink)
1587 NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
1588 } while (progress);
1589
1590 do {
1591 progress = false;
1592 NIR_PASS(progress, s, nir_opt_algebraic_late);
1593 if (progress) {
1594 NIR_PASS_V(s, nir_copy_prop);
1595 NIR_PASS_V(s, nir_opt_dce);
1596 NIR_PASS_V(s, nir_opt_cse);
1597 }
1598 } while (progress);
1599 }
1600
1601 /* - copy the lowered fbfetch variable
1602 * - set the new one up as an input attachment for descriptor 0.6
1603 * - load it as an image
1604 * - overwrite the previous load
1605 */
1606 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1607 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1608 {
1609 bool ms = data != NULL;
1610 if (instr->type != nir_instr_type_intrinsic)
1611 return false;
1612 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1613 if (intr->intrinsic != nir_intrinsic_load_deref)
1614 return false;
1615 nir_variable *var = nir_intrinsic_get_var(intr, 0);
1616 if (!var->data.fb_fetch_output)
1617 return false;
1618 b->cursor = nir_after_instr(instr);
1619 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1620 /* If Dim is SubpassData, ... Image Format must be Unknown
1621 * - SPIRV OpTypeImage specification
1622 */
1623 fbfetch->data.image.format = 0;
1624 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1625 fbfetch->data.mode = nir_var_uniform;
1626 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1627 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1628 fbfetch->data.sample = ms;
1629 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1630 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1631 nir_shader_add_variable(b->shader, fbfetch);
1632 nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1633 nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1634 nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1635 nir_def_rewrite_uses(&intr->def, load);
1636 return true;
1637 }
1638
1639 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1640 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1641 {
1642 nir_foreach_shader_out_variable(var, shader) {
1643 if (var->data.fb_fetch_output) {
1644 *fbfetch = var;
1645 break;
1646 }
1647 }
1648 assert(*fbfetch);
1649 if (!*fbfetch)
1650 return false;
1651 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1652 }
1653
1654 /*
1655 * Add a check for out of bounds LOD for every texel fetch op
1656 * It boils down to:
1657 * - if (lod < query_levels(tex))
1658 * - res = txf(tex)
1659 * - else
1660 * - res = (0, 0, 0, 1)
1661 */
1662 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1663 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1664 {
1665 if (in->type != nir_instr_type_tex)
1666 return false;
1667 nir_tex_instr *txf = nir_instr_as_tex(in);
1668 if (txf->op != nir_texop_txf)
1669 return false;
1670
1671 b->cursor = nir_before_instr(in);
1672 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1673 assert(lod_idx >= 0);
1674 nir_src lod_src = txf->src[lod_idx].src;
1675 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1676 return false;
1677
1678 nir_def *lod = lod_src.ssa;
1679
1680 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1681 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1682 int deref_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_deref);
1683 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1684 1 + !!(offset_idx >= 0) + !!(handle_idx >= 0));
1685 unsigned src_idx = 0;
1686 levels->op = nir_texop_query_levels;
1687 levels->dest_type = nir_type_int | lod->bit_size;
1688 if (deref_idx >= 0) {
1689 levels->src[src_idx].src_type = nir_tex_src_texture_deref;
1690 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[deref_idx].src.ssa);
1691 }
1692 if (offset_idx >= 0) {
1693 levels->src[src_idx].src_type = nir_tex_src_texture_offset;
1694 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1695 }
1696 if (handle_idx >= 0) {
1697 levels->src[src_idx].src_type = nir_tex_src_texture_handle;
1698 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1699 }
1700 nir_def_init(&levels->instr, &levels->def,
1701 nir_tex_instr_dest_size(levels), 32);
1702 nir_builder_instr_insert(b, &levels->instr);
1703
1704 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1705 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1706 nir_builder_instr_insert(b, &new_txf->instr);
1707
1708 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1709 nir_const_value oob_values[4] = {0};
1710 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1711 oob_values[3] = (txf->dest_type & nir_type_float) ?
1712 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1713 nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1714
1715 nir_pop_if(b, lod_oob_else);
1716 nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1717
1718 nir_def_rewrite_uses(&txf->def, robust_txf);
1719 nir_instr_remove_v(in);
1720 return true;
1721 }
1722
1723 /* This pass is used to workaround the lack of out of bounds LOD robustness
1724 * for texel fetch ops in VK_EXT_image_robustness.
1725 */
1726 static bool
lower_txf_lod_robustness(nir_shader * shader)1727 lower_txf_lod_robustness(nir_shader *shader)
1728 {
1729 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1730 }
1731
1732 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1733 static bool
check_psiz(struct nir_shader * s)1734 check_psiz(struct nir_shader *s)
1735 {
1736 bool have_psiz = false;
1737 nir_foreach_shader_out_variable(var, s) {
1738 if (var->data.location == VARYING_SLOT_PSIZ) {
1739 /* genuine PSIZ outputs will have this set */
1740 have_psiz |= !!var->data.explicit_location;
1741 }
1742 }
1743 return have_psiz;
1744 }
1745
1746 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1747 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1748 {
1749 assert((int)location >= 0);
1750
1751 nir_foreach_variable_with_modes(var, nir, mode) {
1752 if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1753 unsigned num_components = glsl_get_vector_elements(var->type);
1754 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1755 num_components *= 2;
1756 if (is_clipcull_dist(var->data.location))
1757 num_components = glsl_get_aoa_size(var->type);
1758 if (var->data.location_frac <= location_frac &&
1759 var->data.location_frac + num_components > location_frac)
1760 return var;
1761 }
1762 }
1763 return NULL;
1764 }
1765
1766 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1767 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1768 {
1769 unsigned num_components = util_bitcount(output->component_mask);
1770 for (unsigned i = 0; i < num_components; i++)
1771 if (!inlined[output->component_offset + i])
1772 return false;
1773 return true;
1774 }
1775
1776 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1777 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1778 {
1779 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1780 if (last_output < VARYING_SLOT_VAR0)
1781 last_output = VARYING_SLOT_VAR0;
1782 else
1783 last_output++;
1784 /* this should get fixed up by slot remapping */
1785 psiz->data.location = last_output;
1786 }
1787
1788 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1789 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1790 {
1791 /* could be dvec/dmat/mat: each member is the same */
1792 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1793 /* determine size of each member type */
1794 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1795 /* normalize slot idx to current type's size */
1796 slot %= slot_count;
1797 unsigned slot_components = glsl_get_components(plain);
1798 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1799 slot_components *= 2;
1800 /* create a vec4 mask of the selected slot's components out of all the components */
1801 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1802 /* return a vecN of the selected components */
1803 slot_components = util_bitcount(mask);
1804 return glsl_vec_type(slot_components);
1805 }
1806
1807 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1808 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1809 {
1810 const struct glsl_type *type = slot_type;
1811 unsigned slot_count = 0;
1812 unsigned cur_slot = 0;
1813 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1814 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1815 /* use array type for slot counting but return array member type for unroll */
1816 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1817 type = glsl_without_array(arraytype);
1818 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1819 }
1820 *slot_idx -= (cur_slot - slot_count);
1821 if (!glsl_type_is_struct_or_ifc(type))
1822 /* this is a fully unrolled struct: find the number of vec components to output */
1823 type = clamp_slot_type(type, *slot_idx);
1824 return type;
1825 }
1826
1827 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1828 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1829 {
1830 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1831 const struct glsl_type *orig_type = var->type;
1832 const struct glsl_type *type = glsl_without_array(var->type);
1833 unsigned slot_idx = slot - so_slot;
1834 if (type != orig_type)
1835 slot_idx %= glsl_count_vec4_slots(type, false, false);
1836 /* need to find the vec4 that's being exported by this slot */
1837 while (glsl_type_is_struct_or_ifc(type))
1838 type = unroll_struct_type(type, &slot_idx);
1839
1840 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1841 unsigned num_components = glsl_get_components(glsl_without_array(type));
1842 /* special handling: clip/cull distance are arrays with vector semantics */
1843 if (is_clipcull_dist(var->data.location)) {
1844 num_components = glsl_array_size(type);
1845 if (slot_idx)
1846 /* this is the second vec4 */
1847 num_components %= 4;
1848 else
1849 /* this is the first vec4 */
1850 num_components = MIN2(num_components, 4);
1851 }
1852 assert(num_components);
1853 /* gallium handles xfb in terms of 32bit units */
1854 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1855 num_components *= 2;
1856 return num_components;
1857 }
1858
1859 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1860 get_var_slot_count(nir_shader *nir, nir_variable *var)
1861 {
1862 assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1863 const struct glsl_type *type = var->type;
1864 if (nir_is_arrayed_io(var, nir->info.stage))
1865 type = glsl_get_array_element(type);
1866 unsigned slot_count = 0;
1867 if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) ||
1868 var->data.location >= VARYING_SLOT_VAR0)
1869 slot_count = glsl_count_vec4_slots(type, false, false);
1870 else if (glsl_type_is_array(type))
1871 slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1872 else
1873 slot_count = 1;
1874 return slot_count;
1875 }
1876
1877
1878 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1879 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1880 {
1881 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1882 const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1883 if (packed_output->location == slot)
1884 return packed_output;
1885 }
1886 return NULL;
1887 }
1888
1889 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1890 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1891 {
1892 bool inlined[VARYING_SLOT_MAX][4] = {0};
1893 uint64_t packed = 0;
1894 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1895 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1896 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1897 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1898 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1899 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1900 unsigned xfb_components = util_bitcount(output->component_mask);
1901 /* always set stride to be used during draw */
1902 zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1903 for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1904 unsigned slot = output->location;
1905 if (inlined[slot][output->component_offset + c])
1906 continue;
1907 nir_variable *var = NULL;
1908 while (!var && slot < VARYING_SLOT_TESS_MAX)
1909 var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1910 slot = output->location;
1911 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1912 if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1913 /* if no variable is found for the xfb output, no output exists */
1914 inlined[slot][c + output->component_offset] = true;
1915 continue;
1916 }
1917 if (var->data.explicit_xfb_buffer) {
1918 /* handle dvec3 where gallium splits streamout over 2 registers */
1919 for (unsigned j = 0; j < xfb_components; j++)
1920 inlined[slot][c + output->component_offset + j] = true;
1921 }
1922 if (is_inlined(inlined[slot], output))
1923 continue;
1924 assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location));
1925 assert(!glsl_type_is_struct_or_ifc(var->type));
1926 unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1927 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1928 num_components *= 2;
1929 /* if this is the entire variable, try to blast it out during the initial declaration
1930 * structs must be handled later to ensure accurate analysis
1931 */
1932 if ((num_components == xfb_components ||
1933 num_components < xfb_components ||
1934 (num_components > xfb_components && xfb_components == 4))) {
1935 var->data.explicit_xfb_buffer = 1;
1936 var->data.xfb.buffer = output->buffer;
1937 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1938 var->data.offset = (output->offset + c * sizeof(uint32_t));
1939 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1940 for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
1941 inlined[slot][c + output->component_offset + j] = true;
1942 } else {
1943 /* otherwise store some metadata for later */
1944 packed |= BITFIELD64_BIT(slot);
1945 packed_components[slot] += xfb_components;
1946 packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
1947 packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
1948 for (unsigned j = 0; j < xfb_components; j++)
1949 packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
1950 }
1951 }
1952 }
1953
1954 /* if this was flagged as a packed output before, and if all the components are
1955 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1956 * can be consolidated into a single output to conserve locations
1957 */
1958 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1959 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1960 unsigned slot = output->location;
1961 if (is_inlined(inlined[slot], output))
1962 continue;
1963 nir_variable *var = NULL;
1964 while (!var)
1965 var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
1966 slot = output->location;
1967 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1968 if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
1969 continue;
1970 /* this is a lowered 64bit variable that can't be exported due to packing */
1971 if (var->data.is_xfb)
1972 goto out;
1973
1974 unsigned num_slots = is_clipcull_dist(var->data.location) ?
1975 glsl_array_size(var->type) / 4 :
1976 glsl_count_vec4_slots(var->type, false, false);
1977 /* for each variable, iterate over all the variable's slots and inline the outputs */
1978 for (unsigned j = 0; j < num_slots; j++) {
1979 slot = var->data.location + j;
1980 const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
1981 if (!packed_output)
1982 goto out;
1983
1984 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1985 if (!(packed & BITFIELD64_BIT(slot)) ||
1986 util_bitcount(packed_streams[slot]) != 1 ||
1987 util_bitcount(packed_buffers[slot]) != 1)
1988 goto out;
1989
1990 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1991 unsigned num_components = get_slot_components(var, slot, var->data.location);
1992 if (num_components != packed_components[slot])
1993 goto out;
1994
1995 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1996 uint32_t prev_offset = packed_offsets[packed_output->location][0];
1997 for (unsigned k = 1; k < num_components; k++) {
1998 /* if the offsets are not incrementing as expected, skip consolidation */
1999 if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
2000 goto out;
2001 prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
2002 }
2003 }
2004 /* this output can be consolidated: blast out all the data inlined */
2005 var->data.explicit_xfb_buffer = 1;
2006 var->data.xfb.buffer = output->buffer;
2007 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2008 var->data.offset = output->offset;
2009 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2010 /* mark all slot components inlined to skip subsequent loop iterations */
2011 for (unsigned j = 0; j < num_slots; j++) {
2012 slot = var->data.location + j;
2013 for (unsigned k = 0; k < packed_components[slot]; k++)
2014 inlined[slot][k] = true;
2015 packed &= ~BITFIELD64_BIT(slot);
2016 }
2017 continue;
2018 out:
2019 unreachable("xfb should be inlined by now!");
2020 }
2021 }
2022
2023 struct decompose_state {
2024 nir_variable **split;
2025 bool needs_w;
2026 };
2027
2028 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)2029 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2030 {
2031 struct decompose_state *state = data;
2032 nir_variable **split = state->split;
2033 if (instr->type != nir_instr_type_intrinsic)
2034 return false;
2035 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2036 if (intr->intrinsic != nir_intrinsic_load_deref)
2037 return false;
2038 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2039 nir_variable *var = nir_deref_instr_get_variable(deref);
2040 if (var != split[0])
2041 return false;
2042 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2043 b->cursor = nir_after_instr(instr);
2044 nir_def *loads[4];
2045 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2046 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2047 if (state->needs_w) {
2048 /* oob load w comopnent to get correct value for int/float */
2049 loads[3] = nir_channel(b, loads[0], 3);
2050 loads[0] = nir_channel(b, loads[0], 0);
2051 }
2052 nir_def *new_load = nir_vec(b, loads, num_components);
2053 nir_def_rewrite_uses(&intr->def, new_load);
2054 nir_instr_remove_v(instr);
2055 return true;
2056 }
2057
2058 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)2059 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2060 {
2061 uint32_t bits = 0;
2062 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2063 bits |= BITFIELD_BIT(var->data.driver_location);
2064 bits = ~bits;
2065 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2066 nir_variable *split[5];
2067 struct decompose_state state;
2068 state.split = split;
2069 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2070 assert(var);
2071 split[0] = var;
2072 bits |= BITFIELD_BIT(var->data.driver_location);
2073 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2074 unsigned num_components = glsl_get_vector_elements(var->type);
2075 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2076 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2077 split[i+1] = nir_variable_clone(var, nir);
2078 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2079 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2080 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2081 else
2082 split[i+1]->type = new_type;
2083 split[i+1]->data.driver_location = ffs(bits) - 1;
2084 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2085 nir_shader_add_variable(nir, split[i+1]);
2086 }
2087 var->data.mode = nir_var_shader_temp;
2088 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2089 }
2090 nir_fixup_deref_modes(nir);
2091 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2092 optimize_nir(nir, NULL, true);
2093 return true;
2094 }
2095
2096 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2097 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2098 {
2099 struct zink_screen *screen = data;
2100 const bool has_int64 = screen->info.feats.features.shaderInt64;
2101 if (instr->type != nir_instr_type_intrinsic)
2102 return false;
2103 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2104 b->cursor = nir_before_instr(instr);
2105 switch (intr->intrinsic) {
2106 case nir_intrinsic_ssbo_atomic:
2107 case nir_intrinsic_ssbo_atomic_swap: {
2108 /* convert offset to uintN_t[idx] */
2109 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2110 nir_src_rewrite(&intr->src[1], offset);
2111 return true;
2112 }
2113 case nir_intrinsic_load_ssbo:
2114 case nir_intrinsic_load_ubo: {
2115 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2116 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2117 nir_src_is_const(intr->src[0]) &&
2118 nir_src_as_uint(intr->src[0]) == 0 &&
2119 intr->def.bit_size == 64 &&
2120 nir_intrinsic_align_offset(intr) % 8 != 0;
2121 force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2122 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2123 nir_src_rewrite(&intr->src[1], offset);
2124 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2125 if (force_2x32) {
2126 /* this is always scalarized */
2127 assert(intr->def.num_components == 1);
2128 /* rewrite as 2x32 */
2129 nir_def *load[2];
2130 for (unsigned i = 0; i < 2; i++) {
2131 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2132 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2133 else
2134 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2135 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2136 }
2137 /* cast back to 64bit */
2138 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2139 nir_def_rewrite_uses(&intr->def, casted);
2140 nir_instr_remove(instr);
2141 }
2142 return true;
2143 }
2144 case nir_intrinsic_load_scratch:
2145 case nir_intrinsic_load_shared: {
2146 b->cursor = nir_before_instr(instr);
2147 bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2148 nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2149 nir_src_rewrite(&intr->src[0], offset);
2150 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2151 if (force_2x32) {
2152 /* this is always scalarized */
2153 assert(intr->def.num_components == 1);
2154 /* rewrite as 2x32 */
2155 nir_def *load[2];
2156 for (unsigned i = 0; i < 2; i++)
2157 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2158 /* cast back to 64bit */
2159 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2160 nir_def_rewrite_uses(&intr->def, casted);
2161 nir_instr_remove(instr);
2162 return true;
2163 }
2164 break;
2165 }
2166 case nir_intrinsic_store_ssbo: {
2167 b->cursor = nir_before_instr(instr);
2168 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2169 nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2170 nir_src_rewrite(&intr->src[2], offset);
2171 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2172 if (force_2x32) {
2173 /* this is always scalarized */
2174 assert(intr->src[0].ssa->num_components == 1);
2175 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2176 for (unsigned i = 0; i < 2; i++)
2177 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2178 nir_instr_remove(instr);
2179 }
2180 return true;
2181 }
2182 case nir_intrinsic_store_scratch:
2183 case nir_intrinsic_store_shared: {
2184 b->cursor = nir_before_instr(instr);
2185 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2186 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2187 nir_src_rewrite(&intr->src[1], offset);
2188 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2189 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2190 /* this is always scalarized */
2191 assert(intr->src[0].ssa->num_components == 1);
2192 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2193 for (unsigned i = 0; i < 2; i++)
2194 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2195 nir_instr_remove(instr);
2196 }
2197 return true;
2198 }
2199 default:
2200 break;
2201 }
2202 return false;
2203 }
2204
2205 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2206 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2207 {
2208 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2209 }
2210
2211 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2212 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2213 {
2214 nir_variable *var, **ptr;
2215 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2216
2217 if (ssbo)
2218 ptr = &bo->ssbo[bit_size >> 4];
2219 else {
2220 if (!idx) {
2221 ptr = &bo->uniforms[bit_size >> 4];
2222 } else
2223 ptr = &bo->ubo[bit_size >> 4];
2224 }
2225 var = *ptr;
2226 if (!var) {
2227 if (ssbo)
2228 var = bo->ssbo[32 >> 4];
2229 else {
2230 if (!idx)
2231 var = bo->uniforms[32 >> 4];
2232 else
2233 var = bo->ubo[32 >> 4];
2234 }
2235 var = nir_variable_clone(var, shader);
2236 if (ssbo)
2237 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2238 else
2239 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2240 *ptr = var;
2241 nir_shader_add_variable(shader, var);
2242
2243 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2244 fields[0].name = ralloc_strdup(shader, "base");
2245 fields[1].name = ralloc_strdup(shader, "unsized");
2246 unsigned array_size = glsl_get_length(var->type);
2247 const struct glsl_type *bare_type = glsl_without_array(var->type);
2248 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2249 unsigned length = glsl_get_length(array_type);
2250 const struct glsl_type *type;
2251 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2252 if (bit_size > 32) {
2253 assert(bit_size == 64);
2254 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2255 } else {
2256 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2257 }
2258 fields[0].type = type;
2259 fields[1].type = unsized;
2260 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2261 var->data.driver_location = idx;
2262 }
2263 return var;
2264 }
2265
2266 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2267 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2268 {
2269 nir_intrinsic_op op;
2270 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2271 if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2272 op = nir_intrinsic_deref_atomic;
2273 else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2274 op = nir_intrinsic_deref_atomic_swap;
2275 else
2276 unreachable("unknown intrinsic");
2277 nir_def *offset = intr->src[1].ssa;
2278 nir_src *src = &intr->src[0];
2279 nir_variable *var = get_bo_var(b->shader, bo, true, src,
2280 intr->def.bit_size);
2281 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2282 nir_def *idx = src->ssa;
2283 if (bo->first_ssbo)
2284 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2285 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2286 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2287
2288 /* generate new atomic deref ops for every component */
2289 nir_def *result[4];
2290 unsigned num_components = intr->def.num_components;
2291 for (unsigned i = 0; i < num_components; i++) {
2292 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2293 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2294 nir_def_init(&new_instr->instr, &new_instr->def, 1,
2295 intr->def.bit_size);
2296 nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2297 new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2298 /* deref ops have no offset src, so copy the srcs after it */
2299 for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2300 new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2301 nir_builder_instr_insert(b, &new_instr->instr);
2302
2303 result[i] = &new_instr->def;
2304 offset = nir_iadd_imm(b, offset, 1);
2305 }
2306
2307 nir_def *load = nir_vec(b, result, num_components);
2308 nir_def_replace(&intr->def, load);
2309 }
2310
2311 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2312 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2313 {
2314 struct bo_vars *bo = data;
2315 if (instr->type != nir_instr_type_intrinsic)
2316 return false;
2317 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2318 nir_variable *var = NULL;
2319 nir_def *offset = NULL;
2320 bool is_load = true;
2321 b->cursor = nir_before_instr(instr);
2322 nir_src *src;
2323 bool ssbo = true;
2324 switch (intr->intrinsic) {
2325 case nir_intrinsic_ssbo_atomic:
2326 case nir_intrinsic_ssbo_atomic_swap:
2327 rewrite_atomic_ssbo_instr(b, instr, bo);
2328 return true;
2329 case nir_intrinsic_store_ssbo:
2330 src = &intr->src[1];
2331 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2332 offset = intr->src[2].ssa;
2333 is_load = false;
2334 break;
2335 case nir_intrinsic_load_ssbo:
2336 src = &intr->src[0];
2337 var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2338 offset = intr->src[1].ssa;
2339 break;
2340 case nir_intrinsic_load_ubo:
2341 src = &intr->src[0];
2342 var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2343 offset = intr->src[1].ssa;
2344 ssbo = false;
2345 break;
2346 default:
2347 return false;
2348 }
2349 assert(var);
2350 assert(offset);
2351 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2352 nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2353 if (!ssbo && bo->first_ubo && var->data.driver_location)
2354 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2355 else if (ssbo && bo->first_ssbo)
2356 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2357 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2358 nir_i2iN(b, idx, deref_var->def.bit_size));
2359 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2360 assert(intr->num_components <= 2);
2361 if (is_load) {
2362 nir_def *result[2];
2363 for (unsigned i = 0; i < intr->num_components; i++) {
2364 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2365 nir_i2iN(b, offset, deref_struct->def.bit_size));
2366 result[i] = nir_load_deref(b, deref_arr);
2367 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2368 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2369 offset = nir_iadd_imm(b, offset, 1);
2370 }
2371 nir_def *load = nir_vec(b, result, intr->num_components);
2372 nir_def_rewrite_uses(&intr->def, load);
2373 } else {
2374 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2375 nir_i2iN(b, offset, deref_struct->def.bit_size));
2376 nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2377 }
2378 nir_instr_remove(instr);
2379 return true;
2380 }
2381
2382 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2383 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2384 {
2385 struct bo_vars bo = get_bo_vars(zs, shader);
2386 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2387 }
2388
2389 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2390 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2391 {
2392 switch (intr->intrinsic) {
2393 case nir_intrinsic_load_interpolated_input:
2394 *is_interp = true;
2395 FALLTHROUGH;
2396 case nir_intrinsic_load_input:
2397 case nir_intrinsic_load_per_vertex_input:
2398 *is_input = true;
2399 FALLTHROUGH;
2400 case nir_intrinsic_load_output:
2401 case nir_intrinsic_load_per_vertex_output:
2402 case nir_intrinsic_load_per_primitive_output:
2403 *is_load = true;
2404 FALLTHROUGH;
2405 case nir_intrinsic_store_output:
2406 case nir_intrinsic_store_per_primitive_output:
2407 case nir_intrinsic_store_per_vertex_output:
2408 break;
2409 default:
2410 return false;
2411 }
2412 return true;
2413 }
2414
2415 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2416 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2417 {
2418 switch (intr->intrinsic) {
2419 case nir_intrinsic_load_per_vertex_input:
2420 case nir_intrinsic_load_per_vertex_output:
2421 case nir_intrinsic_load_per_primitive_output:
2422 case nir_intrinsic_store_per_primitive_output:
2423 case nir_intrinsic_store_per_vertex_output:
2424 return true;
2425 default:
2426 break;
2427 }
2428 return false;
2429 }
2430
2431 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2432 find_var_deref(nir_shader *nir, nir_variable *var)
2433 {
2434 nir_foreach_function_impl(impl, nir) {
2435 nir_foreach_block(block, impl) {
2436 nir_foreach_instr(instr, block) {
2437 if (instr->type != nir_instr_type_deref)
2438 continue;
2439 nir_deref_instr *deref = nir_instr_as_deref(instr);
2440 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2441 return true;
2442 }
2443 }
2444 }
2445 return false;
2446 }
2447
2448 static bool
find_var_io(nir_shader * nir,nir_variable * var)2449 find_var_io(nir_shader *nir, nir_variable *var)
2450 {
2451 nir_foreach_function(function, nir) {
2452 if (!function->impl)
2453 continue;
2454
2455 nir_foreach_block(block, function->impl) {
2456 nir_foreach_instr(instr, block) {
2457 if (instr->type != nir_instr_type_intrinsic)
2458 continue;
2459 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2460 bool is_load = false;
2461 bool is_input = false;
2462 bool is_interp = false;
2463 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2464 continue;
2465 if (var->data.mode == nir_var_shader_in && !is_input)
2466 continue;
2467 if (var->data.mode == nir_var_shader_out && is_input)
2468 continue;
2469 unsigned slot_offset = 0;
2470 if (var->data.fb_fetch_output && !is_load)
2471 continue;
2472 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2473 continue;
2474 nir_src *src_offset = nir_get_io_offset_src(intr);
2475 if (src_offset && nir_src_is_const(*src_offset))
2476 slot_offset = nir_src_as_uint(*src_offset);
2477 unsigned slot_count = get_var_slot_count(nir, var);
2478 if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2479 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2480 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2481 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2482 return true;
2483 }
2484 }
2485 }
2486 return false;
2487 }
2488
2489 struct clamp_layer_output_state {
2490 nir_variable *original;
2491 nir_variable *clamped;
2492 };
2493
2494 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2495 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2496 {
2497 nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2498 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2499 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2500 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2501 nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2502 nir_load_deref(b, original_deref),
2503 nir_imm_int(b, 0));
2504 nir_store_deref(b, clamped_deref, layer, 0);
2505 }
2506
2507 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2508 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2509 {
2510 struct clamp_layer_output_state *state = data;
2511 switch (instr->type) {
2512 case nir_instr_type_intrinsic: {
2513 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2514 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2515 intr->intrinsic != nir_intrinsic_emit_vertex)
2516 return false;
2517 b->cursor = nir_before_instr(instr);
2518 clamp_layer_output_emit(b, state);
2519 return true;
2520 }
2521 default: return false;
2522 }
2523 }
2524
2525 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2526 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2527 {
2528 switch (vs->info.stage) {
2529 case MESA_SHADER_VERTEX:
2530 case MESA_SHADER_GEOMETRY:
2531 case MESA_SHADER_TESS_EVAL:
2532 break;
2533 default:
2534 unreachable("invalid last vertex stage!");
2535 }
2536 struct clamp_layer_output_state state = {0};
2537 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2538 if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2539 return false;
2540 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2541 state.clamped->data.location = VARYING_SLOT_LAYER;
2542 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2543 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2544 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2545 state.original->data.driver_location = (*next_location)++;
2546 if (fs_var) {
2547 fs_var->data.location = state.original->data.location;
2548 fs_var->data.driver_location = state.original->data.driver_location;
2549 }
2550 } else {
2551 if (state.original->data.explicit_xfb_buffer) {
2552 /* Will xfb the clamped output but still better than nothing */
2553 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2554 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2555 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2556 state.clamped->data.offset = state.original->data.offset;
2557 state.clamped->data.stream = state.original->data.stream;
2558 }
2559 state.original->data.mode = nir_var_shader_temp;
2560 nir_fixup_deref_modes(vs);
2561 }
2562 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2563 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2564 } else {
2565 nir_builder b;
2566 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2567 b = nir_builder_at(nir_after_impl(impl));
2568 assert(impl->end_block->predecessors->entries == 1);
2569 clamp_layer_output_emit(&b, &state);
2570 nir_metadata_preserve(impl, nir_metadata_dominance);
2571 }
2572 optimize_nir(vs, NULL, true);
2573 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2574 return true;
2575 }
2576
2577 struct io_slot_map {
2578 uint64_t *patch_slot_track;
2579 uint64_t *slot_track;
2580 unsigned char *slot_map;
2581 unsigned reserved;
2582 unsigned char *patch_slot_map;
2583 unsigned patch_reserved;
2584 };
2585
2586 static void
assign_track_slot_mask(struct io_slot_map * io,nir_variable * var,unsigned slot,unsigned num_slots)2587 assign_track_slot_mask(struct io_slot_map *io, nir_variable *var, unsigned slot, unsigned num_slots)
2588 {
2589 uint64_t *track = var->data.patch ? io->patch_slot_track : io->slot_track;
2590 uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(glsl_without_array(var->type))) << var->data.location_frac;
2591 uint64_t slot_mask = BITFIELD64_RANGE(slot, num_slots);
2592 u_foreach_bit(c, mask) {
2593 assert((track[c] & slot_mask) == 0);
2594 track[c] |= slot_mask;
2595 }
2596 }
2597
2598 static void
assign_slot_io(gl_shader_stage stage,struct io_slot_map * io,nir_variable * var,unsigned slot)2599 assign_slot_io(gl_shader_stage stage, struct io_slot_map *io, nir_variable *var, unsigned slot)
2600 {
2601 unsigned num_slots;
2602 if (nir_is_arrayed_io(var, stage))
2603 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2604 else
2605 num_slots = glsl_count_vec4_slots(var->type, false, false);
2606 uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2607 assign_track_slot_mask(io, var, slot, num_slots);
2608 if (slot_map[slot] != 0xff)
2609 return;
2610 unsigned *reserved = var->data.patch ? &io->patch_reserved : &io->reserved;
2611 assert(*reserved + num_slots <= MAX_VARYING);
2612 assert(*reserved < MAX_VARYING);
2613 for (unsigned i = 0; i < num_slots; i++)
2614 slot_map[slot + i] = (*reserved)++;
2615 }
2616
2617 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2618 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2619 {
2620 unsigned slot = var->data.location;
2621 switch (slot) {
2622 case -1:
2623 unreachable("there should be no UINT32_MAX location variables!");
2624 break;
2625 case VARYING_SLOT_POS:
2626 case VARYING_SLOT_PSIZ:
2627 case VARYING_SLOT_LAYER:
2628 case VARYING_SLOT_PRIMITIVE_ID:
2629 case VARYING_SLOT_CLIP_DIST0:
2630 case VARYING_SLOT_CULL_DIST0:
2631 case VARYING_SLOT_VIEWPORT:
2632 case VARYING_SLOT_FACE:
2633 case VARYING_SLOT_TESS_LEVEL_OUTER:
2634 case VARYING_SLOT_TESS_LEVEL_INNER:
2635 /* use a sentinel value to avoid counting later */
2636 var->data.driver_location = UINT32_MAX;
2637 return;
2638
2639 default:
2640 break;
2641 }
2642 if (var->data.patch) {
2643 assert(slot >= VARYING_SLOT_PATCH0);
2644 slot -= VARYING_SLOT_PATCH0;
2645 }
2646 assign_slot_io(stage, io, var, slot);
2647 slot = var->data.patch ? io->patch_slot_map[slot] : io->slot_map[slot];
2648 assert(slot < MAX_VARYING);
2649 var->data.driver_location = slot;
2650 }
2651
2652 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2653 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2654 {
2655 if (stage != MESA_SHADER_FRAGMENT)
2656 return false;
2657 return var->data.location >= VARYING_SLOT_TEX0 &&
2658 var->data.location <= VARYING_SLOT_TEX7;
2659 }
2660
2661 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2662 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2663 {
2664 unsigned slot = var->data.location;
2665 switch (slot) {
2666 case VARYING_SLOT_POS:
2667 case VARYING_SLOT_PSIZ:
2668 case VARYING_SLOT_LAYER:
2669 case VARYING_SLOT_PRIMITIVE_ID:
2670 case VARYING_SLOT_CLIP_DIST0:
2671 case VARYING_SLOT_CULL_DIST0:
2672 case VARYING_SLOT_VIEWPORT:
2673 case VARYING_SLOT_FACE:
2674 case VARYING_SLOT_TESS_LEVEL_OUTER:
2675 case VARYING_SLOT_TESS_LEVEL_INNER:
2676 /* use a sentinel value to avoid counting later */
2677 var->data.driver_location = UINT_MAX;
2678 return true;
2679 default:
2680 break;
2681 }
2682 if (var->data.patch) {
2683 assert(slot >= VARYING_SLOT_PATCH0);
2684 slot -= VARYING_SLOT_PATCH0;
2685 }
2686 uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2687 if (slot_map[slot] == (unsigned char)-1) {
2688 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2689 * so keep for now and eliminate later
2690 */
2691 if (is_texcoord(stage, var)) {
2692 var->data.driver_location = UINT32_MAX;
2693 return true;
2694 }
2695 /* patch variables may be read in the workgroup */
2696 if (stage != MESA_SHADER_TESS_CTRL)
2697 /* dead io */
2698 return false;
2699 assign_slot_io(stage, io, var, slot);
2700 }
2701 var->data.driver_location = slot_map[slot];
2702 return true;
2703 }
2704
2705
2706 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2707 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2708 {
2709 nir_variable *var = data;
2710 if (instr->type != nir_instr_type_intrinsic)
2711 return false;
2712
2713 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2714 bool is_load = false;
2715 bool is_input = false;
2716 bool is_interp = false;
2717 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2718 return false;
2719 if (!is_load)
2720 return false;
2721 unsigned location = nir_intrinsic_io_semantics(intr).location;
2722 if (location != var->data.location)
2723 return false;
2724 b->cursor = nir_before_instr(instr);
2725 nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2726 intr->def.bit_size);
2727 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2728 switch (location) {
2729 case VARYING_SLOT_COL0:
2730 case VARYING_SLOT_COL1:
2731 case VARYING_SLOT_BFC0:
2732 case VARYING_SLOT_BFC1:
2733 /* default color is 0,0,0,1 */
2734 if (intr->def.num_components == 4)
2735 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2736 break;
2737 default:
2738 break;
2739 }
2740 }
2741 nir_def_replace(&intr->def, zero);
2742 return true;
2743 }
2744
2745
2746
2747 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2748 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2749 {
2750 switch (intr->intrinsic) {
2751 case nir_intrinsic_store_output:
2752 case nir_intrinsic_store_per_primitive_output:
2753 case nir_intrinsic_store_per_vertex_output:
2754 break;
2755 default:
2756 return false;
2757 }
2758 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2759 return false;
2760 if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2761 nir_instr_remove(&intr->instr);
2762 return true;
2763 }
2764 return false;
2765 }
2766
2767 static bool
delete_psiz_store(nir_shader * nir,bool one)2768 delete_psiz_store(nir_shader *nir, bool one)
2769 {
2770 bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2771 nir_metadata_dominance, one ? nir : NULL);
2772 if (progress)
2773 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2774 return progress;
2775 }
2776
2777 struct write_components {
2778 unsigned slot;
2779 uint32_t component_mask;
2780 };
2781
2782 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2783 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2784 {
2785 struct write_components *wc = data;
2786 bool is_load = false;
2787 bool is_input = false;
2788 bool is_interp = false;
2789 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2790 return false;
2791 if (!is_input)
2792 return false;
2793 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2794 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2795 return false;
2796 unsigned num_components = intr->num_components;
2797 unsigned c = nir_intrinsic_component(intr);
2798 if (intr->def.bit_size == 64)
2799 num_components *= 2;
2800 nir_src *src_offset = nir_get_io_offset_src(intr);
2801 if (!nir_src_is_const(*src_offset))
2802 return false;
2803 unsigned slot_offset = nir_src_as_uint(*src_offset);
2804 if (s.location + slot_offset != wc->slot)
2805 return false;
2806 uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2807 if (intr->def.bit_size == 64)
2808 readmask |= readmask << (intr->num_components + c);
2809 /* handle dvec3/dvec4 */
2810 if (num_components + c > 4)
2811 readmask >>= 4;
2812 if ((wc->component_mask & readmask) == readmask)
2813 return false;
2814 uint32_t rewrite_mask = readmask & ~wc->component_mask;
2815 if (!rewrite_mask)
2816 return false;
2817 b->cursor = nir_after_instr(&intr->instr);
2818 nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2819 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2820 switch (wc->slot) {
2821 case VARYING_SLOT_COL0:
2822 case VARYING_SLOT_COL1:
2823 case VARYING_SLOT_BFC0:
2824 case VARYING_SLOT_BFC1:
2825 /* default color is 0,0,0,1 */
2826 if (intr->def.num_components == 4)
2827 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2828 break;
2829 default:
2830 break;
2831 }
2832 }
2833 rewrite_mask >>= c;
2834 nir_def *dest = &intr->def;
2835 u_foreach_bit(component, rewrite_mask)
2836 dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2837 nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2838 return true;
2839 }
2840
2841 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2842 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2843 {
2844 struct write_components *wc = data;
2845 bool is_load = false;
2846 bool is_input = false;
2847 bool is_interp = false;
2848 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2849 return false;
2850 if (is_input || is_load)
2851 return false;
2852 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2853 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2854 return false;
2855 unsigned location = s.location;
2856 unsigned c = nir_intrinsic_component(intr);
2857 uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2858 if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2859 unsigned num_components = intr->num_components * 2;
2860 nir_src *src_offset = nir_get_io_offset_src(intr);
2861 if (nir_src_is_const(*src_offset)) {
2862 if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2863 return false;
2864 }
2865 wrmask |= wrmask << intr->num_components;
2866 /* handle dvec3/dvec4 */
2867 if (num_components + c > 4)
2868 wrmask >>= 4;
2869 }
2870 wc->component_mask |= wrmask;
2871 return false;
2872 }
2873
2874 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2875 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2876 {
2877 uint64_t slot_track[4] = {0};
2878 uint64_t patch_slot_track[4] = {0};
2879 unsigned char slot_map[VARYING_SLOT_MAX];
2880 memset(slot_map, -1, sizeof(slot_map));
2881 unsigned char patch_slot_map[VARYING_SLOT_MAX];
2882 memset(patch_slot_map, -1, sizeof(patch_slot_map));
2883 struct io_slot_map io = {
2884 .patch_slot_track = patch_slot_track,
2885 .slot_track = slot_track,
2886 .slot_map = slot_map,
2887 .patch_slot_map = patch_slot_map,
2888 .reserved = 0,
2889 .patch_reserved = 0,
2890 };
2891 bool do_fixup = false;
2892 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2893 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2894 if (var) {
2895 bool can_remove = false;
2896 if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2897 /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2898 if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2899 can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2900 else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2901 can_remove = !var->data.explicit_location;
2902 }
2903 /* remove injected pointsize from all but the last vertex stage */
2904 if (can_remove) {
2905 var->data.mode = nir_var_shader_temp;
2906 nir_fixup_deref_modes(producer);
2907 delete_psiz_store(producer, false);
2908 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2909 optimize_nir(producer, NULL, true);
2910 }
2911 }
2912 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2913 producer->info.has_transform_feedback_varyings = false;
2914 nir_foreach_shader_out_variable(var_out, producer)
2915 var_out->data.explicit_xfb_buffer = false;
2916 }
2917 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2918 /* never assign from tcs -> tes, always invert */
2919 nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2920 assign_producer_var_io(consumer->info.stage, var_in, &io);
2921 nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2922 if (!assign_consumer_var_io(producer->info.stage, var_out, &io))
2923 /* this is an output, nothing more needs to be done for it to be dropped */
2924 do_fixup = true;
2925 }
2926 } else {
2927 nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2928 assign_producer_var_io(producer->info.stage, var_out, &io);
2929 nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2930 if (!assign_consumer_var_io(consumer->info.stage, var_in, &io)) {
2931 do_fixup = true;
2932 /* input needs to be rewritten */
2933 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
2934 }
2935 }
2936 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_compiler_workarounds.needs_sanitised_layer)
2937 do_fixup |= clamp_layer_output(producer, consumer, &io.reserved);
2938 }
2939 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
2940 if (producer->info.io_lowered && consumer->info.io_lowered) {
2941 u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
2942 struct write_components wc = {slot, 0};
2943 nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
2944 assert(wc.component_mask);
2945 if (wc.component_mask != BITFIELD_MASK(4))
2946 do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
2947 }
2948 }
2949 if (!do_fixup)
2950 return;
2951 nir_fixup_deref_modes(nir);
2952 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2953 optimize_nir(nir, NULL, true);
2954 }
2955
2956 /* all types that hit this function contain something that is 64bit */
2957 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)2958 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2959 {
2960 if (glsl_type_is_array(type)) {
2961 const struct glsl_type *child = glsl_get_array_element(type);
2962 unsigned elements = glsl_array_size(type);
2963 unsigned stride = glsl_get_explicit_stride(type);
2964 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2965 }
2966 /* rewrite structs recursively */
2967 if (glsl_type_is_struct_or_ifc(type)) {
2968 unsigned nmembers = glsl_get_length(type);
2969 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2970 unsigned xfb_offset = 0;
2971 for (unsigned i = 0; i < nmembers; i++) {
2972 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2973 fields[i] = *f;
2974 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2975 if (i < nmembers - 1 && xfb_offset % 8 &&
2976 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2977 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2978 var->data.is_xfb = true;
2979 }
2980 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2981 }
2982 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2983 }
2984 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2985 return type;
2986 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2987 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2988 enum glsl_base_type base_type;
2989 switch (glsl_get_base_type(type)) {
2990 case GLSL_TYPE_UINT64:
2991 base_type = GLSL_TYPE_UINT;
2992 break;
2993 case GLSL_TYPE_INT64:
2994 base_type = GLSL_TYPE_INT;
2995 break;
2996 case GLSL_TYPE_DOUBLE:
2997 base_type = GLSL_TYPE_FLOAT;
2998 break;
2999 default:
3000 unreachable("unknown 64-bit vertex attribute format!");
3001 }
3002 if (glsl_type_is_scalar(type))
3003 return glsl_vector_type(base_type, 2);
3004 unsigned num_components;
3005 if (glsl_type_is_matrix(type)) {
3006 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
3007 unsigned vec_components = glsl_get_vector_elements(type);
3008 if (vec_components == 3)
3009 vec_components = 4;
3010 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
3011 } else {
3012 num_components = glsl_get_vector_elements(type) * 2;
3013 if (num_components <= 4)
3014 return glsl_vector_type(base_type, num_components);
3015 }
3016 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
3017 struct glsl_struct_field fields[8] = {0};
3018 unsigned remaining = num_components;
3019 unsigned nfields = 0;
3020 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
3021 assert(i < ARRAY_SIZE(fields));
3022 fields[i].name = "";
3023 fields[i].offset = i * 16;
3024 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
3025 }
3026 char buf[64];
3027 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
3028 return glsl_struct_type(fields, nfields, buf, true);
3029 }
3030
3031 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)3032 deref_is_matrix(nir_deref_instr *deref)
3033 {
3034 if (glsl_type_is_matrix(deref->type))
3035 return deref->type;
3036 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3037 if (parent)
3038 return deref_is_matrix(parent);
3039 return NULL;
3040 }
3041
3042 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3043 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
3044 struct hash_table *derefs, struct set *deletes, bool doubles_only)
3045 {
3046 bool func_progress = false;
3047 nir_builder b = nir_builder_create(impl);
3048 nir_foreach_block(block, impl) {
3049 nir_foreach_instr_safe(instr, block) {
3050 switch (instr->type) {
3051 case nir_instr_type_deref: {
3052 nir_deref_instr *deref = nir_instr_as_deref(instr);
3053 if (!(deref->modes & var->data.mode))
3054 continue;
3055 if (nir_deref_instr_get_variable(deref) != var)
3056 continue;
3057
3058 /* matrix types are special: store the original deref type for later use */
3059 const struct glsl_type *matrix = deref_is_matrix(deref);
3060 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3061 if (!matrix) {
3062 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
3063 hash_table_foreach(derefs, he) {
3064 /* propagate parent matrix type to row deref */
3065 if (he->key == parent)
3066 matrix = he->data;
3067 }
3068 }
3069 if (matrix)
3070 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
3071 if (deref->deref_type == nir_deref_type_var)
3072 deref->type = var->type;
3073 else
3074 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
3075 }
3076 break;
3077 case nir_instr_type_intrinsic: {
3078 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3079 if (intr->intrinsic != nir_intrinsic_store_deref &&
3080 intr->intrinsic != nir_intrinsic_load_deref)
3081 break;
3082 if (nir_intrinsic_get_var(intr, 0) != var)
3083 break;
3084 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
3085 (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
3086 break;
3087 b.cursor = nir_before_instr(instr);
3088 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3089 unsigned num_components = intr->num_components * 2;
3090 nir_def *comp[NIR_MAX_VEC_COMPONENTS];
3091 /* this is the stored matrix type from the deref */
3092 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
3093 const struct glsl_type *matrix = he ? he->data : NULL;
3094 if (doubles_only && !matrix)
3095 break;
3096 func_progress = true;
3097 if (intr->intrinsic == nir_intrinsic_store_deref) {
3098 /* first, unpack the src data to 32bit vec2 components */
3099 for (unsigned i = 0; i < intr->num_components; i++) {
3100 nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
3101 comp[i * 2] = nir_channel(&b, ssa, 0);
3102 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
3103 }
3104 unsigned wrmask = nir_intrinsic_write_mask(intr);
3105 unsigned mask = 0;
3106 /* expand writemask for doubled components */
3107 for (unsigned i = 0; i < intr->num_components; i++) {
3108 if (wrmask & BITFIELD_BIT(i))
3109 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
3110 }
3111 if (matrix) {
3112 /* matrix types always come from array (row) derefs */
3113 assert(deref->deref_type == nir_deref_type_array);
3114 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3115 /* let optimization clean up consts later */
3116 nir_def *index = deref->arr.index.ssa;
3117 /* this might be an indirect array index:
3118 * - iterate over matrix columns
3119 * - add if blocks for each column
3120 * - perform the store in the block
3121 */
3122 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
3123 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3124 unsigned vec_components = glsl_get_vector_elements(matrix);
3125 /* always clamp dvec3 to 4 components */
3126 if (vec_components == 3)
3127 vec_components = 4;
3128 unsigned start_component = idx * vec_components * 2;
3129 /* struct member */
3130 unsigned member = start_component / 4;
3131 /* number of components remaining */
3132 unsigned remaining = num_components;
3133 for (unsigned i = 0; i < num_components; member++) {
3134 if (!(mask & BITFIELD_BIT(i)))
3135 continue;
3136 assert(member < glsl_get_length(var_deref->type));
3137 /* deref the rewritten struct to the appropriate vec4/vec2 */
3138 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3139 unsigned incr = MIN2(remaining, 4);
3140 /* assemble the write component vec */
3141 nir_def *val = nir_vec(&b, &comp[i], incr);
3142 /* use the number of components being written as the writemask */
3143 if (glsl_get_vector_elements(strct->type) > val->num_components)
3144 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3145 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3146 remaining -= incr;
3147 i += incr;
3148 }
3149 nir_pop_if(&b, NULL);
3150 }
3151 _mesa_set_add(deletes, &deref->instr);
3152 } else if (num_components <= 4) {
3153 /* simple store case: just write out the components */
3154 nir_def *dest = nir_vec(&b, comp, num_components);
3155 nir_store_deref(&b, deref, dest, mask);
3156 } else {
3157 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3158 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3159 if (!(mask & BITFIELD_MASK(4)))
3160 continue;
3161 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3162 nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3163 if (glsl_get_vector_elements(strct->type) > dest->num_components)
3164 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3165 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3166 mask >>= 4;
3167 }
3168 }
3169 } else {
3170 nir_def *dest = NULL;
3171 if (matrix) {
3172 /* matrix types always come from array (row) derefs */
3173 assert(deref->deref_type == nir_deref_type_array);
3174 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3175 /* let optimization clean up consts later */
3176 nir_def *index = deref->arr.index.ssa;
3177 /* this might be an indirect array index:
3178 * - iterate over matrix columns
3179 * - add if blocks for each column
3180 * - phi the loads using the array index
3181 */
3182 unsigned cols = glsl_get_matrix_columns(matrix);
3183 nir_def *dests[4];
3184 for (unsigned idx = 0; idx < cols; idx++) {
3185 /* don't add an if for the final row: this will be handled in the else */
3186 if (idx < cols - 1)
3187 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3188 unsigned vec_components = glsl_get_vector_elements(matrix);
3189 /* always clamp dvec3 to 4 components */
3190 if (vec_components == 3)
3191 vec_components = 4;
3192 unsigned start_component = idx * vec_components * 2;
3193 /* struct member */
3194 unsigned member = start_component / 4;
3195 /* number of components remaining */
3196 unsigned remaining = num_components;
3197 /* component index */
3198 unsigned comp_idx = 0;
3199 for (unsigned i = 0; i < num_components; member++) {
3200 assert(member < glsl_get_length(var_deref->type));
3201 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3202 nir_def *load = nir_load_deref(&b, strct);
3203 unsigned incr = MIN2(remaining, 4);
3204 /* repack the loads to 64bit */
3205 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3206 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3207 remaining -= incr;
3208 i += incr;
3209 }
3210 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3211 if (idx < cols - 1)
3212 nir_push_else(&b, NULL);
3213 }
3214 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3215 for (unsigned idx = cols - 1; idx >= 1; idx--) {
3216 nir_pop_if(&b, NULL);
3217 dest = nir_if_phi(&b, dests[idx - 1], dest);
3218 }
3219 _mesa_set_add(deletes, &deref->instr);
3220 } else if (num_components <= 4) {
3221 /* simple load case */
3222 nir_def *load = nir_load_deref(&b, deref);
3223 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3224 for (unsigned i = 0; i < intr->num_components; i++) {
3225 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3226 }
3227 dest = nir_vec(&b, comp, intr->num_components);
3228 } else {
3229 /* writing > 4 components: access the struct and load the appropriate vec4 members */
3230 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3231 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3232 nir_def *load = nir_load_deref(&b, strct);
3233 comp[i * 2] = nir_pack_64_2x32(&b,
3234 nir_trim_vector(&b, load, 2));
3235 if (num_components > 2)
3236 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3237 }
3238 dest = nir_vec(&b, comp, intr->num_components);
3239 }
3240 nir_def_rewrite_uses_after(&intr->def, dest, instr);
3241 }
3242 _mesa_set_add(deletes, instr);
3243 break;
3244 }
3245 break;
3246 default: break;
3247 }
3248 }
3249 }
3250 if (func_progress)
3251 nir_metadata_preserve(impl, nir_metadata_none);
3252 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3253 set_foreach_remove(deletes, he)
3254 nir_instr_remove((void*)he->key);
3255 return func_progress;
3256 }
3257
3258 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3259 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3260 struct set *deletes, bool doubles_only)
3261 {
3262 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3263 return false;
3264 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3265 /* once type is rewritten, rewrite all loads and stores */
3266 nir_foreach_function_impl(impl, shader)
3267 lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3268 return true;
3269 }
3270
3271 /* rewrite all input/output variables using 32bit types and load/stores */
3272 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3273 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3274 {
3275 bool progress = false;
3276 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3277 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3278 nir_foreach_function_impl(impl, shader) {
3279 nir_foreach_function_temp_variable(var, impl) {
3280 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3281 continue;
3282 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3283 progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3284 }
3285 }
3286 ralloc_free(deletes);
3287 ralloc_free(derefs);
3288 if (progress) {
3289 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3290 nir_lower_phis_to_scalar(shader, false);
3291 optimize_nir(shader, NULL, true);
3292 }
3293 return progress;
3294 }
3295
3296 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3297 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3298 {
3299 FILE *fp = fopen(file, "wb");
3300 if (fp) {
3301 fwrite(words, 1, size, fp);
3302 fclose(fp);
3303 fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3304 }
3305 }
3306
3307 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3308 zink_get_next_stage(gl_shader_stage stage)
3309 {
3310 switch (stage) {
3311 case MESA_SHADER_VERTEX:
3312 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3313 VK_SHADER_STAGE_GEOMETRY_BIT |
3314 VK_SHADER_STAGE_FRAGMENT_BIT;
3315 case MESA_SHADER_TESS_CTRL:
3316 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3317 case MESA_SHADER_TESS_EVAL:
3318 return VK_SHADER_STAGE_GEOMETRY_BIT |
3319 VK_SHADER_STAGE_FRAGMENT_BIT;
3320 case MESA_SHADER_GEOMETRY:
3321 return VK_SHADER_STAGE_FRAGMENT_BIT;
3322 case MESA_SHADER_FRAGMENT:
3323 case MESA_SHADER_COMPUTE:
3324 case MESA_SHADER_KERNEL:
3325 return 0;
3326 default:
3327 unreachable("invalid shader stage");
3328 }
3329 }
3330
3331 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3332 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3333 {
3334 VkShaderModuleCreateInfo smci = {0};
3335 VkShaderCreateInfoEXT sci = {0};
3336
3337 if (!spirv)
3338 spirv = zs->spirv;
3339
3340 if (zink_debug & ZINK_DEBUG_SPIRV) {
3341 char buf[256];
3342 static int i;
3343 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3344 zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3345 }
3346
3347 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3348 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3349 sci.nextStage = zink_get_next_stage(zs->info.stage);
3350 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3351 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3352 sci.pCode = spirv->words;
3353 sci.pName = "main";
3354 VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3355 if (pg) {
3356 sci.setLayoutCount = pg->num_dsl;
3357 sci.pSetLayouts = pg->dsl;
3358 } else {
3359 sci.setLayoutCount = zs->info.stage + 1;
3360 dsl[zs->info.stage] = zs->precompile.dsl;;
3361 sci.pSetLayouts = dsl;
3362 }
3363 VkPushConstantRange pcr;
3364 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3365 pcr.offset = 0;
3366 pcr.size = sizeof(struct zink_gfx_push_constant);
3367 sci.pushConstantRangeCount = 1;
3368 sci.pPushConstantRanges = &pcr;
3369
3370 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3371 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3372 smci.pCode = spirv->words;
3373
3374 #ifndef NDEBUG
3375 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3376 static const struct spirv_to_nir_options spirv_options = {
3377 .environment = NIR_SPIRV_VULKAN,
3378 .capabilities = NULL,
3379 .ubo_addr_format = nir_address_format_32bit_index_offset,
3380 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3381 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3382 .push_const_addr_format = nir_address_format_logical,
3383 .shared_addr_format = nir_address_format_32bit_offset,
3384 };
3385 uint32_t num_spec_entries = 0;
3386 struct nir_spirv_specialization *spec_entries = NULL;
3387 VkSpecializationInfo sinfo = {0};
3388 VkSpecializationMapEntry me[3];
3389 uint32_t size[3] = {1,1,1};
3390 if (!zs->info.workgroup_size[0]) {
3391 sinfo.mapEntryCount = 3;
3392 sinfo.pMapEntries = &me[0];
3393 sinfo.dataSize = sizeof(uint32_t) * 3;
3394 sinfo.pData = size;
3395 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3396 for (int i = 0; i < 3; i++) {
3397 me[i].size = sizeof(uint32_t);
3398 me[i].constantID = ids[i];
3399 me[i].offset = i * sizeof(uint32_t);
3400 }
3401 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3402 }
3403 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3404 spec_entries, num_spec_entries,
3405 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3406 assert(nir);
3407 ralloc_free(nir);
3408 free(spec_entries);
3409 }
3410 #endif
3411
3412 VkResult ret;
3413 struct zink_shader_object obj = {0};
3414 if (!can_shobj || !screen->info.have_EXT_shader_object)
3415 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3416 else
3417 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3418 ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3419 assert(success);
3420 return obj;
3421 }
3422
3423 static void
prune_io(nir_shader * nir)3424 prune_io(nir_shader *nir)
3425 {
3426 nir_foreach_shader_in_variable_safe(var, nir) {
3427 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3428 var->data.mode = nir_var_shader_temp;
3429 }
3430 nir_foreach_shader_out_variable_safe(var, nir) {
3431 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3432 var->data.mode = nir_var_shader_temp;
3433 }
3434 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3435 }
3436
3437 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3438 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3439 {
3440 assert(var->data.driver_location < 32); //bitfield size for tracking
3441 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(var->data.driver_location);
3442 }
3443
3444 static void
flag_shadow_tex_instr(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3445 flag_shadow_tex_instr(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3446 {
3447 assert(var);
3448 unsigned num_components = tex->def.num_components;
3449 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3450 if (rewrite_depth && nir_def_components_read( &tex->def) & ~1) {
3451 /* this needs recompiles */
3452 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3453 flag_shadow_tex(var, zs);
3454 else
3455 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3456 }
3457 }
3458
3459 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3460 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3461 {
3462 assert(var);
3463 const struct glsl_type *type = glsl_without_array(var->type);
3464 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3465 bool is_int = glsl_base_type_is_integer(ret_type);
3466 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3467 unsigned dest_size = tex->def.bit_size;
3468 b->cursor = nir_after_instr(&tex->instr);
3469 unsigned num_components = tex->def.num_components;
3470 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3471 if (bit_size == dest_size && !rewrite_depth)
3472 return NULL;
3473 nir_def *dest = &tex->def;
3474 if (rewrite_depth && zs) {
3475 if (nir_def_components_read(dest) & ~1) {
3476 /* handled above */
3477 return NULL;
3478 }
3479 /* If only .x is used in the NIR, then it's effectively not a legacy depth
3480 * sample anyway and we don't want to ask for shader recompiles. This is
3481 * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3482 * LUMINANCE, so apps just use the first channel.
3483 */
3484 tex->def.num_components = 1;
3485 tex->is_new_style_shadow = true;
3486 }
3487 if (bit_size != dest_size) {
3488 tex->def.bit_size = bit_size;
3489 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3490
3491 if (is_int) {
3492 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3493 dest = nir_u2uN(b, &tex->def, dest_size);
3494 else
3495 dest = nir_i2iN(b, &tex->def, dest_size);
3496 } else {
3497 dest = nir_f2fN(b, &tex->def, dest_size);
3498 }
3499 if (!rewrite_depth)
3500 nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3501 }
3502 return dest;
3503 }
3504
3505 struct lower_zs_swizzle_state {
3506 bool shadow_only;
3507 unsigned base_sampler_id;
3508 const struct zink_zs_swizzle_key *swizzle;
3509 };
3510
3511 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3512 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3513 {
3514 struct lower_zs_swizzle_state *state = data;
3515 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3516 assert(state->shadow_only || swizzle_key);
3517 if (instr->type != nir_instr_type_tex)
3518 return false;
3519 nir_tex_instr *tex = nir_instr_as_tex(instr);
3520 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3521 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3522 return false;
3523 if (tex->is_shadow && tex->op == nir_texop_tg4)
3524 /* Will not even try to emulate the shadow comparison */
3525 return false;
3526 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3527 nir_variable *var = NULL;
3528 if (handle != -1)
3529 /* gtfo bindless depth texture mode */
3530 return false;
3531 var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
3532 assert(var);
3533 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3534 const struct glsl_type *type = glsl_without_array(var->type);
3535 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3536 bool is_int = glsl_base_type_is_integer(ret_type);
3537 unsigned num_components = tex->def.num_components;
3538 if (tex->is_shadow)
3539 tex->is_new_style_shadow = true;
3540 nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3541 assert(dest || !state->shadow_only);
3542 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3543 return false;
3544 else if (!dest)
3545 dest = &tex->def;
3546 else
3547 tex->def.num_components = 1;
3548 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3549 /* these require manual swizzles */
3550 if (tex->op == nir_texop_tg4) {
3551 assert(!tex->is_shadow);
3552 nir_def *swizzle;
3553 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3554 case PIPE_SWIZZLE_0:
3555 swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3556 break;
3557 case PIPE_SWIZZLE_1:
3558 if (is_int)
3559 swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3560 else
3561 swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3562 break;
3563 default:
3564 if (!tex->component)
3565 return false;
3566 tex->component = 0;
3567 return true;
3568 }
3569 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3570 return true;
3571 }
3572 nir_def *vec[4];
3573 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3574 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3575 case PIPE_SWIZZLE_0:
3576 vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3577 break;
3578 case PIPE_SWIZZLE_1:
3579 if (is_int)
3580 vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3581 else
3582 vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3583 break;
3584 default:
3585 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3586 break;
3587 }
3588 }
3589 nir_def *swizzle = nir_vec(b, vec, num_components);
3590 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3591 } else {
3592 assert(tex->is_shadow);
3593 nir_def *vec[4] = {dest, dest, dest, dest};
3594 nir_def *splat = nir_vec(b, vec, num_components);
3595 nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3596 }
3597 return true;
3598 }
3599
3600 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3601 *
3602 * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3603 * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3604 * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3605 * shader to expand out to vec4. Since this depends on sampler state, it's a
3606 * draw-time shader recompile to do so.
3607 *
3608 * We may also need to apply shader swizzles for
3609 * driver_compiler_workarounds.needs_zs_shader_swizzle.
3610 */
3611 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3612 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3613 {
3614 /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3615 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3616 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3617 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr,
3618 nir_metadata_control_flow,
3619 (void*)&state);
3620 }
3621
3622 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3623 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3624 void *data)
3625 {
3626 if (intr->intrinsic != nir_intrinsic_load_point_coord)
3627 return false;
3628 b->cursor = nir_after_instr(&intr->instr);
3629 nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3630 nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3631 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3632 return true;
3633 }
3634
3635 static bool
invert_point_coord(nir_shader * nir)3636 invert_point_coord(nir_shader *nir)
3637 {
3638 if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3639 return false;
3640 return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3641 nir_metadata_dominance, NULL);
3642 }
3643
3644 static bool
lower_sparse_instr(nir_builder * b,nir_instr * instr,void * data)3645 lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data)
3646 {
3647 b->cursor = nir_after_instr(instr);
3648
3649 switch (instr->type) {
3650 case nir_instr_type_tex: {
3651 nir_tex_instr *tex = nir_instr_as_tex(instr);
3652 if (!tex->is_sparse)
3653 return false;
3654
3655 nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def));
3656 nir_def *vec = nir_vector_insert_imm(b, &tex->def, res,
3657 tex->def.num_components - 1);
3658 nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr);
3659 return true;
3660 }
3661
3662 case nir_instr_type_intrinsic: {
3663 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3664 switch (intrin->intrinsic) {
3665 case nir_intrinsic_image_deref_sparse_load: {
3666 nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def));
3667 nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4);
3668 nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
3669 return true;
3670 }
3671
3672 case nir_intrinsic_sparse_residency_code_and: {
3673 nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa);
3674 nir_def_rewrite_uses(&intrin->def, res);
3675 return true;
3676 }
3677
3678 case nir_intrinsic_is_sparse_texels_resident: {
3679 nir_def *res = nir_i2b(b, intrin->src[0].ssa);
3680 nir_def_rewrite_uses(&intrin->def, res);
3681 return true;
3682 }
3683
3684 default:
3685 return false;
3686 }
3687 }
3688
3689 default:
3690 return false;
3691 }
3692 }
3693
3694 static bool
lower_sparse(nir_shader * shader)3695 lower_sparse(nir_shader *shader)
3696 {
3697 return nir_shader_instructions_pass(shader, lower_sparse_instr,
3698 nir_metadata_dominance, NULL);
3699 }
3700
3701 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3702 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3703 {
3704 bool is_load = false;
3705 bool is_input = false;
3706 bool is_interp = false;
3707 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3708 return false;
3709 bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) ||
3710 (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input);
3711 unsigned loc = nir_intrinsic_io_semantics(intr).location;
3712 nir_src *src_offset = nir_get_io_offset_src(intr);
3713 const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3714 unsigned location = loc + slot_offset;
3715 unsigned frac = nir_intrinsic_component(intr);
3716 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3717 /* set c aligned/rounded down to dword */
3718 unsigned c = frac;
3719 if (frac && bit_size < 32)
3720 c = frac * bit_size / 32;
3721 /* loop over all the variables and rewrite corresponding access */
3722 nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3723 const struct glsl_type *type = var->type;
3724 if (nir_is_arrayed_io(var, b->shader->info.stage))
3725 type = glsl_get_array_element(type);
3726 unsigned slot_count = get_var_slot_count(b->shader, var);
3727 /* filter access that isn't specific to this variable */
3728 if (var->data.location > location || var->data.location + slot_count <= location)
3729 continue;
3730 if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3731 continue;
3732 if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3733 continue;
3734
3735 unsigned size = 0;
3736 bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3737 if (is_struct)
3738 size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3739 else if (!is_special_io && var->data.compact)
3740 size = glsl_get_aoa_size(type);
3741 else
3742 size = glsl_get_vector_elements(glsl_without_array(type));
3743 assert(size);
3744 if (glsl_type_is_64bit(glsl_without_array(var->type)))
3745 size *= 2;
3746 if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3747 /* adjust for dvec3-type slot overflow */
3748 assert(location > var->data.location);
3749 size -= (location - var->data.location) * 4;
3750 }
3751 assert(size);
3752 if (var->data.location_frac + size <= c || var->data.location_frac > c)
3753 continue;
3754
3755 b->cursor = nir_before_instr(&intr->instr);
3756 nir_deref_instr *deref = nir_build_deref_var(b, var);
3757 if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3758 assert(intr->intrinsic != nir_intrinsic_store_output);
3759 deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3760 }
3761 if (glsl_type_is_array(type)) {
3762 /* unroll array derefs */
3763 unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0;
3764 assert(src_offset);
3765 if (var->data.location < VARYING_SLOT_VAR0) {
3766 if (src_offset) {
3767 /* clip/cull dist and tess levels use different array offset semantics */
3768 bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3769 is_clipcull_dist(var->data.location);
3770 bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3771 var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3772 bool is_builtin_array = is_clipdist || is_tess_level;
3773 /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3774 if (nir_src_is_const(*src_offset)) {
3775 unsigned offset = slot_offset;
3776 if (is_builtin_array)
3777 offset *= 4;
3778 if (is_clipdist) {
3779 if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1)
3780 offset += 4;
3781 }
3782 deref = nir_build_deref_array_imm(b, deref, offset + idx);
3783 } else {
3784 nir_def *offset = src_offset->ssa;
3785 if (is_builtin_array)
3786 nir_imul_imm(b, offset, 4);
3787 deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3788 }
3789 } else {
3790 deref = nir_build_deref_array_imm(b, deref, idx);
3791 }
3792 type = glsl_get_array_element(type);
3793 } else {
3794 idx += location - var->data.location;
3795 /* need to convert possible N*M to [N][M] */
3796 nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3797 while (glsl_type_is_array(type)) {
3798 const struct glsl_type *elem = glsl_get_array_element(type);
3799 unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3800 nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3801 if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3802 n = nir_udiv_imm(b, n, 2);
3803 deref = nir_build_deref_array(b, deref, n);
3804 nm = nir_umod_imm(b, nm, type_size);
3805 type = glsl_get_array_element(type);
3806 }
3807 }
3808 } else if (glsl_type_is_struct(type)) {
3809 deref = nir_build_deref_struct(b, deref, slot_offset);
3810 }
3811 assert(!glsl_type_is_array(type));
3812 unsigned num_components = glsl_get_vector_elements(type);
3813 if (is_load) {
3814 nir_def *load;
3815 if (is_interp) {
3816 nir_def *interp = intr->src[0].ssa;
3817 nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3818 assert(interp_intr);
3819 var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3820 switch (interp_intr->intrinsic) {
3821 case nir_intrinsic_load_barycentric_centroid:
3822 load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def);
3823 break;
3824 case nir_intrinsic_load_barycentric_sample:
3825 var->data.sample = 1;
3826 load = nir_load_deref(b, deref);
3827 break;
3828 case nir_intrinsic_load_barycentric_pixel:
3829 load = nir_load_deref(b, deref);
3830 break;
3831 case nir_intrinsic_load_barycentric_at_sample:
3832 load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3833 break;
3834 case nir_intrinsic_load_barycentric_at_offset:
3835 load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3836 break;
3837 default:
3838 unreachable("unhandled interp!");
3839 }
3840 } else {
3841 load = nir_load_deref(b, deref);
3842 }
3843 /* filter needed components */
3844 if (intr->num_components < load->num_components)
3845 load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3846 nir_def_rewrite_uses(&intr->def, load);
3847 } else {
3848 nir_def *store = intr->src[0].ssa;
3849 /* pad/filter components to match deref type */
3850 if (intr->num_components < num_components) {
3851 nir_def *zero = nir_imm_zero(b, 1, bit_size);
3852 nir_def *vec[4] = {zero, zero, zero, zero};
3853 u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3854 vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3855 store = nir_vec(b, vec, num_components);
3856 } if (store->num_components > num_components) {
3857 store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3858 }
3859 if (store->bit_size != glsl_get_bit_size(type)) {
3860 /* this should be some weird bindless io conversion */
3861 assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3862 assert(num_components != store->num_components);
3863 store = nir_unpack_64_2x32(b, store);
3864 }
3865 nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3866 }
3867 nir_instr_remove(&intr->instr);
3868 return true;
3869 }
3870 unreachable("failed to find variable for explicit io!");
3871 return true;
3872 }
3873
3874 static bool
add_derefs(nir_shader * nir)3875 add_derefs(nir_shader *nir)
3876 {
3877 return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3878 nir_metadata_dominance, NULL);
3879 }
3880
3881 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3882 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3883 {
3884 struct zink_shader_info *sinfo = &zs->sinfo;
3885 prune_io(nir);
3886
3887 switch (nir->info.stage) {
3888 case MESA_SHADER_VERTEX:
3889 case MESA_SHADER_TESS_EVAL:
3890 case MESA_SHADER_GEOMETRY:
3891 NIR_PASS_V(nir, nir_divergence_analysis);
3892 break;
3893 default: break;
3894 }
3895 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3896
3897 if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3898 nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3899 if (zink_debug & ZINK_DEBUG_NIR) {
3900 fprintf(stderr, "NIR shader:\n---8<---\n");
3901 nir_print_shader(nir, stderr);
3902 fprintf(stderr, "---8<---\n");
3903 }
3904
3905 struct zink_shader_object obj = {0};
3906 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen);
3907 if (spirv)
3908 obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3909
3910 /* TODO: determine if there's any reason to cache spirv output? */
3911 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3912 zs->spirv = spirv;
3913 else
3914 obj.spirv = spirv;
3915 return obj;
3916 }
3917
3918 static bool
remove_interpolate_at_sample(struct nir_builder * b,nir_intrinsic_instr * interp,void * data)3919 remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data)
3920 {
3921 if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample)
3922 return false;
3923
3924 b->cursor = nir_before_instr(&interp->instr);
3925 nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0]));
3926 nir_def_rewrite_uses(&interp->def, res);
3927
3928 return true;
3929 }
3930
3931 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)3932 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3933 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3934 {
3935 bool need_optimize = true;
3936 bool inlined_uniforms = false;
3937
3938 NIR_PASS_V(nir, add_derefs);
3939 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
3940 if (key) {
3941 if (key->inline_uniforms) {
3942 NIR_PASS_V(nir, nir_inline_uniforms,
3943 nir->info.num_inlinable_uniforms,
3944 key->base.inlined_uniform_values,
3945 nir->info.inlinable_uniform_dw_offsets);
3946
3947 inlined_uniforms = true;
3948 }
3949
3950 /* TODO: use a separate mem ctx here for ralloc */
3951
3952 if (!screen->optimal_keys) {
3953 switch (zs->info.stage) {
3954 case MESA_SHADER_VERTEX: {
3955 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3956 const struct zink_vs_key *vs_key = zink_vs_key(key);
3957 switch (vs_key->size) {
3958 case 4:
3959 decomposed_attrs = vs_key->u32.decomposed_attrs;
3960 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3961 break;
3962 case 2:
3963 decomposed_attrs = vs_key->u16.decomposed_attrs;
3964 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3965 break;
3966 case 1:
3967 decomposed_attrs = vs_key->u8.decomposed_attrs;
3968 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3969 break;
3970 default: break;
3971 }
3972 if (decomposed_attrs || decomposed_attrs_without_w)
3973 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3974 break;
3975 }
3976
3977 case MESA_SHADER_GEOMETRY:
3978 if (zink_gs_key(key)->lower_line_stipple) {
3979 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3980 NIR_PASS_V(nir, nir_lower_var_copies);
3981 need_optimize = true;
3982 }
3983
3984 if (zink_gs_key(key)->lower_line_smooth) {
3985 NIR_PASS_V(nir, lower_line_smooth_gs);
3986 NIR_PASS_V(nir, nir_lower_var_copies);
3987 need_optimize = true;
3988 }
3989
3990 if (zink_gs_key(key)->lower_gl_point) {
3991 NIR_PASS_V(nir, lower_gl_point_gs);
3992 need_optimize = true;
3993 }
3994
3995 if (zink_gs_key(key)->lower_pv_mode) {
3996 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3997 need_optimize = true; //TODO verify that this is required
3998 }
3999 break;
4000
4001 default:
4002 break;
4003 }
4004 }
4005
4006 switch (zs->info.stage) {
4007 case MESA_SHADER_VERTEX:
4008 case MESA_SHADER_TESS_EVAL:
4009 case MESA_SHADER_GEOMETRY:
4010 if (zink_vs_key_base(key)->last_vertex_stage) {
4011 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
4012 NIR_PASS_V(nir, nir_lower_clip_halfz);
4013 }
4014 if (zink_vs_key_base(key)->push_drawid) {
4015 NIR_PASS_V(nir, lower_drawid);
4016 }
4017 } else {
4018 nir->xfb_info = NULL;
4019 }
4020 if (zink_vs_key_base(key)->robust_access)
4021 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4022 break;
4023 case MESA_SHADER_FRAGMENT:
4024 if (zink_fs_key(key)->lower_line_smooth) {
4025 NIR_PASS_V(nir, lower_line_smooth_fs,
4026 zink_fs_key(key)->lower_line_stipple);
4027 need_optimize = true;
4028 } else if (zink_fs_key(key)->lower_line_stipple)
4029 NIR_PASS_V(nir, lower_line_stipple_fs);
4030
4031 if (zink_fs_key(key)->lower_point_smooth) {
4032 NIR_PASS_V(nir, nir_lower_point_smooth, false);
4033 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
4034 nir->info.fs.uses_discard = true;
4035 need_optimize = true;
4036 }
4037
4038 if (zink_fs_key(key)->robust_access)
4039 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4040
4041 if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) {
4042 /* VK will always use gl_SampleMask[] values even if sample count is 0,
4043 * so we need to skip this write here to mimic GL's behavior of ignoring it
4044 */
4045 nir_foreach_shader_out_variable(var, nir) {
4046 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
4047 var->data.mode = nir_var_shader_temp;
4048 }
4049 nir_fixup_deref_modes(nir);
4050 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4051 NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample,
4052 nir_metadata_control_flow, NULL);
4053
4054 need_optimize = true;
4055 }
4056 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
4057 NIR_PASS_V(nir, lower_dual_blend);
4058 }
4059 if (zink_fs_key_base(key)->coord_replace_bits)
4060 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
4061 if (zink_fs_key_base(key)->point_coord_yinvert)
4062 NIR_PASS_V(nir, invert_point_coord);
4063 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
4064 nir_foreach_shader_in_variable(var, nir)
4065 var->data.sample = true;
4066 nir->info.fs.uses_sample_qualifier = true;
4067 nir->info.fs.uses_sample_shading = true;
4068 }
4069 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
4070 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
4071 if (nir->info.fs.uses_fbfetch_output) {
4072 nir_variable *fbfetch = NULL;
4073 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
4074 /* old variable must be deleted to avoid spirv errors */
4075 fbfetch->data.mode = nir_var_shader_temp;
4076 nir_fixup_deref_modes(nir);
4077 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4078 need_optimize = true;
4079 }
4080 nir_foreach_shader_in_variable_safe(var, nir) {
4081 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
4082 continue;
4083 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
4084 var->data.mode = nir_var_shader_temp;
4085 nir_fixup_deref_modes(nir);
4086 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4087 need_optimize = true;
4088 }
4089 break;
4090 case MESA_SHADER_COMPUTE:
4091 if (zink_cs_key(key)->robust_access)
4092 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4093 break;
4094 default: break;
4095 }
4096 if (key->base.needs_zs_shader_swizzle) {
4097 assert(extra_data);
4098 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4099 }
4100 if (key->base.nonseamless_cube_mask) {
4101 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4102 need_optimize = true;
4103 }
4104 }
4105 if (screen->driconf.inline_uniforms) {
4106 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4107 NIR_PASS_V(nir, rewrite_bo_access, screen);
4108 NIR_PASS_V(nir, remove_bo_access, zs);
4109 need_optimize = true;
4110 }
4111 if (inlined_uniforms) {
4112 optimize_nir(nir, zs, true);
4113
4114 /* This must be done again. */
4115 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4116 nir_var_shader_out);
4117
4118 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4119 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4120 zs->can_inline = false;
4121 } else if (need_optimize)
4122 optimize_nir(nir, zs, true);
4123 bool has_sparse = false;
4124 NIR_PASS(has_sparse, nir, lower_sparse);
4125 if (has_sparse)
4126 optimize_nir(nir, zs, false);
4127
4128 struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4129 ralloc_free(nir);
4130 return obj;
4131 }
4132
4133 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4134 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4135 {
4136 nir_shader *nir = zs->nir;
4137 /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4138 int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4139 if (screen->info.have_EXT_shader_object)
4140 set = zs->info.stage;
4141 unsigned offsets[4];
4142 zink_descriptor_shader_get_binding_offsets(zs, offsets);
4143 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4144 if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4145 continue;
4146 var->data.descriptor_set = set;
4147 switch (var->data.mode) {
4148 case nir_var_mem_ubo:
4149 var->data.binding = !!var->data.driver_location;
4150 break;
4151 case nir_var_uniform:
4152 if (glsl_type_is_sampler(glsl_without_array(var->type)))
4153 var->data.binding += offsets[1];
4154 break;
4155 case nir_var_mem_ssbo:
4156 var->data.binding += offsets[2];
4157 break;
4158 case nir_var_image:
4159 var->data.binding += offsets[3];
4160 break;
4161 default: break;
4162 }
4163 }
4164 NIR_PASS_V(nir, add_derefs);
4165 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4166 if (screen->driconf.inline_uniforms) {
4167 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4168 NIR_PASS_V(nir, rewrite_bo_access, screen);
4169 NIR_PASS_V(nir, remove_bo_access, zs);
4170 }
4171 optimize_nir(nir, zs, true);
4172 zink_descriptor_shader_init(screen, zs);
4173 nir_shader *nir_clone = NULL;
4174 if (screen->info.have_EXT_shader_object)
4175 nir_clone = nir_shader_clone(nir, nir);
4176 struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4177 if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4178 /* always try to pre-generate a tcs in case it's needed */
4179 if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4180 nir_shader *nir_tcs = NULL;
4181 /* use max pcp for compat */
4182 zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, 32);
4183 zink_shader_tcs_init(screen, zs->non_fs.generated_tcs, nir_clone, &nir_tcs);
4184 nir_tcs->info.separate_shader = true;
4185 zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4186 ralloc_free(nir_tcs);
4187 zs->non_fs.generated_tcs->nir = NULL;
4188 }
4189 }
4190 spirv_shader_delete(obj.spirv);
4191 obj.spirv = NULL;
4192 return obj;
4193 }
4194
4195 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4196 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4197 void *data)
4198 {
4199 if (intr->intrinsic != nir_intrinsic_load_instance_id)
4200 return false;
4201 b->cursor = nir_after_instr(&intr->instr);
4202 nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4203 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4204 return true;
4205 }
4206
4207 static bool
lower_baseinstance(nir_shader * shader)4208 lower_baseinstance(nir_shader *shader)
4209 {
4210 if (shader->info.stage != MESA_SHADER_VERTEX)
4211 return false;
4212 return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4213 nir_metadata_dominance, NULL);
4214 }
4215
4216 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4217 * so instead we delete all those broken variables and just make new ones
4218 */
4219 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4220 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4221 {
4222 uint64_t max_ssbo_size = 0;
4223 uint64_t max_ubo_size = 0;
4224 uint64_t max_uniform_size = 0;
4225
4226 if (!shader->info.num_ssbos && !shader->info.num_ubos)
4227 return false;
4228
4229 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4230 const struct glsl_type *type = glsl_without_array(var->type);
4231 if (type_is_counter(type))
4232 continue;
4233 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4234 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4235 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4236 if (interface_type) {
4237 unsigned block_size = glsl_get_explicit_size(interface_type, true);
4238 if (glsl_get_length(interface_type) == 1) {
4239 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4240 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4241 if (glsl_type_is_array(f) && !glsl_array_size(f))
4242 block_size = 0;
4243 }
4244 if (block_size) {
4245 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4246 size = MAX2(size, block_size);
4247 }
4248 }
4249 if (var->data.mode == nir_var_mem_ubo) {
4250 if (var->data.driver_location)
4251 max_ubo_size = MAX2(max_ubo_size, size);
4252 else
4253 max_uniform_size = MAX2(max_uniform_size, size);
4254 } else {
4255 max_ssbo_size = MAX2(max_ssbo_size, size);
4256 if (interface_type) {
4257 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4258 needs_size = true;
4259 }
4260 }
4261 var->data.mode = nir_var_shader_temp;
4262 }
4263 nir_fixup_deref_modes(shader);
4264 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4265 optimize_nir(shader, NULL, true);
4266
4267 struct glsl_struct_field field = {0};
4268 field.name = ralloc_strdup(shader, "base");
4269 if (shader->info.num_ubos) {
4270 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4271 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4272 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4273 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4274 "uniform_0@32");
4275 var->interface_type = var->type;
4276 var->data.mode = nir_var_mem_ubo;
4277 var->data.driver_location = 0;
4278 }
4279
4280 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4281 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4282 if (num_ubos && ubos_used) {
4283 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4284 /* shrink array as much as possible */
4285 unsigned first_ubo = ffs(ubos_used) - 2;
4286 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4287 num_ubos -= first_ubo;
4288 assert(num_ubos);
4289 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4290 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4291 "ubos@32");
4292 var->interface_type = var->type;
4293 var->data.mode = nir_var_mem_ubo;
4294 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4295 }
4296 }
4297 if (shader->info.num_ssbos && zs->ssbos_used) {
4298 /* shrink array as much as possible */
4299 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4300 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4301 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4302 assert(num_ssbos);
4303 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4304 field.type = ssbo_type;
4305 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4306 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4307 "ssbos@32");
4308 var->interface_type = var->type;
4309 var->data.mode = nir_var_mem_ssbo;
4310 var->data.driver_location = first_ssbo;
4311 }
4312 return true;
4313 }
4314
4315 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4316 get_src_mask_ssbo(unsigned total, nir_src src)
4317 {
4318 if (nir_src_is_const(src))
4319 return BITFIELD_BIT(nir_src_as_uint(src));
4320 return BITFIELD_MASK(total);
4321 }
4322
4323 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4324 get_src_mask_ubo(unsigned total, nir_src src)
4325 {
4326 if (nir_src_is_const(src))
4327 return BITFIELD_BIT(nir_src_as_uint(src));
4328 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4329 }
4330
4331 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4332 analyze_io(struct zink_shader *zs, nir_shader *shader)
4333 {
4334 bool ret = false;
4335 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4336 nir_foreach_block(block, impl) {
4337 nir_foreach_instr(instr, block) {
4338 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4339 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4340 nir_tex_instr *tex = nir_instr_as_tex(instr);
4341 int deref_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
4342 if (deref_idx >= 0) {
4343 nir_variable *img = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[deref_idx].src.ssa->parent_instr));
4344 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4345 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4346 }
4347 continue;
4348 }
4349 if (instr->type != nir_instr_type_intrinsic)
4350 continue;
4351
4352 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4353 switch (intrin->intrinsic) {
4354 case nir_intrinsic_store_ssbo:
4355 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4356 break;
4357
4358 case nir_intrinsic_get_ssbo_size: {
4359 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4360 ret = true;
4361 break;
4362 }
4363 case nir_intrinsic_ssbo_atomic:
4364 case nir_intrinsic_ssbo_atomic_swap:
4365 case nir_intrinsic_load_ssbo:
4366 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4367 break;
4368 case nir_intrinsic_load_ubo:
4369 case nir_intrinsic_load_ubo_vec4:
4370 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4371 break;
4372 default:
4373 break;
4374 }
4375 }
4376 }
4377 return ret;
4378 }
4379
4380 struct zink_bindless_info {
4381 nir_variable *bindless[4];
4382 unsigned bindless_set;
4383 };
4384
4385 /* this is a "default" bindless texture used if the shader has no texture variables */
4386 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4387 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4388 {
4389 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4390 nir_variable *var;
4391
4392 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4393 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4394 var->data.descriptor_set = descriptor_set;
4395 var->data.driver_location = var->data.binding = binding;
4396 return var;
4397 }
4398
4399 /* this is a "default" bindless image used if the shader has no image variables */
4400 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4401 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4402 {
4403 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4404 nir_variable *var;
4405
4406 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4407 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4408 var->data.descriptor_set = descriptor_set;
4409 var->data.driver_location = var->data.binding = binding;
4410 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4411 return var;
4412 }
4413
4414 /* rewrite bindless instructions as array deref instructions */
4415 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4416 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4417 {
4418 struct zink_bindless_info *bindless = data;
4419
4420 if (in->type == nir_instr_type_tex) {
4421 nir_tex_instr *tex = nir_instr_as_tex(in);
4422 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4423 if (idx == -1)
4424 return false;
4425
4426 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4427 if (!var) {
4428 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4429 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4430 bindless->bindless[1] = var;
4431 else
4432 bindless->bindless[0] = var;
4433 }
4434 b->cursor = nir_before_instr(in);
4435 nir_deref_instr *deref = nir_build_deref_var(b, var);
4436 if (glsl_type_is_array(var->type))
4437 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4438 nir_src_rewrite(&tex->src[idx].src, &deref->def);
4439
4440 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4441 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4442 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4443 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4444 *
4445 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4446 * - Warhammer 40k: Dawn of War III
4447 */
4448 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4449 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4450 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4451 if (coord_components < needed_components) {
4452 nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4453 nir_src_rewrite(&tex->src[c].src, def);
4454 tex->coord_components = needed_components;
4455 }
4456 return true;
4457 }
4458 if (in->type != nir_instr_type_intrinsic)
4459 return false;
4460 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4461
4462 nir_intrinsic_op op;
4463 #define OP_SWAP(OP) \
4464 case nir_intrinsic_bindless_image_##OP: \
4465 op = nir_intrinsic_image_deref_##OP; \
4466 break;
4467
4468
4469 /* convert bindless intrinsics to deref intrinsics */
4470 switch (instr->intrinsic) {
4471 OP_SWAP(atomic)
4472 OP_SWAP(atomic_swap)
4473 OP_SWAP(format)
4474 OP_SWAP(load)
4475 OP_SWAP(order)
4476 OP_SWAP(samples)
4477 OP_SWAP(size)
4478 OP_SWAP(store)
4479 default:
4480 return false;
4481 }
4482
4483 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4484 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4485 if (!var)
4486 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4487 instr->intrinsic = op;
4488 b->cursor = nir_before_instr(in);
4489 nir_deref_instr *deref = nir_build_deref_var(b, var);
4490 if (glsl_type_is_array(var->type))
4491 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4492 nir_src_rewrite(&instr->src[0], &deref->def);
4493 return true;
4494 }
4495
4496 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4497 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4498 {
4499 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4500 return false;
4501 nir_fixup_deref_modes(shader);
4502 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4503 optimize_nir(shader, NULL, true);
4504 return true;
4505 }
4506
4507 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4508 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4509 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4510 void *data)
4511 {
4512 bool is_load = false;
4513 bool is_input = false;
4514 bool is_interp = false;
4515 if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4516 return false;
4517
4518 nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4519 if (var->data.bindless)
4520 return false;
4521 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4522 return false;
4523 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4524 return false;
4525
4526 var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4527 var->data.bindless = 1;
4528 return true;
4529 }
4530
4531 static bool
lower_bindless_io(nir_shader * shader)4532 lower_bindless_io(nir_shader *shader)
4533 {
4534 return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4535 nir_metadata_dominance, NULL);
4536 }
4537
4538 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4539 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4540 {
4541 if (stage == MESA_SHADER_NONE) {
4542 unreachable("not supported");
4543 } else {
4544 unsigned base = stage;
4545 /* clamp compute bindings for better driver efficiency */
4546 if (gl_shader_stage_is_compute(stage))
4547 base = 0;
4548 switch (type) {
4549 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4550 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4551 return base * 2 + !!index;
4552
4553 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4554 assert(stage == MESA_SHADER_KERNEL);
4555 FALLTHROUGH;
4556 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4557 if (stage == MESA_SHADER_KERNEL) {
4558 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4559 return index + PIPE_MAX_SAMPLERS;
4560 }
4561 FALLTHROUGH;
4562 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4563 assert(index < PIPE_MAX_SAMPLERS);
4564 assert(stage != MESA_SHADER_KERNEL);
4565 return (base * PIPE_MAX_SAMPLERS) + index;
4566
4567 case VK_DESCRIPTOR_TYPE_SAMPLER:
4568 assert(index < PIPE_MAX_SAMPLERS);
4569 assert(stage == MESA_SHADER_KERNEL);
4570 return index;
4571
4572 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4573 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4574
4575 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4576 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4577 assert(index < ZINK_MAX_SHADER_IMAGES);
4578 if (stage == MESA_SHADER_KERNEL)
4579 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4580 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4581
4582 default:
4583 unreachable("unexpected type");
4584 }
4585 }
4586 }
4587
4588 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4589 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4590 {
4591 if (glsl_type_is_struct(type)) {
4592 for (unsigned i = 0; i < glsl_get_length(type); i++)
4593 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4594 return;
4595 }
4596
4597 /* just a random scalar in a struct */
4598 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4599 return;
4600
4601 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4602 unsigned binding;
4603 switch (vktype) {
4604 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4605 binding = 0;
4606 break;
4607 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4608 binding = 1;
4609 break;
4610 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4611 binding = 2;
4612 break;
4613 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4614 binding = 3;
4615 break;
4616 default:
4617 unreachable("unknown");
4618 }
4619 if (!bindless->bindless[binding]) {
4620 bindless->bindless[binding] = nir_variable_clone(var, nir);
4621 bindless->bindless[binding]->data.bindless = 0;
4622 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4623 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4624 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4625 if (!bindless->bindless[binding]->data.image.format)
4626 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4627 nir_shader_add_variable(nir, bindless->bindless[binding]);
4628 } else {
4629 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4630 }
4631 var->data.mode = nir_var_shader_temp;
4632 }
4633
4634 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4635 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4636 {
4637 struct zink_screen *screen = data;
4638 if (instr->type != nir_instr_type_tex)
4639 return false;
4640 nir_tex_instr *tex = nir_instr_as_tex(instr);
4641 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4642 return false;
4643 if (tex->is_sparse && screen->need_2D_sparse) {
4644 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4645 mesa_loge("unhandled/unsupported 1D sparse texture!");
4646 abort();
4647 }
4648 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4649 b->cursor = nir_before_instr(instr);
4650 tex->coord_components++;
4651 unsigned srcs[] = {
4652 nir_tex_src_coord,
4653 nir_tex_src_offset,
4654 nir_tex_src_ddx,
4655 nir_tex_src_ddy,
4656 };
4657 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4658 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4659 if (c == -1)
4660 continue;
4661 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4662 continue;
4663 nir_def *def;
4664 nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4665 if (tex->src[c].src.ssa->num_components == 1)
4666 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4667 else
4668 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4669 nir_src_rewrite(&tex->src[c].src, def);
4670 }
4671 b->cursor = nir_after_instr(instr);
4672 unsigned needed_components = nir_tex_instr_dest_size(tex);
4673 unsigned num_components = tex->def.num_components;
4674 if (needed_components > num_components) {
4675 tex->def.num_components = needed_components;
4676 assert(num_components < 3);
4677 /* take either xz or just x since this is promoted to 2D from 1D */
4678 uint32_t mask = num_components == 2 ? (1|4) : 1;
4679 nir_def *dst = nir_channels(b, &tex->def, mask);
4680 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4681 }
4682 return true;
4683 }
4684
4685 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4686 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4687 {
4688 bool found = false;
4689 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4690 const struct glsl_type *type = glsl_without_array(var->type);
4691 unsigned length = glsl_get_length(var->type);
4692 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4693 continue;
4694 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4695 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4696
4697 found = true;
4698 }
4699 if (found) {
4700 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4701 nir_fixup_deref_types(shader);
4702 }
4703 return found;
4704 }
4705
4706 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4707 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4708 {
4709 nir_foreach_function_impl(impl, shader) {
4710 nir_foreach_block_safe(block, impl) {
4711 nir_foreach_instr_safe(instr, block) {
4712 if (instr->type == nir_instr_type_tex) {
4713 nir_tex_instr *tex = nir_instr_as_tex(instr);
4714 zs->sinfo.have_sparse |= tex->is_sparse;
4715 }
4716 if (instr->type != nir_instr_type_intrinsic)
4717 continue;
4718 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4719 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4720 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4721 intr->intrinsic == nir_intrinsic_image_deref_store ||
4722 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4723 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4724 intr->intrinsic == nir_intrinsic_image_deref_size ||
4725 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4726 intr->intrinsic == nir_intrinsic_image_deref_format ||
4727 intr->intrinsic == nir_intrinsic_image_deref_order) {
4728
4729 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4730
4731 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4732 const unsigned size =
4733 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4734 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4735 var->data.binding + (MAX2(size, 1) - 1));
4736 }
4737 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4738 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4739 zs->sinfo.have_sparse = true;
4740
4741 bool is_load = false;
4742 bool is_input = false;
4743 bool is_interp = false;
4744 if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) {
4745 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
4746 if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) {
4747 if (is_input)
4748 zs->arrayed_inputs |= BITFIELD64_BIT(s.location);
4749 else
4750 zs->arrayed_outputs |= BITFIELD64_BIT(s.location);
4751 }
4752 /* TODO: delete this once #10826 is fixed */
4753 if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) {
4754 if (is_clipcull_dist(s.location)) {
4755 unsigned frac = nir_intrinsic_component(intr) + 1;
4756 if (s.location < VARYING_SLOT_CULL_DIST0) {
4757 if (s.location == VARYING_SLOT_CLIP_DIST1)
4758 frac += 4;
4759 shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots);
4760 } else {
4761 if (s.location == VARYING_SLOT_CULL_DIST1)
4762 frac += 4;
4763 shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots);
4764 }
4765 }
4766 }
4767 }
4768
4769 static bool warned = false;
4770 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4771 switch (intr->intrinsic) {
4772 case nir_intrinsic_image_deref_atomic: {
4773 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4774 if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4775 util_format_is_float(var->data.image.format))
4776 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4777 break;
4778 }
4779 default:
4780 break;
4781 }
4782 }
4783 }
4784 }
4785 }
4786 }
4787
4788 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data,bool pre)4789 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data, bool pre)
4790 {
4791 if (in->type != nir_instr_type_tex)
4792 return false;
4793 nir_tex_instr *tex = nir_instr_as_tex(in);
4794 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4795 return false;
4796 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4797 nir_variable *var = NULL;
4798 if (handle != -1) {
4799 if (pre)
4800 return false;
4801 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4802 } else {
4803 var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
4804 }
4805 if (pre) {
4806 flag_shadow_tex_instr(b, tex, var, data);
4807 return false;
4808 }
4809 return !!rewrite_tex_dest(b, tex, var, data);
4810 }
4811
4812 static bool
match_tex_dests_instr_pre(nir_builder * b,nir_instr * in,void * data)4813 match_tex_dests_instr_pre(nir_builder *b, nir_instr *in, void *data)
4814 {
4815 return match_tex_dests_instr(b, in, data, true);
4816 }
4817
4818 static bool
match_tex_dests_instr_post(nir_builder * b,nir_instr * in,void * data)4819 match_tex_dests_instr_post(nir_builder *b, nir_instr *in, void *data)
4820 {
4821 return match_tex_dests_instr(b, in, data, false);
4822 }
4823
4824 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs,bool pre_mangle)4825 match_tex_dests(nir_shader *shader, struct zink_shader *zs, bool pre_mangle)
4826 {
4827 return nir_shader_instructions_pass(shader, pre_mangle ? match_tex_dests_instr_pre : match_tex_dests_instr_post, nir_metadata_dominance, zs);
4828 }
4829
4830 static bool
split_bitfields_instr(nir_builder * b,nir_alu_instr * alu,void * data)4831 split_bitfields_instr(nir_builder *b, nir_alu_instr *alu, void *data)
4832 {
4833 switch (alu->op) {
4834 case nir_op_ubitfield_extract:
4835 case nir_op_ibitfield_extract:
4836 case nir_op_bitfield_insert:
4837 break;
4838 default:
4839 return false;
4840 }
4841 unsigned num_components = alu->def.num_components;
4842 if (num_components == 1)
4843 return false;
4844 b->cursor = nir_before_instr(&alu->instr);
4845 nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4846 for (unsigned i = 0; i < num_components; i++) {
4847 if (alu->op == nir_op_bitfield_insert)
4848 dests[i] = nir_bitfield_insert(b,
4849 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4850 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4851 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4852 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4853 else if (alu->op == nir_op_ubitfield_extract)
4854 dests[i] = nir_ubitfield_extract(b,
4855 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4856 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4857 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4858 else
4859 dests[i] = nir_ibitfield_extract(b,
4860 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4861 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4862 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4863 }
4864 nir_def *dest = nir_vec(b, dests, num_components);
4865 nir_def_rewrite_uses_after(&alu->def, dest, &alu->instr);
4866 nir_instr_remove(&alu->instr);
4867 return true;
4868 }
4869
4870
4871 static bool
split_bitfields(nir_shader * shader)4872 split_bitfields(nir_shader *shader)
4873 {
4874 return nir_shader_alu_pass(shader, split_bitfields_instr,
4875 nir_metadata_dominance, NULL);
4876 }
4877
4878 static bool
strip_tex_ms_instr(nir_builder * b,nir_instr * in,void * data)4879 strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data)
4880 {
4881 if (in->type != nir_instr_type_intrinsic)
4882 return false;
4883 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in);
4884 switch (intr->intrinsic) {
4885 case nir_intrinsic_image_deref_samples:
4886 b->cursor = nir_before_instr(in);
4887 nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in);
4888 nir_instr_remove(in);
4889 break;
4890 case nir_intrinsic_image_deref_store:
4891 case nir_intrinsic_image_deref_load:
4892 case nir_intrinsic_image_deref_atomic:
4893 case nir_intrinsic_image_deref_atomic_swap:
4894 break;
4895 default:
4896 return false;
4897 }
4898 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
4899 if (dim != GLSL_SAMPLER_DIM_MS)
4900 return false;
4901
4902 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4903 nir_variable *var = nir_deref_instr_get_variable(deref);
4904 nir_deref_instr *parent = nir_deref_instr_parent(deref);
4905 if (parent) {
4906 parent->type = var->type;
4907 deref->type = glsl_without_array(var->type);
4908 } else {
4909 deref->type = var->type;
4910 }
4911 nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
4912 return true;
4913 }
4914
4915
4916 static bool
strip_tex_ms(nir_shader * shader)4917 strip_tex_ms(nir_shader *shader)
4918 {
4919 bool progress = false;
4920 nir_foreach_image_variable(var, shader) {
4921 const struct glsl_type *bare_type = glsl_without_array(var->type);
4922 if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS)
4923 continue;
4924 unsigned array_size = 0;
4925 if (glsl_type_is_array(var->type))
4926 array_size = glsl_array_size(var->type);
4927
4928 const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type));
4929 if (array_size)
4930 new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type));
4931 var->type = new_type;
4932 progress = true;
4933 }
4934 if (!progress)
4935 return false;
4936 return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL);
4937 }
4938
4939 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)4940 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4941 {
4942 nir_foreach_function_impl(impl, nir) {
4943 nir_foreach_block(block, impl) {
4944 nir_foreach_instr_safe(instr, block) {
4945 if (instr->type != nir_instr_type_deref)
4946 continue;
4947 nir_deref_instr *deref = nir_instr_as_deref(instr);
4948 nir_variable *img = nir_deref_instr_get_variable(deref);
4949 if (img != var)
4950 continue;
4951 if (glsl_type_is_array(var->type)) {
4952 if (deref->deref_type == nir_deref_type_array)
4953 deref->type = glsl_without_array(var->type);
4954 else
4955 deref->type = var->type;
4956 } else {
4957 deref->type = var->type;
4958 }
4959 }
4960 }
4961 }
4962 }
4963
4964 static void
type_image(nir_shader * nir,nir_variable * var)4965 type_image(nir_shader *nir, nir_variable *var)
4966 {
4967 nir_foreach_function_impl(impl, nir) {
4968 nir_foreach_block(block, impl) {
4969 nir_foreach_instr_safe(instr, block) {
4970 if (instr->type != nir_instr_type_intrinsic)
4971 continue;
4972 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4973 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4974 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4975 intr->intrinsic == nir_intrinsic_image_deref_store ||
4976 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4977 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4978 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4979 intr->intrinsic == nir_intrinsic_image_deref_format ||
4980 intr->intrinsic == nir_intrinsic_image_deref_order) {
4981 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4982 nir_variable *img = nir_deref_instr_get_variable(deref);
4983 if (img != var)
4984 continue;
4985
4986 nir_alu_type alu_type;
4987 if (nir_intrinsic_has_src_type(intr))
4988 alu_type = nir_intrinsic_src_type(intr);
4989 else
4990 alu_type = nir_intrinsic_dest_type(intr);
4991
4992 const struct glsl_type *type = glsl_without_array(var->type);
4993 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4994 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4995 continue;
4996 }
4997 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4998 if (glsl_type_is_array(var->type))
4999 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5000 var->type = img_type;
5001 rewrite_cl_derefs(nir, var);
5002 return;
5003 }
5004 }
5005 }
5006 }
5007 nir_foreach_function_impl(impl, nir) {
5008 nir_foreach_block(block, impl) {
5009 nir_foreach_instr_safe(instr, block) {
5010 if (instr->type != nir_instr_type_intrinsic)
5011 continue;
5012 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5013 if (intr->intrinsic != nir_intrinsic_image_deref_size)
5014 continue;
5015 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5016 nir_variable *img = nir_deref_instr_get_variable(deref);
5017 if (img != var)
5018 continue;
5019 nir_alu_type alu_type = nir_type_uint32;
5020 const struct glsl_type *type = glsl_without_array(var->type);
5021 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5022 continue;
5023 }
5024 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5025 if (glsl_type_is_array(var->type))
5026 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5027 var->type = img_type;
5028 rewrite_cl_derefs(nir, var);
5029 return;
5030 }
5031 }
5032 }
5033 var->data.mode = nir_var_shader_temp;
5034 }
5035
5036 static bool
type_sampler_vars(nir_shader * nir)5037 type_sampler_vars(nir_shader *nir)
5038 {
5039 bool progress = false;
5040 nir_foreach_function_impl(impl, nir) {
5041 nir_foreach_block(block, impl) {
5042 nir_foreach_instr(instr, block) {
5043 if (instr->type != nir_instr_type_tex)
5044 continue;
5045 nir_tex_instr *tex = nir_instr_as_tex(instr);
5046 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
5047 assert(var);
5048 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
5049 nir_tex_instr_is_query(tex))
5050 continue;
5051 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
5052 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
5053 if (size > 1)
5054 img_type = glsl_array_type(img_type, size, 0);
5055 var->type = img_type;
5056 progress = true;
5057 }
5058 }
5059 }
5060 return progress;
5061 }
5062
5063 static bool
type_images(nir_shader * nir)5064 type_images(nir_shader *nir)
5065 {
5066 bool progress = false;
5067 progress |= type_sampler_vars(nir);
5068 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
5069 type_image(nir, var);
5070 progress = true;
5071 }
5072 if (progress) {
5073 nir_fixup_deref_types(nir);
5074 nir_fixup_deref_modes(nir);
5075 }
5076 return progress;
5077 }
5078
5079 /* attempt to assign io for separate shaders */
5080 static bool
fixup_io_locations(nir_shader * nir)5081 fixup_io_locations(nir_shader *nir)
5082 {
5083 nir_variable_mode modes;
5084 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
5085 modes = nir_var_shader_in | nir_var_shader_out;
5086 else
5087 modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
5088 u_foreach_bit(mode, modes) {
5089 nir_variable_mode m = BITFIELD_BIT(mode);
5090 if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
5091 (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
5092 /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
5093 * - i/o interface blocks don't need to match
5094 * - any location can be present or not
5095 * - it just has to work
5096 *
5097 * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
5098 * since it's a builtin and yolo it with all the other legacy crap
5099 */
5100 nir_foreach_variable_with_modes(var, nir, m) {
5101 if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
5102 continue;
5103 if (var->data.location == VARYING_SLOT_VAR0)
5104 var->data.driver_location = 0;
5105 else if (var->data.patch)
5106 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5107 else
5108 var->data.driver_location = var->data.location;
5109 }
5110 continue;
5111 }
5112 /* i/o interface blocks are required to be EXACT matches between stages:
5113 * iterate over all locations and set locations incrementally
5114 */
5115 unsigned slot = 0;
5116 for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) {
5117 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
5118 continue;
5119 bool found = false;
5120 unsigned size = 0;
5121 nir_foreach_variable_with_modes(var, nir, m) {
5122 if (var->data.location != i)
5123 continue;
5124 /* only add slots for non-component vars or first-time component vars */
5125 if (!var->data.location_frac || !size) {
5126 /* ensure variable is given enough slots */
5127 if (nir_is_arrayed_io(var, nir->info.stage))
5128 size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
5129 else
5130 size += glsl_count_vec4_slots(var->type, false, false);
5131 }
5132 if (var->data.patch)
5133 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5134 else
5135 var->data.driver_location = slot;
5136 found = true;
5137 }
5138 slot += size;
5139 if (found) {
5140 /* ensure the consumed slots aren't double iterated */
5141 i += size - 1;
5142 } else {
5143 /* locations used between stages are not required to be contiguous */
5144 if (i >= VARYING_SLOT_VAR0)
5145 slot++;
5146 }
5147 }
5148 }
5149 return true;
5150 }
5151
5152 static uint64_t
zink_flat_flags(struct nir_shader * shader)5153 zink_flat_flags(struct nir_shader *shader)
5154 {
5155 uint64_t flat_flags = 0;
5156 nir_foreach_shader_in_variable(var, shader) {
5157 if (var->data.interpolation == INTERP_MODE_FLAT)
5158 flat_flags |= BITFIELD64_BIT(var->data.location);
5159 }
5160
5161 return flat_flags;
5162 }
5163
5164 struct rework_io_state {
5165 /* these are search criteria */
5166 bool indirect_only;
5167 unsigned location;
5168 nir_variable_mode mode;
5169 gl_shader_stage stage;
5170 nir_shader *nir;
5171 const char *name;
5172
5173 /* these are found by scanning */
5174 bool arrayed_io;
5175 bool medium_precision;
5176 bool fb_fetch_output;
5177 bool dual_source_blend_index;
5178 uint32_t component_mask;
5179 uint32_t ignored_component_mask;
5180 unsigned array_size;
5181 unsigned bit_size;
5182 unsigned base;
5183 nir_alu_type type;
5184 /* must be last */
5185 char *newname;
5186 };
5187
5188 /* match an existing variable against the rework state */
5189 static nir_variable *
find_rework_var(nir_shader * nir,struct rework_io_state * ris)5190 find_rework_var(nir_shader *nir, struct rework_io_state *ris)
5191 {
5192 nir_foreach_variable_with_modes(var, nir, ris->mode) {
5193 const struct glsl_type *type = var->type;
5194 if (nir_is_arrayed_io(var, nir->info.stage))
5195 type = glsl_get_array_element(type);
5196 if (var->data.fb_fetch_output != ris->fb_fetch_output)
5197 continue;
5198 if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
5199 continue;
5200 unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5201 if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
5202 continue;
5203 unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5204 assert(!glsl_type_contains_64bit(type));
5205 uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
5206 if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
5207 return var;
5208 }
5209 return NULL;
5210 }
5211
5212 static void
update_io_var_name(struct rework_io_state * ris,const char * name)5213 update_io_var_name(struct rework_io_state *ris, const char *name)
5214 {
5215 if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
5216 return;
5217 if (!name)
5218 return;
5219 if (ris->name && !strcmp(ris->name, name))
5220 return;
5221 if (ris->newname && !strcmp(ris->newname, name))
5222 return;
5223 if (ris->newname) {
5224 ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
5225 } else if (ris->name) {
5226 ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
5227 } else {
5228 ris->newname = ralloc_strdup(ris->nir, name);
5229 }
5230 }
5231
5232 /* check/update tracking state for variable info */
5233 static void
update_io_var_state(nir_intrinsic_instr * intr,struct rework_io_state * ris)5234 update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
5235 {
5236 bool is_load = false;
5237 bool is_input = false;
5238 bool is_interp = false;
5239 filter_io_instr(intr, &is_load, &is_input, &is_interp);
5240 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5241 unsigned frac = nir_intrinsic_component(intr);
5242 /* the mask of components for the instruction */
5243 uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
5244
5245 /* always check for existing variables first */
5246 struct rework_io_state test = {
5247 .location = ris->location,
5248 .mode = ris->mode,
5249 .stage = ris->stage,
5250 .arrayed_io = io_instr_is_arrayed(intr),
5251 .medium_precision = sem.medium_precision,
5252 .fb_fetch_output = sem.fb_fetch_output,
5253 .dual_source_blend_index = sem.dual_source_blend_index,
5254 .component_mask = cmask,
5255 .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
5256 };
5257 if (find_rework_var(ris->nir, &test))
5258 return;
5259
5260 /* filter ignored components to scan later:
5261 * - ignore no-overlapping-components case
5262 * - always match fbfetch and dual src blend
5263 */
5264 if (ris->component_mask &&
5265 (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
5266 ris->ignored_component_mask |= cmask;
5267 return;
5268 }
5269
5270 assert(!ris->indirect_only || sem.num_slots > 1);
5271 if (sem.num_slots > 1)
5272 ris->array_size = MAX2(ris->array_size, sem.num_slots);
5273
5274 assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
5275 ris->arrayed_io = io_instr_is_arrayed(intr);
5276
5277 ris->component_mask |= cmask;
5278
5279 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5280 assert(!ris->bit_size || ris->bit_size == bit_size);
5281 ris->bit_size = bit_size;
5282
5283 nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5284 if (ris->type) {
5285 /* in the case of clashing types, this heuristic guarantees some semblance of a match */
5286 if (ris->type & nir_type_float || type & nir_type_float) {
5287 ris->type = nir_type_float | bit_size;
5288 } else if (ris->type & nir_type_int || type & nir_type_int) {
5289 ris->type = nir_type_int | bit_size;
5290 } else if (ris->type & nir_type_uint || type & nir_type_uint) {
5291 ris->type = nir_type_uint | bit_size;
5292 } else {
5293 assert(bit_size == 1);
5294 ris->type = nir_type_bool;
5295 }
5296 } else {
5297 ris->type = type;
5298 }
5299
5300 update_io_var_name(ris, intr->name);
5301
5302 ris->medium_precision |= sem.medium_precision;
5303 ris->fb_fetch_output |= sem.fb_fetch_output;
5304 ris->dual_source_blend_index |= sem.dual_source_blend_index;
5305 if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5306 ris->base = nir_intrinsic_base(intr);
5307 }
5308
5309 /* instruction-level scanning for variable data */
5310 static bool
scan_io_var_usage(nir_builder * b,nir_intrinsic_instr * intr,void * data)5311 scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5312 {
5313 struct rework_io_state *ris = data;
5314 bool is_load = false;
5315 bool is_input = false;
5316 bool is_interp = false;
5317 /* mode-based filtering */
5318 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5319 return false;
5320 if (ris->mode == nir_var_shader_in) {
5321 if (!is_input)
5322 return false;
5323 } else {
5324 if (is_input)
5325 return false;
5326 }
5327 /* location-based filtering */
5328 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5329 if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
5330 return false;
5331
5332 /* only scan indirect i/o when indirect_only is set */
5333 nir_src *src_offset = nir_get_io_offset_src(intr);
5334 if (!nir_src_is_const(*src_offset)) {
5335 if (!ris->indirect_only)
5336 return false;
5337 update_io_var_state(intr, ris);
5338 return false;
5339 }
5340
5341 /* don't scan direct i/o when indirect_only is set */
5342 if (ris->indirect_only)
5343 return false;
5344
5345 update_io_var_state(intr, ris);
5346 return false;
5347 }
5348
5349 /* scan a given i/o slot for state info */
5350 static struct rework_io_state
scan_io_var_slot(nir_shader * nir,nir_variable_mode mode,unsigned location,bool scan_indirects)5351 scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
5352 {
5353 struct rework_io_state ris = {
5354 .location = location,
5355 .mode = mode,
5356 .stage = nir->info.stage,
5357 .nir = nir,
5358 };
5359
5360 struct rework_io_state test;
5361 do {
5362 update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
5363 test = ris;
5364 /* always run indirect scan first to detect potential overlaps */
5365 if (scan_indirects) {
5366 ris.indirect_only = true;
5367 nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5368 }
5369 ris.indirect_only = false;
5370 nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5371 /* keep scanning until no changes found */
5372 } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
5373 return ris;
5374 }
5375
5376 /* create a variable using explicit/scan info */
5377 static void
create_io_var(nir_shader * nir,struct rework_io_state * ris)5378 create_io_var(nir_shader *nir, struct rework_io_state *ris)
5379 {
5380 char name[1024];
5381 assert(ris->component_mask);
5382 if (ris->newname || ris->name) {
5383 snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
5384 /* always use builtin name where possible */
5385 } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
5386 snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
5387 } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
5388 snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
5389 } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
5390 snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
5391 } else {
5392 int c = ffs(ris->component_mask) - 1;
5393 if (c)
5394 snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
5395 else
5396 snprintf(name, sizeof(name), "slot_%u", ris->location);
5397 }
5398 /* calculate vec/array type */
5399 int frac = ffs(ris->component_mask) - 1;
5400 int num_components = util_last_bit(ris->component_mask) - frac;
5401 assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
5402 const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
5403 if (ris->array_size)
5404 vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
5405 if (ris->arrayed_io) {
5406 /* tess size may be unknown with generated tcs */
5407 unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
5408 nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */;
5409 vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
5410 }
5411 nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
5412 var->data.location_frac = frac;
5413 var->data.location = ris->location;
5414 /* gallium vertex inputs use intrinsic 'base' indexing */
5415 if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5416 var->data.driver_location = ris->base;
5417 var->data.patch = ris->location >= VARYING_SLOT_PATCH0 ||
5418 ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5419 (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER));
5420 /* set flat by default: add_derefs will fill this in later after more shader passes */
5421 if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
5422 var->data.interpolation = INTERP_MODE_FLAT;
5423 var->data.fb_fetch_output = ris->fb_fetch_output;
5424 var->data.index = ris->dual_source_blend_index;
5425 var->data.precision = ris->medium_precision;
5426 /* only clip/cull dist and tess levels are compact */
5427 if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
5428 var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
5429 }
5430
5431 /* loop the i/o mask and generate variables for specified locations */
5432 static void
loop_io_var_mask(nir_shader * nir,nir_variable_mode mode,bool indirect,bool patch,uint64_t mask)5433 loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
5434 {
5435 ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
5436 u_foreach_bit64(slot, mask) {
5437 if (patch)
5438 slot += VARYING_SLOT_PATCH0;
5439
5440 /* this should've been handled explicitly */
5441 assert(is_vertex_input || !is_clipcull_dist(slot));
5442
5443 unsigned remaining = 0;
5444 do {
5445 /* scan the slot for usage */
5446 struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect);
5447 /* one of these must be true or things have gone very wrong */
5448 assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
5449 /* release builds only */
5450 if (!ris.component_mask)
5451 break;
5452
5453 /* whatever reaches this point is either enough info to create a variable or an existing variable */
5454 if (!find_rework_var(nir, &ris))
5455 create_io_var(nir, &ris);
5456 /* scanning may detect multiple potential variables per location at component offsets: process again */
5457 remaining = ris.ignored_component_mask;
5458 } while (remaining);
5459 }
5460 }
5461
5462 /* for a given mode, generate variables */
5463 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode,struct zink_shader * zs)5464 rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
5465 {
5466 assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5467 assert(util_bitcount(mode) == 1);
5468 bool found = false;
5469 /* if no i/o, skip */
5470 if (mode == nir_var_shader_out)
5471 found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
5472 else
5473 found = nir->info.inputs_read || nir->info.patch_inputs_read;
5474 if (!found)
5475 return;
5476
5477 /* use local copies to enable incremental processing */
5478 uint64_t inputs_read = nir->info.inputs_read;
5479 uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
5480 uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
5481 uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly;
5482
5483 /* fragment outputs are special: handle separately */
5484 if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5485 assert(!outputs_accessed_indirectly);
5486 u_foreach_bit64(slot, outputs_accessed) {
5487 struct rework_io_state ris = {
5488 .location = slot,
5489 .mode = mode,
5490 .stage = nir->info.stage,
5491 };
5492 /* explicitly handle builtins */
5493 switch (slot) {
5494 case FRAG_RESULT_DEPTH:
5495 case FRAG_RESULT_STENCIL:
5496 case FRAG_RESULT_SAMPLE_MASK:
5497 ris.bit_size = 32;
5498 ris.component_mask = 0x1;
5499 ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
5500 create_io_var(nir, &ris);
5501 outputs_accessed &= ~BITFIELD64_BIT(slot);
5502 break;
5503 default:
5504 break;
5505 }
5506 }
5507 /* the rest of the outputs can be generated normally */
5508 loop_io_var_mask(nir, mode, false, false, outputs_accessed);
5509 return;
5510 }
5511
5512 /* vertex inputs are special: handle separately */
5513 if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5514 assert(!inputs_read_indirectly);
5515 u_foreach_bit64(slot, inputs_read) {
5516 /* explicitly handle builtins */
5517 if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
5518 continue;
5519
5520 uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
5521 struct rework_io_state ris = {
5522 .location = slot,
5523 .mode = mode,
5524 .stage = nir->info.stage,
5525 .bit_size = 32,
5526 .component_mask = component_mask,
5527 .type = nir_type_float32,
5528 .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
5529 };
5530 create_io_var(nir, &ris);
5531 inputs_read &= ~BITFIELD64_BIT(slot);
5532 }
5533 /* the rest of the inputs can be generated normally */
5534 loop_io_var_mask(nir, mode, false, false, inputs_read);
5535 return;
5536 }
5537
5538 /* these are the masks to process based on the mode: nothing "special" as above */
5539 uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
5540 uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
5541 u_foreach_bit64(slot, mask) {
5542 struct rework_io_state ris = {
5543 .location = slot,
5544 .mode = mode,
5545 .stage = nir->info.stage,
5546 .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
5547 };
5548 /* explicitly handle builtins */
5549 unsigned max_components = 0;
5550 switch (slot) {
5551 case VARYING_SLOT_FOGC:
5552 /* use intr components */
5553 break;
5554 case VARYING_SLOT_POS:
5555 case VARYING_SLOT_CLIP_VERTEX:
5556 case VARYING_SLOT_PNTC:
5557 case VARYING_SLOT_BOUNDING_BOX0:
5558 case VARYING_SLOT_BOUNDING_BOX1:
5559 max_components = 4;
5560 ris.type = nir_type_float32;
5561 break;
5562 case VARYING_SLOT_CLIP_DIST0:
5563 max_components = nir->info.clip_distance_array_size;
5564 assert(max_components);
5565 ris.type = nir_type_float32;
5566 break;
5567 case VARYING_SLOT_CULL_DIST0:
5568 max_components = nir->info.cull_distance_array_size;
5569 assert(max_components);
5570 ris.type = nir_type_float32;
5571 break;
5572 case VARYING_SLOT_CLIP_DIST1:
5573 case VARYING_SLOT_CULL_DIST1:
5574 mask &= ~BITFIELD64_BIT(slot);
5575 indirect_mask &= ~BITFIELD64_BIT(slot);
5576 continue;
5577 case VARYING_SLOT_TESS_LEVEL_OUTER:
5578 max_components = 4;
5579 ris.type = nir_type_float32;
5580 break;
5581 case VARYING_SLOT_TESS_LEVEL_INNER:
5582 max_components = 2;
5583 ris.type = nir_type_float32;
5584 break;
5585 case VARYING_SLOT_PRIMITIVE_ID:
5586 case VARYING_SLOT_LAYER:
5587 case VARYING_SLOT_VIEWPORT:
5588 case VARYING_SLOT_FACE:
5589 case VARYING_SLOT_VIEW_INDEX:
5590 case VARYING_SLOT_VIEWPORT_MASK:
5591 ris.type = nir_type_int32;
5592 max_components = 1;
5593 break;
5594 case VARYING_SLOT_PSIZ:
5595 max_components = 1;
5596 ris.type = nir_type_float32;
5597 break;
5598 default:
5599 break;
5600 }
5601 if (!max_components)
5602 continue;
5603 switch (slot) {
5604 case VARYING_SLOT_CLIP_DIST0:
5605 case VARYING_SLOT_CLIP_DIST1:
5606 case VARYING_SLOT_CULL_DIST0:
5607 case VARYING_SLOT_CULL_DIST1:
5608 case VARYING_SLOT_TESS_LEVEL_OUTER:
5609 case VARYING_SLOT_TESS_LEVEL_INNER:
5610 /* compact arrays */
5611 ris.component_mask = 0x1;
5612 ris.array_size = max_components;
5613 break;
5614 default:
5615 ris.component_mask = BITFIELD_MASK(max_components);
5616 break;
5617 }
5618 ris.bit_size = 32;
5619 create_io_var(nir, &ris);
5620 mask &= ~BITFIELD64_BIT(slot);
5621 /* eliminate clip/cull distance scanning early */
5622 indirect_mask &= ~BITFIELD64_BIT(slot);
5623 }
5624
5625 /* patch i/o */
5626 if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
5627 (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
5628 uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
5629 uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly;
5630 uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
5631
5632 loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
5633 loop_io_var_mask(nir, mode, false, true, patch_mask);
5634 }
5635
5636 /* regular i/o */
5637 loop_io_var_mask(nir, mode, true, false, indirect_mask);
5638 loop_io_var_mask(nir, mode, false, false, mask);
5639 }
5640
5641 static int
zink_type_size(const struct glsl_type * type,bool bindless)5642 zink_type_size(const struct glsl_type *type, bool bindless)
5643 {
5644 return glsl_count_attribute_slots(type, false);
5645 }
5646
5647 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)5648 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5649 uint8_t bit_size, uint32_t align,
5650 uint32_t align_offset, bool offset_is_const,
5651 enum gl_access_qualifier access, const void *cb_data)
5652 {
5653 align = nir_combined_align(align, align_offset);
5654
5655 assert(util_is_power_of_two_nonzero(align));
5656
5657 /* simply drop the bit_size for unaligned load/stores */
5658 if (align < (bit_size / 8)) {
5659 return (nir_mem_access_size_align){
5660 .num_components = MIN2(bytes / align, 4),
5661 .bit_size = align * 8,
5662 .align = align,
5663 .shift = nir_mem_access_shift_method_scalar,
5664 };
5665 } else {
5666 return (nir_mem_access_size_align){
5667 .num_components = MIN2(bytes / (bit_size / 8), 4),
5668 .bit_size = bit_size,
5669 .align = bit_size / 8,
5670 .shift = nir_mem_access_shift_method_scalar,
5671 };
5672 }
5673 }
5674
5675 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)5676 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5677 uint8_t bit_size, uint32_t align,
5678 uint32_t align_offset, bool offset_is_const,
5679 enum gl_access_qualifier access, const void *cb_data)
5680 {
5681 bit_size = *(const uint8_t *)cb_data;
5682 align = nir_combined_align(align, align_offset);
5683
5684 assert(util_is_power_of_two_nonzero(align));
5685
5686 return (nir_mem_access_size_align){
5687 .num_components = MIN2(bytes / (bit_size / 8), 4),
5688 .bit_size = bit_size,
5689 .align = bit_size / 8,
5690 .shift = nir_mem_access_shift_method_scalar,
5691 };
5692 }
5693
5694 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5695 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5696 {
5697 uint8_t *bit_size = data;
5698 switch (instr->intrinsic) {
5699 case nir_intrinsic_load_scratch:
5700 *bit_size = MIN2(*bit_size, instr->def.bit_size);
5701 return false;
5702 case nir_intrinsic_store_scratch:
5703 *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5704 return false;
5705 default:
5706 return false;
5707 }
5708 }
5709
5710 static bool
alias_scratch_memory(nir_shader * nir)5711 alias_scratch_memory(nir_shader *nir)
5712 {
5713 uint8_t bit_size = 64;
5714
5715 nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5716 nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5717 .modes = nir_var_function_temp,
5718 .may_lower_unaligned_stores_to_atomics = true,
5719 .callback = mem_access_scratch_size_align_cb,
5720 .cb_data = &bit_size,
5721 };
5722 return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5723 }
5724
5725 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5726 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5727 {
5728 return 4;
5729 }
5730
5731 static unsigned
zink_lower_bit_size_cb(const nir_instr * instr,void * data)5732 zink_lower_bit_size_cb(const nir_instr *instr, void *data)
5733 {
5734 switch (instr->type) {
5735 case nir_instr_type_alu: {
5736 nir_alu_instr *alu = nir_instr_as_alu(instr);
5737 switch (alu->op) {
5738 case nir_op_bit_count:
5739 case nir_op_find_lsb:
5740 case nir_op_ifind_msb:
5741 case nir_op_ufind_msb:
5742 return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32;
5743 default:
5744 return 0;
5745 }
5746 }
5747 default:
5748 return 0;
5749 }
5750 }
5751
5752 static bool
fix_vertex_input_locations_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)5753 fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5754 {
5755 bool is_load = false;
5756 bool is_input = false;
5757 bool is_interp = false;
5758 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input)
5759 return false;
5760
5761 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5762 if (sem.location < VERT_ATTRIB_GENERIC0)
5763 return false;
5764 sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr);
5765 nir_intrinsic_set_io_semantics(intr, sem);
5766 return true;
5767 }
5768
5769 static bool
fix_vertex_input_locations(nir_shader * nir)5770 fix_vertex_input_locations(nir_shader *nir)
5771 {
5772 if (nir->info.stage != MESA_SHADER_VERTEX)
5773 return false;
5774
5775 return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
5776 }
5777
5778 struct trivial_revectorize_state {
5779 bool has_xfb;
5780 uint32_t component_mask;
5781 nir_intrinsic_instr *base;
5782 nir_intrinsic_instr *next_emit_vertex;
5783 nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
5784 struct set *deletions;
5785 };
5786
5787 /* always skip xfb; scalarized xfb is preferred */
5788 static bool
intr_has_xfb(nir_intrinsic_instr * intr)5789 intr_has_xfb(nir_intrinsic_instr *intr)
5790 {
5791 if (!nir_intrinsic_has_io_xfb(intr))
5792 return false;
5793 for (unsigned i = 0; i < 2; i++) {
5794 if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
5795 return true;
5796 }
5797 }
5798 return false;
5799 }
5800
5801 /* helper to avoid vectorizing i/o for different vertices */
5802 static nir_intrinsic_instr *
find_next_emit_vertex(nir_intrinsic_instr * intr)5803 find_next_emit_vertex(nir_intrinsic_instr *intr)
5804 {
5805 bool found = false;
5806 nir_foreach_instr_safe(instr, intr->instr.block) {
5807 if (instr->type == nir_instr_type_intrinsic) {
5808 nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5809 if (!found && test_intr != intr)
5810 continue;
5811 if (!found) {
5812 assert(intr == test_intr);
5813 found = true;
5814 continue;
5815 }
5816 if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5817 return test_intr;
5818 }
5819 }
5820 return NULL;
5821 }
5822
5823 /* scan for vectorizable instrs on a given location */
5824 static bool
trivial_revectorize_intr_scan(nir_shader * nir,nir_intrinsic_instr * intr,struct trivial_revectorize_state * state)5825 trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
5826 {
5827 nir_intrinsic_instr *base = state->base;
5828
5829 if (intr == base)
5830 return false;
5831
5832 if (intr->intrinsic != base->intrinsic)
5833 return false;
5834
5835 if (_mesa_set_search(state->deletions, intr))
5836 return false;
5837
5838 bool is_load = false;
5839 bool is_input = false;
5840 bool is_interp = false;
5841 filter_io_instr(intr, &is_load, &is_input, &is_interp);
5842
5843 nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
5844 nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
5845 nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
5846 nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5847 int c = nir_intrinsic_component(intr);
5848 /* already detected */
5849 if (state->component_mask & BITFIELD_BIT(c))
5850 return false;
5851 /* not a match */
5852 if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
5853 return false;
5854 /* only vectorize when all srcs match */
5855 for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
5856 if (!nir_srcs_equal(intr->src[i], base->src[i]))
5857 return false;
5858 }
5859 /* never match xfb */
5860 state->has_xfb |= intr_has_xfb(intr);
5861 if (state->has_xfb)
5862 return false;
5863 if (nir->info.stage == MESA_SHADER_GEOMETRY) {
5864 /* only match same vertex */
5865 if (state->next_emit_vertex != find_next_emit_vertex(intr))
5866 return false;
5867 }
5868 uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
5869 state->component_mask |= mask;
5870 u_foreach_bit(component, mask)
5871 state->merge[component] = intr;
5872
5873 return true;
5874 }
5875
5876 static bool
trivial_revectorize_scan(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)5877 trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
5878 {
5879 bool is_load = false;
5880 bool is_input = false;
5881 bool is_interp = false;
5882 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5883 return false;
5884 if (intr->num_components != 1)
5885 return false;
5886 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5887 if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
5888 /* always ignore compact arrays */
5889 switch (sem.location) {
5890 case VARYING_SLOT_CLIP_DIST0:
5891 case VARYING_SLOT_CLIP_DIST1:
5892 case VARYING_SLOT_CULL_DIST0:
5893 case VARYING_SLOT_CULL_DIST1:
5894 case VARYING_SLOT_TESS_LEVEL_INNER:
5895 case VARYING_SLOT_TESS_LEVEL_OUTER:
5896 return false;
5897 default: break;
5898 }
5899 }
5900 /* always ignore to-be-deleted instrs */
5901 if (_mesa_set_search(data, intr))
5902 return false;
5903
5904 /* never vectorize xfb */
5905 if (intr_has_xfb(intr))
5906 return false;
5907
5908 int ic = nir_intrinsic_component(intr);
5909 uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
5910 /* already vectorized */
5911 if (util_bitcount(mask) == 4)
5912 return false;
5913 struct trivial_revectorize_state state = {
5914 .component_mask = mask,
5915 .base = intr,
5916 /* avoid clobbering i/o for different vertices */
5917 .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
5918 .deletions = data,
5919 };
5920 u_foreach_bit(bit, mask)
5921 state.merge[bit] = intr;
5922 bool progress = false;
5923 nir_foreach_instr(instr, intr->instr.block) {
5924 if (instr->type != nir_instr_type_intrinsic)
5925 continue;
5926 nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5927 /* no matching across vertex emission */
5928 if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5929 break;
5930 progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
5931 }
5932 if (!progress || state.has_xfb)
5933 return false;
5934
5935 /* verify nothing crazy happened */
5936 assert(state.component_mask);
5937 for (unsigned i = 0; i < 4; i++) {
5938 assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
5939 }
5940
5941 unsigned first_component = ffs(state.component_mask) - 1;
5942 unsigned num_components = util_bitcount(state.component_mask);
5943 unsigned num_contiguous = 0;
5944 uint32_t contiguous_mask = 0;
5945 for (unsigned i = 0; i < num_components; i++) {
5946 unsigned c = i + first_component;
5947 /* calc mask of contiguous components to vectorize */
5948 if (state.component_mask & BITFIELD_BIT(c)) {
5949 num_contiguous++;
5950 contiguous_mask |= BITFIELD_BIT(c);
5951 }
5952 /* on the first gap or the the last component, vectorize */
5953 if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
5954 if (num_contiguous > 1) {
5955 /* reindex to enable easy src/dest index comparison */
5956 nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
5957 /* determine the first/last instr to use for the base (vectorized) load/store */
5958 unsigned first_c = ffs(contiguous_mask) - 1;
5959 nir_intrinsic_instr *base = NULL;
5960 unsigned test_idx = is_load ? UINT32_MAX : 0;
5961 for (unsigned j = 0; j < num_contiguous; j++) {
5962 unsigned merge_c = j + first_c;
5963 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
5964 /* avoid breaking ssa ordering by using:
5965 * - first instr for vectorized load
5966 * - last instr for vectorized store
5967 * this guarantees all srcs have been seen
5968 */
5969 if ((is_load && merge_intr->def.index < test_idx) ||
5970 (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
5971 test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
5972 base = merge_intr;
5973 }
5974 }
5975 assert(base);
5976 /* update instr components */
5977 nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
5978 unsigned orig_components = base->num_components;
5979 base->num_components = num_contiguous;
5980 /* do rewrites after loads and before stores */
5981 b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
5982 if (is_load) {
5983 base->def.num_components = num_contiguous;
5984 /* iterate the contiguous loaded components and rewrite merged dests */
5985 for (unsigned j = 0; j < num_contiguous; j++) {
5986 unsigned merge_c = j + first_c;
5987 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
5988 /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
5989 unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
5990 nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
5991 nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr);
5992 j += use_components - 1;
5993 }
5994 } else {
5995 nir_def *comp[NIR_MAX_VEC_COMPONENTS];
5996 /* generate swizzled vec of store components and rewrite store src */
5997 for (unsigned j = 0; j < num_contiguous; j++) {
5998 unsigned merge_c = j + first_c;
5999 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6000 /* detect if the merged instr stored multiple components and extract them for rewrite */
6001 unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
6002 for (unsigned k = 0; k < use_components; k++)
6003 comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
6004 j += use_components - 1;
6005 }
6006 nir_def *val = nir_vec(b, comp, num_contiguous);
6007 nir_src_rewrite(&base->src[0], val);
6008 nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
6009 }
6010 /* deleting instructions during a foreach explodes the compiler, so delete later */
6011 for (unsigned j = 0; j < num_contiguous; j++) {
6012 unsigned merge_c = j + first_c;
6013 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6014 if (merge_intr != base)
6015 _mesa_set_add(data, &merge_intr->instr);
6016 }
6017 }
6018 contiguous_mask = 0;
6019 num_contiguous = 0;
6020 }
6021 }
6022
6023 return true;
6024 }
6025
6026 /* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
6027 static bool
trivial_revectorize(nir_shader * nir)6028 trivial_revectorize(nir_shader *nir)
6029 {
6030 struct set deletions;
6031
6032 if (nir->info.stage > MESA_SHADER_FRAGMENT)
6033 return false;
6034
6035 _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6036 bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions);
6037 /* now it's safe to delete */
6038 set_foreach_remove(&deletions, entry) {
6039 nir_instr *instr = (void*)entry->key;
6040 nir_instr_remove(instr);
6041 }
6042 ralloc_free(deletions.table);
6043 return progress;
6044 }
6045
6046 static bool
flatten_image_arrays_intr(struct nir_builder * b,nir_instr * instr,void * data)6047 flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data)
6048 {
6049 if (instr->type != nir_instr_type_deref)
6050 return false;
6051
6052 nir_deref_instr *deref = nir_instr_as_deref(instr);
6053 if (deref->deref_type != nir_deref_type_array)
6054 return false;
6055 nir_deref_instr *parent = nir_deref_instr_parent(deref);
6056 if (!parent || parent->deref_type != nir_deref_type_array)
6057 return false;
6058 nir_variable *var = nir_deref_instr_get_variable(deref);
6059 const struct glsl_type *type = glsl_without_array(var->type);
6060 if (type == var->type || (!glsl_type_is_sampler(type) && !glsl_type_is_image(type)))
6061 return false;
6062
6063 nir_deref_instr *parent_parent = nir_deref_instr_parent(parent);
6064 int parent_size = glsl_array_size(parent->type);
6065 b->cursor = nir_after_instr(instr);
6066 nir_deref_instr *new_deref = nir_build_deref_array(b, parent_parent, nir_iadd(b, nir_imul_imm(b, parent->arr.index.ssa, parent_size), deref->arr.index.ssa));
6067 nir_def_rewrite_uses_after(&deref->def, &new_deref->def, &new_deref->instr);
6068 _mesa_set_add(data, instr);
6069 _mesa_set_add(data, &parent->instr);
6070 return true;
6071 }
6072
6073 static bool
flatten_image_arrays(nir_shader * nir)6074 flatten_image_arrays(nir_shader *nir)
6075 {
6076 bool progress = false;
6077 nir_foreach_variable_with_modes(var, nir, nir_var_uniform | nir_var_image) {
6078 const struct glsl_type *type = glsl_without_array(var->type);
6079 if (!glsl_type_is_sampler(type) && !glsl_type_is_image(type))
6080 continue;
6081 if (type == var->type)
6082 continue;
6083 var->type = glsl_array_type(type, glsl_get_aoa_size(var->type), sizeof(void*));
6084 progress = true;
6085 }
6086 struct set *deletions = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6087 progress |= nir_shader_instructions_pass(nir, flatten_image_arrays_intr, nir_metadata_dominance, deletions);
6088 set_foreach_remove(deletions, he) {
6089 nir_instr *instr = (void*)he->key;
6090 nir_instr_remove_v(instr);
6091 }
6092 _mesa_set_destroy(deletions, NULL);
6093 if (progress)
6094 nir_fixup_deref_types(nir);
6095 return progress;
6096 }
6097
6098 static bool
bound_image_arrays_instr(struct nir_builder * b,nir_instr * instr,void * data)6099 bound_image_arrays_instr(struct nir_builder *b, nir_instr *instr, void *data)
6100 {
6101 if (instr->type != nir_instr_type_deref)
6102 return false;
6103
6104 nir_deref_instr *deref = nir_instr_as_deref(instr);
6105 if (deref->deref_type != nir_deref_type_array)
6106 return false;
6107
6108 if (!nir_src_is_const(deref->arr.index))
6109 return false;
6110 nir_deref_instr *parent = nir_deref_instr_parent(deref);
6111 int parent_size = glsl_array_size(parent->type);
6112 unsigned idx = nir_src_as_uint(deref->arr.index);
6113 if (idx >= parent_size) {
6114 b->cursor = nir_before_instr(instr);
6115 nir_src_rewrite(&deref->arr.index, nir_imm_zero(b, 1, 32));
6116 return true;
6117 }
6118 return false;
6119 }
6120
6121 static bool
bound_image_arrays(nir_shader * nir)6122 bound_image_arrays(nir_shader *nir)
6123 {
6124 return nir_shader_instructions_pass(nir, bound_image_arrays_instr, nir_metadata_dominance, NULL);
6125 }
6126
6127 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)6128 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
6129 {
6130 struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6131
6132 zs->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
6133 nir->info.outputs_written & VARYING_BIT_EDGE;
6134
6135 zs->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
6136 zs->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
6137 zs->sinfo.broken_arbitary_type_const = screen->driver_compiler_workarounds.broken_const;
6138 if (screen->info.have_KHR_shader_float_controls) {
6139 if (screen->info.props12.shaderDenormFlushToZeroFloat16)
6140 zs->sinfo.float_controls.flush_denorms |= 0x1;
6141 if (screen->info.props12.shaderDenormFlushToZeroFloat32)
6142 zs->sinfo.float_controls.flush_denorms |= 0x2;
6143 if (screen->info.props12.shaderDenormFlushToZeroFloat64)
6144 zs->sinfo.float_controls.flush_denorms |= 0x4;
6145
6146 if (screen->info.props12.shaderDenormPreserveFloat16)
6147 zs->sinfo.float_controls.preserve_denorms |= 0x1;
6148 if (screen->info.props12.shaderDenormPreserveFloat32)
6149 zs->sinfo.float_controls.preserve_denorms |= 0x2;
6150 if (screen->info.props12.shaderDenormPreserveFloat64)
6151 zs->sinfo.float_controls.preserve_denorms |= 0x4;
6152
6153 zs->sinfo.float_controls.denorms_all_independence =
6154 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
6155
6156 zs->sinfo.float_controls.denorms_32_bit_independence =
6157 zs->sinfo.float_controls.denorms_all_independence ||
6158 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
6159 }
6160 zs->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6161
6162 util_queue_fence_init(&zs->precompile.fence);
6163 util_dynarray_init(&zs->pipeline_libs, zs);
6164 zs->hash = _mesa_hash_pointer(zs);
6165
6166 zs->programs = _mesa_pointer_set_create(NULL);
6167 simple_mtx_init(&zs->lock, mtx_plain);
6168 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6169 zs->info.name = ralloc_strdup(zs, nir->info.name);
6170
6171 zs->can_inline = true;
6172 zs->nir = nir;
6173
6174 if (nir->info.stage != MESA_SHADER_KERNEL)
6175 match_tex_dests(nir, zs, true);
6176
6177 return zs;
6178 }
6179
6180 void
zink_shader_init(struct zink_screen * screen,struct zink_shader * zs)6181 zink_shader_init(struct zink_screen *screen, struct zink_shader *zs)
6182 {
6183 bool have_psiz = false;
6184 nir_shader *nir = zs->nir;
6185
6186 if (nir->info.stage == MESA_SHADER_KERNEL) {
6187 nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
6188 .modes = nir_var_all ^ nir_var_function_temp,
6189 .may_lower_unaligned_stores_to_atomics = true,
6190 .callback = mem_access_size_align_cb,
6191 .cb_data = screen,
6192 };
6193 NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
6194 NIR_PASS_V(nir, nir_lower_bit_size, zink_lower_bit_size_cb, NULL);
6195 NIR_PASS_V(nir, alias_scratch_memory);
6196 NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
6197 NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
6198 }
6199
6200 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
6201 optimize_nir(nir, NULL, true);
6202 NIR_PASS_V(nir, bound_image_arrays);
6203 NIR_PASS_V(nir, flatten_image_arrays);
6204 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
6205 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
6206 NIR_PASS_V(nir, lower_bindless_io);
6207 break;
6208 }
6209 }
6210 if (nir->info.stage < MESA_SHADER_FRAGMENT)
6211 nir_gather_xfb_info_from_intrinsics(nir);
6212 NIR_PASS_V(nir, fix_vertex_input_locations);
6213 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6214 scan_nir(screen, nir, zs);
6215 NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL);
6216 NIR_PASS_V(nir, trivial_revectorize);
6217 if (nir->info.io_lowered) {
6218 rework_io_vars(nir, nir_var_shader_in, zs);
6219 rework_io_vars(nir, nir_var_shader_out, zs);
6220 nir_sort_variables_by_location(nir, nir_var_shader_in);
6221 nir_sort_variables_by_location(nir, nir_var_shader_out);
6222 }
6223
6224 if (nir->info.stage < MESA_SHADER_COMPUTE)
6225 create_gfx_pushconst(nir);
6226
6227 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
6228 nir->info.stage == MESA_SHADER_TESS_EVAL)
6229 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
6230
6231 if (nir->info.stage < MESA_SHADER_FRAGMENT)
6232 have_psiz = check_psiz(nir);
6233 if (nir->info.stage == MESA_SHADER_FRAGMENT)
6234 zs->flat_flags = zink_flat_flags(nir);
6235
6236 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
6237 NIR_PASS_V(nir, fixup_io_locations);
6238
6239 NIR_PASS_V(nir, lower_basevertex);
6240 NIR_PASS_V(nir, lower_baseinstance);
6241 NIR_PASS_V(nir, split_bitfields);
6242 if (!screen->info.feats.features.shaderStorageImageMultisample)
6243 NIR_PASS_V(nir, strip_tex_ms);
6244 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
6245
6246 if (screen->need_2D_zs)
6247 NIR_PASS_V(nir, lower_1d_shadow, screen);
6248
6249 {
6250 nir_lower_subgroups_options subgroup_options = {0};
6251 subgroup_options.lower_to_scalar = true;
6252 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
6253 subgroup_options.ballot_bit_size = 32;
6254 subgroup_options.ballot_components = 4;
6255 subgroup_options.lower_subgroup_masks = true;
6256 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
6257 subgroup_options.subgroup_size = 1;
6258 subgroup_options.lower_vote_trivial = true;
6259 }
6260 subgroup_options.lower_inverse_ballot = true;
6261 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
6262 }
6263
6264 optimize_nir(nir, NULL, true);
6265 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6266 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
6267 nir_lower_demote_if_to_cf |
6268 nir_lower_terminate_if_to_cf));
6269
6270 bool needs_size = analyze_io(zs, nir);
6271 NIR_PASS_V(nir, unbreak_bos, zs, needs_size);
6272 /* run in compile if there could be inlined uniforms */
6273 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
6274 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
6275 NIR_PASS_V(nir, rewrite_bo_access, screen);
6276 NIR_PASS_V(nir, remove_bo_access, zs);
6277 }
6278
6279 struct zink_bindless_info bindless = {0};
6280 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6281 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
6282 var->data.is_xfb = false;
6283
6284 optimize_nir(nir, NULL, true);
6285 prune_io(nir);
6286
6287 if (nir->info.stage == MESA_SHADER_KERNEL) {
6288 NIR_PASS_V(nir, type_images);
6289 }
6290
6291 unsigned ubo_binding_mask = 0;
6292 unsigned ssbo_binding_mask = 0;
6293 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
6294 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
6295 nir_var_image |
6296 nir_var_mem_ubo |
6297 nir_var_mem_ssbo)) {
6298 enum zink_descriptor_type ztype;
6299 const struct glsl_type *type = glsl_without_array(var->type);
6300 if (var->data.mode == nir_var_mem_ubo) {
6301 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
6302 /* buffer 0 is a push descriptor */
6303 var->data.descriptor_set = !!var->data.driver_location;
6304 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
6305 zink_binding(nir->info.stage,
6306 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
6307 var->data.driver_location,
6308 screen->compact_descriptors);
6309 assert(var->data.driver_location || var->data.binding < 10);
6310 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6311 int binding = var->data.binding;
6312
6313 if (!var->data.driver_location) {
6314 zs->has_uniforms = true;
6315 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
6316 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6317 zs->bindings[ztype][zs->num_bindings[ztype]].binding = binding;
6318 zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6319 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6320 assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6321 zs->num_bindings[ztype]++;
6322 ubo_binding_mask |= BITFIELD_BIT(binding);
6323 }
6324 } else if (var->data.mode == nir_var_mem_ssbo) {
6325 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
6326 var->data.descriptor_set = screen->desc_set_id[ztype];
6327 var->data.binding = zink_binding(clamp_stage(&nir->info),
6328 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
6329 var->data.driver_location,
6330 screen->compact_descriptors);
6331 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
6332 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6333 zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6334 zs->bindings[ztype][zs->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
6335 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6336 assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6337 zs->num_bindings[ztype]++;
6338 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
6339 }
6340 } else {
6341 assert(var->data.mode == nir_var_uniform ||
6342 var->data.mode == nir_var_image);
6343 if (var->data.bindless) {
6344 zs->bindless = true;
6345 handle_bindless_var(nir, var, type, &bindless);
6346 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
6347 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : glsl_type_is_bare_sampler(type) ? VK_DESCRIPTOR_TYPE_SAMPLER : zink_sampler_type(type);
6348 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
6349 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
6350 ztype = zink_desc_type_from_vktype(vktype);
6351 var->data.driver_location = var->data.binding;
6352 var->data.descriptor_set = screen->desc_set_id[ztype];
6353 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
6354 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6355 zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6356 zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6357 if (glsl_type_is_array(var->type))
6358 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
6359 else
6360 zs->bindings[ztype][zs->num_bindings[ztype]].size = 1;
6361 zs->num_bindings[ztype]++;
6362 } else if (var->data.mode == nir_var_uniform) {
6363 /* this is a dead uniform */
6364 var->data.mode = 0;
6365 exec_node_remove(&var->node);
6366 }
6367 }
6368 }
6369 }
6370 bool bindless_lowered = false;
6371 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
6372 zs->bindless |= bindless_lowered;
6373
6374 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
6375 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
6376 if (nir->info.stage != MESA_SHADER_KERNEL)
6377 NIR_PASS_V(nir, match_tex_dests, zs, false);
6378
6379 if (!nir->info.internal)
6380 nir_foreach_shader_out_variable(var, nir)
6381 var->data.explicit_xfb_buffer = 0;
6382 if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
6383 update_so_info(zs, nir, nir->info.outputs_written, have_psiz);
6384 zink_shader_serialize_blob(nir, &zs->blob);
6385 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6386 }
6387
6388 char *
zink_shader_finalize(struct pipe_screen * pscreen,struct nir_shader * nir)6389 zink_shader_finalize(struct pipe_screen *pscreen, struct nir_shader *nir)
6390 {
6391 struct zink_screen *screen = zink_screen(pscreen);
6392
6393 nir_lower_tex_options tex_opts = {
6394 .lower_invalid_implicit_lod = true,
6395 };
6396 /*
6397 Sampled Image must be an object whose type is OpTypeSampledImage.
6398 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
6399 or Rect, and the Arrayed and MS operands must be 0.
6400 - SPIRV, OpImageSampleProj* opcodes
6401 */
6402 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
6403 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
6404 tex_opts.lower_txp_array = true;
6405 if (!screen->info.feats.features.shaderImageGatherExtended)
6406 tex_opts.lower_tg4_offsets = true;
6407 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
6408 optimize_nir(nir, NULL, false);
6409 if (nir->info.stage == MESA_SHADER_VERTEX)
6410 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6411 if (screen->driconf.inline_uniforms)
6412 nir_find_inlinable_uniforms(nir);
6413
6414 return NULL;
6415 }
6416
6417 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)6418 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6419 {
6420 _mesa_set_destroy(shader->programs, NULL);
6421 util_queue_fence_wait(&shader->precompile.fence);
6422 util_queue_fence_destroy(&shader->precompile.fence);
6423 zink_descriptor_shader_deinit(screen, shader);
6424 if (screen->info.have_EXT_shader_object) {
6425 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
6426 } else {
6427 if (shader->precompile.obj.mod)
6428 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
6429 if (shader->precompile.gpl)
6430 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
6431 }
6432 blob_finish(&shader->blob);
6433 ralloc_free(shader->spirv);
6434 free(shader->precompile.bindings);
6435 ralloc_free(shader);
6436 }
6437
6438 static bool
gfx_shader_prune(struct zink_screen * screen,struct zink_shader * shader)6439 gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader)
6440 {
6441 /* this shader may still be precompiling, so access here must be locked and singular */
6442 simple_mtx_lock(&shader->lock);
6443 struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL);
6444 struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL);
6445 if (entry)
6446 _mesa_set_remove(shader->programs, entry);
6447 simple_mtx_unlock(&shader->lock);
6448 if (!prog)
6449 return false;
6450 gl_shader_stage stage = shader->info.stage;
6451 assert(stage < ZINK_GFX_SHADER_COUNT);
6452 util_queue_fence_wait(&prog->base.cache_fence);
6453 unsigned stages_present = prog->stages_present;
6454 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
6455 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
6456 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
6457 unsigned idx = zink_program_cache_stages(stages_present);
6458 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
6459 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
6460 struct hash_table *ht = &prog->base.ctx->program_cache[idx];
6461 simple_mtx_lock(&prog->base.ctx->program_lock[idx]);
6462 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
6463 assert(he && he->data == prog);
6464 _mesa_hash_table_remove(ht, he);
6465 prog->base.removed = true;
6466 simple_mtx_unlock(&prog->base.ctx->program_lock[idx]);
6467
6468 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
6469 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
6470 hash_table_foreach(&prog->pipelines[r][i], table_entry) {
6471 struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
6472
6473 util_queue_fence_wait(&pc_entry->fence);
6474 }
6475 }
6476 }
6477 }
6478 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
6479 prog->shaders[stage] = NULL;
6480 prog->stages_remaining &= ~BITFIELD_BIT(stage);
6481 }
6482 /* only remove generated tcs during parent tes destruction */
6483 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
6484 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
6485 if (stage != MESA_SHADER_FRAGMENT &&
6486 prog->shaders[MESA_SHADER_GEOMETRY] &&
6487 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
6488 shader) {
6489 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
6490 }
6491 zink_gfx_program_reference(screen, &prog, NULL);
6492 return true;
6493 }
6494
6495 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)6496 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6497 {
6498 assert(shader->info.stage != MESA_SHADER_COMPUTE);
6499 util_queue_fence_wait(&shader->precompile.fence);
6500
6501 /* if the shader is still precompiling, the program set must be pruned under lock */
6502 while (gfx_shader_prune(screen, shader));
6503
6504 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
6505 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
6506 if (!libs->removed) {
6507 libs->removed = true;
6508 unsigned idx = zink_program_cache_stages(libs->stages_present);
6509 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
6510 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
6511 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
6512 }
6513 zink_gfx_lib_cache_unref(screen, libs);
6514 }
6515 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
6516 shader->non_fs.generated_tcs) {
6517 /* automatically destroy generated tcs shaders when tes is destroyed */
6518 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
6519 shader->non_fs.generated_tcs = NULL;
6520 }
6521 if (shader->info.stage != MESA_SHADER_FRAGMENT) {
6522 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
6523 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
6524 if (shader->non_fs.generated_gs[i][j]) {
6525 /* automatically destroy generated gs shaders when owner is destroyed */
6526 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
6527 shader->non_fs.generated_gs[i][j] = NULL;
6528 }
6529 }
6530 }
6531 }
6532 zink_shader_free(screen, shader);
6533 }
6534
6535
6536 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)6537 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
6538 {
6539 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
6540 /* shortcut all the nir passes since we just have to change this one word */
6541 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
6542 return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
6543 }
6544
6545 /* creating a passthrough tcs shader that's roughly:
6546
6547 #version 150
6548 #extension GL_ARB_tessellation_shader : require
6549
6550 in vec4 some_var[gl_MaxPatchVertices];
6551 out vec4 some_var_out;
6552
6553 layout(push_constant) uniform tcsPushConstants {
6554 layout(offset = 0) float TessLevelInner[2];
6555 layout(offset = 8) float TessLevelOuter[4];
6556 } u_tcsPushConstants;
6557 layout(vertices = $vertices_per_patch) out;
6558 void main()
6559 {
6560 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
6561 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
6562 some_var_out = some_var[gl_InvocationID];
6563 }
6564
6565 */
6566 void
zink_shader_tcs_init(struct zink_screen * screen,struct zink_shader * zs,nir_shader * tes,nir_shader ** nir_ret)6567 zink_shader_tcs_init(struct zink_screen *screen, struct zink_shader *zs, nir_shader *tes, nir_shader **nir_ret)
6568 {
6569 nir_shader *nir = zs->nir;
6570
6571 nir_builder b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir)));
6572
6573 nir_def *invocation_id = nir_load_invocation_id(&b);
6574
6575 nir_foreach_shader_in_variable(var, tes) {
6576 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
6577 continue;
6578 const struct glsl_type *in_type = var->type;
6579 const struct glsl_type *out_type = var->type;
6580 char buf[1024];
6581 snprintf(buf, sizeof(buf), "%s_out", var->name);
6582 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
6583 const struct glsl_type *type = var->type;
6584 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
6585 out_type = glsl_array_type(type, nir->info.tess.tcs_vertices_out, 0);
6586 }
6587
6588 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
6589 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
6590 out->data.location = in->data.location = var->data.location;
6591 out->data.location_frac = in->data.location_frac = var->data.location_frac;
6592
6593 /* gl_in[] receives values from equivalent built-in output
6594 variables written by the vertex shader (section 2.14.7). Each array
6595 element of gl_in[] is a structure holding values for a specific vertex of
6596 the input patch. The length of gl_in[] is equal to the
6597 implementation-dependent maximum patch size (gl_MaxPatchVertices).
6598 - ARB_tessellation_shader
6599 */
6600 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
6601 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
6602 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
6603 copy_vars(&b, out_value, in_value);
6604 }
6605 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
6606 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
6607 gl_TessLevelInner->data.patch = 1;
6608 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
6609 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
6610 gl_TessLevelOuter->data.patch = 1;
6611
6612 create_gfx_pushconst(nir);
6613
6614 nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
6615 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
6616 nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
6617 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
6618
6619 for (unsigned i = 0; i < 2; i++) {
6620 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
6621 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
6622 }
6623 for (unsigned i = 0; i < 4; i++) {
6624 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
6625 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
6626 }
6627
6628 nir_validate_shader(nir, "created");
6629
6630 optimize_nir(nir, NULL, true);
6631 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6632 NIR_PASS_V(nir, nir_convert_from_ssa, true);
6633
6634 *nir_ret = nir;
6635 zink_shader_serialize_blob(nir, &zs->blob);
6636 }
6637
6638 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,unsigned vertices_per_patch)6639 zink_shader_tcs_create(struct zink_screen *screen, unsigned vertices_per_patch)
6640 {
6641 struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6642 util_queue_fence_init(&zs->precompile.fence);
6643 zs->hash = _mesa_hash_pointer(zs);
6644 zs->programs = _mesa_pointer_set_create(NULL);
6645 simple_mtx_init(&zs->lock, mtx_plain);
6646
6647 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
6648 nir_function *fn = nir_function_create(nir, "main");
6649 fn->is_entrypoint = true;
6650 nir_function_impl_create(fn);
6651 zs->nir = nir;
6652
6653 nir->info.tess.tcs_vertices_out = vertices_per_patch;
6654 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6655 zs->non_fs.is_generated = true;
6656 return zs;
6657 }
6658
6659 bool
zink_shader_has_cubes(nir_shader * nir)6660 zink_shader_has_cubes(nir_shader *nir)
6661 {
6662 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
6663 const struct glsl_type *type = glsl_without_array(var->type);
6664 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
6665 return true;
6666 }
6667 return false;
6668 }
6669
6670 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)6671 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
6672 {
6673 struct blob_reader blob_reader;
6674 blob_reader_init(&blob_reader, blob->data, blob->size);
6675 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
6676 }
6677
6678 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)6679 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
6680 {
6681 return zink_shader_blob_deserialize(screen, &zs->blob);
6682 }
6683
6684 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)6685 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
6686 {
6687 blob_init(blob);
6688 #ifndef NDEBUG
6689 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
6690 #else
6691 bool strip = false;
6692 #endif
6693 nir_serialize(blob, nir, strip);
6694 }
6695
6696 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)6697 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
6698 {
6699 nir_shader *nir = zink_shader_deserialize(screen, zs);
6700 nir_print_shader(nir, fp);
6701 ralloc_free(nir);
6702 }
6703