1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43
44 #include "util/u_memory.h"
45
46 #include "compiler/spirv/nir_spirv.h"
47 #include "vulkan/util/vk_util.h"
48
49 bool
50 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
51
52
53 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)54 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
55 {
56 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
57 if (glsl_type_is_struct_or_ifc(dst->type)) {
58 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
59 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
60 }
61 } else if (glsl_type_is_array_or_matrix(dst->type)) {
62 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
63 for (unsigned i = 0; i < count; i++) {
64 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
65 }
66 } else {
67 nir_def *load = nir_load_deref(b, src);
68 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
69 }
70 }
71
72 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
73
74 static void
create_gfx_pushconst(nir_shader * nir)75 create_gfx_pushconst(nir_shader *nir)
76 {
77 #define PUSHCONST_MEMBER(member_idx, field) \
78 fields[member_idx].type = \
79 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
80 fields[member_idx].name = ralloc_asprintf(nir, #field); \
81 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
82
83 nir_variable *pushconst;
84 /* create compatible layout for the ntv push constant loader */
85 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
86 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
87 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
88 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
89 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
90 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
91 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
92 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
93 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
94
95 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
96 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
97 "gfx_pushconst");
98 pushconst->data.location = INT_MAX; //doesn't really matter
99
100 #undef PUSHCONST_MEMBER
101 }
102
103 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)104 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
105 {
106 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
107 return false;
108
109 b->cursor = nir_after_instr(&instr->instr);
110 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
111 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
112 load->num_components = 1;
113 nir_def_init(&load->instr, &load->def, 1, 32);
114 nir_builder_instr_insert(b, &load->instr);
115
116 nir_def *composite = nir_build_alu(b, nir_op_bcsel,
117 nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
118 &instr->def,
119 nir_imm_int(b, 0),
120 NULL);
121
122 nir_def_rewrite_uses_after(&instr->def, composite,
123 composite->parent_instr);
124 return true;
125 }
126
127 static bool
lower_basevertex(nir_shader * shader)128 lower_basevertex(nir_shader *shader)
129 {
130 if (shader->info.stage != MESA_SHADER_VERTEX)
131 return false;
132
133 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
134 return false;
135
136 return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
137 nir_metadata_dominance, NULL);
138 }
139
140
141 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)142 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
143 {
144 if (instr->intrinsic != nir_intrinsic_load_draw_id)
145 return false;
146
147 b->cursor = nir_before_instr(&instr->instr);
148 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
149 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
150 load->num_components = 1;
151 nir_def_init(&load->instr, &load->def, 1, 32);
152 nir_builder_instr_insert(b, &load->instr);
153
154 nir_def_rewrite_uses(&instr->def, &load->def);
155
156 return true;
157 }
158
159 static bool
lower_drawid(nir_shader * shader)160 lower_drawid(nir_shader *shader)
161 {
162 if (shader->info.stage != MESA_SHADER_VERTEX)
163 return false;
164
165 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
166 return false;
167
168 return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
169 nir_metadata_dominance, NULL);
170 }
171
172 struct lower_gl_point_state {
173 nir_variable *gl_pos_out;
174 nir_variable *gl_point_size;
175 };
176
177 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)178 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
179 {
180 struct lower_gl_point_state *state = data;
181 nir_def *vp_scale, *pos;
182
183 if (instr->type != nir_instr_type_intrinsic)
184 return false;
185
186 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
187 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
188 intrin->intrinsic != nir_intrinsic_emit_vertex)
189 return false;
190
191 if (nir_intrinsic_stream_id(intrin) != 0)
192 return false;
193
194 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
195 intrin->intrinsic == nir_intrinsic_end_primitive) {
196 nir_instr_remove(&intrin->instr);
197 return true;
198 }
199
200 b->cursor = nir_before_instr(instr);
201
202 // viewport-map endpoints
203 nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
204 vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
205
206 // Load point info values
207 nir_def *point_size = nir_load_var(b, state->gl_point_size);
208 nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
209
210 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
211 nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
212 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
213 // halt_w_delta = w_delta / 2
214 nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
215
216 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
217 nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
218 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
219 // halt_h_delta = h_delta / 2
220 nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
221
222 nir_def *point_dir[4][2] = {
223 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
224 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
225 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
226 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
227 };
228
229 nir_def *point_pos_x = nir_channel(b, point_pos, 0);
230 nir_def *point_pos_y = nir_channel(b, point_pos, 1);
231
232 for (size_t i = 0; i < 4; i++) {
233 pos = nir_vec4(b,
234 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
235 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
236 nir_channel(b, point_pos, 2),
237 nir_channel(b, point_pos, 3));
238
239 nir_store_var(b, state->gl_pos_out, pos, 0xf);
240
241 nir_emit_vertex(b);
242 }
243
244 nir_end_primitive(b);
245
246 nir_instr_remove(&intrin->instr);
247
248 return true;
249 }
250
251 static bool
lower_gl_point_gs(nir_shader * shader)252 lower_gl_point_gs(nir_shader *shader)
253 {
254 struct lower_gl_point_state state;
255
256 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
257 shader->info.gs.vertices_out *= 4;
258
259 // Gets the gl_Position in and out
260 state.gl_pos_out =
261 nir_find_variable_with_location(shader, nir_var_shader_out,
262 VARYING_SLOT_POS);
263 state.gl_point_size =
264 nir_find_variable_with_location(shader, nir_var_shader_out,
265 VARYING_SLOT_PSIZ);
266
267 // if position in or gl_PointSize aren't written, we have nothing to do
268 if (!state.gl_pos_out || !state.gl_point_size)
269 return false;
270
271 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
272 nir_metadata_dominance, &state);
273 }
274
275 struct lower_pv_mode_state {
276 nir_variable *varyings[VARYING_SLOT_MAX][4];
277 nir_variable *pos_counter;
278 nir_variable *out_pos_counter;
279 nir_variable *ring_offset;
280 unsigned ring_size;
281 unsigned primitive_vert_count;
282 unsigned prim;
283 };
284
285 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)286 lower_pv_mode_gs_ring_index(nir_builder *b,
287 struct lower_pv_mode_state *state,
288 nir_def *index)
289 {
290 nir_def *ring_offset = nir_load_var(b, state->ring_offset);
291 return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
292 state->ring_size);
293 }
294
295 /* Given the final deref of chain of derefs this function will walk up the chain
296 * until it finds a var deref.
297 *
298 * It will then recreate an identical chain that ends with the provided deref.
299 */
300 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)301 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
302 {
303 nir_deref_instr *parent = nir_deref_instr_parent(old);
304 if (!parent)
305 return new;
306 switch(old->deref_type) {
307 case nir_deref_type_var:
308 return new;
309 case nir_deref_type_array:
310 return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
311 case nir_deref_type_struct:
312 return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
313 case nir_deref_type_array_wildcard:
314 case nir_deref_type_ptr_as_array:
315 case nir_deref_type_cast:
316 unreachable("unexpected deref type");
317 }
318 unreachable("impossible deref type");
319 }
320
321 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)322 lower_pv_mode_gs_store(nir_builder *b,
323 nir_intrinsic_instr *intrin,
324 struct lower_pv_mode_state *state)
325 {
326 b->cursor = nir_before_instr(&intrin->instr);
327 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
328 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
329 nir_variable *var = nir_deref_instr_get_variable(deref);
330
331 gl_varying_slot location = var->data.location;
332 unsigned location_frac = var->data.location_frac;
333 assert(state->varyings[location][location_frac]);
334 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
335 nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
336 nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
337 nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
338 // recreate the chain of deref that lead to the store.
339 nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
340 nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
341 nir_instr_remove(&intrin->instr);
342 return true;
343 }
344
345 return false;
346 }
347
348 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)349 lower_pv_mode_emit_rotated_prim(nir_builder *b,
350 struct lower_pv_mode_state *state,
351 nir_def *current_vertex)
352 {
353 nir_def *two = nir_imm_int(b, 2);
354 nir_def *three = nir_imm_int(b, 3);
355 bool is_triangle = state->primitive_vert_count == 3;
356 /* This shader will always see the last three vertices emitted by the user gs.
357 * The following table is used to to rotate primitives within a strip generated
358 * by the user gs such that the last vertex becomes the first.
359 *
360 * [lines, tris][even/odd index][vertex mod 3]
361 */
362 static const unsigned vert_maps[2][2][3] = {
363 {{1, 0, 0}, {1, 0, 0}},
364 {{2, 0, 1}, {2, 1, 0}}
365 };
366 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
367 * is either the last or the second, depending on whether the triangle is at an odd
368 * or even position within the strip.
369 *
370 * odd or even primitive within draw
371 */
372 nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
373 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
374 /* odd or even triangle within strip emitted by user GS
375 * this is handled using the table
376 */
377 nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
378 unsigned offset_even = vert_maps[is_triangle][0][i];
379 unsigned offset_odd = vert_maps[is_triangle][1][i];
380 nir_def *offset_even_value = nir_imm_int(b, offset_even);
381 nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
382 nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
383 offset_odd_value, offset_even_value);
384 /* Here we account for how triangles are provided to the gs from a strip.
385 * For even primitives we rotate by 3, meaning we do nothing.
386 * For odd primitives we rotate by 2, combined with the previous rotation this
387 * means the second vertex becomes the last.
388 */
389 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
390 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
391 nir_isub(b, three,
392 odd_prim)),
393 three);
394 /* Triangles that come from fans are provided to the gs the same way as
395 * odd triangles from a strip so always rotate by 2.
396 */
397 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
398 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
399 three);
400 rotated_i = nir_iadd(b, rotated_i, current_vertex);
401 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
402 gl_varying_slot location = var->data.location;
403 unsigned location_frac = var->data.location_frac;
404 if (state->varyings[location][location_frac]) {
405 nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
406 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
407 copy_vars(b, nir_build_deref_var(b, var), value);
408 }
409 }
410 nir_emit_vertex(b);
411 }
412 }
413
414 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)415 lower_pv_mode_gs_emit_vertex(nir_builder *b,
416 nir_intrinsic_instr *intrin,
417 struct lower_pv_mode_state *state)
418 {
419 b->cursor = nir_before_instr(&intrin->instr);
420
421 // increment pos_counter
422 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
423 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
424
425 nir_instr_remove(&intrin->instr);
426 return true;
427 }
428
429 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)430 lower_pv_mode_gs_end_primitive(nir_builder *b,
431 nir_intrinsic_instr *intrin,
432 struct lower_pv_mode_state *state)
433 {
434 b->cursor = nir_before_instr(&intrin->instr);
435
436 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
437 nir_push_loop(b);
438 {
439 nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
440 nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
441 nir_imm_int(b, state->primitive_vert_count)));
442 nir_jump(b, nir_jump_break);
443 nir_pop_if(b, NULL);
444
445 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
446 nir_end_primitive(b);
447
448 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
449 }
450 nir_pop_loop(b, NULL);
451 /* Set the ring offset such that when position 0 is
452 * read we get the last value written
453 */
454 nir_store_var(b, state->ring_offset, pos_counter, 1);
455 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
456 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
457
458 nir_instr_remove(&intrin->instr);
459 return true;
460 }
461
462 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)463 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
464 {
465 if (instr->type != nir_instr_type_intrinsic)
466 return false;
467
468 struct lower_pv_mode_state *state = data;
469 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
470
471 switch (intrin->intrinsic) {
472 case nir_intrinsic_store_deref:
473 return lower_pv_mode_gs_store(b, intrin, state);
474 case nir_intrinsic_copy_deref:
475 unreachable("should be lowered");
476 case nir_intrinsic_emit_vertex_with_counter:
477 case nir_intrinsic_emit_vertex:
478 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
479 case nir_intrinsic_end_primitive:
480 case nir_intrinsic_end_primitive_with_counter:
481 return lower_pv_mode_gs_end_primitive(b, intrin, state);
482 default:
483 return false;
484 }
485 }
486
487 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)488 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
489 {
490 nir_builder b;
491 struct lower_pv_mode_state state;
492 memset(state.varyings, 0, sizeof(state.varyings));
493
494 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
495 b = nir_builder_at(nir_before_impl(entry));
496
497 state.primitive_vert_count =
498 mesa_vertices_per_prim(shader->info.gs.output_primitive);
499 state.ring_size = shader->info.gs.vertices_out;
500
501 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
502 gl_varying_slot location = var->data.location;
503 unsigned location_frac = var->data.location_frac;
504
505 char name[100];
506 snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
507 state.varyings[location][location_frac] =
508 nir_local_variable_create(entry,
509 glsl_array_type(var->type,
510 state.ring_size,
511 false),
512 name);
513 }
514
515 state.pos_counter = nir_local_variable_create(entry,
516 glsl_uint_type(),
517 "__pos_counter");
518
519 state.out_pos_counter = nir_local_variable_create(entry,
520 glsl_uint_type(),
521 "__out_pos_counter");
522
523 state.ring_offset = nir_local_variable_create(entry,
524 glsl_uint_type(),
525 "__ring_offset");
526
527 state.prim = prim;
528
529 // initialize pos_counter and out_pos_counter
530 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
531 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
532 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
533
534 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
535 (state.primitive_vert_count - 1)) *
536 state.primitive_vert_count;
537 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
538 nir_metadata_dominance, &state);
539 }
540
541 struct lower_line_stipple_state {
542 nir_variable *pos_out;
543 nir_variable *stipple_out;
544 nir_variable *prev_pos;
545 nir_variable *pos_counter;
546 nir_variable *stipple_counter;
547 bool line_rectangular;
548 };
549
550 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)551 viewport_map(nir_builder *b, nir_def *vert,
552 nir_def *scale)
553 {
554 nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
555 nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
556 w_recip);
557 return nir_fmul(b, ndc_point, scale);
558 }
559
560 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)561 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
562 {
563 struct lower_line_stipple_state *state = data;
564 if (instr->type != nir_instr_type_intrinsic)
565 return false;
566
567 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
568 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
569 intrin->intrinsic != nir_intrinsic_emit_vertex)
570 return false;
571
572 b->cursor = nir_before_instr(instr);
573
574 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
575 // viewport-map endpoints
576 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
577 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
578 nir_def *prev = nir_load_var(b, state->prev_pos);
579 nir_def *curr = nir_load_var(b, state->pos_out);
580 prev = viewport_map(b, prev, vp_scale);
581 curr = viewport_map(b, curr, vp_scale);
582
583 // calculate length of line
584 nir_def *len;
585 if (state->line_rectangular)
586 len = nir_fast_distance(b, prev, curr);
587 else {
588 nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
589 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
590 }
591 // update stipple_counter
592 nir_store_var(b, state->stipple_counter,
593 nir_fadd(b, nir_load_var(b, state->stipple_counter),
594 len), 1);
595 nir_pop_if(b, NULL);
596 // emit stipple out
597 nir_copy_var(b, state->stipple_out, state->stipple_counter);
598 nir_copy_var(b, state->prev_pos, state->pos_out);
599
600 // update prev_pos and pos_counter for next vertex
601 b->cursor = nir_after_instr(instr);
602 nir_store_var(b, state->pos_counter,
603 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
604 1), 1);
605
606 return true;
607 }
608
609 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)610 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
611 {
612 nir_builder b;
613 struct lower_line_stipple_state state;
614
615 state.pos_out =
616 nir_find_variable_with_location(shader, nir_var_shader_out,
617 VARYING_SLOT_POS);
618
619 // if position isn't written, we have nothing to do
620 if (!state.pos_out)
621 return false;
622
623 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
624 glsl_float_type(),
625 "__stipple");
626 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
627 state.stipple_out->data.driver_location = shader->num_outputs++;
628 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
629 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
630
631 // create temp variables
632 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
633 glsl_vec4_type(),
634 "__prev_pos");
635 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
636 glsl_uint_type(),
637 "__pos_counter");
638 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
639 glsl_float_type(),
640 "__stipple_counter");
641
642 state.line_rectangular = line_rectangular;
643 // initialize pos_counter and stipple_counter
644 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
645 b = nir_builder_at(nir_before_impl(entry));
646 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
647 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
648
649 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
650 nir_metadata_dominance, &state);
651 }
652
653 static bool
lower_line_stipple_fs(nir_shader * shader)654 lower_line_stipple_fs(nir_shader *shader)
655 {
656 nir_builder b;
657 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
658 b = nir_builder_at(nir_after_impl(entry));
659
660 // create stipple counter
661 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
662 glsl_float_type(),
663 "__stipple");
664 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
665 stipple->data.driver_location = shader->num_inputs++;
666 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
667 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
668
669 nir_variable *sample_mask_out =
670 nir_find_variable_with_location(shader, nir_var_shader_out,
671 FRAG_RESULT_SAMPLE_MASK);
672 if (!sample_mask_out) {
673 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
674 glsl_uint_type(), "sample_mask");
675 sample_mask_out->data.driver_location = shader->num_outputs++;
676 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
677 }
678
679 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
680 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
681 nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
682 pattern = nir_iand_imm(&b, pattern, 0xffff);
683
684 nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
685 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
686 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
687 nir_store_var(&b, v, sample_mask_in, 1);
688 nir_store_var(&b, sample_mask, sample_mask_in, 1);
689 nir_push_loop(&b);
690 {
691 nir_def *value = nir_load_var(&b, v);
692 nir_def *index = nir_ufind_msb(&b, value);
693 nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
694 nir_def *new_value = nir_ixor(&b, value, index_mask);
695 nir_store_var(&b, v, new_value, 1);
696 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
697 nir_jump(&b, nir_jump_break);
698 nir_pop_if(&b, NULL);
699
700 nir_def *stipple_pos =
701 nir_interp_deref_at_sample(&b, 1, 32,
702 &nir_build_deref_var(&b, stipple)->def, index);
703 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
704 nir_imm_float(&b, 16.0));
705 stipple_pos = nir_f2i32(&b, stipple_pos);
706 nir_def *bit =
707 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
708 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
709 {
710 nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
711 sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
712 nir_store_var(&b, sample_mask, sample_mask_value, 1);
713 }
714 nir_pop_if(&b, NULL);
715 }
716 nir_pop_loop(&b, NULL);
717 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
718
719 return true;
720 }
721
722 struct lower_line_smooth_state {
723 nir_variable *pos_out;
724 nir_variable *line_coord_out;
725 nir_variable *prev_pos;
726 nir_variable *pos_counter;
727 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
728 *varyings[VARYING_SLOT_MAX][4]; // location_frac
729 };
730
731 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)732 lower_line_smooth_gs_store(nir_builder *b,
733 nir_intrinsic_instr *intrin,
734 struct lower_line_smooth_state *state)
735 {
736 b->cursor = nir_before_instr(&intrin->instr);
737 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
738 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
739 nir_variable *var = nir_deref_instr_get_variable(deref);
740
741 // we take care of position elsewhere
742 gl_varying_slot location = var->data.location;
743 unsigned location_frac = var->data.location_frac;
744 if (location != VARYING_SLOT_POS) {
745 assert(state->varyings[location]);
746 nir_store_var(b, state->varyings[location][location_frac],
747 intrin->src[1].ssa,
748 nir_intrinsic_write_mask(intrin));
749 nir_instr_remove(&intrin->instr);
750 return true;
751 }
752 }
753
754 return false;
755 }
756
757 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)758 lower_line_smooth_gs_emit_vertex(nir_builder *b,
759 nir_intrinsic_instr *intrin,
760 struct lower_line_smooth_state *state)
761 {
762 b->cursor = nir_before_instr(&intrin->instr);
763
764 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
765 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
766 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
767 nir_def *prev = nir_load_var(b, state->prev_pos);
768 nir_def *curr = nir_load_var(b, state->pos_out);
769 nir_def *prev_vp = viewport_map(b, prev, vp_scale);
770 nir_def *curr_vp = viewport_map(b, curr, vp_scale);
771
772 nir_def *width = nir_load_push_constant_zink(b, 1, 32,
773 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
774 nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
775
776 const unsigned yx[2] = { 1, 0 };
777 nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
778 nir_def *len = nir_fast_length(b, vec);
779 nir_def *dir = nir_normalize(b, vec);
780 nir_def *half_length = nir_fmul_imm(b, len, 0.5);
781 half_length = nir_fadd_imm(b, half_length, 0.5);
782
783 nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
784 nir_def *tangent =
785 nir_fmul(b,
786 nir_fmul(b,
787 nir_swizzle(b, dir, yx, 2),
788 nir_imm_vec2(b, 1.0, -1.0)),
789 vp_scale_rcp);
790 tangent = nir_fmul(b, tangent, half_width);
791 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
792 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
793
794 nir_def *line_offets[8] = {
795 nir_fadd(b, tangent, nir_fneg(b, dir)),
796 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
797 tangent,
798 nir_fneg(b, tangent),
799 tangent,
800 nir_fneg(b, tangent),
801 nir_fadd(b, tangent, dir),
802 nir_fadd(b, nir_fneg(b, tangent), dir),
803 };
804 nir_def *line_coord =
805 nir_vec4(b, half_width, half_width, half_length, half_length);
806 nir_def *line_coords[8] = {
807 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
808 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
809 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
810 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
811 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
812 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
813 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
814 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
815 };
816
817 /* emit first end-cap, and start line */
818 for (int i = 0; i < 4; ++i) {
819 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
820 gl_varying_slot location = var->data.location;
821 unsigned location_frac = var->data.location_frac;
822 if (state->prev_varyings[location][location_frac])
823 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
824 }
825 nir_store_var(b, state->pos_out,
826 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
827 nir_channel(b, prev, 3))), 0xf);
828 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
829 nir_emit_vertex(b);
830 }
831
832 /* finish line and emit last end-cap */
833 for (int i = 4; i < 8; ++i) {
834 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
835 gl_varying_slot location = var->data.location;
836 unsigned location_frac = var->data.location_frac;
837 if (state->varyings[location][location_frac])
838 nir_copy_var(b, var, state->varyings[location][location_frac]);
839 }
840 nir_store_var(b, state->pos_out,
841 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
842 nir_channel(b, curr, 3))), 0xf);
843 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
844 nir_emit_vertex(b);
845 }
846 nir_end_primitive(b);
847
848 nir_pop_if(b, NULL);
849
850 nir_copy_var(b, state->prev_pos, state->pos_out);
851 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
852 gl_varying_slot location = var->data.location;
853 unsigned location_frac = var->data.location_frac;
854 if (state->varyings[location][location_frac])
855 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
856 }
857
858 // update prev_pos and pos_counter for next vertex
859 b->cursor = nir_after_instr(&intrin->instr);
860 nir_store_var(b, state->pos_counter,
861 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
862 1), 1);
863
864 nir_instr_remove(&intrin->instr);
865 return true;
866 }
867
868 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)869 lower_line_smooth_gs_end_primitive(nir_builder *b,
870 nir_intrinsic_instr *intrin,
871 struct lower_line_smooth_state *state)
872 {
873 b->cursor = nir_before_instr(&intrin->instr);
874
875 // reset line counter
876 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
877
878 nir_instr_remove(&intrin->instr);
879 return true;
880 }
881
882 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)883 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
884 {
885 if (instr->type != nir_instr_type_intrinsic)
886 return false;
887
888 struct lower_line_smooth_state *state = data;
889 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
890
891 switch (intrin->intrinsic) {
892 case nir_intrinsic_store_deref:
893 return lower_line_smooth_gs_store(b, intrin, state);
894 case nir_intrinsic_copy_deref:
895 unreachable("should be lowered");
896 case nir_intrinsic_emit_vertex_with_counter:
897 case nir_intrinsic_emit_vertex:
898 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
899 case nir_intrinsic_end_primitive:
900 case nir_intrinsic_end_primitive_with_counter:
901 return lower_line_smooth_gs_end_primitive(b, intrin, state);
902 default:
903 return false;
904 }
905 }
906
907 static bool
lower_line_smooth_gs(nir_shader * shader)908 lower_line_smooth_gs(nir_shader *shader)
909 {
910 nir_builder b;
911 struct lower_line_smooth_state state;
912
913 memset(state.varyings, 0, sizeof(state.varyings));
914 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
915 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
916 gl_varying_slot location = var->data.location;
917 unsigned location_frac = var->data.location_frac;
918 if (location == VARYING_SLOT_POS)
919 continue;
920
921 char name[100];
922 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
923 state.varyings[location][location_frac] =
924 nir_variable_create(shader, nir_var_shader_temp,
925 var->type, name);
926
927 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
928 state.prev_varyings[location][location_frac] =
929 nir_variable_create(shader, nir_var_shader_temp,
930 var->type, name);
931 }
932
933 state.pos_out =
934 nir_find_variable_with_location(shader, nir_var_shader_out,
935 VARYING_SLOT_POS);
936
937 // if position isn't written, we have nothing to do
938 if (!state.pos_out)
939 return false;
940
941 unsigned location = 0;
942 nir_foreach_shader_in_variable(var, shader) {
943 if (var->data.driver_location >= location)
944 location = var->data.driver_location + 1;
945 }
946
947 state.line_coord_out =
948 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
949 "__line_coord");
950 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
951 state.line_coord_out->data.driver_location = location;
952 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
953 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
954 shader->num_outputs++;
955
956 // create temp variables
957 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
958 glsl_vec4_type(),
959 "__prev_pos");
960 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
961 glsl_uint_type(),
962 "__pos_counter");
963
964 // initialize pos_counter
965 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
966 b = nir_builder_at(nir_before_impl(entry));
967 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
968
969 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
970 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
971
972 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
973 nir_metadata_dominance, &state);
974 }
975
976 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)977 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
978 {
979 int dummy;
980 nir_builder b;
981
982 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
983 if (lower_stipple) {
984 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
985 glsl_float_type(),
986 "__stipple");
987 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
988 stipple_counter->data.driver_location = shader->num_inputs++;
989 stipple_counter->data.location =
990 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
991 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
992
993 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
994 glsl_uint_type(),
995 "stipple_pattern");
996
997 // initialize stipple_pattern
998 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
999 b = nir_builder_at(nir_before_impl(entry));
1000 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1001 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1002 nir_store_var(&b, stipple_pattern, pattern, 1);
1003 }
1004
1005 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1006 return true;
1007 }
1008
1009 static bool
lower_dual_blend(nir_shader * shader)1010 lower_dual_blend(nir_shader *shader)
1011 {
1012 bool progress = false;
1013 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1014 if (var) {
1015 var->data.location = FRAG_RESULT_DATA0;
1016 var->data.index = 1;
1017 progress = true;
1018 }
1019 nir_shader_preserve_all_metadata(shader);
1020 return progress;
1021 }
1022
1023 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1024 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1025 {
1026 if (instr->type != nir_instr_type_alu)
1027 return false;
1028 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1029 if (alu_instr->op != nir_op_pack_64_2x32 &&
1030 alu_instr->op != nir_op_unpack_64_2x32)
1031 return false;
1032 b->cursor = nir_before_instr(&alu_instr->instr);
1033 nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1034 nir_def *dest;
1035 switch (alu_instr->op) {
1036 case nir_op_pack_64_2x32:
1037 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1038 break;
1039 case nir_op_unpack_64_2x32:
1040 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1041 break;
1042 default:
1043 unreachable("Impossible opcode");
1044 }
1045 nir_def_rewrite_uses(&alu_instr->def, dest);
1046 nir_instr_remove(&alu_instr->instr);
1047 return true;
1048 }
1049
1050 static bool
lower_64bit_pack(nir_shader * shader)1051 lower_64bit_pack(nir_shader *shader)
1052 {
1053 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1054 nir_metadata_block_index | nir_metadata_dominance, NULL);
1055 }
1056
1057 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1058 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1059 const nir_shader *prev_stage)
1060 {
1061 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1062 options,
1063 "filled quad gs");
1064
1065 nir_shader *nir = b.shader;
1066 nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1067 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1068 nir->info.gs.vertices_in = 4;
1069 nir->info.gs.vertices_out = 6;
1070 nir->info.gs.invocations = 1;
1071 nir->info.gs.active_stream_mask = 1;
1072
1073 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1074 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1075 if (prev_stage->xfb_info) {
1076 size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1077 nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1078 }
1079
1080 nir_variable *in_vars[VARYING_SLOT_MAX];
1081 nir_variable *out_vars[VARYING_SLOT_MAX];
1082 unsigned num_vars = 0;
1083
1084 /* Create input/output variables. */
1085 nir_foreach_shader_out_variable(var, prev_stage) {
1086 assert(!var->data.patch);
1087
1088 /* input vars can't be created for those */
1089 if (var->data.location == VARYING_SLOT_LAYER ||
1090 var->data.location == VARYING_SLOT_VIEW_INDEX ||
1091 /* psiz not needed for quads */
1092 var->data.location == VARYING_SLOT_PSIZ)
1093 continue;
1094
1095 char name[100];
1096 if (var->name)
1097 snprintf(name, sizeof(name), "in_%s", var->name);
1098 else
1099 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1100
1101 nir_variable *in = nir_variable_clone(var, nir);
1102 ralloc_free(in->name);
1103 in->name = ralloc_strdup(in, name);
1104 in->type = glsl_array_type(var->type, 4, false);
1105 in->data.mode = nir_var_shader_in;
1106 nir_shader_add_variable(nir, in);
1107
1108 if (var->name)
1109 snprintf(name, sizeof(name), "out_%s", var->name);
1110 else
1111 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1112
1113 nir_variable *out = nir_variable_clone(var, nir);
1114 ralloc_free(out->name);
1115 out->name = ralloc_strdup(out, name);
1116 out->data.mode = nir_var_shader_out;
1117 nir_shader_add_variable(nir, out);
1118
1119 in_vars[num_vars] = in;
1120 out_vars[num_vars++] = out;
1121 }
1122
1123 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1124 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1125 nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1126 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1127 for (unsigned i = 0; i < 6; ++i) {
1128 /* swap indices 2 and 3 */
1129 nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1130 nir_imm_int(&b, mapping_last[i]),
1131 nir_imm_int(&b, mapping_first[i]));
1132 /* Copy inputs to outputs. */
1133 for (unsigned j = 0; j < num_vars; ++j) {
1134 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1135 continue;
1136 }
1137 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1138 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1139 }
1140 nir_emit_vertex(&b, 0);
1141 if (i == 2)
1142 nir_end_primitive(&b, 0);
1143 }
1144
1145 nir_end_primitive(&b, 0);
1146 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1147 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1148 return nir;
1149 }
1150
1151 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1152 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1153 nir_intrinsic_instr *intrin,
1154 void *data)
1155 {
1156 int inlined_uniform_offset;
1157 switch (intrin->intrinsic) {
1158 case nir_intrinsic_load_flat_mask:
1159 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1160 break;
1161 case nir_intrinsic_load_provoking_last:
1162 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1163 break;
1164 default:
1165 return false;
1166 }
1167
1168 b->cursor = nir_before_instr(&intrin->instr);
1169 assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1170 /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1171 * anything with a different bit_size) so we need to split the load. */
1172 int num_dwords = intrin->def.bit_size / 32;
1173 nir_def *dwords[2] = {NULL};
1174 for (unsigned i = 0; i < num_dwords; i++)
1175 dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1176 nir_imm_int(b, inlined_uniform_offset + i),
1177 .align_mul = intrin->def.bit_size / 8,
1178 .align_offset = 0,
1179 .range_base = 0, .range = ~0);
1180 nir_def *new_dest_def;
1181 if (intrin->def.bit_size == 32)
1182 new_dest_def = dwords[0];
1183 else
1184 new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1185 nir_def_rewrite_uses(&intrin->def, new_dest_def);
1186 nir_instr_remove(&intrin->instr);
1187 return true;
1188 }
1189
1190 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1191 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1192 {
1193 return nir_shader_intrinsics_pass(nir,
1194 lower_system_values_to_inlined_uniforms_instr,
1195 nir_metadata_dominance, NULL);
1196 }
1197
1198 void
zink_screen_init_compiler(struct zink_screen * screen)1199 zink_screen_init_compiler(struct zink_screen *screen)
1200 {
1201 static const struct nir_shader_compiler_options
1202 default_options = {
1203 .lower_ffma16 = true,
1204 .lower_ffma32 = true,
1205 .lower_ffma64 = true,
1206 .lower_scmp = true,
1207 .lower_fdph = true,
1208 .lower_flrp32 = true,
1209 .lower_fpow = true,
1210 .lower_fsat = true,
1211 .lower_hadd = true,
1212 .lower_iadd_sat = true,
1213 .lower_fisnormal = true,
1214 .lower_extract_byte = true,
1215 .lower_extract_word = true,
1216 .lower_insert_byte = true,
1217 .lower_insert_word = true,
1218
1219 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1220 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1221 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1222 * effectively unused (no instances in shader-db), it's not worth the
1223 * effort to do so.
1224 * */
1225 .lower_ldexp = true,
1226
1227 .lower_mul_high = true,
1228 .lower_uadd_carry = true,
1229 .lower_usub_borrow = true,
1230 .lower_uadd_sat = true,
1231 .lower_usub_sat = true,
1232 .lower_vector_cmp = true,
1233 .lower_int64_options = 0,
1234 .lower_doubles_options = nir_lower_dround_even,
1235 .lower_uniforms_to_ubo = true,
1236 .has_fsub = true,
1237 .has_isub = true,
1238 .lower_mul_2x32_64 = true,
1239 .support_16bit_alu = true, /* not quite what it sounds like */
1240 .max_unroll_iterations = 0,
1241 .use_interpolated_input_intrinsics = true,
1242 };
1243
1244 screen->nir_options = default_options;
1245
1246 if (!screen->info.feats.features.shaderInt64)
1247 screen->nir_options.lower_int64_options = ~0;
1248
1249 if (!screen->info.feats.features.shaderFloat64) {
1250 screen->nir_options.lower_doubles_options = ~0;
1251 screen->nir_options.lower_flrp64 = true;
1252 screen->nir_options.lower_ffma64 = true;
1253 /* soft fp64 function inlining will blow up loop bodies and effectively
1254 * stop Vulkan drivers from unrolling the loops.
1255 */
1256 screen->nir_options.max_unroll_iterations_fp64 = 32;
1257 }
1258
1259 /*
1260 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1261 and the error can be large due to the discontinuity in trunc() and floor().
1262 This can produce mathematically unexpected results in some cases, such as
1263 FMod(x,x) computing x rather than 0, and can also cause the result to have
1264 a different sign than the infinitely precise result.
1265
1266 -Table 84. Precision of core SPIR-V Instructions
1267 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1268 */
1269 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
1270 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1271 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
1272 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1273 }
1274
1275 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1276 zink_get_compiler_options(struct pipe_screen *pscreen,
1277 enum pipe_shader_ir ir,
1278 gl_shader_stage shader)
1279 {
1280 assert(ir == PIPE_SHADER_IR_NIR);
1281 return &zink_screen(pscreen)->nir_options;
1282 }
1283
1284 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1285 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1286 {
1287 if (zink_debug & ZINK_DEBUG_TGSI) {
1288 fprintf(stderr, "TGSI shader:\n---8<---\n");
1289 tgsi_dump_to_file(tokens, 0, stderr);
1290 fprintf(stderr, "---8<---\n\n");
1291 }
1292
1293 return tgsi_to_nir(tokens, screen, false);
1294 }
1295
1296
1297 static bool
def_is_64bit(nir_def * def,void * state)1298 def_is_64bit(nir_def *def, void *state)
1299 {
1300 bool *lower = (bool *)state;
1301 if (def && (def->bit_size == 64)) {
1302 *lower = true;
1303 return false;
1304 }
1305 return true;
1306 }
1307
1308 static bool
src_is_64bit(nir_src * src,void * state)1309 src_is_64bit(nir_src *src, void *state)
1310 {
1311 bool *lower = (bool *)state;
1312 if (src && (nir_src_bit_size(*src) == 64)) {
1313 *lower = true;
1314 return false;
1315 }
1316 return true;
1317 }
1318
1319 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1320 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1321 {
1322 bool lower = false;
1323 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1324 * doesn't have const variants, so do the ugly const_cast here. */
1325 nir_instr *instr = (nir_instr *)const_instr;
1326
1327 nir_foreach_def(instr, def_is_64bit, &lower);
1328 if (lower)
1329 return true;
1330 nir_foreach_src(instr, src_is_64bit, &lower);
1331 return lower;
1332 }
1333
1334 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1335 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1336 {
1337 nir_instr *instr = (nir_instr *)const_instr;
1338 nir_alu_instr *alu = nir_instr_as_alu(instr);
1339 switch (alu->op) {
1340 case nir_op_pack_64_2x32_split:
1341 case nir_op_pack_32_2x16_split:
1342 case nir_op_unpack_32_2x16_split_x:
1343 case nir_op_unpack_32_2x16_split_y:
1344 case nir_op_unpack_64_2x32_split_x:
1345 case nir_op_unpack_64_2x32_split_y:
1346 return true;
1347 default:
1348 break;
1349 }
1350 return false;
1351 }
1352
1353
1354 struct bo_vars {
1355 nir_variable *uniforms[5];
1356 nir_variable *ubo[5];
1357 nir_variable *ssbo[5];
1358 uint32_t first_ubo;
1359 uint32_t first_ssbo;
1360 };
1361
1362 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1363 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1364 {
1365 struct bo_vars bo;
1366 memset(&bo, 0, sizeof(bo));
1367 if (zs->ubos_used)
1368 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1369 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1370 if (zs->ssbos_used)
1371 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1372 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1373 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1374 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1375 if (var->data.mode == nir_var_mem_ssbo) {
1376 assert(!bo.ssbo[idx]);
1377 bo.ssbo[idx] = var;
1378 } else {
1379 if (var->data.driver_location) {
1380 assert(!bo.ubo[idx]);
1381 bo.ubo[idx] = var;
1382 } else {
1383 assert(!bo.uniforms[idx]);
1384 bo.uniforms[idx] = var;
1385 }
1386 }
1387 }
1388 return bo;
1389 }
1390
1391 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1392 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1393 {
1394 struct bo_vars *bo = data;
1395 if (instr->type != nir_instr_type_intrinsic)
1396 return false;
1397 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1398 nir_variable *var = NULL;
1399 nir_def *offset = NULL;
1400 bool is_load = true;
1401 b->cursor = nir_before_instr(instr);
1402
1403 switch (intr->intrinsic) {
1404 case nir_intrinsic_store_ssbo:
1405 var = bo->ssbo[intr->def.bit_size >> 4];
1406 offset = intr->src[2].ssa;
1407 is_load = false;
1408 break;
1409 case nir_intrinsic_load_ssbo:
1410 var = bo->ssbo[intr->def.bit_size >> 4];
1411 offset = intr->src[1].ssa;
1412 break;
1413 case nir_intrinsic_load_ubo:
1414 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1415 var = bo->uniforms[intr->def.bit_size >> 4];
1416 else
1417 var = bo->ubo[intr->def.bit_size >> 4];
1418 offset = intr->src[1].ssa;
1419 break;
1420 default:
1421 return false;
1422 }
1423 nir_src offset_src = nir_src_for_ssa(offset);
1424 if (!nir_src_is_const(offset_src))
1425 return false;
1426
1427 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1428 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1429 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1430 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1431 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1432 return false;
1433
1434 unsigned rewrites = 0;
1435 nir_def *result[2];
1436 for (unsigned i = 0; i < intr->num_components; i++) {
1437 if (offset_bytes + i >= size) {
1438 rewrites++;
1439 if (is_load)
1440 result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1441 }
1442 }
1443 assert(rewrites == intr->num_components);
1444 if (is_load) {
1445 nir_def *load = nir_vec(b, result, intr->num_components);
1446 nir_def_rewrite_uses(&intr->def, load);
1447 }
1448 nir_instr_remove(instr);
1449 return true;
1450 }
1451
1452 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1453 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1454 {
1455 struct bo_vars bo = get_bo_vars(zs, shader);
1456 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1457 }
1458
1459 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1460 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1461 {
1462 bool progress;
1463 do {
1464 progress = false;
1465 if (s->options->lower_int64_options)
1466 NIR_PASS_V(s, nir_lower_int64);
1467 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1468 NIR_PASS_V(s, lower_64bit_pack);
1469 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1470 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1471 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1472 NIR_PASS(progress, s, nir_copy_prop);
1473 NIR_PASS(progress, s, nir_opt_remove_phis);
1474 if (s->options->lower_int64_options) {
1475 NIR_PASS(progress, s, nir_lower_64bit_phis);
1476 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1477 }
1478 NIR_PASS(progress, s, nir_opt_dce);
1479 NIR_PASS(progress, s, nir_opt_dead_cf);
1480 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1481 NIR_PASS(progress, s, nir_opt_cse);
1482 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1483 NIR_PASS(progress, s, nir_opt_algebraic);
1484 NIR_PASS(progress, s, nir_opt_constant_folding);
1485 NIR_PASS(progress, s, nir_opt_undef);
1486 NIR_PASS(progress, s, zink_nir_lower_b2b);
1487 if (zs)
1488 NIR_PASS(progress, s, bound_bo_access, zs);
1489 if (can_shrink)
1490 NIR_PASS(progress, s, nir_opt_shrink_vectors);
1491 } while (progress);
1492
1493 do {
1494 progress = false;
1495 NIR_PASS(progress, s, nir_opt_algebraic_late);
1496 if (progress) {
1497 NIR_PASS_V(s, nir_copy_prop);
1498 NIR_PASS_V(s, nir_opt_dce);
1499 NIR_PASS_V(s, nir_opt_cse);
1500 }
1501 } while (progress);
1502 }
1503
1504 /* - copy the lowered fbfetch variable
1505 * - set the new one up as an input attachment for descriptor 0.6
1506 * - load it as an image
1507 * - overwrite the previous load
1508 */
1509 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1510 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1511 {
1512 bool ms = data != NULL;
1513 if (instr->type != nir_instr_type_intrinsic)
1514 return false;
1515 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1516 if (intr->intrinsic != nir_intrinsic_load_deref)
1517 return false;
1518 nir_variable *var = nir_intrinsic_get_var(intr, 0);
1519 if (!var->data.fb_fetch_output)
1520 return false;
1521 b->cursor = nir_after_instr(instr);
1522 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1523 /* If Dim is SubpassData, ... Image Format must be Unknown
1524 * - SPIRV OpTypeImage specification
1525 */
1526 fbfetch->data.image.format = 0;
1527 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1528 fbfetch->data.mode = nir_var_uniform;
1529 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1530 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1531 fbfetch->data.sample = ms;
1532 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1533 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1534 nir_shader_add_variable(b->shader, fbfetch);
1535 nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1536 nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1537 nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1538 nir_def_rewrite_uses(&intr->def, load);
1539 return true;
1540 }
1541
1542 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1543 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1544 {
1545 nir_foreach_shader_out_variable(var, shader) {
1546 if (var->data.fb_fetch_output) {
1547 *fbfetch = var;
1548 break;
1549 }
1550 }
1551 assert(*fbfetch);
1552 if (!*fbfetch)
1553 return false;
1554 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1555 }
1556
1557 /*
1558 * Add a check for out of bounds LOD for every texel fetch op
1559 * It boils down to:
1560 * - if (lod < query_levels(tex))
1561 * - res = txf(tex)
1562 * - else
1563 * - res = (0, 0, 0, 1)
1564 */
1565 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1566 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1567 {
1568 if (in->type != nir_instr_type_tex)
1569 return false;
1570 nir_tex_instr *txf = nir_instr_as_tex(in);
1571 if (txf->op != nir_texop_txf)
1572 return false;
1573
1574 b->cursor = nir_before_instr(in);
1575 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1576 assert(lod_idx >= 0);
1577 nir_src lod_src = txf->src[lod_idx].src;
1578 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1579 return false;
1580
1581 nir_def *lod = lod_src.ssa;
1582
1583 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1584 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1585 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1586 !!(offset_idx >= 0) + !!(handle_idx >= 0));
1587 levels->op = nir_texop_query_levels;
1588 levels->texture_index = txf->texture_index;
1589 levels->dest_type = nir_type_int | lod->bit_size;
1590 if (offset_idx >= 0) {
1591 levels->src[0].src_type = nir_tex_src_texture_offset;
1592 levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1593 }
1594 if (handle_idx >= 0) {
1595 levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
1596 levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1597 }
1598 nir_def_init(&levels->instr, &levels->def,
1599 nir_tex_instr_dest_size(levels), 32);
1600 nir_builder_instr_insert(b, &levels->instr);
1601
1602 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1603 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1604 nir_builder_instr_insert(b, &new_txf->instr);
1605
1606 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1607 nir_const_value oob_values[4] = {0};
1608 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1609 oob_values[3] = (txf->dest_type & nir_type_float) ?
1610 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1611 nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1612
1613 nir_pop_if(b, lod_oob_else);
1614 nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1615
1616 nir_def_rewrite_uses(&txf->def, robust_txf);
1617 nir_instr_remove_v(in);
1618 return true;
1619 }
1620
1621 /* This pass is used to workaround the lack of out of bounds LOD robustness
1622 * for texel fetch ops in VK_EXT_image_robustness.
1623 */
1624 static bool
lower_txf_lod_robustness(nir_shader * shader)1625 lower_txf_lod_robustness(nir_shader *shader)
1626 {
1627 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1628 }
1629
1630 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1631 static bool
check_psiz(struct nir_shader * s)1632 check_psiz(struct nir_shader *s)
1633 {
1634 bool have_psiz = false;
1635 nir_foreach_shader_out_variable(var, s) {
1636 if (var->data.location == VARYING_SLOT_PSIZ) {
1637 /* genuine PSIZ outputs will have this set */
1638 have_psiz |= !!var->data.explicit_location;
1639 }
1640 }
1641 return have_psiz;
1642 }
1643
1644 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1645 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1646 {
1647 assert((int)location >= 0);
1648
1649 nir_foreach_variable_with_modes(var, nir, mode) {
1650 if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1651 unsigned num_components = glsl_get_vector_elements(var->type);
1652 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1653 num_components *= 2;
1654 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0)
1655 num_components = glsl_get_aoa_size(var->type);
1656 if (var->data.location_frac <= location_frac &&
1657 var->data.location_frac + num_components > location_frac)
1658 return var;
1659 }
1660 }
1661 return NULL;
1662 }
1663
1664 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1665 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1666 {
1667 unsigned num_components = util_bitcount(output->component_mask);
1668 for (unsigned i = 0; i < num_components; i++)
1669 if (!inlined[output->component_offset + i])
1670 return false;
1671 return true;
1672 }
1673
1674 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1675 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1676 {
1677 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1678 if (last_output < VARYING_SLOT_VAR0)
1679 last_output = VARYING_SLOT_VAR0;
1680 else
1681 last_output++;
1682 /* this should get fixed up by slot remapping */
1683 psiz->data.location = last_output;
1684 }
1685
1686 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1687 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1688 {
1689 /* could be dvec/dmat/mat: each member is the same */
1690 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1691 /* determine size of each member type */
1692 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1693 /* normalize slot idx to current type's size */
1694 slot %= slot_count;
1695 unsigned slot_components = glsl_get_components(plain);
1696 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1697 slot_components *= 2;
1698 /* create a vec4 mask of the selected slot's components out of all the components */
1699 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1700 /* return a vecN of the selected components */
1701 slot_components = util_bitcount(mask);
1702 return glsl_vec_type(slot_components);
1703 }
1704
1705 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1706 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1707 {
1708 const struct glsl_type *type = slot_type;
1709 unsigned slot_count = 0;
1710 unsigned cur_slot = 0;
1711 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1712 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1713 /* use array type for slot counting but return array member type for unroll */
1714 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1715 type = glsl_without_array(arraytype);
1716 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1717 }
1718 *slot_idx -= (cur_slot - slot_count);
1719 if (!glsl_type_is_struct_or_ifc(type))
1720 /* this is a fully unrolled struct: find the number of vec components to output */
1721 type = clamp_slot_type(type, *slot_idx);
1722 return type;
1723 }
1724
1725 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1726 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1727 {
1728 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1729 const struct glsl_type *orig_type = var->type;
1730 const struct glsl_type *type = glsl_without_array(var->type);
1731 unsigned slot_idx = slot - so_slot;
1732 if (type != orig_type)
1733 slot_idx %= glsl_count_vec4_slots(type, false, false);
1734 /* need to find the vec4 that's being exported by this slot */
1735 while (glsl_type_is_struct_or_ifc(type))
1736 type = unroll_struct_type(type, &slot_idx);
1737
1738 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1739 unsigned num_components = glsl_get_components(glsl_without_array(type));
1740 /* special handling: clip/cull distance are arrays with vector semantics */
1741 if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) {
1742 num_components = glsl_array_size(type);
1743 if (slot_idx)
1744 /* this is the second vec4 */
1745 num_components %= 4;
1746 else
1747 /* this is the first vec4 */
1748 num_components = MIN2(num_components, 4);
1749 }
1750 assert(num_components);
1751 /* gallium handles xfb in terms of 32bit units */
1752 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1753 num_components *= 2;
1754 return num_components;
1755 }
1756
1757 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1758 get_var_slot_count(nir_shader *nir, nir_variable *var)
1759 {
1760 assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1761 const struct glsl_type *type = var->type;
1762 if (nir_is_arrayed_io(var, nir->info.stage))
1763 type = glsl_get_array_element(type);
1764 unsigned slot_count = 0;
1765 if (var->data.location >= VARYING_SLOT_VAR0)
1766 slot_count = glsl_count_vec4_slots(type, false, false);
1767 else if (glsl_type_is_array(type))
1768 slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1769 else
1770 slot_count = 1;
1771 return slot_count;
1772 }
1773
1774
1775 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1776 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1777 {
1778 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1779 const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1780 if (packed_output->location == slot)
1781 return packed_output;
1782 }
1783 return NULL;
1784 }
1785
1786 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1787 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1788 {
1789 bool inlined[VARYING_SLOT_MAX][4] = {0};
1790 uint64_t packed = 0;
1791 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1792 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1793 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1794 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1795 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1796 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1797 unsigned xfb_components = util_bitcount(output->component_mask);
1798 /* always set stride to be used during draw */
1799 zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1800 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1801 for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1802 unsigned slot = output->location;
1803 if (inlined[slot][output->component_offset + c])
1804 continue;
1805 nir_variable *var = NULL;
1806 while (!var && slot < VARYING_SLOT_TESS_MAX)
1807 var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1808 slot = output->location;
1809 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1810 if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1811 /* if no variable is found for the xfb output, no output exists */
1812 inlined[slot][c + output->component_offset] = true;
1813 continue;
1814 }
1815 if (var->data.explicit_xfb_buffer) {
1816 /* handle dvec3 where gallium splits streamout over 2 registers */
1817 for (unsigned j = 0; j < xfb_components; j++)
1818 inlined[slot][c + output->component_offset + j] = true;
1819 }
1820 if (is_inlined(inlined[slot], output))
1821 continue;
1822 assert(!glsl_type_is_array(var->type) || var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0);
1823 assert(!glsl_type_is_struct_or_ifc(var->type));
1824 unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1825 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1826 num_components *= 2;
1827 /* if this is the entire variable, try to blast it out during the initial declaration
1828 * structs must be handled later to ensure accurate analysis
1829 */
1830 if ((num_components == xfb_components ||
1831 num_components < xfb_components ||
1832 (num_components > xfb_components && xfb_components == 4))) {
1833 var->data.explicit_xfb_buffer = 1;
1834 var->data.xfb.buffer = output->buffer;
1835 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1836 var->data.offset = (output->offset + c * sizeof(uint32_t));
1837 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1838 for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
1839 inlined[slot][c + output->component_offset + j] = true;
1840 } else {
1841 /* otherwise store some metadata for later */
1842 packed |= BITFIELD64_BIT(slot);
1843 packed_components[slot] += xfb_components;
1844 packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
1845 packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
1846 for (unsigned j = 0; j < xfb_components; j++)
1847 packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
1848 }
1849 }
1850 }
1851 }
1852
1853 /* if this was flagged as a packed output before, and if all the components are
1854 * being output with the same stream on the same buffer with increasing offsets, this entire variable
1855 * can be consolidated into a single output to conserve locations
1856 */
1857 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1858 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1859 unsigned slot = output->location;
1860 if (is_inlined(inlined[slot], output))
1861 continue;
1862 if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
1863 nir_variable *var = NULL;
1864 while (!var)
1865 var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
1866 slot = output->location;
1867 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1868 if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
1869 continue;
1870 /* this is a lowered 64bit variable that can't be exported due to packing */
1871 if (var->data.is_xfb)
1872 goto out;
1873
1874 unsigned num_slots = var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1 ?
1875 glsl_array_size(var->type) / 4 :
1876 glsl_count_vec4_slots(var->type, false, false);
1877 /* for each variable, iterate over all the variable's slots and inline the outputs */
1878 for (unsigned j = 0; j < num_slots; j++) {
1879 slot = var->data.location + j;
1880 const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
1881 if (!packed_output)
1882 goto out;
1883
1884 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
1885 if (!(packed & BITFIELD64_BIT(slot)) ||
1886 util_bitcount(packed_streams[slot]) != 1 ||
1887 util_bitcount(packed_buffers[slot]) != 1)
1888 goto out;
1889
1890 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
1891 unsigned num_components = get_slot_components(var, slot, var->data.location);
1892 if (num_components != packed_components[slot])
1893 goto out;
1894
1895 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
1896 uint32_t prev_offset = packed_offsets[packed_output->location][0];
1897 for (unsigned k = 1; k < num_components; k++) {
1898 /* if the offsets are not incrementing as expected, skip consolidation */
1899 if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
1900 goto out;
1901 prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
1902 }
1903 }
1904 /* this output can be consolidated: blast out all the data inlined */
1905 var->data.explicit_xfb_buffer = 1;
1906 var->data.xfb.buffer = output->buffer;
1907 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
1908 var->data.offset = output->offset;
1909 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
1910 /* mark all slot components inlined to skip subsequent loop iterations */
1911 for (unsigned j = 0; j < num_slots; j++) {
1912 slot = var->data.location + j;
1913 for (unsigned k = 0; k < packed_components[slot]; k++)
1914 inlined[slot][k] = true;
1915 packed &= ~BITFIELD64_BIT(slot);
1916 }
1917 continue;
1918 }
1919 out:
1920 unreachable("xfb should be inlined by now!");
1921 }
1922 }
1923
1924 struct decompose_state {
1925 nir_variable **split;
1926 bool needs_w;
1927 };
1928
1929 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)1930 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
1931 {
1932 struct decompose_state *state = data;
1933 nir_variable **split = state->split;
1934 if (instr->type != nir_instr_type_intrinsic)
1935 return false;
1936 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1937 if (intr->intrinsic != nir_intrinsic_load_deref)
1938 return false;
1939 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1940 nir_variable *var = nir_deref_instr_get_variable(deref);
1941 if (var != split[0])
1942 return false;
1943 unsigned num_components = glsl_get_vector_elements(split[0]->type);
1944 b->cursor = nir_after_instr(instr);
1945 nir_def *loads[4];
1946 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
1947 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
1948 if (state->needs_w) {
1949 /* oob load w comopnent to get correct value for int/float */
1950 loads[3] = nir_channel(b, loads[0], 3);
1951 loads[0] = nir_channel(b, loads[0], 0);
1952 }
1953 nir_def *new_load = nir_vec(b, loads, num_components);
1954 nir_def_rewrite_uses(&intr->def, new_load);
1955 nir_instr_remove_v(instr);
1956 return true;
1957 }
1958
1959 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)1960 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
1961 {
1962 uint32_t bits = 0;
1963 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
1964 bits |= BITFIELD_BIT(var->data.driver_location);
1965 bits = ~bits;
1966 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
1967 nir_variable *split[5];
1968 struct decompose_state state;
1969 state.split = split;
1970 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
1971 assert(var);
1972 split[0] = var;
1973 bits |= BITFIELD_BIT(var->data.driver_location);
1974 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
1975 unsigned num_components = glsl_get_vector_elements(var->type);
1976 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
1977 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
1978 split[i+1] = nir_variable_clone(var, nir);
1979 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
1980 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
1981 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
1982 else
1983 split[i+1]->type = new_type;
1984 split[i+1]->data.driver_location = ffs(bits) - 1;
1985 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
1986 nir_shader_add_variable(nir, split[i+1]);
1987 }
1988 var->data.mode = nir_var_shader_temp;
1989 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
1990 }
1991 nir_fixup_deref_modes(nir);
1992 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1993 optimize_nir(nir, NULL, true);
1994 return true;
1995 }
1996
1997 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1998 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1999 {
2000 struct zink_screen *screen = data;
2001 const bool has_int64 = screen->info.feats.features.shaderInt64;
2002 if (instr->type != nir_instr_type_intrinsic)
2003 return false;
2004 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2005 b->cursor = nir_before_instr(instr);
2006 switch (intr->intrinsic) {
2007 case nir_intrinsic_ssbo_atomic:
2008 case nir_intrinsic_ssbo_atomic_swap: {
2009 /* convert offset to uintN_t[idx] */
2010 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2011 nir_src_rewrite(&intr->src[1], offset);
2012 return true;
2013 }
2014 case nir_intrinsic_load_ssbo:
2015 case nir_intrinsic_load_ubo: {
2016 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2017 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2018 nir_src_is_const(intr->src[0]) &&
2019 nir_src_as_uint(intr->src[0]) == 0 &&
2020 intr->def.bit_size == 64 &&
2021 nir_intrinsic_align_offset(intr) % 8 != 0;
2022 force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2023 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2024 nir_src_rewrite(&intr->src[1], offset);
2025 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2026 if (force_2x32) {
2027 /* this is always scalarized */
2028 assert(intr->def.num_components == 1);
2029 /* rewrite as 2x32 */
2030 nir_def *load[2];
2031 for (unsigned i = 0; i < 2; i++) {
2032 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2033 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2034 else
2035 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2036 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2037 }
2038 /* cast back to 64bit */
2039 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2040 nir_def_rewrite_uses(&intr->def, casted);
2041 nir_instr_remove(instr);
2042 }
2043 return true;
2044 }
2045 case nir_intrinsic_load_scratch:
2046 case nir_intrinsic_load_shared: {
2047 b->cursor = nir_before_instr(instr);
2048 bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2049 nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2050 nir_src_rewrite(&intr->src[0], offset);
2051 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2052 if (force_2x32) {
2053 /* this is always scalarized */
2054 assert(intr->def.num_components == 1);
2055 /* rewrite as 2x32 */
2056 nir_def *load[2];
2057 for (unsigned i = 0; i < 2; i++)
2058 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2059 /* cast back to 64bit */
2060 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2061 nir_def_rewrite_uses(&intr->def, casted);
2062 nir_instr_remove(instr);
2063 return true;
2064 }
2065 break;
2066 }
2067 case nir_intrinsic_store_ssbo: {
2068 b->cursor = nir_before_instr(instr);
2069 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2070 nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2071 nir_src_rewrite(&intr->src[2], offset);
2072 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2073 if (force_2x32) {
2074 /* this is always scalarized */
2075 assert(intr->src[0].ssa->num_components == 1);
2076 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2077 for (unsigned i = 0; i < 2; i++)
2078 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2079 nir_instr_remove(instr);
2080 }
2081 return true;
2082 }
2083 case nir_intrinsic_store_scratch:
2084 case nir_intrinsic_store_shared: {
2085 b->cursor = nir_before_instr(instr);
2086 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2087 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2088 nir_src_rewrite(&intr->src[1], offset);
2089 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2090 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2091 /* this is always scalarized */
2092 assert(intr->src[0].ssa->num_components == 1);
2093 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2094 for (unsigned i = 0; i < 2; i++)
2095 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2096 nir_instr_remove(instr);
2097 }
2098 return true;
2099 }
2100 default:
2101 break;
2102 }
2103 return false;
2104 }
2105
2106 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2107 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2108 {
2109 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2110 }
2111
2112 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2113 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2114 {
2115 nir_variable *var, **ptr;
2116 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2117
2118 if (ssbo)
2119 ptr = &bo->ssbo[bit_size >> 4];
2120 else {
2121 if (!idx) {
2122 ptr = &bo->uniforms[bit_size >> 4];
2123 } else
2124 ptr = &bo->ubo[bit_size >> 4];
2125 }
2126 var = *ptr;
2127 if (!var) {
2128 if (ssbo)
2129 var = bo->ssbo[32 >> 4];
2130 else {
2131 if (!idx)
2132 var = bo->uniforms[32 >> 4];
2133 else
2134 var = bo->ubo[32 >> 4];
2135 }
2136 var = nir_variable_clone(var, shader);
2137 if (ssbo)
2138 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2139 else
2140 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2141 *ptr = var;
2142 nir_shader_add_variable(shader, var);
2143
2144 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2145 fields[0].name = ralloc_strdup(shader, "base");
2146 fields[1].name = ralloc_strdup(shader, "unsized");
2147 unsigned array_size = glsl_get_length(var->type);
2148 const struct glsl_type *bare_type = glsl_without_array(var->type);
2149 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2150 unsigned length = glsl_get_length(array_type);
2151 const struct glsl_type *type;
2152 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2153 if (bit_size > 32) {
2154 assert(bit_size == 64);
2155 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2156 } else {
2157 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2158 }
2159 fields[0].type = type;
2160 fields[1].type = unsized;
2161 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2162 var->data.driver_location = idx;
2163 }
2164 return var;
2165 }
2166
2167 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2168 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2169 {
2170 nir_intrinsic_op op;
2171 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2172 if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2173 op = nir_intrinsic_deref_atomic;
2174 else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2175 op = nir_intrinsic_deref_atomic_swap;
2176 else
2177 unreachable("unknown intrinsic");
2178 nir_def *offset = intr->src[1].ssa;
2179 nir_src *src = &intr->src[0];
2180 nir_variable *var = get_bo_var(b->shader, bo, true, src,
2181 intr->def.bit_size);
2182 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2183 nir_def *idx = src->ssa;
2184 if (bo->first_ssbo)
2185 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2186 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2187 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2188
2189 /* generate new atomic deref ops for every component */
2190 nir_def *result[4];
2191 unsigned num_components = intr->def.num_components;
2192 for (unsigned i = 0; i < num_components; i++) {
2193 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2194 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2195 nir_def_init(&new_instr->instr, &new_instr->def, 1,
2196 intr->def.bit_size);
2197 nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2198 new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2199 /* deref ops have no offset src, so copy the srcs after it */
2200 for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2201 new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2202 nir_builder_instr_insert(b, &new_instr->instr);
2203
2204 result[i] = &new_instr->def;
2205 offset = nir_iadd_imm(b, offset, 1);
2206 }
2207
2208 nir_def *load = nir_vec(b, result, num_components);
2209 nir_def_rewrite_uses(&intr->def, load);
2210 nir_instr_remove(instr);
2211 }
2212
2213 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2214 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2215 {
2216 struct bo_vars *bo = data;
2217 if (instr->type != nir_instr_type_intrinsic)
2218 return false;
2219 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2220 nir_variable *var = NULL;
2221 nir_def *offset = NULL;
2222 bool is_load = true;
2223 b->cursor = nir_before_instr(instr);
2224 nir_src *src;
2225 bool ssbo = true;
2226 switch (intr->intrinsic) {
2227 case nir_intrinsic_ssbo_atomic:
2228 case nir_intrinsic_ssbo_atomic_swap:
2229 rewrite_atomic_ssbo_instr(b, instr, bo);
2230 return true;
2231 case nir_intrinsic_store_ssbo:
2232 src = &intr->src[1];
2233 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2234 offset = intr->src[2].ssa;
2235 is_load = false;
2236 break;
2237 case nir_intrinsic_load_ssbo:
2238 src = &intr->src[0];
2239 var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2240 offset = intr->src[1].ssa;
2241 break;
2242 case nir_intrinsic_load_ubo:
2243 src = &intr->src[0];
2244 var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2245 offset = intr->src[1].ssa;
2246 ssbo = false;
2247 break;
2248 default:
2249 return false;
2250 }
2251 assert(var);
2252 assert(offset);
2253 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2254 nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2255 if (!ssbo && bo->first_ubo && var->data.driver_location)
2256 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2257 else if (ssbo && bo->first_ssbo)
2258 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2259 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2260 nir_i2iN(b, idx, deref_var->def.bit_size));
2261 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2262 assert(intr->num_components <= 2);
2263 if (is_load) {
2264 nir_def *result[2];
2265 for (unsigned i = 0; i < intr->num_components; i++) {
2266 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2267 nir_i2iN(b, offset, deref_struct->def.bit_size));
2268 result[i] = nir_load_deref(b, deref_arr);
2269 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2270 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2271 offset = nir_iadd_imm(b, offset, 1);
2272 }
2273 nir_def *load = nir_vec(b, result, intr->num_components);
2274 nir_def_rewrite_uses(&intr->def, load);
2275 } else {
2276 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2277 nir_i2iN(b, offset, deref_struct->def.bit_size));
2278 nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2279 }
2280 nir_instr_remove(instr);
2281 return true;
2282 }
2283
2284 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2285 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2286 {
2287 struct bo_vars bo = get_bo_vars(zs, shader);
2288 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2289 }
2290
2291 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2292 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2293 {
2294 switch (intr->intrinsic) {
2295 case nir_intrinsic_load_interpolated_input:
2296 *is_interp = true;
2297 FALLTHROUGH;
2298 case nir_intrinsic_load_input:
2299 case nir_intrinsic_load_per_vertex_input:
2300 *is_input = true;
2301 FALLTHROUGH;
2302 case nir_intrinsic_load_output:
2303 case nir_intrinsic_load_per_vertex_output:
2304 case nir_intrinsic_load_per_primitive_output:
2305 *is_load = true;
2306 FALLTHROUGH;
2307 case nir_intrinsic_store_output:
2308 case nir_intrinsic_store_per_primitive_output:
2309 case nir_intrinsic_store_per_vertex_output:
2310 break;
2311 default:
2312 return false;
2313 }
2314 return true;
2315 }
2316
2317 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2318 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2319 {
2320 switch (intr->intrinsic) {
2321 case nir_intrinsic_load_per_vertex_input:
2322 case nir_intrinsic_load_per_vertex_output:
2323 case nir_intrinsic_load_per_primitive_output:
2324 case nir_intrinsic_store_per_primitive_output:
2325 case nir_intrinsic_store_per_vertex_output:
2326 return true;
2327 default:
2328 break;
2329 }
2330 return false;
2331 }
2332
2333 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2334 find_var_deref(nir_shader *nir, nir_variable *var)
2335 {
2336 nir_foreach_function_impl(impl, nir) {
2337 nir_foreach_block(block, impl) {
2338 nir_foreach_instr(instr, block) {
2339 if (instr->type != nir_instr_type_deref)
2340 continue;
2341 nir_deref_instr *deref = nir_instr_as_deref(instr);
2342 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2343 return true;
2344 }
2345 }
2346 }
2347 return false;
2348 }
2349
2350 static bool
find_var_io(nir_shader * nir,nir_variable * var)2351 find_var_io(nir_shader *nir, nir_variable *var)
2352 {
2353 nir_foreach_function(function, nir) {
2354 if (!function->impl)
2355 continue;
2356
2357 nir_foreach_block(block, function->impl) {
2358 nir_foreach_instr(instr, block) {
2359 if (instr->type != nir_instr_type_intrinsic)
2360 continue;
2361 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2362 bool is_load = false;
2363 bool is_input = false;
2364 bool is_interp = false;
2365 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2366 continue;
2367 if (var->data.mode == nir_var_shader_in && !is_input)
2368 continue;
2369 if (var->data.mode == nir_var_shader_out && is_input)
2370 continue;
2371 unsigned slot_offset = 0;
2372 if (var->data.fb_fetch_output && !is_load)
2373 continue;
2374 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2375 continue;
2376 nir_src *src_offset = nir_get_io_offset_src(intr);
2377 if (src_offset && nir_src_is_const(*src_offset))
2378 slot_offset = nir_src_as_uint(*src_offset);
2379 unsigned slot_count = get_var_slot_count(nir, var);
2380 if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2381 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2382 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2383 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2384 return true;
2385 }
2386 }
2387 }
2388 return false;
2389 }
2390
2391 struct clamp_layer_output_state {
2392 nir_variable *original;
2393 nir_variable *clamped;
2394 };
2395
2396 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2397 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2398 {
2399 nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2400 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2401 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2402 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2403 nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2404 nir_load_deref(b, original_deref),
2405 nir_imm_int(b, 0));
2406 nir_store_deref(b, clamped_deref, layer, 0);
2407 }
2408
2409 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2410 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2411 {
2412 struct clamp_layer_output_state *state = data;
2413 switch (instr->type) {
2414 case nir_instr_type_intrinsic: {
2415 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2416 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2417 intr->intrinsic != nir_intrinsic_emit_vertex)
2418 return false;
2419 b->cursor = nir_before_instr(instr);
2420 clamp_layer_output_emit(b, state);
2421 return true;
2422 }
2423 default: return false;
2424 }
2425 }
2426
2427 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2428 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2429 {
2430 switch (vs->info.stage) {
2431 case MESA_SHADER_VERTEX:
2432 case MESA_SHADER_GEOMETRY:
2433 case MESA_SHADER_TESS_EVAL:
2434 break;
2435 default:
2436 unreachable("invalid last vertex stage!");
2437 }
2438 struct clamp_layer_output_state state = {0};
2439 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2440 if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2441 return false;
2442 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2443 state.clamped->data.location = VARYING_SLOT_LAYER;
2444 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2445 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2446 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2447 state.original->data.driver_location = (*next_location)++;
2448 if (fs_var) {
2449 fs_var->data.location = state.original->data.location;
2450 fs_var->data.driver_location = state.original->data.driver_location;
2451 }
2452 } else {
2453 if (state.original->data.explicit_xfb_buffer) {
2454 /* Will xfb the clamped output but still better than nothing */
2455 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2456 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2457 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2458 state.clamped->data.offset = state.original->data.offset;
2459 state.clamped->data.stream = state.original->data.stream;
2460 }
2461 state.original->data.mode = nir_var_shader_temp;
2462 nir_fixup_deref_modes(vs);
2463 }
2464 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2465 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2466 } else {
2467 nir_builder b;
2468 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2469 b = nir_builder_at(nir_after_impl(impl));
2470 assert(impl->end_block->predecessors->entries == 1);
2471 clamp_layer_output_emit(&b, &state);
2472 nir_metadata_preserve(impl, nir_metadata_dominance);
2473 }
2474 optimize_nir(vs, NULL, true);
2475 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2476 return true;
2477 }
2478
2479 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)2480 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2481 {
2482 unsigned slot = var->data.location;
2483 switch (slot) {
2484 case -1:
2485 case VARYING_SLOT_POS:
2486 case VARYING_SLOT_PSIZ:
2487 case VARYING_SLOT_LAYER:
2488 case VARYING_SLOT_PRIMITIVE_ID:
2489 case VARYING_SLOT_CLIP_DIST0:
2490 case VARYING_SLOT_CULL_DIST0:
2491 case VARYING_SLOT_VIEWPORT:
2492 case VARYING_SLOT_FACE:
2493 case VARYING_SLOT_TESS_LEVEL_OUTER:
2494 case VARYING_SLOT_TESS_LEVEL_INNER:
2495 /* use a sentinel value to avoid counting later */
2496 var->data.driver_location = UINT_MAX;
2497 break;
2498
2499 default:
2500 if (var->data.patch) {
2501 assert(slot >= VARYING_SLOT_PATCH0);
2502 slot -= VARYING_SLOT_PATCH0;
2503 }
2504 if (slot_map[slot] == 0xff) {
2505 assert(*reserved < MAX_VARYING);
2506 unsigned num_slots;
2507 if (nir_is_arrayed_io(var, stage))
2508 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2509 else
2510 num_slots = glsl_count_vec4_slots(var->type, false, false);
2511 assert(*reserved + num_slots <= MAX_VARYING);
2512 for (unsigned i = 0; i < num_slots; i++)
2513 slot_map[slot + i] = (*reserved)++;
2514 }
2515 slot = slot_map[slot];
2516 assert(slot < MAX_VARYING);
2517 var->data.driver_location = slot;
2518 }
2519 }
2520
2521 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2522 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2523 {
2524 if (stage != MESA_SHADER_FRAGMENT)
2525 return false;
2526 return var->data.location >= VARYING_SLOT_TEX0 &&
2527 var->data.location <= VARYING_SLOT_TEX7;
2528 }
2529
2530 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)2531 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
2532 {
2533 unsigned slot = var->data.location;
2534 switch (slot) {
2535 case VARYING_SLOT_POS:
2536 case VARYING_SLOT_PSIZ:
2537 case VARYING_SLOT_LAYER:
2538 case VARYING_SLOT_PRIMITIVE_ID:
2539 case VARYING_SLOT_CLIP_DIST0:
2540 case VARYING_SLOT_CULL_DIST0:
2541 case VARYING_SLOT_VIEWPORT:
2542 case VARYING_SLOT_FACE:
2543 case VARYING_SLOT_TESS_LEVEL_OUTER:
2544 case VARYING_SLOT_TESS_LEVEL_INNER:
2545 /* use a sentinel value to avoid counting later */
2546 var->data.driver_location = UINT_MAX;
2547 break;
2548 default:
2549 if (var->data.patch) {
2550 assert(slot >= VARYING_SLOT_PATCH0);
2551 slot -= VARYING_SLOT_PATCH0;
2552 }
2553 if (slot_map[slot] == (unsigned char)-1) {
2554 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2555 * so keep for now and eliminate later
2556 */
2557 if (is_texcoord(stage, var)) {
2558 var->data.driver_location = -1;
2559 return true;
2560 }
2561 /* patch variables may be read in the workgroup */
2562 if (stage != MESA_SHADER_TESS_CTRL)
2563 /* dead io */
2564 return false;
2565 unsigned num_slots;
2566 if (nir_is_arrayed_io(var, stage))
2567 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2568 else
2569 num_slots = glsl_count_vec4_slots(var->type, false, false);
2570 assert(*reserved + num_slots <= MAX_VARYING);
2571 for (unsigned i = 0; i < num_slots; i++)
2572 slot_map[slot + i] = (*reserved)++;
2573 }
2574 var->data.driver_location = slot_map[slot];
2575 }
2576 return true;
2577 }
2578
2579
2580 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2581 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2582 {
2583 nir_variable *var = data;
2584 if (instr->type != nir_instr_type_intrinsic)
2585 return false;
2586
2587 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2588 bool is_load = false;
2589 bool is_input = false;
2590 bool is_interp = false;
2591 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2592 return false;
2593 if (!is_load)
2594 return false;
2595 unsigned location = nir_intrinsic_io_semantics(intr).location;
2596 if (location != var->data.location)
2597 return false;
2598 b->cursor = nir_before_instr(instr);
2599 nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2600 intr->def.bit_size);
2601 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2602 switch (location) {
2603 case VARYING_SLOT_COL0:
2604 case VARYING_SLOT_COL1:
2605 case VARYING_SLOT_BFC0:
2606 case VARYING_SLOT_BFC1:
2607 /* default color is 0,0,0,1 */
2608 if (intr->def.num_components == 4)
2609 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2610 break;
2611 default:
2612 break;
2613 }
2614 }
2615 nir_def_rewrite_uses(&intr->def, zero);
2616 nir_instr_remove(instr);
2617 return true;
2618 }
2619
2620
2621
2622 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2623 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2624 {
2625 switch (intr->intrinsic) {
2626 case nir_intrinsic_store_output:
2627 case nir_intrinsic_store_per_primitive_output:
2628 case nir_intrinsic_store_per_vertex_output:
2629 break;
2630 default:
2631 return false;
2632 }
2633 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2634 return false;
2635 if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2636 nir_instr_remove(&intr->instr);
2637 return true;
2638 }
2639 return false;
2640 }
2641
2642 static bool
delete_psiz_store(nir_shader * nir,bool one)2643 delete_psiz_store(nir_shader *nir, bool one)
2644 {
2645 bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2646 nir_metadata_dominance, one ? nir : NULL);
2647 if (progress)
2648 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2649 return progress;
2650 }
2651
2652 struct write_components {
2653 unsigned slot;
2654 uint32_t component_mask;
2655 };
2656
2657 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2658 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2659 {
2660 struct write_components *wc = data;
2661 bool is_load = false;
2662 bool is_input = false;
2663 bool is_interp = false;
2664 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2665 return false;
2666 if (!is_input)
2667 return false;
2668 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2669 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2670 return false;
2671 unsigned num_components = intr->num_components;
2672 unsigned c = nir_intrinsic_component(intr);
2673 if (intr->def.bit_size == 64)
2674 num_components *= 2;
2675 nir_src *src_offset = nir_get_io_offset_src(intr);
2676 if (nir_src_is_const(*src_offset)) {
2677 unsigned slot_offset = nir_src_as_uint(*src_offset);
2678 if (s.location + slot_offset != wc->slot)
2679 return false;
2680 } else if (s.location > wc->slot || s.location + s.num_slots <= wc->slot) {
2681 return false;
2682 }
2683 uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2684 if (intr->def.bit_size == 64)
2685 readmask |= readmask << (intr->num_components + c);
2686 /* handle dvec3/dvec4 */
2687 if (num_components + c > 4)
2688 readmask >>= 4;
2689 if ((wc->component_mask & readmask) == readmask)
2690 return false;
2691 uint32_t rewrite_mask = readmask & ~wc->component_mask;
2692 if (!rewrite_mask)
2693 return false;
2694 b->cursor = nir_after_instr(&intr->instr);
2695 nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2696 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2697 switch (wc->slot) {
2698 case VARYING_SLOT_COL0:
2699 case VARYING_SLOT_COL1:
2700 case VARYING_SLOT_BFC0:
2701 case VARYING_SLOT_BFC1:
2702 /* default color is 0,0,0,1 */
2703 if (intr->def.num_components == 4)
2704 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2705 break;
2706 default:
2707 break;
2708 }
2709 }
2710 rewrite_mask >>= c;
2711 nir_def *dest = &intr->def;
2712 u_foreach_bit(component, rewrite_mask)
2713 dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2714 nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2715 return true;
2716 }
2717
2718 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2719 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2720 {
2721 struct write_components *wc = data;
2722 bool is_load = false;
2723 bool is_input = false;
2724 bool is_interp = false;
2725 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2726 return false;
2727 if (is_input || is_load)
2728 return false;
2729 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2730 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2731 return false;
2732 unsigned location = s.location;
2733 unsigned c = nir_intrinsic_component(intr);
2734 uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2735 if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2736 unsigned num_components = intr->num_components * 2;
2737 nir_src *src_offset = nir_get_io_offset_src(intr);
2738 if (nir_src_is_const(*src_offset)) {
2739 if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2740 return false;
2741 }
2742 wrmask |= wrmask << intr->num_components;
2743 /* handle dvec3/dvec4 */
2744 if (num_components + c > 4)
2745 wrmask >>= 4;
2746 }
2747 wc->component_mask |= wrmask;
2748 return false;
2749 }
2750
2751 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2752 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2753 {
2754 unsigned reserved = 0;
2755 unsigned char slot_map[VARYING_SLOT_MAX];
2756 memset(slot_map, -1, sizeof(slot_map));
2757 bool do_fixup = false;
2758 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2759 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2760 if (var) {
2761 bool can_remove = false;
2762 if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2763 /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2764 if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2765 can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2766 else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2767 can_remove = !var->data.explicit_location;
2768 }
2769 /* remove injected pointsize from all but the last vertex stage */
2770 if (can_remove) {
2771 var->data.mode = nir_var_shader_temp;
2772 nir_fixup_deref_modes(producer);
2773 delete_psiz_store(producer, false);
2774 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2775 optimize_nir(producer, NULL, true);
2776 }
2777 }
2778 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2779 producer->info.has_transform_feedback_varyings = false;
2780 nir_foreach_shader_out_variable(var_out, producer)
2781 var_out->data.explicit_xfb_buffer = false;
2782 }
2783 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2784 /* never assign from tcs -> tes, always invert */
2785 nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2786 assign_producer_var_io(consumer->info.stage, var_in, &reserved, slot_map);
2787 nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2788 if (!assign_consumer_var_io(producer->info.stage, var_out, &reserved, slot_map))
2789 /* this is an output, nothing more needs to be done for it to be dropped */
2790 do_fixup = true;
2791 }
2792 } else {
2793 nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2794 assign_producer_var_io(producer->info.stage, var_out, &reserved, slot_map);
2795 nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2796 if (!assign_consumer_var_io(consumer->info.stage, var_in, &reserved, slot_map)) {
2797 do_fixup = true;
2798 /* input needs to be rewritten */
2799 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
2800 }
2801 }
2802 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
2803 do_fixup |= clamp_layer_output(producer, consumer, &reserved);
2804 }
2805 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
2806 if (producer->info.io_lowered && consumer->info.io_lowered) {
2807 u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
2808 struct write_components wc = {slot, 0};
2809 nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
2810 assert(wc.component_mask);
2811 if (wc.component_mask != BITFIELD_MASK(4))
2812 do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
2813 }
2814 }
2815 if (!do_fixup)
2816 return;
2817 nir_fixup_deref_modes(nir);
2818 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2819 optimize_nir(nir, NULL, true);
2820 }
2821
2822 /* all types that hit this function contain something that is 64bit */
2823 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)2824 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
2825 {
2826 if (glsl_type_is_array(type)) {
2827 const struct glsl_type *child = glsl_get_array_element(type);
2828 unsigned elements = glsl_array_size(type);
2829 unsigned stride = glsl_get_explicit_stride(type);
2830 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
2831 }
2832 /* rewrite structs recursively */
2833 if (glsl_type_is_struct_or_ifc(type)) {
2834 unsigned nmembers = glsl_get_length(type);
2835 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
2836 unsigned xfb_offset = 0;
2837 for (unsigned i = 0; i < nmembers; i++) {
2838 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
2839 fields[i] = *f;
2840 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
2841 if (i < nmembers - 1 && xfb_offset % 8 &&
2842 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
2843 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
2844 var->data.is_xfb = true;
2845 }
2846 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
2847 }
2848 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
2849 }
2850 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
2851 return type;
2852 if (doubles_only && glsl_type_is_vector_or_scalar(type))
2853 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
2854 enum glsl_base_type base_type;
2855 switch (glsl_get_base_type(type)) {
2856 case GLSL_TYPE_UINT64:
2857 base_type = GLSL_TYPE_UINT;
2858 break;
2859 case GLSL_TYPE_INT64:
2860 base_type = GLSL_TYPE_INT;
2861 break;
2862 case GLSL_TYPE_DOUBLE:
2863 base_type = GLSL_TYPE_FLOAT;
2864 break;
2865 default:
2866 unreachable("unknown 64-bit vertex attribute format!");
2867 }
2868 if (glsl_type_is_scalar(type))
2869 return glsl_vector_type(base_type, 2);
2870 unsigned num_components;
2871 if (glsl_type_is_matrix(type)) {
2872 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
2873 unsigned vec_components = glsl_get_vector_elements(type);
2874 if (vec_components == 3)
2875 vec_components = 4;
2876 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
2877 } else {
2878 num_components = glsl_get_vector_elements(type) * 2;
2879 if (num_components <= 4)
2880 return glsl_vector_type(base_type, num_components);
2881 }
2882 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
2883 struct glsl_struct_field fields[8] = {0};
2884 unsigned remaining = num_components;
2885 unsigned nfields = 0;
2886 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
2887 assert(i < ARRAY_SIZE(fields));
2888 fields[i].name = "";
2889 fields[i].offset = i * 16;
2890 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
2891 }
2892 char buf[64];
2893 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
2894 return glsl_struct_type(fields, nfields, buf, true);
2895 }
2896
2897 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)2898 deref_is_matrix(nir_deref_instr *deref)
2899 {
2900 if (glsl_type_is_matrix(deref->type))
2901 return deref->type;
2902 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2903 if (parent)
2904 return deref_is_matrix(parent);
2905 return NULL;
2906 }
2907
2908 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)2909 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
2910 struct hash_table *derefs, struct set *deletes, bool doubles_only)
2911 {
2912 bool func_progress = false;
2913 nir_builder b = nir_builder_create(impl);
2914 nir_foreach_block(block, impl) {
2915 nir_foreach_instr_safe(instr, block) {
2916 switch (instr->type) {
2917 case nir_instr_type_deref: {
2918 nir_deref_instr *deref = nir_instr_as_deref(instr);
2919 if (!(deref->modes & var->data.mode))
2920 continue;
2921 if (nir_deref_instr_get_variable(deref) != var)
2922 continue;
2923
2924 /* matrix types are special: store the original deref type for later use */
2925 const struct glsl_type *matrix = deref_is_matrix(deref);
2926 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2927 if (!matrix) {
2928 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
2929 hash_table_foreach(derefs, he) {
2930 /* propagate parent matrix type to row deref */
2931 if (he->key == parent)
2932 matrix = he->data;
2933 }
2934 }
2935 if (matrix)
2936 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
2937 if (deref->deref_type == nir_deref_type_var)
2938 deref->type = var->type;
2939 else
2940 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
2941 }
2942 break;
2943 case nir_instr_type_intrinsic: {
2944 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2945 if (intr->intrinsic != nir_intrinsic_store_deref &&
2946 intr->intrinsic != nir_intrinsic_load_deref)
2947 break;
2948 if (nir_intrinsic_get_var(intr, 0) != var)
2949 break;
2950 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
2951 (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
2952 break;
2953 b.cursor = nir_before_instr(instr);
2954 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2955 unsigned num_components = intr->num_components * 2;
2956 nir_def *comp[NIR_MAX_VEC_COMPONENTS];
2957 /* this is the stored matrix type from the deref */
2958 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
2959 const struct glsl_type *matrix = he ? he->data : NULL;
2960 if (doubles_only && !matrix)
2961 break;
2962 func_progress = true;
2963 if (intr->intrinsic == nir_intrinsic_store_deref) {
2964 /* first, unpack the src data to 32bit vec2 components */
2965 for (unsigned i = 0; i < intr->num_components; i++) {
2966 nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
2967 comp[i * 2] = nir_channel(&b, ssa, 0);
2968 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
2969 }
2970 unsigned wrmask = nir_intrinsic_write_mask(intr);
2971 unsigned mask = 0;
2972 /* expand writemask for doubled components */
2973 for (unsigned i = 0; i < intr->num_components; i++) {
2974 if (wrmask & BITFIELD_BIT(i))
2975 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
2976 }
2977 if (matrix) {
2978 /* matrix types always come from array (row) derefs */
2979 assert(deref->deref_type == nir_deref_type_array);
2980 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
2981 /* let optimization clean up consts later */
2982 nir_def *index = deref->arr.index.ssa;
2983 /* this might be an indirect array index:
2984 * - iterate over matrix columns
2985 * - add if blocks for each column
2986 * - perform the store in the block
2987 */
2988 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
2989 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
2990 unsigned vec_components = glsl_get_vector_elements(matrix);
2991 /* always clamp dvec3 to 4 components */
2992 if (vec_components == 3)
2993 vec_components = 4;
2994 unsigned start_component = idx * vec_components * 2;
2995 /* struct member */
2996 unsigned member = start_component / 4;
2997 /* number of components remaining */
2998 unsigned remaining = num_components;
2999 for (unsigned i = 0; i < num_components; member++) {
3000 if (!(mask & BITFIELD_BIT(i)))
3001 continue;
3002 assert(member < glsl_get_length(var_deref->type));
3003 /* deref the rewritten struct to the appropriate vec4/vec2 */
3004 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3005 unsigned incr = MIN2(remaining, 4);
3006 /* assemble the write component vec */
3007 nir_def *val = nir_vec(&b, &comp[i], incr);
3008 /* use the number of components being written as the writemask */
3009 if (glsl_get_vector_elements(strct->type) > val->num_components)
3010 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3011 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3012 remaining -= incr;
3013 i += incr;
3014 }
3015 nir_pop_if(&b, NULL);
3016 }
3017 _mesa_set_add(deletes, &deref->instr);
3018 } else if (num_components <= 4) {
3019 /* simple store case: just write out the components */
3020 nir_def *dest = nir_vec(&b, comp, num_components);
3021 nir_store_deref(&b, deref, dest, mask);
3022 } else {
3023 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3024 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3025 if (!(mask & BITFIELD_MASK(4)))
3026 continue;
3027 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3028 nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3029 if (glsl_get_vector_elements(strct->type) > dest->num_components)
3030 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3031 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3032 mask >>= 4;
3033 }
3034 }
3035 } else {
3036 nir_def *dest = NULL;
3037 if (matrix) {
3038 /* matrix types always come from array (row) derefs */
3039 assert(deref->deref_type == nir_deref_type_array);
3040 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3041 /* let optimization clean up consts later */
3042 nir_def *index = deref->arr.index.ssa;
3043 /* this might be an indirect array index:
3044 * - iterate over matrix columns
3045 * - add if blocks for each column
3046 * - phi the loads using the array index
3047 */
3048 unsigned cols = glsl_get_matrix_columns(matrix);
3049 nir_def *dests[4];
3050 for (unsigned idx = 0; idx < cols; idx++) {
3051 /* don't add an if for the final row: this will be handled in the else */
3052 if (idx < cols - 1)
3053 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3054 unsigned vec_components = glsl_get_vector_elements(matrix);
3055 /* always clamp dvec3 to 4 components */
3056 if (vec_components == 3)
3057 vec_components = 4;
3058 unsigned start_component = idx * vec_components * 2;
3059 /* struct member */
3060 unsigned member = start_component / 4;
3061 /* number of components remaining */
3062 unsigned remaining = num_components;
3063 /* component index */
3064 unsigned comp_idx = 0;
3065 for (unsigned i = 0; i < num_components; member++) {
3066 assert(member < glsl_get_length(var_deref->type));
3067 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3068 nir_def *load = nir_load_deref(&b, strct);
3069 unsigned incr = MIN2(remaining, 4);
3070 /* repack the loads to 64bit */
3071 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3072 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3073 remaining -= incr;
3074 i += incr;
3075 }
3076 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3077 if (idx < cols - 1)
3078 nir_push_else(&b, NULL);
3079 }
3080 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3081 for (unsigned idx = cols - 1; idx >= 1; idx--) {
3082 nir_pop_if(&b, NULL);
3083 dest = nir_if_phi(&b, dests[idx - 1], dest);
3084 }
3085 _mesa_set_add(deletes, &deref->instr);
3086 } else if (num_components <= 4) {
3087 /* simple load case */
3088 nir_def *load = nir_load_deref(&b, deref);
3089 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3090 for (unsigned i = 0; i < intr->num_components; i++) {
3091 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3092 }
3093 dest = nir_vec(&b, comp, intr->num_components);
3094 } else {
3095 /* writing > 4 components: access the struct and load the appropriate vec4 members */
3096 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3097 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3098 nir_def *load = nir_load_deref(&b, strct);
3099 comp[i * 2] = nir_pack_64_2x32(&b,
3100 nir_trim_vector(&b, load, 2));
3101 if (num_components > 2)
3102 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3103 }
3104 dest = nir_vec(&b, comp, intr->num_components);
3105 }
3106 nir_def_rewrite_uses_after(&intr->def, dest, instr);
3107 }
3108 _mesa_set_add(deletes, instr);
3109 break;
3110 }
3111 break;
3112 default: break;
3113 }
3114 }
3115 }
3116 if (func_progress)
3117 nir_metadata_preserve(impl, nir_metadata_none);
3118 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3119 set_foreach_remove(deletes, he)
3120 nir_instr_remove((void*)he->key);
3121 return func_progress;
3122 }
3123
3124 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3125 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3126 struct set *deletes, bool doubles_only)
3127 {
3128 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3129 return false;
3130 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3131 /* once type is rewritten, rewrite all loads and stores */
3132 nir_foreach_function_impl(impl, shader)
3133 lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3134 return true;
3135 }
3136
3137 /* rewrite all input/output variables using 32bit types and load/stores */
3138 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3139 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3140 {
3141 bool progress = false;
3142 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3143 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3144 nir_foreach_function_impl(impl, shader) {
3145 nir_foreach_function_temp_variable(var, impl) {
3146 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3147 continue;
3148 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3149 progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3150 }
3151 }
3152 ralloc_free(deletes);
3153 ralloc_free(derefs);
3154 if (progress) {
3155 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3156 nir_lower_phis_to_scalar(shader, false);
3157 optimize_nir(shader, NULL, true);
3158 }
3159 return progress;
3160 }
3161
3162 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3163 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3164 {
3165 FILE *fp = fopen(file, "wb");
3166 if (fp) {
3167 fwrite(words, 1, size, fp);
3168 fclose(fp);
3169 fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3170 }
3171 }
3172
3173 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3174 zink_get_next_stage(gl_shader_stage stage)
3175 {
3176 switch (stage) {
3177 case MESA_SHADER_VERTEX:
3178 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3179 VK_SHADER_STAGE_GEOMETRY_BIT |
3180 VK_SHADER_STAGE_FRAGMENT_BIT;
3181 case MESA_SHADER_TESS_CTRL:
3182 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3183 case MESA_SHADER_TESS_EVAL:
3184 return VK_SHADER_STAGE_GEOMETRY_BIT |
3185 VK_SHADER_STAGE_FRAGMENT_BIT;
3186 case MESA_SHADER_GEOMETRY:
3187 return VK_SHADER_STAGE_FRAGMENT_BIT;
3188 case MESA_SHADER_FRAGMENT:
3189 case MESA_SHADER_COMPUTE:
3190 case MESA_SHADER_KERNEL:
3191 return 0;
3192 default:
3193 unreachable("invalid shader stage");
3194 }
3195 }
3196
3197 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3198 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3199 {
3200 VkShaderModuleCreateInfo smci = {0};
3201 VkShaderCreateInfoEXT sci = {0};
3202
3203 if (!spirv)
3204 spirv = zs->spirv;
3205
3206 if (zink_debug & ZINK_DEBUG_SPIRV) {
3207 char buf[256];
3208 static int i;
3209 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3210 zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3211 }
3212
3213 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3214 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3215 sci.nextStage = zink_get_next_stage(zs->info.stage);
3216 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3217 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3218 sci.pCode = spirv->words;
3219 sci.pName = "main";
3220 VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3221 if (pg) {
3222 sci.setLayoutCount = pg->num_dsl;
3223 sci.pSetLayouts = pg->dsl;
3224 } else {
3225 sci.setLayoutCount = zs->info.stage + 1;
3226 dsl[zs->info.stage] = zs->precompile.dsl;;
3227 sci.pSetLayouts = dsl;
3228 }
3229 VkPushConstantRange pcr;
3230 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3231 pcr.offset = 0;
3232 pcr.size = sizeof(struct zink_gfx_push_constant);
3233 sci.pushConstantRangeCount = 1;
3234 sci.pPushConstantRanges = &pcr;
3235
3236 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3237 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3238 smci.pCode = spirv->words;
3239
3240 #ifndef NDEBUG
3241 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3242 static const struct spirv_to_nir_options spirv_options = {
3243 .environment = NIR_SPIRV_VULKAN,
3244 .caps = {
3245 .float64 = true,
3246 .int16 = true,
3247 .int64 = true,
3248 .tessellation = true,
3249 .float_controls = true,
3250 .image_ms_array = true,
3251 .image_read_without_format = true,
3252 .image_write_without_format = true,
3253 .storage_image_ms = true,
3254 .geometry_streams = true,
3255 .storage_8bit = true,
3256 .storage_16bit = true,
3257 .variable_pointers = true,
3258 .stencil_export = true,
3259 .post_depth_coverage = true,
3260 .transform_feedback = true,
3261 .device_group = true,
3262 .draw_parameters = true,
3263 .shader_viewport_index_layer = true,
3264 .multiview = true,
3265 .physical_storage_buffer_address = true,
3266 .int64_atomics = true,
3267 .subgroup_arithmetic = true,
3268 .subgroup_basic = true,
3269 .subgroup_ballot = true,
3270 .subgroup_quad = true,
3271 .subgroup_shuffle = true,
3272 .subgroup_vote = true,
3273 .vk_memory_model = true,
3274 .vk_memory_model_device_scope = true,
3275 .int8 = true,
3276 .float16 = true,
3277 .demote_to_helper_invocation = true,
3278 .sparse_residency = true,
3279 .min_lod = true,
3280 .workgroup_memory_explicit_layout = true,
3281 },
3282 .ubo_addr_format = nir_address_format_32bit_index_offset,
3283 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3284 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3285 .push_const_addr_format = nir_address_format_logical,
3286 .shared_addr_format = nir_address_format_32bit_offset,
3287 };
3288 uint32_t num_spec_entries = 0;
3289 struct nir_spirv_specialization *spec_entries = NULL;
3290 VkSpecializationInfo sinfo = {0};
3291 VkSpecializationMapEntry me[3];
3292 uint32_t size[3] = {1,1,1};
3293 if (!zs->info.workgroup_size[0]) {
3294 sinfo.mapEntryCount = 3;
3295 sinfo.pMapEntries = &me[0];
3296 sinfo.dataSize = sizeof(uint32_t) * 3;
3297 sinfo.pData = size;
3298 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3299 for (int i = 0; i < 3; i++) {
3300 me[i].size = sizeof(uint32_t);
3301 me[i].constantID = ids[i];
3302 me[i].offset = i * sizeof(uint32_t);
3303 }
3304 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3305 }
3306 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3307 spec_entries, num_spec_entries,
3308 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3309 assert(nir);
3310 ralloc_free(nir);
3311 free(spec_entries);
3312 }
3313 #endif
3314
3315 VkResult ret;
3316 struct zink_shader_object obj = {0};
3317 if (!can_shobj || !screen->info.have_EXT_shader_object)
3318 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3319 else
3320 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3321 ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3322 assert(success);
3323 return obj;
3324 }
3325
3326 static void
prune_io(nir_shader * nir)3327 prune_io(nir_shader *nir)
3328 {
3329 nir_foreach_shader_in_variable_safe(var, nir) {
3330 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3331 var->data.mode = nir_var_shader_temp;
3332 }
3333 nir_foreach_shader_out_variable_safe(var, nir) {
3334 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3335 var->data.mode = nir_var_shader_temp;
3336 }
3337 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3338 }
3339
3340 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3341 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3342 {
3343 /* unconvert from zink_binding() */
3344 uint32_t sampler_id = var->data.binding - (PIPE_MAX_SAMPLERS * MESA_SHADER_FRAGMENT);
3345 assert(sampler_id < 32); //bitfield size for tracking
3346 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
3347 }
3348
3349 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3350 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3351 {
3352 assert(var);
3353 const struct glsl_type *type = glsl_without_array(var->type);
3354 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3355 bool is_int = glsl_base_type_is_integer(ret_type);
3356 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3357 unsigned dest_size = tex->def.bit_size;
3358 b->cursor = nir_after_instr(&tex->instr);
3359 unsigned num_components = tex->def.num_components;
3360 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3361 if (bit_size == dest_size && !rewrite_depth)
3362 return NULL;
3363 nir_def *dest = &tex->def;
3364 if (rewrite_depth && zs) {
3365 if (nir_def_components_read(dest) & ~1) {
3366 /* this needs recompiles */
3367 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3368 flag_shadow_tex(var, zs);
3369 else
3370 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3371 return NULL;
3372 }
3373 /* If only .x is used in the NIR, then it's effectively not a legacy depth
3374 * sample anyway and we don't want to ask for shader recompiles. This is
3375 * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3376 * LUMINANCE, so apps just use the first channel.
3377 */
3378 tex->def.num_components = 1;
3379 tex->is_new_style_shadow = true;
3380 }
3381 if (bit_size != dest_size) {
3382 tex->def.bit_size = bit_size;
3383 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3384
3385 if (is_int) {
3386 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3387 dest = nir_u2uN(b, &tex->def, dest_size);
3388 else
3389 dest = nir_i2iN(b, &tex->def, dest_size);
3390 } else {
3391 dest = nir_f2fN(b, &tex->def, dest_size);
3392 }
3393 if (rewrite_depth)
3394 return dest;
3395 nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3396 } else if (rewrite_depth) {
3397 return dest;
3398 }
3399 return dest;
3400 }
3401
3402 struct lower_zs_swizzle_state {
3403 bool shadow_only;
3404 unsigned base_sampler_id;
3405 const struct zink_zs_swizzle_key *swizzle;
3406 };
3407
3408 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3409 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3410 {
3411 struct lower_zs_swizzle_state *state = data;
3412 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3413 assert(state->shadow_only || swizzle_key);
3414 if (instr->type != nir_instr_type_tex)
3415 return false;
3416 nir_tex_instr *tex = nir_instr_as_tex(instr);
3417 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3418 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3419 return false;
3420 if (tex->is_shadow && tex->op == nir_texop_tg4)
3421 /* Will not even try to emulate the shadow comparison */
3422 return false;
3423 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3424 nir_variable *var = NULL;
3425 if (handle != -1)
3426 /* gtfo bindless depth texture mode */
3427 return false;
3428 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
3429 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
3430 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
3431 if (tex->texture_index >= img->data.driver_location &&
3432 tex->texture_index < img->data.driver_location + size) {
3433 var = img;
3434 break;
3435 }
3436 }
3437 }
3438 assert(var);
3439 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3440 const struct glsl_type *type = glsl_without_array(var->type);
3441 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3442 bool is_int = glsl_base_type_is_integer(ret_type);
3443 unsigned num_components = tex->def.num_components;
3444 if (tex->is_shadow)
3445 tex->is_new_style_shadow = true;
3446 nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3447 assert(dest || !state->shadow_only);
3448 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3449 return false;
3450 else if (!dest)
3451 dest = &tex->def;
3452 else
3453 tex->def.num_components = 1;
3454 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3455 /* these require manual swizzles */
3456 if (tex->op == nir_texop_tg4) {
3457 assert(!tex->is_shadow);
3458 nir_def *swizzle;
3459 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3460 case PIPE_SWIZZLE_0:
3461 swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3462 break;
3463 case PIPE_SWIZZLE_1:
3464 if (is_int)
3465 swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3466 else
3467 swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3468 break;
3469 default:
3470 if (!tex->component)
3471 return false;
3472 tex->component = 0;
3473 return true;
3474 }
3475 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3476 return true;
3477 }
3478 nir_def *vec[4];
3479 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3480 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3481 case PIPE_SWIZZLE_0:
3482 vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3483 break;
3484 case PIPE_SWIZZLE_1:
3485 if (is_int)
3486 vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3487 else
3488 vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3489 break;
3490 default:
3491 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3492 break;
3493 }
3494 }
3495 nir_def *swizzle = nir_vec(b, vec, num_components);
3496 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3497 } else {
3498 assert(tex->is_shadow);
3499 nir_def *vec[4] = {dest, dest, dest, dest};
3500 nir_def *splat = nir_vec(b, vec, num_components);
3501 nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3502 }
3503 return true;
3504 }
3505
3506 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3507 *
3508 * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3509 * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3510 * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3511 * shader to expand out to vec4. Since this depends on sampler state, it's a
3512 * draw-time shader recompile to do so.
3513 *
3514 * We may also need to apply shader swizzles for
3515 * driver_workarounds.needs_zs_shader_swizzle.
3516 */
3517 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3518 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3519 {
3520 /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3521 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3522 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3523 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
3524 }
3525
3526 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3527 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3528 void *data)
3529 {
3530 if (intr->intrinsic != nir_intrinsic_load_point_coord)
3531 return false;
3532 b->cursor = nir_after_instr(&intr->instr);
3533 nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3534 nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3535 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3536 return true;
3537 }
3538
3539 static bool
invert_point_coord(nir_shader * nir)3540 invert_point_coord(nir_shader *nir)
3541 {
3542 if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3543 return false;
3544 return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3545 nir_metadata_dominance, NULL);
3546 }
3547
3548 static bool
is_residency_code(nir_def * src)3549 is_residency_code(nir_def *src)
3550 {
3551 nir_instr *parent = src->parent_instr;
3552 while (1) {
3553 if (parent->type == nir_instr_type_intrinsic) {
3554 ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
3555 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
3556 return false;
3557 }
3558 if (parent->type == nir_instr_type_tex)
3559 return true;
3560 assert(parent->type == nir_instr_type_alu);
3561 nir_alu_instr *alu = nir_instr_as_alu(parent);
3562 parent = alu->src[0].src.ssa->parent_instr;
3563 }
3564 }
3565
3566 static bool
lower_sparse_and_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)3567 lower_sparse_and_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
3568 {
3569 if (instr->intrinsic != nir_intrinsic_sparse_residency_code_and)
3570 return false;
3571
3572 b->cursor = nir_before_instr(&instr->instr);
3573 nir_def *src0;
3574 if (is_residency_code(instr->src[0].ssa))
3575 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
3576 else
3577 src0 = instr->src[0].ssa;
3578 nir_def *src1;
3579 if (is_residency_code(instr->src[1].ssa))
3580 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
3581 else
3582 src1 = instr->src[1].ssa;
3583 nir_def *def = nir_iand(b, src0, src1);
3584 nir_def_rewrite_uses_after(&instr->def, def, &instr->instr);
3585 nir_instr_remove(&instr->instr);
3586 return true;
3587 }
3588
3589 static bool
lower_sparse_and(nir_shader * shader)3590 lower_sparse_and(nir_shader *shader)
3591 {
3592 return nir_shader_intrinsics_pass(shader, lower_sparse_and_instr,
3593 nir_metadata_dominance, NULL);
3594 }
3595
3596 static bool
lower_sparse_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)3597 lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
3598 {
3599 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
3600 return false;
3601
3602 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
3603 * so just rewrite as the first component since ntv is going to use a different
3604 * method for storing the residency value anyway
3605 */
3606 b->cursor = nir_before_instr(&instr->instr);
3607 nir_instr *parent = instr->src[0].ssa->parent_instr;
3608 if (is_residency_code(instr->src[0].ssa)) {
3609 assert(parent->type == nir_instr_type_alu);
3610 nir_alu_instr *alu = nir_instr_as_alu(parent);
3611 nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
3612 nir_instr_remove(parent);
3613 } else {
3614 nir_def *src;
3615 if (parent->type == nir_instr_type_intrinsic) {
3616 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
3617 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
3618 src = intr->src[0].ssa;
3619 } else {
3620 assert(parent->type == nir_instr_type_alu);
3621 nir_alu_instr *alu = nir_instr_as_alu(parent);
3622 src = alu->src[0].src.ssa;
3623 }
3624 if (instr->def.bit_size != 32) {
3625 if (instr->def.bit_size == 1)
3626 src = nir_ieq_imm(b, src, 1);
3627 else
3628 src = nir_u2uN(b, src, instr->def.bit_size);
3629 }
3630 nir_def_rewrite_uses(&instr->def, src);
3631 nir_instr_remove(&instr->instr);
3632 }
3633 return true;
3634 }
3635
3636 static bool
lower_sparse(nir_shader * shader)3637 lower_sparse(nir_shader *shader)
3638 {
3639 return nir_shader_intrinsics_pass(shader, lower_sparse_instr,
3640 nir_metadata_dominance, NULL);
3641 }
3642
3643 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3644 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3645 {
3646 bool is_load = false;
3647 bool is_input = false;
3648 bool is_interp = false;
3649 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3650 return false;
3651 unsigned loc = nir_intrinsic_io_semantics(intr).location;
3652 nir_src *src_offset = nir_get_io_offset_src(intr);
3653 const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3654 unsigned location = loc + slot_offset;
3655 unsigned frac = nir_intrinsic_component(intr);
3656 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3657 /* set c aligned/rounded down to dword */
3658 unsigned c = frac;
3659 if (frac && bit_size < 32)
3660 c = frac * bit_size / 32;
3661 /* loop over all the variables and rewrite corresponding access */
3662 nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3663 const struct glsl_type *type = var->type;
3664 if (nir_is_arrayed_io(var, b->shader->info.stage))
3665 type = glsl_get_array_element(type);
3666 unsigned slot_count = get_var_slot_count(b->shader, var);
3667 /* filter access that isn't specific to this variable */
3668 if (var->data.location > location || var->data.location + slot_count <= location)
3669 continue;
3670 if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3671 continue;
3672 if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3673 continue;
3674
3675 unsigned size = 0;
3676 bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3677 if (is_struct)
3678 size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3679 else if ((var->data.mode == nir_var_shader_out && var->data.location < VARYING_SLOT_VAR0) ||
3680 (var->data.mode == nir_var_shader_in && var->data.location < (b->shader->info.stage == MESA_SHADER_VERTEX ? VERT_ATTRIB_GENERIC0 : VARYING_SLOT_VAR0)))
3681 size = glsl_type_is_array(type) ? glsl_get_aoa_size(type) : glsl_get_vector_elements(type);
3682 else
3683 size = glsl_get_vector_elements(glsl_without_array(type));
3684 assert(size);
3685 if (glsl_type_is_64bit(glsl_without_array(var->type)))
3686 size *= 2;
3687 if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3688 /* adjust for dvec3-type slot overflow */
3689 assert(location > var->data.location);
3690 size -= (location - var->data.location) * 4;
3691 }
3692 assert(size);
3693 if (var->data.location_frac + size <= c || var->data.location_frac > c)
3694 continue;
3695
3696 b->cursor = nir_before_instr(&intr->instr);
3697 nir_deref_instr *deref = nir_build_deref_var(b, var);
3698 if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3699 assert(intr->intrinsic != nir_intrinsic_store_output);
3700 deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3701 }
3702 if (glsl_type_is_array(type)) {
3703 /* unroll array derefs */
3704 unsigned idx = frac - var->data.location_frac;
3705 assert(src_offset);
3706 if (var->data.location < VARYING_SLOT_VAR0) {
3707 if (src_offset) {
3708 /* clip/cull dist and tess levels use different array offset semantics */
3709 bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3710 var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1;
3711 bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3712 var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3713 bool is_builtin_array = is_clipdist || is_tess_level;
3714 /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3715 if (nir_src_is_const(*src_offset)) {
3716 unsigned offset = slot_offset;
3717 if (is_builtin_array)
3718 offset *= 4;
3719 deref = nir_build_deref_array_imm(b, deref, offset + idx);
3720 } else {
3721 nir_def *offset = src_offset->ssa;
3722 if (is_builtin_array)
3723 nir_imul_imm(b, offset, 4);
3724 deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3725 }
3726 } else {
3727 deref = nir_build_deref_array_imm(b, deref, idx);
3728 }
3729 type = glsl_get_array_element(type);
3730 } else {
3731 /* need to convert possible N*M to [N][M] */
3732 nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3733 while (glsl_type_is_array(type)) {
3734 const struct glsl_type *elem = glsl_get_array_element(type);
3735 unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3736 nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3737 if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3738 n = nir_udiv_imm(b, n, 2);
3739 deref = nir_build_deref_array(b, deref, n);
3740 nm = nir_umod_imm(b, nm, type_size);
3741 type = glsl_get_array_element(type);
3742 }
3743 }
3744 } else if (glsl_type_is_struct(type)) {
3745 deref = nir_build_deref_struct(b, deref, slot_offset);
3746 }
3747 if (is_load) {
3748 nir_def *load;
3749 if (is_interp) {
3750 nir_def *interp = intr->src[0].ssa;
3751 nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3752 assert(interp_intr);
3753 var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3754 switch (interp_intr->intrinsic) {
3755 case nir_intrinsic_load_barycentric_centroid:
3756 load = nir_interp_deref_at_centroid(b, intr->num_components, bit_size, &deref->def);
3757 break;
3758 case nir_intrinsic_load_barycentric_sample:
3759 var->data.sample = 1;
3760 load = nir_load_deref(b, deref);
3761 break;
3762 case nir_intrinsic_load_barycentric_pixel:
3763 load = nir_load_deref(b, deref);
3764 break;
3765 case nir_intrinsic_load_barycentric_at_sample:
3766 load = nir_interp_deref_at_sample(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3767 break;
3768 case nir_intrinsic_load_barycentric_at_offset:
3769 load = nir_interp_deref_at_offset(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3770 break;
3771 default:
3772 unreachable("unhandled interp!");
3773 }
3774 } else {
3775 load = nir_load_deref(b, deref);
3776 }
3777 /* filter needed components */
3778 if (intr->num_components < load->num_components)
3779 load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3780 nir_def_rewrite_uses(&intr->def, load);
3781 } else {
3782 nir_def *store = intr->src[0].ssa;
3783 assert(!glsl_type_is_array(type));
3784 unsigned num_components = glsl_get_vector_elements(type);
3785 /* pad/filter components to match deref type */
3786 if (intr->num_components < num_components) {
3787 nir_def *zero = nir_imm_zero(b, 1, bit_size);
3788 nir_def *vec[4] = {zero, zero, zero, zero};
3789 u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3790 vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3791 store = nir_vec(b, vec, num_components);
3792 } if (store->num_components > num_components) {
3793 store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3794 }
3795 if (store->bit_size != glsl_get_bit_size(type)) {
3796 /* this should be some weird bindless io conversion */
3797 assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3798 assert(num_components != store->num_components);
3799 store = nir_unpack_64_2x32(b, store);
3800 }
3801 nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3802 }
3803 nir_instr_remove(&intr->instr);
3804 return true;
3805 }
3806 unreachable("failed to find variable for explicit io!");
3807 return true;
3808 }
3809
3810 static bool
add_derefs(nir_shader * nir)3811 add_derefs(nir_shader *nir)
3812 {
3813 return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3814 nir_metadata_dominance, NULL);
3815 }
3816
3817 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3818 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3819 {
3820 struct zink_shader_info *sinfo = &zs->sinfo;
3821 prune_io(nir);
3822
3823 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3824
3825 if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3826 nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3827 if (zink_debug & ZINK_DEBUG_NIR) {
3828 fprintf(stderr, "NIR shader:\n---8<---\n");
3829 nir_print_shader(nir, stderr);
3830 fprintf(stderr, "---8<---\n");
3831 }
3832
3833 struct zink_shader_object obj = {0};
3834 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
3835 if (spirv)
3836 obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3837
3838 /* TODO: determine if there's any reason to cache spirv output? */
3839 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3840 zs->spirv = spirv;
3841 else
3842 obj.spirv = spirv;
3843 return obj;
3844 }
3845
3846 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)3847 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
3848 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
3849 {
3850 bool need_optimize = true;
3851 bool inlined_uniforms = false;
3852
3853 NIR_PASS_V(nir, add_derefs);
3854 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
3855 if (key) {
3856 if (key->inline_uniforms) {
3857 NIR_PASS_V(nir, nir_inline_uniforms,
3858 nir->info.num_inlinable_uniforms,
3859 key->base.inlined_uniform_values,
3860 nir->info.inlinable_uniform_dw_offsets);
3861
3862 inlined_uniforms = true;
3863 }
3864
3865 /* TODO: use a separate mem ctx here for ralloc */
3866
3867 if (!screen->optimal_keys) {
3868 switch (zs->info.stage) {
3869 case MESA_SHADER_VERTEX: {
3870 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
3871 const struct zink_vs_key *vs_key = zink_vs_key(key);
3872 switch (vs_key->size) {
3873 case 4:
3874 decomposed_attrs = vs_key->u32.decomposed_attrs;
3875 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
3876 break;
3877 case 2:
3878 decomposed_attrs = vs_key->u16.decomposed_attrs;
3879 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
3880 break;
3881 case 1:
3882 decomposed_attrs = vs_key->u8.decomposed_attrs;
3883 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
3884 break;
3885 default: break;
3886 }
3887 if (decomposed_attrs || decomposed_attrs_without_w)
3888 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
3889 break;
3890 }
3891
3892 case MESA_SHADER_GEOMETRY:
3893 if (zink_gs_key(key)->lower_line_stipple) {
3894 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
3895 NIR_PASS_V(nir, nir_lower_var_copies);
3896 need_optimize = true;
3897 }
3898
3899 if (zink_gs_key(key)->lower_line_smooth) {
3900 NIR_PASS_V(nir, lower_line_smooth_gs);
3901 NIR_PASS_V(nir, nir_lower_var_copies);
3902 need_optimize = true;
3903 }
3904
3905 if (zink_gs_key(key)->lower_gl_point) {
3906 NIR_PASS_V(nir, lower_gl_point_gs);
3907 need_optimize = true;
3908 }
3909
3910 if (zink_gs_key(key)->lower_pv_mode) {
3911 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
3912 need_optimize = true; //TODO verify that this is required
3913 }
3914 break;
3915
3916 default:
3917 break;
3918 }
3919 }
3920
3921 switch (zs->info.stage) {
3922 case MESA_SHADER_VERTEX:
3923 case MESA_SHADER_TESS_EVAL:
3924 case MESA_SHADER_GEOMETRY:
3925 if (zink_vs_key_base(key)->last_vertex_stage) {
3926 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
3927 NIR_PASS_V(nir, nir_lower_clip_halfz);
3928 }
3929 if (zink_vs_key_base(key)->push_drawid) {
3930 NIR_PASS_V(nir, lower_drawid);
3931 }
3932 } else {
3933 nir->xfb_info = NULL;
3934 }
3935 if (zink_vs_key_base(key)->robust_access)
3936 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3937 break;
3938 case MESA_SHADER_FRAGMENT:
3939 if (zink_fs_key(key)->lower_line_smooth) {
3940 NIR_PASS_V(nir, lower_line_smooth_fs,
3941 zink_fs_key(key)->lower_line_stipple);
3942 need_optimize = true;
3943 } else if (zink_fs_key(key)->lower_line_stipple)
3944 NIR_PASS_V(nir, lower_line_stipple_fs);
3945
3946 if (zink_fs_key(key)->lower_point_smooth) {
3947 NIR_PASS_V(nir, nir_lower_point_smooth);
3948 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
3949 nir->info.fs.uses_discard = true;
3950 need_optimize = true;
3951 }
3952
3953 if (zink_fs_key(key)->robust_access)
3954 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
3955
3956 if (!zink_fs_key_base(key)->samples &&
3957 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
3958 /* VK will always use gl_SampleMask[] values even if sample count is 0,
3959 * so we need to skip this write here to mimic GL's behavior of ignoring it
3960 */
3961 nir_foreach_shader_out_variable(var, nir) {
3962 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
3963 var->data.mode = nir_var_shader_temp;
3964 }
3965 nir_fixup_deref_modes(nir);
3966 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3967 need_optimize = true;
3968 }
3969 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
3970 NIR_PASS_V(nir, lower_dual_blend);
3971 }
3972 if (zink_fs_key_base(key)->coord_replace_bits)
3973 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
3974 if (zink_fs_key_base(key)->point_coord_yinvert)
3975 NIR_PASS_V(nir, invert_point_coord);
3976 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
3977 nir_foreach_shader_in_variable(var, nir)
3978 var->data.sample = true;
3979 nir->info.fs.uses_sample_qualifier = true;
3980 nir->info.fs.uses_sample_shading = true;
3981 }
3982 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
3983 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
3984 if (nir->info.fs.uses_fbfetch_output) {
3985 nir_variable *fbfetch = NULL;
3986 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
3987 /* old variable must be deleted to avoid spirv errors */
3988 fbfetch->data.mode = nir_var_shader_temp;
3989 nir_fixup_deref_modes(nir);
3990 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3991 need_optimize = true;
3992 }
3993 nir_foreach_shader_in_variable_safe(var, nir) {
3994 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
3995 continue;
3996 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
3997 var->data.mode = nir_var_shader_temp;
3998 nir_fixup_deref_modes(nir);
3999 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4000 need_optimize = true;
4001 }
4002 break;
4003 case MESA_SHADER_COMPUTE:
4004 if (zink_cs_key(key)->robust_access)
4005 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4006 break;
4007 default: break;
4008 }
4009 if (key->base.needs_zs_shader_swizzle) {
4010 assert(extra_data);
4011 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4012 }
4013 if (key->base.nonseamless_cube_mask) {
4014 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4015 need_optimize = true;
4016 }
4017 }
4018 if (screen->driconf.inline_uniforms) {
4019 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4020 NIR_PASS_V(nir, rewrite_bo_access, screen);
4021 NIR_PASS_V(nir, remove_bo_access, zs);
4022 need_optimize = true;
4023 }
4024 if (inlined_uniforms) {
4025 optimize_nir(nir, zs, true);
4026
4027 /* This must be done again. */
4028 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4029 nir_var_shader_out);
4030
4031 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4032 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4033 zs->can_inline = false;
4034 } else if (need_optimize)
4035 optimize_nir(nir, zs, true);
4036 bool has_sparse = false;
4037 NIR_PASS(has_sparse, nir, lower_sparse);
4038 if (has_sparse)
4039 optimize_nir(nir, zs, false);
4040
4041 struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4042 ralloc_free(nir);
4043 return obj;
4044 }
4045
4046 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4047 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4048 {
4049 nir_shader *nir = zink_shader_deserialize(screen, zs);
4050 /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4051 int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4052 if (screen->info.have_EXT_shader_object)
4053 set = zs->info.stage;
4054 unsigned offsets[4];
4055 zink_descriptor_shader_get_binding_offsets(zs, offsets);
4056 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4057 if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4058 continue;
4059 var->data.descriptor_set = set;
4060 switch (var->data.mode) {
4061 case nir_var_mem_ubo:
4062 var->data.binding = !!var->data.driver_location;
4063 break;
4064 case nir_var_uniform:
4065 if (glsl_type_is_sampler(glsl_without_array(var->type)))
4066 var->data.binding += offsets[1];
4067 break;
4068 case nir_var_mem_ssbo:
4069 var->data.binding += offsets[2];
4070 break;
4071 case nir_var_image:
4072 var->data.binding += offsets[3];
4073 break;
4074 default: break;
4075 }
4076 }
4077 NIR_PASS_V(nir, add_derefs);
4078 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4079 if (screen->driconf.inline_uniforms) {
4080 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4081 NIR_PASS_V(nir, rewrite_bo_access, screen);
4082 NIR_PASS_V(nir, remove_bo_access, zs);
4083 }
4084 optimize_nir(nir, zs, true);
4085 zink_descriptor_shader_init(screen, zs);
4086 nir_shader *nir_clone = NULL;
4087 if (screen->info.have_EXT_shader_object)
4088 nir_clone = nir_shader_clone(nir, nir);
4089 struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4090 if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4091 /* always try to pre-generate a tcs in case it's needed */
4092 if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4093 nir_shader *nir_tcs = NULL;
4094 /* use max pcp for compat */
4095 zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
4096 nir_tcs->info.separate_shader = true;
4097 zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4098 ralloc_free(nir_tcs);
4099 }
4100 }
4101 ralloc_free(nir);
4102 spirv_shader_delete(obj.spirv);
4103 obj.spirv = NULL;
4104 return obj;
4105 }
4106
4107 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4108 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4109 void *data)
4110 {
4111 if (intr->intrinsic != nir_intrinsic_load_instance_id)
4112 return false;
4113 b->cursor = nir_after_instr(&intr->instr);
4114 nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4115 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4116 return true;
4117 }
4118
4119 static bool
lower_baseinstance(nir_shader * shader)4120 lower_baseinstance(nir_shader *shader)
4121 {
4122 if (shader->info.stage != MESA_SHADER_VERTEX)
4123 return false;
4124 return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4125 nir_metadata_dominance, NULL);
4126 }
4127
4128 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4129 * so instead we delete all those broken variables and just make new ones
4130 */
4131 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4132 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4133 {
4134 uint64_t max_ssbo_size = 0;
4135 uint64_t max_ubo_size = 0;
4136 uint64_t max_uniform_size = 0;
4137
4138 if (!shader->info.num_ssbos && !shader->info.num_ubos)
4139 return false;
4140
4141 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4142 const struct glsl_type *type = glsl_without_array(var->type);
4143 if (type_is_counter(type))
4144 continue;
4145 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4146 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4147 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4148 if (interface_type) {
4149 unsigned block_size = glsl_get_explicit_size(interface_type, true);
4150 if (glsl_get_length(interface_type) == 1) {
4151 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4152 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4153 if (glsl_type_is_array(f) && !glsl_array_size(f))
4154 block_size = 0;
4155 }
4156 if (block_size) {
4157 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4158 size = MAX2(size, block_size);
4159 }
4160 }
4161 if (var->data.mode == nir_var_mem_ubo) {
4162 if (var->data.driver_location)
4163 max_ubo_size = MAX2(max_ubo_size, size);
4164 else
4165 max_uniform_size = MAX2(max_uniform_size, size);
4166 } else {
4167 max_ssbo_size = MAX2(max_ssbo_size, size);
4168 if (interface_type) {
4169 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4170 needs_size = true;
4171 }
4172 }
4173 var->data.mode = nir_var_shader_temp;
4174 }
4175 nir_fixup_deref_modes(shader);
4176 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4177 optimize_nir(shader, NULL, true);
4178
4179 struct glsl_struct_field field = {0};
4180 field.name = ralloc_strdup(shader, "base");
4181 if (shader->info.num_ubos) {
4182 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4183 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4184 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4185 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4186 "uniform_0@32");
4187 var->interface_type = var->type;
4188 var->data.mode = nir_var_mem_ubo;
4189 var->data.driver_location = 0;
4190 }
4191
4192 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4193 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4194 if (num_ubos && ubos_used) {
4195 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4196 /* shrink array as much as possible */
4197 unsigned first_ubo = ffs(ubos_used) - 2;
4198 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4199 num_ubos -= first_ubo;
4200 assert(num_ubos);
4201 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4202 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4203 "ubos@32");
4204 var->interface_type = var->type;
4205 var->data.mode = nir_var_mem_ubo;
4206 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4207 }
4208 }
4209 if (shader->info.num_ssbos && zs->ssbos_used) {
4210 /* shrink array as much as possible */
4211 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4212 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4213 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4214 assert(num_ssbos);
4215 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4216 field.type = ssbo_type;
4217 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4218 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4219 "ssbos@32");
4220 var->interface_type = var->type;
4221 var->data.mode = nir_var_mem_ssbo;
4222 var->data.driver_location = first_ssbo;
4223 }
4224 return true;
4225 }
4226
4227 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4228 get_src_mask_ssbo(unsigned total, nir_src src)
4229 {
4230 if (nir_src_is_const(src))
4231 return BITFIELD_BIT(nir_src_as_uint(src));
4232 return BITFIELD_MASK(total);
4233 }
4234
4235 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4236 get_src_mask_ubo(unsigned total, nir_src src)
4237 {
4238 if (nir_src_is_const(src))
4239 return BITFIELD_BIT(nir_src_as_uint(src));
4240 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4241 }
4242
4243 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4244 analyze_io(struct zink_shader *zs, nir_shader *shader)
4245 {
4246 bool ret = false;
4247 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4248 nir_foreach_block(block, impl) {
4249 nir_foreach_instr(instr, block) {
4250 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4251 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4252 nir_tex_instr *tex = nir_instr_as_tex(instr);
4253 nir_foreach_variable_with_modes(img, shader, nir_var_uniform) {
4254 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4255 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4256 if (tex->texture_index >= img->data.driver_location &&
4257 tex->texture_index < img->data.driver_location + size) {
4258 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4259 break;
4260 }
4261 }
4262 }
4263 continue;
4264 }
4265 if (instr->type != nir_instr_type_intrinsic)
4266 continue;
4267
4268 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4269 switch (intrin->intrinsic) {
4270 case nir_intrinsic_store_ssbo:
4271 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4272 break;
4273
4274 case nir_intrinsic_get_ssbo_size: {
4275 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4276 ret = true;
4277 break;
4278 }
4279 case nir_intrinsic_ssbo_atomic:
4280 case nir_intrinsic_ssbo_atomic_swap:
4281 case nir_intrinsic_load_ssbo:
4282 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4283 break;
4284 case nir_intrinsic_load_ubo:
4285 case nir_intrinsic_load_ubo_vec4:
4286 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4287 break;
4288 default:
4289 break;
4290 }
4291 }
4292 }
4293 return ret;
4294 }
4295
4296 struct zink_bindless_info {
4297 nir_variable *bindless[4];
4298 unsigned bindless_set;
4299 };
4300
4301 /* this is a "default" bindless texture used if the shader has no texture variables */
4302 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4303 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4304 {
4305 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4306 nir_variable *var;
4307
4308 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4309 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4310 var->data.descriptor_set = descriptor_set;
4311 var->data.driver_location = var->data.binding = binding;
4312 return var;
4313 }
4314
4315 /* this is a "default" bindless image used if the shader has no image variables */
4316 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4317 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4318 {
4319 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4320 nir_variable *var;
4321
4322 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4323 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4324 var->data.descriptor_set = descriptor_set;
4325 var->data.driver_location = var->data.binding = binding;
4326 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4327 return var;
4328 }
4329
4330 /* rewrite bindless instructions as array deref instructions */
4331 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4332 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4333 {
4334 struct zink_bindless_info *bindless = data;
4335
4336 if (in->type == nir_instr_type_tex) {
4337 nir_tex_instr *tex = nir_instr_as_tex(in);
4338 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4339 if (idx == -1)
4340 return false;
4341
4342 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4343 if (!var) {
4344 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4345 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4346 bindless->bindless[1] = var;
4347 else
4348 bindless->bindless[0] = var;
4349 }
4350 b->cursor = nir_before_instr(in);
4351 nir_deref_instr *deref = nir_build_deref_var(b, var);
4352 if (glsl_type_is_array(var->type))
4353 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4354 nir_src_rewrite(&tex->src[idx].src, &deref->def);
4355
4356 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4357 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4358 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4359 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4360 *
4361 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4362 * - Warhammer 40k: Dawn of War III
4363 */
4364 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4365 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4366 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4367 if (coord_components < needed_components) {
4368 nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4369 nir_src_rewrite(&tex->src[c].src, def);
4370 tex->coord_components = needed_components;
4371 }
4372 return true;
4373 }
4374 if (in->type != nir_instr_type_intrinsic)
4375 return false;
4376 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4377
4378 nir_intrinsic_op op;
4379 #define OP_SWAP(OP) \
4380 case nir_intrinsic_bindless_image_##OP: \
4381 op = nir_intrinsic_image_deref_##OP; \
4382 break;
4383
4384
4385 /* convert bindless intrinsics to deref intrinsics */
4386 switch (instr->intrinsic) {
4387 OP_SWAP(atomic)
4388 OP_SWAP(atomic_swap)
4389 OP_SWAP(format)
4390 OP_SWAP(load)
4391 OP_SWAP(order)
4392 OP_SWAP(samples)
4393 OP_SWAP(size)
4394 OP_SWAP(store)
4395 default:
4396 return false;
4397 }
4398
4399 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4400 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4401 if (!var)
4402 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4403 instr->intrinsic = op;
4404 b->cursor = nir_before_instr(in);
4405 nir_deref_instr *deref = nir_build_deref_var(b, var);
4406 if (glsl_type_is_array(var->type))
4407 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4408 nir_src_rewrite(&instr->src[0], &deref->def);
4409 return true;
4410 }
4411
4412 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4413 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4414 {
4415 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4416 return false;
4417 nir_fixup_deref_modes(shader);
4418 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4419 optimize_nir(shader, NULL, true);
4420 return true;
4421 }
4422
4423 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4424 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4425 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4426 void *data)
4427 {
4428 bool is_load = false;
4429 bool is_input = false;
4430 bool is_interp = false;
4431 if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4432 return false;
4433
4434 nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4435 if (var->data.bindless)
4436 return false;
4437 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4438 return false;
4439 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4440 return false;
4441
4442 var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4443 var->data.bindless = 1;
4444 return true;
4445 }
4446
4447 static bool
lower_bindless_io(nir_shader * shader)4448 lower_bindless_io(nir_shader *shader)
4449 {
4450 return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4451 nir_metadata_dominance, NULL);
4452 }
4453
4454 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4455 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4456 {
4457 if (stage == MESA_SHADER_NONE) {
4458 unreachable("not supported");
4459 } else {
4460 unsigned base = stage;
4461 /* clamp compute bindings for better driver efficiency */
4462 if (gl_shader_stage_is_compute(stage))
4463 base = 0;
4464 switch (type) {
4465 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4466 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4467 return base * 2 + !!index;
4468
4469 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4470 assert(stage == MESA_SHADER_KERNEL);
4471 FALLTHROUGH;
4472 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4473 if (stage == MESA_SHADER_KERNEL) {
4474 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4475 return index + PIPE_MAX_SAMPLERS;
4476 }
4477 FALLTHROUGH;
4478 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4479 assert(index < PIPE_MAX_SAMPLERS);
4480 assert(stage != MESA_SHADER_KERNEL);
4481 return (base * PIPE_MAX_SAMPLERS) + index;
4482
4483 case VK_DESCRIPTOR_TYPE_SAMPLER:
4484 assert(index < PIPE_MAX_SAMPLERS);
4485 assert(stage == MESA_SHADER_KERNEL);
4486 return index;
4487
4488 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4489 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4490
4491 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4492 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4493 assert(index < ZINK_MAX_SHADER_IMAGES);
4494 if (stage == MESA_SHADER_KERNEL)
4495 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4496 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4497
4498 default:
4499 unreachable("unexpected type");
4500 }
4501 }
4502 }
4503
4504 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4505 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4506 {
4507 if (glsl_type_is_struct(type)) {
4508 for (unsigned i = 0; i < glsl_get_length(type); i++)
4509 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4510 return;
4511 }
4512
4513 /* just a random scalar in a struct */
4514 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4515 return;
4516
4517 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4518 unsigned binding;
4519 switch (vktype) {
4520 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4521 binding = 0;
4522 break;
4523 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4524 binding = 1;
4525 break;
4526 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4527 binding = 2;
4528 break;
4529 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4530 binding = 3;
4531 break;
4532 default:
4533 unreachable("unknown");
4534 }
4535 if (!bindless->bindless[binding]) {
4536 bindless->bindless[binding] = nir_variable_clone(var, nir);
4537 bindless->bindless[binding]->data.bindless = 0;
4538 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4539 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4540 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4541 if (!bindless->bindless[binding]->data.image.format)
4542 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4543 nir_shader_add_variable(nir, bindless->bindless[binding]);
4544 } else {
4545 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4546 }
4547 var->data.mode = nir_var_shader_temp;
4548 }
4549
4550 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4551 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4552 {
4553 struct zink_screen *screen = data;
4554 if (instr->type != nir_instr_type_tex)
4555 return false;
4556 nir_tex_instr *tex = nir_instr_as_tex(instr);
4557 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4558 return false;
4559 if (tex->is_sparse && screen->need_2D_sparse) {
4560 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4561 mesa_loge("unhandled/unsupported 1D sparse texture!");
4562 abort();
4563 }
4564 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4565 b->cursor = nir_before_instr(instr);
4566 tex->coord_components++;
4567 unsigned srcs[] = {
4568 nir_tex_src_coord,
4569 nir_tex_src_offset,
4570 nir_tex_src_ddx,
4571 nir_tex_src_ddy,
4572 };
4573 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4574 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4575 if (c == -1)
4576 continue;
4577 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4578 continue;
4579 nir_def *def;
4580 nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4581 if (tex->src[c].src.ssa->num_components == 1)
4582 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4583 else
4584 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4585 nir_src_rewrite(&tex->src[c].src, def);
4586 }
4587 b->cursor = nir_after_instr(instr);
4588 unsigned needed_components = nir_tex_instr_dest_size(tex);
4589 unsigned num_components = tex->def.num_components;
4590 if (needed_components > num_components) {
4591 tex->def.num_components = needed_components;
4592 assert(num_components < 3);
4593 /* take either xz or just x since this is promoted to 2D from 1D */
4594 uint32_t mask = num_components == 2 ? (1|4) : 1;
4595 nir_def *dst = nir_channels(b, &tex->def, mask);
4596 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4597 }
4598 return true;
4599 }
4600
4601 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4602 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4603 {
4604 bool found = false;
4605 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4606 const struct glsl_type *type = glsl_without_array(var->type);
4607 unsigned length = glsl_get_length(var->type);
4608 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4609 continue;
4610 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4611 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4612
4613 found = true;
4614 }
4615 if (found)
4616 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4617 return found;
4618 }
4619
4620 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4621 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4622 {
4623 nir_foreach_function_impl(impl, shader) {
4624 nir_foreach_block_safe(block, impl) {
4625 nir_foreach_instr_safe(instr, block) {
4626 if (instr->type == nir_instr_type_tex) {
4627 nir_tex_instr *tex = nir_instr_as_tex(instr);
4628 zs->sinfo.have_sparse |= tex->is_sparse;
4629 }
4630 if (instr->type != nir_instr_type_intrinsic)
4631 continue;
4632 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4633 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4634 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4635 intr->intrinsic == nir_intrinsic_image_deref_store ||
4636 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4637 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4638 intr->intrinsic == nir_intrinsic_image_deref_size ||
4639 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4640 intr->intrinsic == nir_intrinsic_image_deref_format ||
4641 intr->intrinsic == nir_intrinsic_image_deref_order) {
4642
4643 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4644
4645 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4646 const unsigned size =
4647 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4648 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4649 var->data.binding + (MAX2(size, 1) - 1));
4650 }
4651 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4652 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4653 zs->sinfo.have_sparse = true;
4654
4655 static bool warned = false;
4656 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4657 switch (intr->intrinsic) {
4658 case nir_intrinsic_image_deref_atomic: {
4659 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4660 if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4661 util_format_is_float(var->data.image.format))
4662 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4663 break;
4664 }
4665 default:
4666 break;
4667 }
4668 }
4669 }
4670 }
4671 }
4672 }
4673
4674 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data)4675 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
4676 {
4677 if (in->type != nir_instr_type_tex)
4678 return false;
4679 nir_tex_instr *tex = nir_instr_as_tex(in);
4680 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4681 return false;
4682 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4683 nir_variable *var = NULL;
4684 if (handle != -1) {
4685 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4686 } else {
4687 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
4688 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
4689 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4690 if (tex->texture_index >= img->data.driver_location &&
4691 tex->texture_index < img->data.driver_location + size) {
4692 var = img;
4693 break;
4694 }
4695 }
4696 }
4697 }
4698 return !!rewrite_tex_dest(b, tex, var, data);
4699 }
4700
4701 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs)4702 match_tex_dests(nir_shader *shader, struct zink_shader *zs)
4703 {
4704 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, zs);
4705 }
4706
4707 static bool
split_bitfields_instr(nir_builder * b,nir_instr * in,void * data)4708 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
4709 {
4710 if (in->type != nir_instr_type_alu)
4711 return false;
4712 nir_alu_instr *alu = nir_instr_as_alu(in);
4713 switch (alu->op) {
4714 case nir_op_ubitfield_extract:
4715 case nir_op_ibitfield_extract:
4716 case nir_op_bitfield_insert:
4717 break;
4718 default:
4719 return false;
4720 }
4721 unsigned num_components = alu->def.num_components;
4722 if (num_components == 1)
4723 return false;
4724 b->cursor = nir_before_instr(in);
4725 nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4726 for (unsigned i = 0; i < num_components; i++) {
4727 if (alu->op == nir_op_bitfield_insert)
4728 dests[i] = nir_bitfield_insert(b,
4729 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4730 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4731 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4732 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4733 else if (alu->op == nir_op_ubitfield_extract)
4734 dests[i] = nir_ubitfield_extract(b,
4735 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4736 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4737 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4738 else
4739 dests[i] = nir_ibitfield_extract(b,
4740 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4741 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4742 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4743 }
4744 nir_def *dest = nir_vec(b, dests, num_components);
4745 nir_def_rewrite_uses_after(&alu->def, dest, in);
4746 nir_instr_remove(in);
4747 return true;
4748 }
4749
4750
4751 static bool
split_bitfields(nir_shader * shader)4752 split_bitfields(nir_shader *shader)
4753 {
4754 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
4755 }
4756
4757 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)4758 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
4759 {
4760 nir_foreach_function_impl(impl, nir) {
4761 nir_foreach_block(block, impl) {
4762 nir_foreach_instr_safe(instr, block) {
4763 if (instr->type != nir_instr_type_deref)
4764 continue;
4765 nir_deref_instr *deref = nir_instr_as_deref(instr);
4766 nir_variable *img = nir_deref_instr_get_variable(deref);
4767 if (img != var)
4768 continue;
4769 if (glsl_type_is_array(var->type)) {
4770 if (deref->deref_type == nir_deref_type_array)
4771 deref->type = glsl_without_array(var->type);
4772 else
4773 deref->type = var->type;
4774 } else {
4775 deref->type = var->type;
4776 }
4777 }
4778 }
4779 }
4780 }
4781
4782 static void
type_image(nir_shader * nir,nir_variable * var)4783 type_image(nir_shader *nir, nir_variable *var)
4784 {
4785 nir_foreach_function_impl(impl, nir) {
4786 nir_foreach_block(block, impl) {
4787 nir_foreach_instr_safe(instr, block) {
4788 if (instr->type != nir_instr_type_intrinsic)
4789 continue;
4790 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4791 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4792 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4793 intr->intrinsic == nir_intrinsic_image_deref_store ||
4794 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4795 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4796 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4797 intr->intrinsic == nir_intrinsic_image_deref_format ||
4798 intr->intrinsic == nir_intrinsic_image_deref_order) {
4799 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4800 nir_variable *img = nir_deref_instr_get_variable(deref);
4801 if (img != var)
4802 continue;
4803 nir_alu_type alu_type = nir_intrinsic_src_type(intr);
4804 const struct glsl_type *type = glsl_without_array(var->type);
4805 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4806 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
4807 continue;
4808 }
4809 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4810 if (glsl_type_is_array(var->type))
4811 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4812 var->type = img_type;
4813 rewrite_cl_derefs(nir, var);
4814 return;
4815 }
4816 }
4817 }
4818 }
4819 nir_foreach_function_impl(impl, nir) {
4820 nir_foreach_block(block, impl) {
4821 nir_foreach_instr_safe(instr, block) {
4822 if (instr->type != nir_instr_type_intrinsic)
4823 continue;
4824 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4825 if (intr->intrinsic != nir_intrinsic_image_deref_size)
4826 continue;
4827 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4828 nir_variable *img = nir_deref_instr_get_variable(deref);
4829 if (img != var)
4830 continue;
4831 nir_alu_type alu_type = nir_type_uint32;
4832 const struct glsl_type *type = glsl_without_array(var->type);
4833 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
4834 continue;
4835 }
4836 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
4837 if (glsl_type_is_array(var->type))
4838 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
4839 var->type = img_type;
4840 rewrite_cl_derefs(nir, var);
4841 return;
4842 }
4843 }
4844 }
4845 var->data.mode = nir_var_shader_temp;
4846 }
4847
4848 static bool
type_sampler_vars(nir_shader * nir,unsigned * sampler_mask)4849 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
4850 {
4851 bool progress = false;
4852 nir_foreach_function_impl(impl, nir) {
4853 nir_foreach_block(block, impl) {
4854 nir_foreach_instr(instr, block) {
4855 if (instr->type != nir_instr_type_tex)
4856 continue;
4857 nir_tex_instr *tex = nir_instr_as_tex(instr);
4858 if (nir_tex_instr_need_sampler(tex))
4859 *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
4860 nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
4861 assert(var);
4862 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
4863 nir_tex_instr_is_query(tex))
4864 continue;
4865 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
4866 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
4867 if (size > 1)
4868 img_type = glsl_array_type(img_type, size, 0);
4869 var->type = img_type;
4870 progress = true;
4871 }
4872 }
4873 }
4874 return progress;
4875 }
4876
4877 static bool
delete_samplers(nir_shader * nir)4878 delete_samplers(nir_shader *nir)
4879 {
4880 bool progress = false;
4881 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
4882 if (glsl_type_is_sampler(glsl_without_array(var->type))) {
4883 var->data.mode = nir_var_shader_temp;
4884 progress = true;
4885 }
4886 }
4887 return progress;
4888 }
4889
4890 static bool
type_images(nir_shader * nir,unsigned * sampler_mask)4891 type_images(nir_shader *nir, unsigned *sampler_mask)
4892 {
4893 bool progress = false;
4894 progress |= delete_samplers(nir);
4895 progress |= type_sampler_vars(nir, sampler_mask);
4896 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
4897 type_image(nir, var);
4898 progress = true;
4899 }
4900 return progress;
4901 }
4902
4903 /* attempt to assign io for separate shaders */
4904 static bool
fixup_io_locations(nir_shader * nir)4905 fixup_io_locations(nir_shader *nir)
4906 {
4907 nir_variable_mode modes;
4908 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
4909 modes = nir_var_shader_in | nir_var_shader_out;
4910 else
4911 modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
4912 u_foreach_bit(mode, modes) {
4913 nir_variable_mode m = BITFIELD_BIT(mode);
4914 if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
4915 (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
4916 /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
4917 * - i/o interface blocks don't need to match
4918 * - any location can be present or not
4919 * - it just has to work
4920 *
4921 * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
4922 * since it's a builtin and yolo it with all the other legacy crap
4923 */
4924 nir_foreach_variable_with_modes(var, nir, m) {
4925 if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
4926 continue;
4927 if (var->data.location == VARYING_SLOT_VAR0)
4928 var->data.driver_location = 0;
4929 else if (var->data.patch)
4930 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
4931 else
4932 var->data.driver_location = var->data.location;
4933 }
4934 return true;
4935 }
4936 /* i/o interface blocks are required to be EXACT matches between stages:
4937 * iterate over all locations and set locations incrementally
4938 */
4939 unsigned slot = 0;
4940 for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
4941 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
4942 continue;
4943 bool found = false;
4944 unsigned size = 0;
4945 nir_foreach_variable_with_modes(var, nir, m) {
4946 if (var->data.location != i)
4947 continue;
4948 /* only add slots for non-component vars or first-time component vars */
4949 if (!var->data.location_frac || !size) {
4950 /* ensure variable is given enough slots */
4951 if (nir_is_arrayed_io(var, nir->info.stage))
4952 size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
4953 else
4954 size += glsl_count_vec4_slots(var->type, false, false);
4955 }
4956 if (var->data.patch)
4957 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
4958 else
4959 var->data.driver_location = slot;
4960 found = true;
4961 }
4962 slot += size;
4963 if (found) {
4964 /* ensure the consumed slots aren't double iterated */
4965 i += size - 1;
4966 } else {
4967 /* locations used between stages are not required to be contiguous */
4968 if (i >= VARYING_SLOT_VAR0)
4969 slot++;
4970 }
4971 }
4972 }
4973 return true;
4974 }
4975
4976 static uint64_t
zink_flat_flags(struct nir_shader * shader)4977 zink_flat_flags(struct nir_shader *shader)
4978 {
4979 uint64_t flat_flags = 0;
4980 nir_foreach_shader_in_variable(var, shader) {
4981 if (var->data.interpolation == INTERP_MODE_FLAT)
4982 flat_flags |= BITFIELD64_BIT(var->data.location);
4983 }
4984
4985 return flat_flags;
4986 }
4987
4988 static nir_variable *
find_io_var_with_semantics(nir_shader * nir,nir_variable_mode mode,nir_variable_mode realmode,nir_io_semantics s,unsigned location,unsigned c,bool is_load)4989 find_io_var_with_semantics(nir_shader *nir, nir_variable_mode mode, nir_variable_mode realmode, nir_io_semantics s, unsigned location, unsigned c, bool is_load)
4990 {
4991 nir_foreach_variable_with_modes(var, nir, mode) {
4992 const struct glsl_type *type = var->type;
4993 nir_variable_mode m = var->data.mode;
4994 var->data.mode = realmode;
4995 if (nir_is_arrayed_io(var, nir->info.stage))
4996 type = glsl_get_array_element(type);
4997 var->data.mode = m;
4998 if (var->data.fb_fetch_output != s.fb_fetch_output)
4999 continue;
5000 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && s.dual_source_blend_index != var->data.index)
5001 continue;
5002 unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5003 if (var->data.location > location || var->data.location + num_slots <= location)
5004 continue;
5005 unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5006 if (glsl_type_contains_64bit(type)) {
5007 num_components *= 2;
5008 if (location > var->data.location) {
5009 unsigned sub_components = (location - var->data.location) * 4;
5010 if (sub_components > num_components)
5011 continue;
5012 num_components -= sub_components;
5013 }
5014 }
5015 if (var->data.location_frac > c || var->data.location_frac + num_components <= c)
5016 continue;
5017 return var;
5018 }
5019 return NULL;
5020 }
5021
5022 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode)5023 rework_io_vars(nir_shader *nir, nir_variable_mode mode)
5024 {
5025 assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5026 assert(util_bitcount(mode) == 1);
5027 bool found = false;
5028 /* store old vars */
5029 nir_foreach_variable_with_modes(var, nir, mode) {
5030 if (nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out)
5031 var->data.compact |= var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
5032 /* stash vars in this mode for now */
5033 var->data.mode = nir_var_mem_shared;
5034 found = true;
5035 }
5036 if (!found) {
5037 if (mode == nir_var_shader_out)
5038 found = nir->info.outputs_written || nir->info.outputs_read;
5039 else
5040 found = nir->info.inputs_read;
5041 if (!found)
5042 return;
5043 }
5044 /* scan for vars using indirect array access */
5045 BITSET_DECLARE(indirect_access, 128);
5046 BITSET_ZERO(indirect_access);
5047 nir_foreach_function_impl(impl, nir) {
5048 nir_foreach_block(block, impl) {
5049 nir_foreach_instr(instr, block) {
5050 if (instr->type != nir_instr_type_intrinsic)
5051 continue;
5052 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5053 bool is_load = false;
5054 bool is_input = false;
5055 bool is_interp = false;
5056 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5057 continue;
5058 nir_src *src_offset = nir_get_io_offset_src(intr);
5059 if (!is_input && !src_offset)
5060 continue;
5061 if (mode == nir_var_shader_in && !is_input)
5062 continue;
5063 if (mode == nir_var_shader_out && is_input)
5064 continue;
5065 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5066 if (!nir_src_is_const(*src_offset))
5067 BITSET_SET(indirect_access, s.location);
5068 }
5069 }
5070 }
5071 /* loop and create vars */
5072 nir_foreach_function_impl(impl, nir) {
5073 nir_foreach_block(block, impl) {
5074 nir_foreach_instr(instr, block) {
5075 if (instr->type != nir_instr_type_intrinsic)
5076 continue;
5077 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5078 bool is_load = false;
5079 bool is_input = false;
5080 bool is_interp = false;
5081 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5082 continue;
5083 if (mode == nir_var_shader_in && !is_input)
5084 continue;
5085 if (mode == nir_var_shader_out && is_input)
5086 continue;
5087 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5088 unsigned slot_offset = 0;
5089 bool is_indirect = BITSET_TEST(indirect_access, s.location);
5090 nir_src *src_offset = nir_get_io_offset_src(intr);
5091 if (src_offset && !is_indirect) {
5092 assert(nir_src_is_const(*src_offset));
5093 slot_offset = nir_src_as_uint(*src_offset);
5094 }
5095 unsigned location = s.location + slot_offset;
5096 unsigned frac = nir_intrinsic_component(intr);
5097 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5098 /* set c aligned/rounded down to dword */
5099 unsigned c = nir_slot_is_sysval_output(location, MESA_SHADER_NONE) ? 0 : frac;
5100 if (frac && bit_size < 32)
5101 c = frac * bit_size / 32;
5102 nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5103 /* ensure dword is filled with like-sized components */
5104 unsigned max_components = intr->num_components;
5105 if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5106 switch (s.location) {
5107 case FRAG_RESULT_DEPTH:
5108 case FRAG_RESULT_STENCIL:
5109 case FRAG_RESULT_SAMPLE_MASK:
5110 max_components = 1;
5111 break;
5112 default:
5113 break;
5114 }
5115 } else if ((nir->info.stage != MESA_SHADER_VERTEX || mode != nir_var_shader_in) && s.location < VARYING_SLOT_VAR0) {
5116 switch (s.location) {
5117 case VARYING_SLOT_FOGC:
5118 /* use intr components */
5119 break;
5120 case VARYING_SLOT_POS:
5121 case VARYING_SLOT_COL0:
5122 case VARYING_SLOT_COL1:
5123 case VARYING_SLOT_TEX0:
5124 case VARYING_SLOT_TEX1:
5125 case VARYING_SLOT_TEX2:
5126 case VARYING_SLOT_TEX3:
5127 case VARYING_SLOT_TEX4:
5128 case VARYING_SLOT_TEX5:
5129 case VARYING_SLOT_TEX6:
5130 case VARYING_SLOT_TEX7:
5131 case VARYING_SLOT_BFC0:
5132 case VARYING_SLOT_BFC1:
5133 case VARYING_SLOT_EDGE:
5134 case VARYING_SLOT_CLIP_VERTEX:
5135 case VARYING_SLOT_PNTC:
5136 case VARYING_SLOT_BOUNDING_BOX0:
5137 case VARYING_SLOT_BOUNDING_BOX1:
5138 max_components = 4;
5139 break;
5140 case VARYING_SLOT_CLIP_DIST0:
5141 case VARYING_SLOT_CLIP_DIST1:
5142 max_components = s.num_slots;
5143 break;
5144 case VARYING_SLOT_CULL_DIST0:
5145 case VARYING_SLOT_CULL_DIST1:
5146 max_components = s.num_slots;
5147 break;
5148 case VARYING_SLOT_TESS_LEVEL_OUTER:
5149 max_components = 4;
5150 break;
5151 case VARYING_SLOT_TESS_LEVEL_INNER:
5152 max_components = 2;
5153 break;
5154 case VARYING_SLOT_PRIMITIVE_ID:
5155 case VARYING_SLOT_LAYER:
5156 case VARYING_SLOT_VIEWPORT:
5157 case VARYING_SLOT_FACE:
5158 case VARYING_SLOT_PSIZ:
5159 case VARYING_SLOT_VIEW_INDEX:
5160 case VARYING_SLOT_VIEWPORT_MASK:
5161 max_components = 1;
5162 break;
5163 default:
5164 unreachable("???");
5165 }
5166 } else if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5167 if (s.location == VERT_ATTRIB_POINT_SIZE)
5168 max_components = 1;
5169 else if (s.location < VERT_ATTRIB_GENERIC0)
5170 max_components = 4;
5171 else
5172 max_components = frac + max_components;
5173 } else if (bit_size == 16)
5174 max_components = align(max_components, 2);
5175 else if (bit_size == 8)
5176 max_components = align(max_components, 4);
5177 if (c + (bit_size == 64 ? max_components * 2 : max_components) > 4)
5178 c = 0;
5179 const struct glsl_type *vec_type;
5180 bool is_compact = false;
5181 if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5182 vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
5183 } else {
5184 switch (s.location) {
5185 case VARYING_SLOT_CLIP_DIST0:
5186 case VARYING_SLOT_CLIP_DIST1:
5187 case VARYING_SLOT_CULL_DIST0:
5188 case VARYING_SLOT_CULL_DIST1:
5189 case VARYING_SLOT_TESS_LEVEL_OUTER:
5190 case VARYING_SLOT_TESS_LEVEL_INNER:
5191 vec_type = glsl_array_type(glsl_float_type(), max_components, sizeof(uint32_t));
5192 is_compact = true;
5193 break;
5194 default:
5195 vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
5196 break;
5197 }
5198 }
5199 /* reset the mode for nir_is_arrayed_io to work */
5200 bool is_arrayed = io_instr_is_arrayed(intr);
5201 if (is_indirect && s.location != VARYING_SLOT_TESS_LEVEL_INNER && s.location != VARYING_SLOT_TESS_LEVEL_OUTER) {
5202 /* indirect array access requires the full array in a single variable */
5203 unsigned slot_count = s.num_slots;
5204 if (bit_size == 64 && slot_count > 1)
5205 slot_count /= 2;
5206 if (slot_count > 1)
5207 vec_type = glsl_array_type(vec_type, slot_count, glsl_get_explicit_stride(vec_type));
5208 }
5209 if (is_arrayed)
5210 vec_type = glsl_array_type(vec_type, 32 /* MAX_PATCH_VERTICES */, glsl_get_explicit_stride(vec_type));
5211 nir_variable *found_var = find_io_var_with_semantics(nir, mode, mode, s, location, c, is_load);
5212 if (found_var) {
5213 if (glsl_get_vector_elements(glsl_without_array(found_var->type)) < glsl_get_vector_elements(glsl_without_array(vec_type))) {
5214 /* enlarge existing vars if necessary */
5215 found_var->type = vec_type;
5216 }
5217 continue;
5218 }
5219
5220 char name[1024];
5221 if (c)
5222 snprintf(name, sizeof(name), "slot_%u_c%u", location, c);
5223 else
5224 snprintf(name, sizeof(name), "slot_%u", location);
5225 nir_variable *old_var = find_io_var_with_semantics(nir, nir_var_mem_shared, mode, s, location, c, is_load);
5226 nir_variable *var = nir_variable_create(nir, mode, vec_type, old_var ? old_var->name : name);
5227 var->data.mode = mode;
5228 var->type = vec_type;
5229 var->data.driver_location = nir_intrinsic_base(intr) + slot_offset;
5230 var->data.location_frac = c;
5231 var->data.location = location;
5232 var->data.patch = location >= VARYING_SLOT_PATCH0 ||
5233 ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5234 (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER));
5235 /* set flat by default */
5236 if (nir->info.stage == MESA_SHADER_FRAGMENT && mode == nir_var_shader_in)
5237 var->data.interpolation = INTERP_MODE_FLAT;
5238 var->data.fb_fetch_output = s.fb_fetch_output;
5239 var->data.index = s.dual_source_blend_index;
5240 var->data.precision = s.medium_precision;
5241 var->data.compact = is_compact;
5242 }
5243 }
5244 }
5245 nir_foreach_variable_with_modes(var, nir, nir_var_mem_shared)
5246 var->data.mode = nir_var_shader_temp;
5247 nir_fixup_deref_modes(nir);
5248 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5249 }
5250
5251
5252 static bool
eliminate_io_wrmasks_instr(const nir_instr * instr,const void * data)5253 eliminate_io_wrmasks_instr(const nir_instr *instr, const void *data)
5254 {
5255 const nir_shader *nir = data;
5256 if (instr->type != nir_instr_type_intrinsic)
5257 return false;
5258
5259 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5260 switch (intr->intrinsic) {
5261 case nir_intrinsic_store_output:
5262 case nir_intrinsic_store_per_primitive_output:
5263 case nir_intrinsic_store_per_vertex_output:
5264 break;
5265 default:
5266 return false;
5267 }
5268 unsigned src_components = nir_intrinsic_src_components(intr, 0);
5269 unsigned wrmask = nir_intrinsic_write_mask(intr);
5270 unsigned num_components = util_bitcount(wrmask);
5271 if (num_components != src_components)
5272 return true;
5273 if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64)
5274 num_components *= 2;
5275 if (nir->xfb_info) {
5276 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
5277 nir_src *src_offset = nir_get_io_offset_src(intr);
5278 if (nir_src_is_const(*src_offset)) {
5279 unsigned slot_offset = nir_src_as_uint(*src_offset);
5280 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
5281 if (nir->xfb_info->outputs[i].location == s.location + slot_offset) {
5282 unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
5283 if (xfb_components != MIN2(4, num_components))
5284 return true;
5285 num_components -= xfb_components;
5286 if (!num_components)
5287 break;
5288 }
5289 }
5290 } else {
5291 for (unsigned i = 0; i <nir->xfb_info->output_count; i++) {
5292 if (nir->xfb_info->outputs[i].location >= s.location &&
5293 nir->xfb_info->outputs[i].location < s.location + s.num_slots) {
5294 unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
5295 if (xfb_components < MIN2(num_components, 4))
5296 return true;
5297 num_components -= xfb_components;
5298 if (!num_components)
5299 break;
5300 }
5301 }
5302 }
5303 }
5304 return false;
5305 }
5306
5307 static int
zink_type_size(const struct glsl_type * type,bool bindless)5308 zink_type_size(const struct glsl_type *type, bool bindless)
5309 {
5310 return glsl_count_attribute_slots(type, false);
5311 }
5312
5313 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5314 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5315 uint8_t bit_size, uint32_t align,
5316 uint32_t align_offset, bool offset_is_const,
5317 const void *cb_data)
5318 {
5319 align = nir_combined_align(align, align_offset);
5320
5321 assert(util_is_power_of_two_nonzero(align));
5322
5323 /* simply drop the bit_size for unaligned load/stores */
5324 if (align < (bit_size / 8)) {
5325 return (nir_mem_access_size_align){
5326 .num_components = MIN2(bytes / align, 4),
5327 .bit_size = align * 8,
5328 .align = align,
5329 };
5330 } else {
5331 return (nir_mem_access_size_align){
5332 .num_components = MIN2(bytes / (bit_size / 8), 4),
5333 .bit_size = bit_size,
5334 .align = bit_size / 8,
5335 };
5336 }
5337 }
5338
5339 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5340 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5341 uint8_t bit_size, uint32_t align,
5342 uint32_t align_offset, bool offset_is_const,
5343 const void *cb_data)
5344 {
5345 bit_size = *(const uint8_t *)cb_data;
5346 align = nir_combined_align(align, align_offset);
5347
5348 assert(util_is_power_of_two_nonzero(align));
5349
5350 return (nir_mem_access_size_align){
5351 .num_components = MIN2(bytes / (bit_size / 8), 4),
5352 .bit_size = bit_size,
5353 .align = bit_size / 8,
5354 };
5355 }
5356
5357 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5358 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5359 {
5360 uint8_t *bit_size = data;
5361 switch (instr->intrinsic) {
5362 case nir_intrinsic_load_scratch:
5363 *bit_size = MIN2(*bit_size, instr->def.bit_size);
5364 return false;
5365 case nir_intrinsic_store_scratch:
5366 *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5367 return false;
5368 default:
5369 return false;
5370 }
5371 }
5372
5373 static bool
alias_scratch_memory(nir_shader * nir)5374 alias_scratch_memory(nir_shader *nir)
5375 {
5376 uint8_t bit_size = 64;
5377
5378 nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5379 nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5380 .modes = nir_var_function_temp,
5381 .may_lower_unaligned_stores_to_atomics = true,
5382 .callback = mem_access_scratch_size_align_cb,
5383 .cb_data = &bit_size,
5384 };
5385 return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5386 }
5387
5388 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5389 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5390 {
5391 return 4;
5392 }
5393
5394 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)5395 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
5396 {
5397 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5398 bool have_psiz = false;
5399
5400 ret->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
5401 nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
5402
5403 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
5404 ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
5405 if (screen->info.have_KHR_shader_float_controls) {
5406 if (screen->info.props12.shaderDenormFlushToZeroFloat16)
5407 ret->sinfo.float_controls.flush_denorms |= 0x1;
5408 if (screen->info.props12.shaderDenormFlushToZeroFloat32)
5409 ret->sinfo.float_controls.flush_denorms |= 0x2;
5410 if (screen->info.props12.shaderDenormFlushToZeroFloat64)
5411 ret->sinfo.float_controls.flush_denorms |= 0x4;
5412
5413 if (screen->info.props12.shaderDenormPreserveFloat16)
5414 ret->sinfo.float_controls.preserve_denorms |= 0x1;
5415 if (screen->info.props12.shaderDenormPreserveFloat32)
5416 ret->sinfo.float_controls.preserve_denorms |= 0x2;
5417 if (screen->info.props12.shaderDenormPreserveFloat64)
5418 ret->sinfo.float_controls.preserve_denorms |= 0x4;
5419
5420 ret->sinfo.float_controls.denorms_all_independence =
5421 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
5422
5423 ret->sinfo.float_controls.denorms_32_bit_independence =
5424 ret->sinfo.float_controls.denorms_all_independence ||
5425 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
5426 }
5427 ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
5428
5429 util_queue_fence_init(&ret->precompile.fence);
5430 util_dynarray_init(&ret->pipeline_libs, ret);
5431 ret->hash = _mesa_hash_pointer(ret);
5432
5433 ret->programs = _mesa_pointer_set_create(NULL);
5434 simple_mtx_init(&ret->lock, mtx_plain);
5435
5436 nir_lower_io_options lower_io_flags = 0;
5437 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5438 lower_io_flags = nir_lower_io_lower_64bit_to_32;
5439 else if (!screen->info.feats.features.shaderFloat64)
5440 lower_io_flags = nir_lower_io_lower_64bit_float_to_32;
5441 bool temp_inputs = nir->info.stage != MESA_SHADER_VERTEX && nir->info.inputs_read & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
5442 bool temp_outputs = nir->info.stage != MESA_SHADER_FRAGMENT && (nir->info.outputs_read | nir->info.outputs_written) & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
5443 if (temp_inputs || temp_outputs) {
5444 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), temp_outputs, temp_inputs);
5445 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
5446 NIR_PASS_V(nir, nir_split_var_copies);
5447 NIR_PASS_V(nir, nir_lower_var_copies);
5448 }
5449 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, zink_type_size, lower_io_flags);
5450 if (nir->info.stage == MESA_SHADER_VERTEX)
5451 lower_io_flags |= nir_lower_io_lower_64bit_to_32;
5452 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, zink_type_size, lower_io_flags);
5453 nir->info.io_lowered = true;
5454
5455 if (nir->info.stage == MESA_SHADER_KERNEL) {
5456 nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
5457 .modes = nir_var_all ^ nir_var_function_temp,
5458 .may_lower_unaligned_stores_to_atomics = true,
5459 .callback = mem_access_size_align_cb,
5460 .cb_data = screen,
5461 };
5462 NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
5463 NIR_PASS_V(nir, alias_scratch_memory);
5464 NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
5465 NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
5466 }
5467
5468 optimize_nir(nir, NULL, true);
5469 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
5470 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
5471 NIR_PASS_V(nir, lower_bindless_io);
5472 break;
5473 }
5474 }
5475 nir_gather_xfb_info_from_intrinsics(nir);
5476 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, eliminate_io_wrmasks_instr, nir);
5477 /* clean up io to improve direct access */
5478 optimize_nir(nir, NULL, true);
5479 rework_io_vars(nir, nir_var_shader_in);
5480 rework_io_vars(nir, nir_var_shader_out);
5481
5482 if (nir->info.stage < MESA_SHADER_COMPUTE)
5483 create_gfx_pushconst(nir);
5484
5485 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
5486 nir->info.stage == MESA_SHADER_TESS_EVAL)
5487 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
5488
5489 if (nir->info.stage < MESA_SHADER_FRAGMENT)
5490 have_psiz = check_psiz(nir);
5491 if (nir->info.stage == MESA_SHADER_FRAGMENT)
5492 ret->flat_flags = zink_flat_flags(nir);
5493
5494 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
5495 NIR_PASS_V(nir, fixup_io_locations);
5496
5497 NIR_PASS_V(nir, lower_basevertex);
5498 NIR_PASS_V(nir, lower_baseinstance);
5499 NIR_PASS_V(nir, lower_sparse_and);
5500 NIR_PASS_V(nir, split_bitfields);
5501 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
5502
5503 if (screen->info.have_EXT_shader_demote_to_helper_invocation) {
5504 NIR_PASS_V(nir, nir_lower_discard_or_demote, true);
5505 }
5506
5507 if (screen->need_2D_zs)
5508 NIR_PASS_V(nir, lower_1d_shadow, screen);
5509
5510 {
5511 nir_lower_subgroups_options subgroup_options = {0};
5512 subgroup_options.lower_to_scalar = true;
5513 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
5514 subgroup_options.ballot_bit_size = 32;
5515 subgroup_options.ballot_components = 4;
5516 subgroup_options.lower_subgroup_masks = true;
5517 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
5518 subgroup_options.subgroup_size = 1;
5519 subgroup_options.lower_vote_trivial = true;
5520 }
5521 subgroup_options.lower_inverse_ballot = true;
5522 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
5523 }
5524
5525 optimize_nir(nir, NULL, true);
5526 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5527 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
5528 nir_lower_demote_if_to_cf |
5529 nir_lower_terminate_if_to_cf));
5530
5531 bool needs_size = analyze_io(ret, nir);
5532 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
5533 /* run in compile if there could be inlined uniforms */
5534 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
5535 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
5536 NIR_PASS_V(nir, rewrite_bo_access, screen);
5537 NIR_PASS_V(nir, remove_bo_access, ret);
5538 }
5539
5540 struct zink_bindless_info bindless = {0};
5541 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
5542 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
5543 var->data.is_xfb = false;
5544
5545 optimize_nir(nir, NULL, true);
5546 prune_io(nir);
5547
5548 scan_nir(screen, nir, ret);
5549 unsigned sampler_mask = 0;
5550 if (nir->info.stage == MESA_SHADER_KERNEL) {
5551 NIR_PASS_V(nir, type_images, &sampler_mask);
5552 enum zink_descriptor_type ztype = ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW;
5553 VkDescriptorType vktype = VK_DESCRIPTOR_TYPE_SAMPLER;
5554 u_foreach_bit(s, sampler_mask) {
5555 ret->bindings[ztype][ret->num_bindings[ztype]].index = s;
5556 ret->bindings[ztype][ret->num_bindings[ztype]].binding = zink_binding(MESA_SHADER_KERNEL, vktype, s, screen->compact_descriptors);
5557 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5558 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5559 ret->num_bindings[ztype]++;
5560 }
5561 ret->sinfo.sampler_mask = sampler_mask;
5562 }
5563
5564 unsigned ubo_binding_mask = 0;
5565 unsigned ssbo_binding_mask = 0;
5566 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
5567 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
5568 nir_var_image |
5569 nir_var_mem_ubo |
5570 nir_var_mem_ssbo)) {
5571 enum zink_descriptor_type ztype;
5572 const struct glsl_type *type = glsl_without_array(var->type);
5573 if (var->data.mode == nir_var_mem_ubo) {
5574 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
5575 /* buffer 0 is a push descriptor */
5576 var->data.descriptor_set = !!var->data.driver_location;
5577 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
5578 zink_binding(nir->info.stage,
5579 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
5580 var->data.driver_location,
5581 screen->compact_descriptors);
5582 assert(var->data.driver_location || var->data.binding < 10);
5583 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
5584 int binding = var->data.binding;
5585
5586 if (!var->data.driver_location) {
5587 ret->has_uniforms = true;
5588 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
5589 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5590 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
5591 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5592 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
5593 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
5594 ret->num_bindings[ztype]++;
5595 ubo_binding_mask |= BITFIELD_BIT(binding);
5596 }
5597 } else if (var->data.mode == nir_var_mem_ssbo) {
5598 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
5599 var->data.descriptor_set = screen->desc_set_id[ztype];
5600 var->data.binding = zink_binding(clamp_stage(&nir->info),
5601 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
5602 var->data.driver_location,
5603 screen->compact_descriptors);
5604 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
5605 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5606 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5607 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
5608 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
5609 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
5610 ret->num_bindings[ztype]++;
5611 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
5612 }
5613 } else {
5614 assert(var->data.mode == nir_var_uniform ||
5615 var->data.mode == nir_var_image);
5616 if (var->data.bindless) {
5617 ret->bindless = true;
5618 handle_bindless_var(nir, var, type, &bindless);
5619 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
5620 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
5621 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
5622 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
5623 ztype = zink_desc_type_from_vktype(vktype);
5624 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
5625 ret->num_texel_buffers++;
5626 var->data.driver_location = var->data.binding;
5627 var->data.descriptor_set = screen->desc_set_id[ztype];
5628 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
5629 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
5630 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
5631 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
5632 if (glsl_type_is_array(var->type))
5633 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
5634 else
5635 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
5636 ret->num_bindings[ztype]++;
5637 } else if (var->data.mode == nir_var_uniform) {
5638 /* this is a dead uniform */
5639 var->data.mode = 0;
5640 exec_node_remove(&var->node);
5641 }
5642 }
5643 }
5644 }
5645 bool bindless_lowered = false;
5646 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
5647 ret->bindless |= bindless_lowered;
5648
5649 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
5650 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
5651 if (nir->info.stage != MESA_SHADER_KERNEL)
5652 NIR_PASS_V(nir, match_tex_dests, ret);
5653
5654 if (!nir->info.internal)
5655 nir_foreach_shader_out_variable(var, nir)
5656 var->data.explicit_xfb_buffer = 0;
5657 if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
5658 update_so_info(ret, nir, nir->info.outputs_written, have_psiz);
5659 else if (have_psiz) {
5660 bool have_fake_psiz = false;
5661 nir_variable *psiz = NULL;
5662 nir_foreach_shader_out_variable(var, nir) {
5663 if (var->data.location == VARYING_SLOT_PSIZ) {
5664 if (!var->data.explicit_location)
5665 have_fake_psiz = true;
5666 else
5667 psiz = var;
5668 }
5669 }
5670 /* maintenance5 allows injected psiz deletion */
5671 if (have_fake_psiz && (psiz || screen->info.have_KHR_maintenance5)) {
5672 psiz->data.mode = nir_var_shader_temp;
5673 nir_fixup_deref_modes(nir);
5674 delete_psiz_store(nir, true);
5675 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
5676 }
5677 }
5678 zink_shader_serialize_blob(nir, &ret->blob);
5679 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5680
5681 ret->can_inline = true;
5682
5683 return ret;
5684 }
5685
5686 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)5687 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
5688 {
5689 struct zink_screen *screen = zink_screen(pscreen);
5690 nir_shader *nir = nirptr;
5691
5692 nir_lower_tex_options tex_opts = {
5693 .lower_invalid_implicit_lod = true,
5694 };
5695 /*
5696 Sampled Image must be an object whose type is OpTypeSampledImage.
5697 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
5698 or Rect, and the Arrayed and MS operands must be 0.
5699 - SPIRV, OpImageSampleProj* opcodes
5700 */
5701 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
5702 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
5703 tex_opts.lower_txp_array = true;
5704 if (!screen->info.feats.features.shaderImageGatherExtended)
5705 tex_opts.lower_tg4_offsets = true;
5706 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
5707 optimize_nir(nir, NULL, false);
5708 if (nir->info.stage == MESA_SHADER_VERTEX)
5709 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
5710 if (screen->driconf.inline_uniforms)
5711 nir_find_inlinable_uniforms(nir);
5712
5713 return NULL;
5714 }
5715
5716 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)5717 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5718 {
5719 _mesa_set_destroy(shader->programs, NULL);
5720 util_queue_fence_wait(&shader->precompile.fence);
5721 util_queue_fence_destroy(&shader->precompile.fence);
5722 zink_descriptor_shader_deinit(screen, shader);
5723 if (screen->info.have_EXT_shader_object) {
5724 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
5725 } else {
5726 if (shader->precompile.obj.mod)
5727 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
5728 if (shader->precompile.gpl)
5729 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
5730 }
5731 blob_finish(&shader->blob);
5732 ralloc_free(shader->spirv);
5733 free(shader->precompile.bindings);
5734 ralloc_free(shader);
5735 }
5736
5737 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)5738 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
5739 {
5740 assert(shader->info.stage != MESA_SHADER_COMPUTE);
5741 util_queue_fence_wait(&shader->precompile.fence);
5742 set_foreach(shader->programs, entry) {
5743 struct zink_gfx_program *prog = (void*)entry->key;
5744 gl_shader_stage stage = shader->info.stage;
5745 assert(stage < ZINK_GFX_SHADER_COUNT);
5746 unsigned stages_present = prog->stages_present;
5747 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
5748 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
5749 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
5750 unsigned idx = zink_program_cache_stages(stages_present);
5751 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
5752 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
5753 struct hash_table *ht = &prog->ctx->program_cache[idx];
5754 simple_mtx_lock(&prog->ctx->program_lock[idx]);
5755 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
5756 assert(he && he->data == prog);
5757 _mesa_hash_table_remove(ht, he);
5758 prog->base.removed = true;
5759 simple_mtx_unlock(&prog->ctx->program_lock[idx]);
5760 util_queue_fence_wait(&prog->base.cache_fence);
5761
5762 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
5763 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
5764 hash_table_foreach(&prog->pipelines[r][i], table_entry) {
5765 struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
5766
5767 util_queue_fence_wait(&pc_entry->fence);
5768 }
5769 }
5770 }
5771
5772 }
5773 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
5774 prog->shaders[stage] = NULL;
5775 prog->stages_remaining &= ~BITFIELD_BIT(stage);
5776 }
5777 /* only remove generated tcs during parent tes destruction */
5778 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
5779 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
5780 if (stage != MESA_SHADER_FRAGMENT &&
5781 prog->shaders[MESA_SHADER_GEOMETRY] &&
5782 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
5783 shader) {
5784 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
5785 }
5786 zink_gfx_program_reference(screen, &prog, NULL);
5787 }
5788 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
5789 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
5790 if (!libs->removed) {
5791 libs->removed = true;
5792 unsigned idx = zink_program_cache_stages(libs->stages_present);
5793 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
5794 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
5795 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
5796 }
5797 zink_gfx_lib_cache_unref(screen, libs);
5798 }
5799 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
5800 shader->non_fs.generated_tcs) {
5801 /* automatically destroy generated tcs shaders when tes is destroyed */
5802 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
5803 shader->non_fs.generated_tcs = NULL;
5804 }
5805 if (shader->info.stage != MESA_SHADER_FRAGMENT) {
5806 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
5807 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
5808 if (shader->non_fs.generated_gs[i][j]) {
5809 /* automatically destroy generated gs shaders when owner is destroyed */
5810 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
5811 shader->non_fs.generated_gs[i][j] = NULL;
5812 }
5813 }
5814 }
5815 }
5816 zink_shader_free(screen, shader);
5817 }
5818
5819
5820 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)5821 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
5822 {
5823 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
5824 /* shortcut all the nir passes since we just have to change this one word */
5825 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
5826 return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
5827 }
5828
5829 /* creating a passthrough tcs shader that's roughly:
5830
5831 #version 150
5832 #extension GL_ARB_tessellation_shader : require
5833
5834 in vec4 some_var[gl_MaxPatchVertices];
5835 out vec4 some_var_out;
5836
5837 layout(push_constant) uniform tcsPushConstants {
5838 layout(offset = 0) float TessLevelInner[2];
5839 layout(offset = 8) float TessLevelOuter[4];
5840 } u_tcsPushConstants;
5841 layout(vertices = $vertices_per_patch) out;
5842 void main()
5843 {
5844 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
5845 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
5846 some_var_out = some_var[gl_InvocationID];
5847 }
5848
5849 */
5850 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,nir_shader * tes,unsigned vertices_per_patch,nir_shader ** nir_ret)5851 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
5852 {
5853 struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
5854 util_queue_fence_init(&ret->precompile.fence);
5855 ret->hash = _mesa_hash_pointer(ret);
5856 ret->programs = _mesa_pointer_set_create(NULL);
5857 simple_mtx_init(&ret->lock, mtx_plain);
5858
5859 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
5860 nir_function *fn = nir_function_create(nir, "main");
5861 fn->is_entrypoint = true;
5862 nir_function_impl *impl = nir_function_impl_create(fn);
5863
5864 nir_builder b = nir_builder_at(nir_before_impl(impl));
5865
5866 nir_def *invocation_id = nir_load_invocation_id(&b);
5867
5868 nir_foreach_shader_in_variable(var, tes) {
5869 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
5870 continue;
5871 const struct glsl_type *in_type = var->type;
5872 const struct glsl_type *out_type = var->type;
5873 char buf[1024];
5874 snprintf(buf, sizeof(buf), "%s_out", var->name);
5875 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
5876 const struct glsl_type *type = var->type;
5877 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
5878 out_type = glsl_array_type(type, vertices_per_patch, 0);
5879 }
5880
5881 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
5882 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
5883 out->data.location = in->data.location = var->data.location;
5884 out->data.location_frac = in->data.location_frac = var->data.location_frac;
5885
5886 /* gl_in[] receives values from equivalent built-in output
5887 variables written by the vertex shader (section 2.14.7). Each array
5888 element of gl_in[] is a structure holding values for a specific vertex of
5889 the input patch. The length of gl_in[] is equal to the
5890 implementation-dependent maximum patch size (gl_MaxPatchVertices).
5891 - ARB_tessellation_shader
5892 */
5893 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
5894 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
5895 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
5896 copy_vars(&b, out_value, in_value);
5897 }
5898 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
5899 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
5900 gl_TessLevelInner->data.patch = 1;
5901 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
5902 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
5903 gl_TessLevelOuter->data.patch = 1;
5904
5905 create_gfx_pushconst(nir);
5906
5907 nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
5908 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
5909 nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
5910 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
5911
5912 for (unsigned i = 0; i < 2; i++) {
5913 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
5914 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
5915 }
5916 for (unsigned i = 0; i < 4; i++) {
5917 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
5918 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
5919 }
5920
5921 nir->info.tess.tcs_vertices_out = vertices_per_patch;
5922 nir_validate_shader(nir, "created");
5923
5924 optimize_nir(nir, NULL, true);
5925 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
5926 NIR_PASS_V(nir, nir_convert_from_ssa, true);
5927
5928 *nir_ret = nir;
5929 zink_shader_serialize_blob(nir, &ret->blob);
5930 memcpy(&ret->info, &nir->info, sizeof(nir->info));
5931 ret->non_fs.is_generated = true;
5932 return ret;
5933 }
5934
5935 bool
zink_shader_has_cubes(nir_shader * nir)5936 zink_shader_has_cubes(nir_shader *nir)
5937 {
5938 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
5939 const struct glsl_type *type = glsl_without_array(var->type);
5940 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
5941 return true;
5942 }
5943 return false;
5944 }
5945
5946 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)5947 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
5948 {
5949 struct blob_reader blob_reader;
5950 blob_reader_init(&blob_reader, blob->data, blob->size);
5951 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
5952 }
5953
5954 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)5955 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
5956 {
5957 return zink_shader_blob_deserialize(screen, &zs->blob);
5958 }
5959
5960 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)5961 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
5962 {
5963 blob_init(blob);
5964 #ifndef NDEBUG
5965 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
5966 #else
5967 bool strip = false;
5968 #endif
5969 nir_serialize(blob, nir, strip);
5970 }
5971
5972 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)5973 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
5974 {
5975 nir_shader *nir = zink_shader_deserialize(screen, zs);
5976 nir_print_shader(nir, fp);
5977 ralloc_free(nir);
5978 }
5979