1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_program.h"
28 #include "zink_screen.h"
29 #include "nir_to_spirv/nir_to_spirv.h"
30
31 #include "pipe/p_state.h"
32
33 #include "nir.h"
34 #include "compiler/nir/nir_builder.h"
35
36 #include "nir/tgsi_to_nir.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39
40 #include "util/u_memory.h"
41
42 #include "compiler/spirv/nir_spirv.h"
43 #include "vulkan/util/vk_util.h"
44
45 bool
46 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
47
48 static void
create_vs_pushconst(nir_shader * nir)49 create_vs_pushconst(nir_shader *nir)
50 {
51 nir_variable *vs_pushconst;
52 /* create compatible layout for the ntv push constant loader */
53 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
54 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
55 fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
56 fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
57 fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
58 fields[1].name = ralloc_asprintf(nir, "draw_id");
59 fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
60 vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
61 glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
62 vs_pushconst->data.location = INT_MAX; //doesn't really matter
63 }
64
65 static void
create_cs_pushconst(nir_shader * nir)66 create_cs_pushconst(nir_shader *nir)
67 {
68 nir_variable *cs_pushconst;
69 /* create compatible layout for the ntv push constant loader */
70 struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
71 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
72 fields[0].name = ralloc_asprintf(nir, "work_dim");
73 fields[0].offset = 0;
74 cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
75 glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
76 cs_pushconst->data.location = INT_MAX; //doesn't really matter
77 }
78
79 static bool
reads_work_dim(nir_shader * shader)80 reads_work_dim(nir_shader *shader)
81 {
82 return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
83 }
84
85 static bool
lower_work_dim_instr(nir_builder * b,nir_instr * in,void * data)86 lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
87 {
88 if (in->type != nir_instr_type_intrinsic)
89 return false;
90 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
91 if (instr->intrinsic != nir_intrinsic_load_work_dim)
92 return false;
93
94 if (instr->intrinsic == nir_intrinsic_load_work_dim) {
95 b->cursor = nir_after_instr(&instr->instr);
96 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
97 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
98 nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
99 load->num_components = 1;
100 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
101 nir_builder_instr_insert(b, &load->instr);
102
103 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
104 }
105
106 return true;
107 }
108
109 static bool
lower_work_dim(nir_shader * shader)110 lower_work_dim(nir_shader *shader)
111 {
112 if (shader->info.stage != MESA_SHADER_KERNEL)
113 return false;
114
115 if (!reads_work_dim(shader))
116 return false;
117
118 return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
119 }
120
121 static bool
lower_64bit_vertex_attribs_instr(nir_builder * b,nir_instr * instr,void * data)122 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
123 {
124 if (instr->type != nir_instr_type_intrinsic)
125 return false;
126 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
127 if (intr->intrinsic != nir_intrinsic_load_deref)
128 return false;
129 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
130 if (var->data.mode != nir_var_shader_in)
131 return false;
132 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
133 return false;
134
135 /* create second variable for the split */
136 nir_variable *var2 = nir_variable_clone(var, b->shader);
137 /* split new variable into second slot */
138 var2->data.driver_location++;
139 nir_shader_add_variable(b->shader, var2);
140
141 unsigned total_num_components = glsl_get_vector_elements(var->type);
142 /* new variable is the second half of the dvec */
143 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
144 /* clamp original variable to a dvec2 */
145 var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
146
147 b->cursor = nir_after_instr(instr);
148
149 /* this is the first load instruction for the first half of the dvec3/4 components */
150 nir_ssa_def *load = nir_load_var(b, var);
151 /* this is the second load instruction for the second half of the dvec3/4 components */
152 nir_ssa_def *load2 = nir_load_var(b, var2);
153
154 nir_ssa_def *def[4];
155 /* create a new dvec3/4 comprised of all the loaded components from both variables */
156 def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
157 def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
158 def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
159 if (total_num_components == 4)
160 def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
161 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
162 /* use the assembled dvec3/4 for all other uses of the load */
163 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
164 new_vec->parent_instr);
165
166 /* remove the original instr and its deref chain */
167 nir_instr *parent = intr->src[0].ssa->parent_instr;
168 nir_instr_remove(instr);
169 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
170
171 return true;
172 }
173
174 /* mesa/gallium always provides UINT versions of 64bit formats:
175 * - rewrite loads as 32bit vec loads
176 * - cast back to 64bit
177 */
178 static bool
lower_64bit_uint_attribs_instr(nir_builder * b,nir_instr * instr,void * data)179 lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
180 {
181 if (instr->type != nir_instr_type_intrinsic)
182 return false;
183 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
184 if (intr->intrinsic != nir_intrinsic_load_deref)
185 return false;
186 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
187 if (var->data.mode != nir_var_shader_in)
188 return false;
189 if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
190 return false;
191
192 unsigned num_components = glsl_get_vector_elements(var->type);
193 enum glsl_base_type base_type;
194 switch (glsl_get_base_type(var->type)) {
195 case GLSL_TYPE_UINT64:
196 base_type = GLSL_TYPE_UINT;
197 break;
198 case GLSL_TYPE_INT64:
199 base_type = GLSL_TYPE_INT;
200 break;
201 case GLSL_TYPE_DOUBLE:
202 base_type = GLSL_TYPE_FLOAT;
203 break;
204 default:
205 unreachable("unknown 64-bit vertex attribute format!");
206 }
207 var->type = glsl_vector_type(base_type, num_components * 2);
208
209 b->cursor = nir_after_instr(instr);
210
211 nir_ssa_def *load = nir_load_var(b, var);
212 nir_ssa_def *casted[2];
213 for (unsigned i = 0; i < num_components; i++)
214 casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
215 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
216
217 /* remove the original instr and its deref chain */
218 nir_instr *parent = intr->src[0].ssa->parent_instr;
219 nir_instr_remove(instr);
220 nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
221
222 return true;
223 }
224
225 /* "64-bit three- and four-component vectors consume two consecutive locations."
226 * - 14.1.4. Location Assignment
227 *
228 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
229 * are assigned to consecutive locations, loaded separately, and then assembled back into a
230 * composite value that's used in place of the original loaded ssa src
231 */
232 static bool
lower_64bit_vertex_attribs(nir_shader * shader)233 lower_64bit_vertex_attribs(nir_shader *shader)
234 {
235 if (shader->info.stage != MESA_SHADER_VERTEX)
236 return false;
237
238 bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
239 progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
240 return progress;
241 }
242
243 static bool
lower_basevertex_instr(nir_builder * b,nir_instr * in,void * data)244 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
245 {
246 if (in->type != nir_instr_type_intrinsic)
247 return false;
248 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
249 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
250 return false;
251
252 b->cursor = nir_after_instr(&instr->instr);
253 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
254 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
255 nir_intrinsic_set_range(load, 4);
256 load->num_components = 1;
257 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
258 nir_builder_instr_insert(b, &load->instr);
259
260 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
261 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
262 &instr->dest.ssa,
263 nir_imm_int(b, 0),
264 NULL);
265
266 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
267 composite->parent_instr);
268 return true;
269 }
270
271 static bool
lower_basevertex(nir_shader * shader)272 lower_basevertex(nir_shader *shader)
273 {
274 if (shader->info.stage != MESA_SHADER_VERTEX)
275 return false;
276
277 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
278 return false;
279
280 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
281 }
282
283
284 static bool
lower_drawid_instr(nir_builder * b,nir_instr * in,void * data)285 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
286 {
287 if (in->type != nir_instr_type_intrinsic)
288 return false;
289 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
290 if (instr->intrinsic != nir_intrinsic_load_draw_id)
291 return false;
292
293 b->cursor = nir_before_instr(&instr->instr);
294 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
295 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
296 nir_intrinsic_set_range(load, 4);
297 load->num_components = 1;
298 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
299 nir_builder_instr_insert(b, &load->instr);
300
301 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
302
303 return true;
304 }
305
306 static bool
lower_drawid(nir_shader * shader)307 lower_drawid(nir_shader *shader)
308 {
309 if (shader->info.stage != MESA_SHADER_VERTEX)
310 return false;
311
312 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
313 return false;
314
315 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
316 }
317
318 static bool
lower_dual_blend(nir_shader * shader)319 lower_dual_blend(nir_shader *shader)
320 {
321 bool progress = false;
322 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
323 if (var) {
324 var->data.location = FRAG_RESULT_DATA0;
325 var->data.index = 1;
326 progress = true;
327 }
328 nir_shader_preserve_all_metadata(shader);
329 return progress;
330 }
331
332 void
zink_screen_init_compiler(struct zink_screen * screen)333 zink_screen_init_compiler(struct zink_screen *screen)
334 {
335 static const struct nir_shader_compiler_options
336 default_options = {
337 .lower_ffma16 = true,
338 .lower_ffma32 = true,
339 .lower_ffma64 = true,
340 .lower_scmp = true,
341 .lower_fdph = true,
342 .lower_flrp32 = true,
343 .lower_fpow = true,
344 .lower_fsat = true,
345 .lower_extract_byte = true,
346 .lower_extract_word = true,
347 .lower_insert_byte = true,
348 .lower_insert_word = true,
349 .lower_mul_high = true,
350 .lower_rotate = true,
351 .lower_uadd_carry = true,
352 .lower_uadd_sat = true,
353 .lower_usub_sat = true,
354 .lower_vector_cmp = true,
355 .lower_int64_options = 0,
356 .lower_doubles_options = 0,
357 .lower_uniforms_to_ubo = true,
358 .has_fsub = true,
359 .has_isub = true,
360 .has_txs = true,
361 .lower_mul_2x32_64 = true,
362 .support_16bit_alu = true, /* not quite what it sounds like */
363 };
364
365 screen->nir_options = default_options;
366
367 if (!screen->info.feats.features.shaderInt64)
368 screen->nir_options.lower_int64_options = ~0;
369
370 if (!screen->info.feats.features.shaderFloat64) {
371 screen->nir_options.lower_doubles_options = ~0;
372 screen->nir_options.lower_flrp64 = true;
373 screen->nir_options.lower_ffma64 = true;
374 }
375
376 /*
377 The OpFRem and OpFMod instructions use cheap approximations of remainder,
378 and the error can be large due to the discontinuity in trunc() and floor().
379 This can produce mathematically unexpected results in some cases, such as
380 FMod(x,x) computing x rather than 0, and can also cause the result to have
381 a different sign than the infinitely precise result.
382
383 -Table 84. Precision of core SPIR-V Instructions
384 * for drivers that are known to have imprecise fmod for doubles, lower dmod
385 */
386 if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
387 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
388 screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
389 screen->nir_options.lower_doubles_options = nir_lower_dmod;
390 }
391
392 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)393 zink_get_compiler_options(struct pipe_screen *pscreen,
394 enum pipe_shader_ir ir,
395 enum pipe_shader_type shader)
396 {
397 assert(ir == PIPE_SHADER_IR_NIR);
398 return &zink_screen(pscreen)->nir_options;
399 }
400
401 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)402 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
403 {
404 if (zink_debug & ZINK_DEBUG_TGSI) {
405 fprintf(stderr, "TGSI shader:\n---8<---\n");
406 tgsi_dump_to_file(tokens, 0, stderr);
407 fprintf(stderr, "---8<---\n\n");
408 }
409
410 return tgsi_to_nir(tokens, screen, false);
411 }
412
413
414 static bool
dest_is_64bit(nir_dest * dest,void * state)415 dest_is_64bit(nir_dest *dest, void *state)
416 {
417 bool *lower = (bool *)state;
418 if (dest && (nir_dest_bit_size(*dest) == 64)) {
419 *lower = true;
420 return false;
421 }
422 return true;
423 }
424
425 static bool
src_is_64bit(nir_src * src,void * state)426 src_is_64bit(nir_src *src, void *state)
427 {
428 bool *lower = (bool *)state;
429 if (src && (nir_src_bit_size(*src) == 64)) {
430 *lower = true;
431 return false;
432 }
433 return true;
434 }
435
436 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)437 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
438 {
439 bool lower = false;
440 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
441 * doesn't have const variants, so do the ugly const_cast here. */
442 nir_instr *instr = (nir_instr *)const_instr;
443
444 nir_foreach_dest(instr, dest_is_64bit, &lower);
445 if (lower)
446 return true;
447 nir_foreach_src(instr, src_is_64bit, &lower);
448 return lower;
449 }
450
451 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)452 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
453 {
454 nir_instr *instr = (nir_instr *)const_instr;
455 nir_alu_instr *alu = nir_instr_as_alu(instr);
456 switch (alu->op) {
457 case nir_op_pack_64_2x32_split:
458 case nir_op_pack_32_2x16_split:
459 case nir_op_unpack_32_2x16_split_x:
460 case nir_op_unpack_32_2x16_split_y:
461 case nir_op_unpack_64_2x32_split_x:
462 case nir_op_unpack_64_2x32_split_y:
463 return true;
464 default:
465 break;
466 }
467 return false;
468 }
469
470
471 struct bo_vars {
472 nir_variable *uniforms[5];
473 nir_variable *ubo[5];
474 nir_variable *ssbo[5];
475 uint32_t first_ubo;
476 uint32_t first_ssbo;
477 };
478
479 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)480 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
481 {
482 struct bo_vars bo;
483 memset(&bo, 0, sizeof(bo));
484 if (zs->ubos_used)
485 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
486 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
487 if (zs->ssbos_used)
488 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
489 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
490 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
491 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
492 if (var->data.mode == nir_var_mem_ssbo) {
493 assert(!bo.ssbo[idx]);
494 bo.ssbo[idx] = var;
495 } else {
496 if (var->data.driver_location) {
497 assert(!bo.ubo[idx]);
498 bo.ubo[idx] = var;
499 } else {
500 assert(!bo.uniforms[idx]);
501 bo.uniforms[idx] = var;
502 }
503 }
504 }
505 return bo;
506 }
507
508 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)509 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
510 {
511 struct bo_vars *bo = data;
512 if (instr->type != nir_instr_type_intrinsic)
513 return false;
514 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
515 nir_variable *var = NULL;
516 nir_ssa_def *offset = NULL;
517 bool is_load = true;
518 b->cursor = nir_before_instr(instr);
519
520 switch (intr->intrinsic) {
521 case nir_intrinsic_store_ssbo:
522 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
523 offset = intr->src[2].ssa;
524 is_load = false;
525 break;
526 case nir_intrinsic_load_ssbo:
527 var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
528 offset = intr->src[1].ssa;
529 break;
530 case nir_intrinsic_load_ubo:
531 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
532 var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
533 else
534 var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
535 offset = intr->src[1].ssa;
536 break;
537 default:
538 return false;
539 }
540 nir_src offset_src = nir_src_for_ssa(offset);
541 if (!nir_src_is_const(offset_src))
542 return false;
543
544 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
545 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
546 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
547 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
548 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
549 return false;
550
551 unsigned rewrites = 0;
552 nir_ssa_def *result[2];
553 for (unsigned i = 0; i < intr->num_components; i++) {
554 if (offset_bytes + i >= size) {
555 rewrites++;
556 if (is_load)
557 result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
558 }
559 }
560 assert(rewrites == intr->num_components);
561 if (is_load) {
562 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
563 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
564 }
565 nir_instr_remove(instr);
566 return true;
567 }
568
569 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)570 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
571 {
572 struct bo_vars bo = get_bo_vars(zs, shader);
573 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
574 }
575
576 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs)577 optimize_nir(struct nir_shader *s, struct zink_shader *zs)
578 {
579 bool progress;
580 do {
581 progress = false;
582 if (s->options->lower_int64_options)
583 NIR_PASS_V(s, nir_lower_int64);
584 NIR_PASS_V(s, nir_lower_vars_to_ssa);
585 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
586 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
587 NIR_PASS(progress, s, nir_copy_prop);
588 NIR_PASS(progress, s, nir_opt_remove_phis);
589 if (s->options->lower_int64_options) {
590 NIR_PASS(progress, s, nir_lower_64bit_phis);
591 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
592 }
593 NIR_PASS(progress, s, nir_opt_dce);
594 NIR_PASS(progress, s, nir_opt_dead_cf);
595 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
596 NIR_PASS(progress, s, nir_opt_cse);
597 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
598 NIR_PASS(progress, s, nir_opt_algebraic);
599 NIR_PASS(progress, s, nir_opt_constant_folding);
600 NIR_PASS(progress, s, nir_opt_undef);
601 NIR_PASS(progress, s, zink_nir_lower_b2b);
602 if (zs)
603 NIR_PASS(progress, s, bound_bo_access, zs);
604 } while (progress);
605
606 do {
607 progress = false;
608 NIR_PASS(progress, s, nir_opt_algebraic_late);
609 if (progress) {
610 NIR_PASS_V(s, nir_copy_prop);
611 NIR_PASS_V(s, nir_opt_dce);
612 NIR_PASS_V(s, nir_opt_cse);
613 }
614 } while (progress);
615 }
616
617 /* - copy the lowered fbfetch variable
618 * - set the new one up as an input attachment for descriptor 0.6
619 * - load it as an image
620 * - overwrite the previous load
621 */
622 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)623 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
624 {
625 bool ms = data != NULL;
626 if (instr->type != nir_instr_type_intrinsic)
627 return false;
628 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
629 if (intr->intrinsic != nir_intrinsic_load_deref)
630 return false;
631 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
632 if (!var->data.fb_fetch_output)
633 return false;
634 b->cursor = nir_after_instr(instr);
635 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
636 /* If Dim is SubpassData, ... Image Format must be Unknown
637 * - SPIRV OpTypeImage specification
638 */
639 fbfetch->data.image.format = 0;
640 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
641 fbfetch->data.mode = nir_var_uniform;
642 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
643 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
644 fbfetch->data.sample = ms;
645 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
646 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
647 nir_shader_add_variable(b->shader, fbfetch);
648 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
649 nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
650 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
651 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
652 return true;
653 }
654
655 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)656 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
657 {
658 nir_foreach_shader_out_variable(var, shader) {
659 if (var->data.fb_fetch_output) {
660 *fbfetch = var;
661 break;
662 }
663 }
664 assert(*fbfetch);
665 if (!*fbfetch)
666 return false;
667 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
668 }
669
670 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
671 static bool
check_psiz(struct nir_shader * s)672 check_psiz(struct nir_shader *s)
673 {
674 bool have_psiz = false;
675 nir_foreach_shader_out_variable(var, s) {
676 if (var->data.location == VARYING_SLOT_PSIZ) {
677 /* genuine PSIZ outputs will have this set */
678 have_psiz |= !!var->data.explicit_location;
679 }
680 }
681 return have_psiz;
682 }
683
684 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz)685 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
686 {
687 unsigned found = 0;
688 if (!location_frac && location != VARYING_SLOT_PSIZ) {
689 nir_foreach_shader_out_variable(var, nir) {
690 if (var->data.location == location)
691 found++;
692 }
693 }
694 if (found) {
695 /* multiple variables found for this location: find the biggest one */
696 nir_variable *out = NULL;
697 unsigned slots = 0;
698 nir_foreach_shader_out_variable(var, nir) {
699 if (var->data.location == location) {
700 unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
701 if (count_slots > slots) {
702 slots = count_slots;
703 out = var;
704 }
705 }
706 }
707 return out;
708 } else {
709 /* only one variable found or this is location_frac */
710 nir_foreach_shader_out_variable(var, nir) {
711 if (var->data.location == location &&
712 (var->data.location_frac == location_frac ||
713 (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
714 if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
715 return var;
716 }
717 }
718 }
719 return NULL;
720 }
721
722 static bool
is_inlined(const bool * inlined,const struct pipe_stream_output * output)723 is_inlined(const bool *inlined, const struct pipe_stream_output *output)
724 {
725 for (unsigned i = 0; i < output->num_components; i++)
726 if (!inlined[output->start_component + i])
727 return false;
728 return true;
729 }
730
731 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)732 update_psiz_location(nir_shader *nir, nir_variable *psiz)
733 {
734 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
735 if (last_output < VARYING_SLOT_VAR0)
736 last_output = VARYING_SLOT_VAR0;
737 else
738 last_output++;
739 /* this should get fixed up by slot remapping */
740 psiz->data.location = last_output;
741 }
742
743 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)744 clamp_slot_type(const struct glsl_type *type, unsigned slot)
745 {
746 /* could be dvec/dmat/mat: each member is the same */
747 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
748 /* determine size of each member type */
749 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
750 /* normalize slot idx to current type's size */
751 slot %= slot_count;
752 unsigned slot_components = glsl_get_components(plain);
753 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
754 slot_components *= 2;
755 /* create a vec4 mask of the selected slot's components out of all the components */
756 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
757 /* return a vecN of the selected components */
758 slot_components = util_bitcount(mask);
759 return glsl_vec_type(slot_components);
760 }
761
762 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)763 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
764 {
765 const struct glsl_type *type = slot_type;
766 unsigned slot_count = 0;
767 unsigned cur_slot = 0;
768 /* iterate over all the members in the struct, stopping once the slot idx is reached */
769 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
770 /* use array type for slot counting but return array member type for unroll */
771 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
772 type = glsl_without_array(arraytype);
773 slot_count = glsl_count_vec4_slots(arraytype, false, false);
774 }
775 *slot_idx -= (cur_slot - slot_count);
776 if (!glsl_type_is_struct_or_ifc(type))
777 /* this is a fully unrolled struct: find the number of vec components to output */
778 type = clamp_slot_type(type, *slot_idx);
779 return type;
780 }
781
782 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)783 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
784 {
785 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
786 const struct glsl_type *orig_type = var->type;
787 const struct glsl_type *type = glsl_without_array(var->type);
788 unsigned slot_idx = slot - so_slot;
789 if (type != orig_type)
790 slot_idx %= glsl_count_vec4_slots(type, false, false);
791 /* need to find the vec4 that's being exported by this slot */
792 while (glsl_type_is_struct_or_ifc(type))
793 type = unroll_struct_type(type, &slot_idx);
794
795 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
796 unsigned num_components = glsl_get_components(glsl_without_array(type));
797 const struct glsl_type *arraytype = orig_type;
798 while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) {
799 num_components *= glsl_array_size(arraytype);
800 arraytype = glsl_get_array_element(arraytype);
801 }
802 assert(num_components);
803 /* gallium handles xfb in terms of 32bit units */
804 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
805 num_components *= 2;
806 return num_components;
807 }
808
809 static const struct pipe_stream_output *
find_packed_output(const struct pipe_stream_output_info * so_info,uint8_t * reverse_map,unsigned slot)810 find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
811 {
812 for (unsigned i = 0; i < so_info->num_outputs; i++) {
813 const struct pipe_stream_output *packed_output = &so_info->output[i];
814 if (reverse_map[packed_output->register_index] == slot)
815 return packed_output;
816 }
817 return NULL;
818 }
819
820 static void
update_so_info(struct zink_shader * zs,const struct pipe_stream_output_info * so_info,uint64_t outputs_written,bool have_psiz)821 update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
822 uint64_t outputs_written, bool have_psiz)
823 {
824 uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
825 unsigned slot = 0;
826 /* semi-copied from iris */
827 while (outputs_written) {
828 int bit = u_bit_scan64(&outputs_written);
829 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
830 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
831 continue;
832 reverse_map[slot++] = bit;
833 }
834
835 bool have_fake_psiz = false;
836 nir_foreach_shader_out_variable(var, zs->nir) {
837 if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
838 have_fake_psiz = true;
839 }
840
841 bool inlined[VARYING_SLOT_MAX][4] = {0};
842 uint64_t packed = 0;
843 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
844 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
845 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
846 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
847 nir_variable *psiz = NULL;
848 for (unsigned i = 0; i < so_info->num_outputs; i++) {
849 const struct pipe_stream_output *output = &so_info->output[i];
850 unsigned slot = reverse_map[output->register_index];
851 /* always set stride to be used during draw */
852 zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
853 if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
854 nir_variable *var = NULL;
855 unsigned so_slot;
856 while (!var)
857 var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
858 if (var->data.location == VARYING_SLOT_PSIZ)
859 psiz = var;
860 so_slot = slot + 1;
861 slot = reverse_map[output->register_index];
862 if (var->data.explicit_xfb_buffer) {
863 /* handle dvec3 where gallium splits streamout over 2 registers */
864 for (unsigned j = 0; j < output->num_components; j++)
865 inlined[slot][output->start_component + j] = true;
866 }
867 if (is_inlined(inlined[slot], output))
868 continue;
869 bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
870 unsigned num_components = get_slot_components(var, slot, so_slot);
871 /* if this is the entire variable, try to blast it out during the initial declaration
872 * structs must be handled later to ensure accurate analysis
873 */
874 if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
875 var->data.explicit_xfb_buffer = 1;
876 var->data.xfb.buffer = output->output_buffer;
877 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
878 var->data.offset = output->dst_offset * 4;
879 var->data.stream = output->stream;
880 for (unsigned j = 0; j < output->num_components; j++)
881 inlined[slot][output->start_component + j] = true;
882 } else {
883 /* otherwise store some metadata for later */
884 packed |= BITFIELD64_BIT(slot);
885 packed_components[slot] += output->num_components;
886 packed_streams[slot] |= BITFIELD_BIT(output->stream);
887 packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
888 for (unsigned j = 0; j < output->num_components; j++)
889 packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
890 }
891 }
892 }
893
894 /* if this was flagged as a packed output before, and if all the components are
895 * being output with the same stream on the same buffer with increasing offsets, this entire variable
896 * can be consolidated into a single output to conserve locations
897 */
898 for (unsigned i = 0; i < so_info->num_outputs; i++) {
899 const struct pipe_stream_output *output = &so_info->output[i];
900 unsigned slot = reverse_map[output->register_index];
901 if (is_inlined(inlined[slot], output))
902 continue;
903 if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
904 nir_variable *var = NULL;
905 while (!var)
906 var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
907 /* this is a lowered 64bit variable that can't be exported due to packing */
908 if (var->data.is_xfb)
909 goto out;
910
911 unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
912 /* for each variable, iterate over all the variable's slots and inline the outputs */
913 for (unsigned j = 0; j < num_slots; j++) {
914 slot = var->data.location + j;
915 const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
916 if (!packed_output)
917 goto out;
918
919 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
920 if (!(packed & BITFIELD64_BIT(slot)) ||
921 util_bitcount(packed_streams[slot]) != 1 ||
922 util_bitcount(packed_buffers[slot]) != 1)
923 goto out;
924
925 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
926 unsigned num_components = get_slot_components(var, slot, var->data.location);
927 if (glsl_type_is_array(var->type) && !glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
928 num_components /= glsl_array_size(var->type);
929 if (num_components != packed_components[slot])
930 goto out;
931
932 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
933 uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
934 for (unsigned k = 1; k < num_components; k++) {
935 /* if the offsets are not incrementing as expected, skip consolidation */
936 if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
937 goto out;
938 prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
939 }
940 }
941 /* this output can be consolidated: blast out all the data inlined */
942 var->data.explicit_xfb_buffer = 1;
943 var->data.xfb.buffer = output->output_buffer;
944 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
945 var->data.offset = output->dst_offset * 4;
946 var->data.stream = output->stream;
947 /* GLSL specifies that interface blocks are split per-buffer in XFB */
948 if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
949 zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
950 /* mark all slot components inlined to skip subsequent loop iterations */
951 for (unsigned j = 0; j < num_slots; j++) {
952 slot = var->data.location + j;
953 for (unsigned k = 0; k < packed_components[slot]; k++)
954 inlined[slot][k] = true;
955 packed &= ~BITFIELD64_BIT(slot);
956 }
957 continue;
958 }
959 out:
960 /* these are packed/explicit varyings which can't be exported with normal output */
961 zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
962 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
963 zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
964 }
965 zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
966 /* ensure this doesn't get output in the shader by unsetting location */
967 if (have_fake_psiz && psiz)
968 update_psiz_location(zs->nir, psiz);
969 }
970
971 struct decompose_state {
972 nir_variable **split;
973 bool needs_w;
974 };
975
976 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)977 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
978 {
979 struct decompose_state *state = data;
980 nir_variable **split = state->split;
981 if (instr->type != nir_instr_type_intrinsic)
982 return false;
983 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
984 if (intr->intrinsic != nir_intrinsic_load_deref)
985 return false;
986 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
987 nir_variable *var = nir_deref_instr_get_variable(deref);
988 if (var != split[0])
989 return false;
990 unsigned num_components = glsl_get_vector_elements(split[0]->type);
991 b->cursor = nir_after_instr(instr);
992 nir_ssa_def *loads[4];
993 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
994 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
995 if (state->needs_w) {
996 /* oob load w comopnent to get correct value for int/float */
997 loads[3] = nir_channel(b, loads[0], 3);
998 loads[0] = nir_channel(b, loads[0], 0);
999 }
1000 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
1001 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
1002 nir_instr_remove_v(instr);
1003 return true;
1004 }
1005
1006 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)1007 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
1008 {
1009 uint32_t bits = 0;
1010 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
1011 bits |= BITFIELD_BIT(var->data.driver_location);
1012 bits = ~bits;
1013 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
1014 nir_variable *split[5];
1015 struct decompose_state state;
1016 state.split = split;
1017 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
1018 assert(var);
1019 split[0] = var;
1020 bits |= BITFIELD_BIT(var->data.driver_location);
1021 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
1022 unsigned num_components = glsl_get_vector_elements(var->type);
1023 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
1024 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
1025 split[i+1] = nir_variable_clone(var, nir);
1026 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
1027 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
1028 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
1029 else
1030 split[i+1]->type = new_type;
1031 split[i+1]->data.driver_location = ffs(bits) - 1;
1032 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
1033 nir_shader_add_variable(nir, split[i+1]);
1034 }
1035 var->data.mode = nir_var_shader_temp;
1036 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
1037 }
1038 nir_fixup_deref_modes(nir);
1039 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1040 optimize_nir(nir, NULL);
1041 return true;
1042 }
1043
1044 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1045 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1046 {
1047 struct zink_screen *screen = data;
1048 const bool has_int64 = screen->info.feats.features.shaderInt64;
1049 if (instr->type != nir_instr_type_intrinsic)
1050 return false;
1051 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1052 b->cursor = nir_before_instr(instr);
1053 switch (intr->intrinsic) {
1054 case nir_intrinsic_ssbo_atomic_fadd:
1055 case nir_intrinsic_ssbo_atomic_add:
1056 case nir_intrinsic_ssbo_atomic_umin:
1057 case nir_intrinsic_ssbo_atomic_imin:
1058 case nir_intrinsic_ssbo_atomic_umax:
1059 case nir_intrinsic_ssbo_atomic_imax:
1060 case nir_intrinsic_ssbo_atomic_and:
1061 case nir_intrinsic_ssbo_atomic_or:
1062 case nir_intrinsic_ssbo_atomic_xor:
1063 case nir_intrinsic_ssbo_atomic_exchange:
1064 case nir_intrinsic_ssbo_atomic_comp_swap: {
1065 /* convert offset to uintN_t[idx] */
1066 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
1067 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1068 return true;
1069 }
1070 case nir_intrinsic_load_ssbo:
1071 case nir_intrinsic_load_ubo: {
1072 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
1073 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
1074 nir_src_is_const(intr->src[0]) &&
1075 nir_src_as_uint(intr->src[0]) == 0 &&
1076 nir_dest_bit_size(intr->dest) == 64 &&
1077 nir_intrinsic_align_offset(intr) % 8 != 0;
1078 force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1079 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1080 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1081 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1082 if (force_2x32) {
1083 /* this is always scalarized */
1084 assert(intr->dest.ssa.num_components == 1);
1085 /* rewrite as 2x32 */
1086 nir_ssa_def *load[2];
1087 for (unsigned i = 0; i < 2; i++) {
1088 if (intr->intrinsic == nir_intrinsic_load_ssbo)
1089 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1090 else
1091 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
1092 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
1093 }
1094 /* cast back to 64bit */
1095 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1096 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1097 nir_instr_remove(instr);
1098 }
1099 return true;
1100 }
1101 case nir_intrinsic_load_shared:
1102 b->cursor = nir_before_instr(instr);
1103 bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
1104 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
1105 nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
1106 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1107 if (force_2x32) {
1108 /* this is always scalarized */
1109 assert(intr->dest.ssa.num_components == 1);
1110 /* rewrite as 2x32 */
1111 nir_ssa_def *load[2];
1112 for (unsigned i = 0; i < 2; i++)
1113 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
1114 /* cast back to 64bit */
1115 nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
1116 nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
1117 nir_instr_remove(instr);
1118 return true;
1119 }
1120 break;
1121 case nir_intrinsic_store_ssbo: {
1122 b->cursor = nir_before_instr(instr);
1123 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1124 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1125 nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
1126 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1127 if (force_2x32) {
1128 /* this is always scalarized */
1129 assert(intr->src[0].ssa->num_components == 1);
1130 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1131 for (unsigned i = 0; i < 2; i++)
1132 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
1133 nir_instr_remove(instr);
1134 }
1135 return true;
1136 }
1137 case nir_intrinsic_store_shared: {
1138 b->cursor = nir_before_instr(instr);
1139 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
1140 nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
1141 nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
1142 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
1143 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
1144 /* this is always scalarized */
1145 assert(intr->src[0].ssa->num_components == 1);
1146 nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
1147 for (unsigned i = 0; i < 2; i++)
1148 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
1149 nir_instr_remove(instr);
1150 }
1151 return true;
1152 }
1153 default:
1154 break;
1155 }
1156 return false;
1157 }
1158
1159 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)1160 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
1161 {
1162 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
1163 }
1164
1165 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)1166 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
1167 {
1168 nir_variable *var, **ptr;
1169 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
1170
1171 if (ssbo)
1172 ptr = &bo->ssbo[bit_size >> 4];
1173 else {
1174 if (!idx) {
1175 ptr = &bo->uniforms[bit_size >> 4];
1176 } else
1177 ptr = &bo->ubo[bit_size >> 4];
1178 }
1179 var = *ptr;
1180 if (!var) {
1181 if (ssbo)
1182 var = bo->ssbo[32 >> 4];
1183 else {
1184 if (!idx)
1185 var = bo->uniforms[32 >> 4];
1186 else
1187 var = bo->ubo[32 >> 4];
1188 }
1189 var = nir_variable_clone(var, shader);
1190 *ptr = var;
1191 nir_shader_add_variable(shader, var);
1192
1193 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1194 fields[0].name = ralloc_strdup(shader, "base");
1195 fields[1].name = ralloc_strdup(shader, "unsized");
1196 unsigned array_size = glsl_get_length(var->type);
1197 const struct glsl_type *bare_type = glsl_without_array(var->type);
1198 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
1199 unsigned length = glsl_get_length(array_type);
1200 const struct glsl_type *type;
1201 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
1202 if (bit_size > 32) {
1203 assert(bit_size == 64);
1204 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
1205 } else {
1206 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
1207 }
1208 fields[0].type = type;
1209 fields[1].type = unsized;
1210 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
1211 var->data.driver_location = idx;
1212 }
1213 return var;
1214 }
1215
1216 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)1217 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
1218 {
1219 nir_intrinsic_op op;
1220 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1221 switch (intr->intrinsic) {
1222 case nir_intrinsic_ssbo_atomic_add:
1223 op = nir_intrinsic_deref_atomic_add;
1224 break;
1225 case nir_intrinsic_ssbo_atomic_umin:
1226 op = nir_intrinsic_deref_atomic_umin;
1227 break;
1228 case nir_intrinsic_ssbo_atomic_imin:
1229 op = nir_intrinsic_deref_atomic_imin;
1230 break;
1231 case nir_intrinsic_ssbo_atomic_umax:
1232 op = nir_intrinsic_deref_atomic_umax;
1233 break;
1234 case nir_intrinsic_ssbo_atomic_imax:
1235 op = nir_intrinsic_deref_atomic_imax;
1236 break;
1237 case nir_intrinsic_ssbo_atomic_and:
1238 op = nir_intrinsic_deref_atomic_and;
1239 break;
1240 case nir_intrinsic_ssbo_atomic_or:
1241 op = nir_intrinsic_deref_atomic_or;
1242 break;
1243 case nir_intrinsic_ssbo_atomic_xor:
1244 op = nir_intrinsic_deref_atomic_xor;
1245 break;
1246 case nir_intrinsic_ssbo_atomic_exchange:
1247 op = nir_intrinsic_deref_atomic_exchange;
1248 break;
1249 case nir_intrinsic_ssbo_atomic_comp_swap:
1250 op = nir_intrinsic_deref_atomic_comp_swap;
1251 break;
1252 default:
1253 unreachable("unknown intrinsic");
1254 }
1255 nir_ssa_def *offset = intr->src[1].ssa;
1256 nir_src *src = &intr->src[0];
1257 nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1258 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1259 nir_ssa_def *idx = src->ssa;
1260 if (bo->first_ssbo)
1261 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1262 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1263 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1264
1265 /* generate new atomic deref ops for every component */
1266 nir_ssa_def *result[4];
1267 unsigned num_components = nir_dest_num_components(intr->dest);
1268 for (unsigned i = 0; i < num_components; i++) {
1269 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1270 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
1271 nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
1272 new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
1273 /* deref ops have no offset src, so copy the srcs after it */
1274 for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
1275 nir_src_copy(&new_instr->src[i - 1], &intr->src[i]);
1276 nir_builder_instr_insert(b, &new_instr->instr);
1277
1278 result[i] = &new_instr->dest.ssa;
1279 offset = nir_iadd_imm(b, offset, 1);
1280 }
1281
1282 nir_ssa_def *load = nir_vec(b, result, num_components);
1283 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1284 nir_instr_remove(instr);
1285 }
1286
1287 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1288 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1289 {
1290 struct bo_vars *bo = data;
1291 if (instr->type != nir_instr_type_intrinsic)
1292 return false;
1293 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1294 nir_variable *var = NULL;
1295 nir_ssa_def *offset = NULL;
1296 bool is_load = true;
1297 b->cursor = nir_before_instr(instr);
1298 nir_src *src;
1299 bool ssbo = true;
1300 switch (intr->intrinsic) {
1301 case nir_intrinsic_ssbo_atomic_add:
1302 case nir_intrinsic_ssbo_atomic_umin:
1303 case nir_intrinsic_ssbo_atomic_imin:
1304 case nir_intrinsic_ssbo_atomic_umax:
1305 case nir_intrinsic_ssbo_atomic_imax:
1306 case nir_intrinsic_ssbo_atomic_and:
1307 case nir_intrinsic_ssbo_atomic_or:
1308 case nir_intrinsic_ssbo_atomic_xor:
1309 case nir_intrinsic_ssbo_atomic_exchange:
1310 case nir_intrinsic_ssbo_atomic_comp_swap:
1311 rewrite_atomic_ssbo_instr(b, instr, bo);
1312 return true;
1313 case nir_intrinsic_store_ssbo:
1314 src = &intr->src[1];
1315 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
1316 offset = intr->src[2].ssa;
1317 is_load = false;
1318 break;
1319 case nir_intrinsic_load_ssbo:
1320 src = &intr->src[0];
1321 var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
1322 offset = intr->src[1].ssa;
1323 break;
1324 case nir_intrinsic_load_ubo:
1325 src = &intr->src[0];
1326 var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
1327 offset = intr->src[1].ssa;
1328 ssbo = false;
1329 break;
1330 default:
1331 return false;
1332 }
1333 assert(var);
1334 assert(offset);
1335 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
1336 nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
1337 if (!ssbo && bo->first_ubo && var->data.driver_location)
1338 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
1339 else if (ssbo && bo->first_ssbo)
1340 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
1341 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
1342 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
1343 assert(intr->num_components <= 2);
1344 if (is_load) {
1345 nir_ssa_def *result[2];
1346 for (unsigned i = 0; i < intr->num_components; i++) {
1347 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1348 result[i] = nir_load_deref(b, deref_arr);
1349 if (intr->intrinsic == nir_intrinsic_load_ssbo)
1350 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
1351 offset = nir_iadd_imm(b, offset, 1);
1352 }
1353 nir_ssa_def *load = nir_vec(b, result, intr->num_components);
1354 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1355 } else {
1356 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
1357 nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
1358 }
1359 nir_instr_remove(instr);
1360 return true;
1361 }
1362
1363 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)1364 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
1365 {
1366 struct bo_vars bo = get_bo_vars(zs, shader);
1367 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
1368 }
1369
1370 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)1371 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1372 {
1373 unsigned slot = var->data.location;
1374 switch (slot) {
1375 case -1:
1376 case VARYING_SLOT_POS:
1377 case VARYING_SLOT_PNTC:
1378 case VARYING_SLOT_PSIZ:
1379 case VARYING_SLOT_LAYER:
1380 case VARYING_SLOT_PRIMITIVE_ID:
1381 case VARYING_SLOT_CLIP_DIST0:
1382 case VARYING_SLOT_CULL_DIST0:
1383 case VARYING_SLOT_VIEWPORT:
1384 case VARYING_SLOT_FACE:
1385 case VARYING_SLOT_TESS_LEVEL_OUTER:
1386 case VARYING_SLOT_TESS_LEVEL_INNER:
1387 /* use a sentinel value to avoid counting later */
1388 var->data.driver_location = UINT_MAX;
1389 break;
1390
1391 default:
1392 if (var->data.patch) {
1393 assert(slot >= VARYING_SLOT_PATCH0);
1394 slot -= VARYING_SLOT_PATCH0;
1395 }
1396 if (slot_map[slot] == 0xff) {
1397 assert(*reserved < MAX_VARYING);
1398 unsigned num_slots;
1399 if (nir_is_arrayed_io(var, stage))
1400 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
1401 else
1402 num_slots = glsl_count_vec4_slots(var->type, false, false);
1403 assert(*reserved + num_slots <= MAX_VARYING);
1404 for (unsigned i = 0; i < num_slots; i++)
1405 slot_map[slot + i] = (*reserved)++;
1406 }
1407 slot = slot_map[slot];
1408 assert(slot < MAX_VARYING);
1409 var->data.driver_location = slot;
1410 }
1411 }
1412
1413 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)1414 is_texcoord(gl_shader_stage stage, const nir_variable *var)
1415 {
1416 if (stage != MESA_SHADER_FRAGMENT)
1417 return false;
1418 return var->data.location >= VARYING_SLOT_TEX0 &&
1419 var->data.location <= VARYING_SLOT_TEX7;
1420 }
1421
1422 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)1423 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
1424 {
1425 unsigned slot = var->data.location;
1426 switch (slot) {
1427 case VARYING_SLOT_POS:
1428 case VARYING_SLOT_PNTC:
1429 case VARYING_SLOT_PSIZ:
1430 case VARYING_SLOT_LAYER:
1431 case VARYING_SLOT_PRIMITIVE_ID:
1432 case VARYING_SLOT_CLIP_DIST0:
1433 case VARYING_SLOT_CULL_DIST0:
1434 case VARYING_SLOT_VIEWPORT:
1435 case VARYING_SLOT_FACE:
1436 case VARYING_SLOT_TESS_LEVEL_OUTER:
1437 case VARYING_SLOT_TESS_LEVEL_INNER:
1438 /* use a sentinel value to avoid counting later */
1439 var->data.driver_location = UINT_MAX;
1440 break;
1441 default:
1442 if (var->data.patch) {
1443 assert(slot >= VARYING_SLOT_PATCH0);
1444 slot -= VARYING_SLOT_PATCH0;
1445 }
1446 if (slot_map[slot] == (unsigned char)-1) {
1447 if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var))
1448 /* dead io */
1449 return false;
1450 /* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE
1451 * - patch variables may be read in the workgroup
1452 */
1453 slot_map[slot] = (*reserved)++;
1454 }
1455 var->data.driver_location = slot_map[slot];
1456 }
1457 return true;
1458 }
1459
1460
1461 static bool
rewrite_and_discard_read(nir_builder * b,nir_instr * instr,void * data)1462 rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
1463 {
1464 nir_variable *var = data;
1465 if (instr->type != nir_instr_type_intrinsic)
1466 return false;
1467
1468 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1469 if (intr->intrinsic != nir_intrinsic_load_deref)
1470 return false;
1471 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
1472 if (deref_var != var)
1473 return false;
1474 nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
1475 nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
1476 return true;
1477 }
1478
1479 void
zink_compiler_assign_io(nir_shader * producer,nir_shader * consumer)1480 zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
1481 {
1482 unsigned reserved = 0;
1483 unsigned char slot_map[VARYING_SLOT_MAX];
1484 memset(slot_map, -1, sizeof(slot_map));
1485 bool do_fixup = false;
1486 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
1487 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1488 /* remove injected pointsize from all but the last vertex stage */
1489 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1490 if (var && !var->data.explicit_location) {
1491 var->data.mode = nir_var_shader_temp;
1492 nir_fixup_deref_modes(producer);
1493 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1494 optimize_nir(producer, NULL);
1495 }
1496 }
1497 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
1498 /* never assign from tcs -> tes, always invert */
1499 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
1500 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
1501 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
1502 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
1503 /* this is an output, nothing more needs to be done for it to be dropped */
1504 do_fixup = true;
1505 }
1506 } else {
1507 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
1508 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
1509 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
1510 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
1511 do_fixup = true;
1512 /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
1513 nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
1514 }
1515 }
1516 }
1517 if (!do_fixup)
1518 return;
1519 nir_fixup_deref_modes(nir);
1520 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1521 optimize_nir(nir, NULL);
1522 }
1523
1524 /* all types that hit this function contain something that is 64bit */
1525 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var)1526 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var)
1527 {
1528 if (glsl_type_is_array(type)) {
1529 const struct glsl_type *child = glsl_get_array_element(type);
1530 unsigned elements = glsl_array_size(type);
1531 unsigned stride = glsl_get_explicit_stride(type);
1532 return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride);
1533 }
1534 /* rewrite structs recursively */
1535 if (glsl_type_is_struct_or_ifc(type)) {
1536 unsigned nmembers = glsl_get_length(type);
1537 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
1538 unsigned xfb_offset = 0;
1539 for (unsigned i = 0; i < nmembers; i++) {
1540 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
1541 fields[i] = *f;
1542 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
1543 if (i < nmembers - 1 && xfb_offset % 8 &&
1544 glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) {
1545 var->data.is_xfb = true;
1546 }
1547 fields[i].type = rewrite_64bit_type(nir, f->type, var);
1548 }
1549 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
1550 }
1551 if (!glsl_type_is_64bit(type))
1552 return type;
1553 enum glsl_base_type base_type;
1554 switch (glsl_get_base_type(type)) {
1555 case GLSL_TYPE_UINT64:
1556 base_type = GLSL_TYPE_UINT;
1557 break;
1558 case GLSL_TYPE_INT64:
1559 base_type = GLSL_TYPE_INT;
1560 break;
1561 case GLSL_TYPE_DOUBLE:
1562 base_type = GLSL_TYPE_FLOAT;
1563 break;
1564 default:
1565 unreachable("unknown 64-bit vertex attribute format!");
1566 }
1567 if (glsl_type_is_scalar(type))
1568 return glsl_vector_type(base_type, 2);
1569 unsigned num_components;
1570 if (glsl_type_is_matrix(type)) {
1571 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
1572 unsigned vec_components = glsl_get_vector_elements(type);
1573 if (vec_components == 3)
1574 vec_components = 4;
1575 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
1576 } else {
1577 num_components = glsl_get_vector_elements(type) * 2;
1578 if (num_components <= 4)
1579 return glsl_vector_type(base_type, num_components);
1580 }
1581 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
1582 struct glsl_struct_field fields[8] = {0};
1583 unsigned remaining = num_components;
1584 unsigned nfields = 0;
1585 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
1586 assert(i < ARRAY_SIZE(fields));
1587 fields[i].name = "";
1588 fields[i].offset = i * 16;
1589 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
1590 }
1591 char buf[64];
1592 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
1593 return glsl_struct_type(fields, nfields, buf, true);
1594 }
1595
1596 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)1597 deref_is_matrix(nir_deref_instr *deref)
1598 {
1599 if (glsl_type_is_matrix(deref->type))
1600 return deref->type;
1601 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1602 if (parent)
1603 return deref_is_matrix(parent);
1604 return NULL;
1605 }
1606
1607 /* rewrite all input/output variables using 32bit types and load/stores */
1608 static bool
lower_64bit_vars(nir_shader * shader)1609 lower_64bit_vars(nir_shader *shader)
1610 {
1611 bool progress = false;
1612 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1613 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
1614 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) {
1615 if (!glsl_type_contains_64bit(var->type))
1616 continue;
1617 var->type = rewrite_64bit_type(shader, var->type, var);
1618 /* once type is rewritten, rewrite all loads and stores */
1619 nir_foreach_function(function, shader) {
1620 bool func_progress = false;
1621 if (!function->impl)
1622 continue;
1623 nir_builder b;
1624 nir_builder_init(&b, function->impl);
1625 nir_foreach_block(block, function->impl) {
1626 nir_foreach_instr_safe(instr, block) {
1627 switch (instr->type) {
1628 case nir_instr_type_deref: {
1629 nir_deref_instr *deref = nir_instr_as_deref(instr);
1630 if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out)))
1631 continue;
1632 if (nir_deref_instr_get_variable(deref) != var)
1633 continue;
1634
1635 /* matrix types are special: store the original deref type for later use */
1636 const struct glsl_type *matrix = deref_is_matrix(deref);
1637 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1638 if (!matrix) {
1639 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
1640 hash_table_foreach(derefs, he) {
1641 /* propagate parent matrix type to row deref */
1642 if (he->key == parent)
1643 matrix = he->data;
1644 }
1645 }
1646 if (matrix)
1647 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
1648 if (deref->deref_type == nir_deref_type_var)
1649 deref->type = var->type;
1650 else
1651 deref->type = rewrite_64bit_type(shader, deref->type, var);
1652 }
1653 break;
1654 case nir_instr_type_intrinsic: {
1655 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1656 if (intr->intrinsic != nir_intrinsic_store_deref &&
1657 intr->intrinsic != nir_intrinsic_load_deref)
1658 break;
1659 if (nir_intrinsic_get_var(intr, 0) != var)
1660 break;
1661 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
1662 (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
1663 break;
1664 b.cursor = nir_before_instr(instr);
1665 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1666 unsigned num_components = intr->num_components * 2;
1667 nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
1668 /* this is the stored matrix type from the deref */
1669 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
1670 const struct glsl_type *matrix = he ? he->data : NULL;
1671 func_progress = true;
1672 if (intr->intrinsic == nir_intrinsic_store_deref) {
1673 /* first, unpack the src data to 32bit vec2 components */
1674 for (unsigned i = 0; i < intr->num_components; i++) {
1675 nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
1676 comp[i * 2] = nir_channel(&b, ssa, 0);
1677 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
1678 }
1679 unsigned wrmask = nir_intrinsic_write_mask(intr);
1680 unsigned mask = 0;
1681 /* expand writemask for doubled components */
1682 for (unsigned i = 0; i < intr->num_components; i++) {
1683 if (wrmask & BITFIELD_BIT(i))
1684 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
1685 }
1686 if (matrix) {
1687 /* matrix types always come from array (row) derefs */
1688 assert(deref->deref_type == nir_deref_type_array);
1689 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1690 /* let optimization clean up consts later */
1691 nir_ssa_def *index = deref->arr.index.ssa;
1692 /* this might be an indirect array index:
1693 * - iterate over matrix columns
1694 * - add if blocks for each column
1695 * - perform the store in the block
1696 */
1697 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
1698 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1699 unsigned vec_components = glsl_get_vector_elements(matrix);
1700 /* always clamp dvec3 to 4 components */
1701 if (vec_components == 3)
1702 vec_components = 4;
1703 unsigned start_component = idx * vec_components * 2;
1704 /* struct member */
1705 unsigned member = start_component / 4;
1706 /* number of components remaining */
1707 unsigned remaining = num_components;
1708 for (unsigned i = 0; i < num_components; member++) {
1709 if (!(mask & BITFIELD_BIT(i)))
1710 continue;
1711 assert(member < glsl_get_length(var_deref->type));
1712 /* deref the rewritten struct to the appropriate vec4/vec2 */
1713 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1714 unsigned incr = MIN2(remaining, 4);
1715 /* assemble the write component vec */
1716 nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
1717 /* use the number of components being written as the writemask */
1718 if (glsl_get_vector_elements(strct->type) > val->num_components)
1719 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
1720 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
1721 remaining -= incr;
1722 i += incr;
1723 }
1724 nir_pop_if(&b, NULL);
1725 }
1726 _mesa_set_add(deletes, &deref->instr);
1727 } else if (num_components <= 4) {
1728 /* simple store case: just write out the components */
1729 nir_ssa_def *dest = nir_vec(&b, comp, num_components);
1730 nir_store_deref(&b, deref, dest, mask);
1731 } else {
1732 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
1733 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
1734 if (!(mask & BITFIELD_MASK(4)))
1735 continue;
1736 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1737 nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
1738 if (glsl_get_vector_elements(strct->type) > dest->num_components)
1739 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
1740 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
1741 mask >>= 4;
1742 }
1743 }
1744 } else {
1745 nir_ssa_def *dest = NULL;
1746 if (matrix) {
1747 /* matrix types always come from array (row) derefs */
1748 assert(deref->deref_type == nir_deref_type_array);
1749 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
1750 /* let optimization clean up consts later */
1751 nir_ssa_def *index = deref->arr.index.ssa;
1752 /* this might be an indirect array index:
1753 * - iterate over matrix columns
1754 * - add if blocks for each column
1755 * - phi the loads using the array index
1756 */
1757 unsigned cols = glsl_get_matrix_columns(matrix);
1758 nir_ssa_def *dests[4];
1759 for (unsigned idx = 0; idx < cols; idx++) {
1760 /* don't add an if for the final row: this will be handled in the else */
1761 if (idx < cols - 1)
1762 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
1763 unsigned vec_components = glsl_get_vector_elements(matrix);
1764 /* always clamp dvec3 to 4 components */
1765 if (vec_components == 3)
1766 vec_components = 4;
1767 unsigned start_component = idx * vec_components * 2;
1768 /* struct member */
1769 unsigned member = start_component / 4;
1770 /* number of components remaining */
1771 unsigned remaining = num_components;
1772 /* component index */
1773 unsigned comp_idx = 0;
1774 for (unsigned i = 0; i < num_components; member++) {
1775 assert(member < glsl_get_length(var_deref->type));
1776 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
1777 nir_ssa_def *load = nir_load_deref(&b, strct);
1778 unsigned incr = MIN2(remaining, 4);
1779 /* repack the loads to 64bit */
1780 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
1781 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
1782 remaining -= incr;
1783 i += incr;
1784 }
1785 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
1786 if (idx < cols - 1)
1787 nir_push_else(&b, NULL);
1788 }
1789 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
1790 for (unsigned idx = cols - 1; idx >= 1; idx--) {
1791 nir_pop_if(&b, NULL);
1792 dest = nir_if_phi(&b, dests[idx - 1], dest);
1793 }
1794 _mesa_set_add(deletes, &deref->instr);
1795 } else if (num_components <= 4) {
1796 /* simple load case */
1797 nir_ssa_def *load = nir_load_deref(&b, deref);
1798 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
1799 for (unsigned i = 0; i < intr->num_components; i++) {
1800 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
1801 }
1802 dest = nir_vec(&b, comp, intr->num_components);
1803 } else {
1804 /* writing > 4 components: access the struct and load the appropriate vec4 members */
1805 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
1806 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
1807 nir_ssa_def *load = nir_load_deref(&b, strct);
1808 comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
1809 if (num_components > 2)
1810 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
1811 }
1812 dest = nir_vec(&b, comp, intr->num_components);
1813 }
1814 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
1815 }
1816 _mesa_set_add(deletes, instr);
1817 break;
1818 }
1819 break;
1820 default: break;
1821 }
1822 }
1823 }
1824 if (func_progress)
1825 nir_metadata_preserve(function->impl, nir_metadata_none);
1826 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
1827 set_foreach_remove(deletes, he)
1828 nir_instr_remove((void*)he->key);
1829 }
1830 progress = true;
1831 }
1832 ralloc_free(deletes);
1833 ralloc_free(derefs);
1834 if (progress) {
1835 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
1836 nir_lower_phis_to_scalar(shader, false);
1837 optimize_nir(shader, NULL);
1838 }
1839 return progress;
1840 }
1841
1842 static bool
split_blocks(nir_shader * nir)1843 split_blocks(nir_shader *nir)
1844 {
1845 bool progress = false;
1846 bool changed = true;
1847 do {
1848 progress = false;
1849 nir_foreach_shader_out_variable(var, nir) {
1850 const struct glsl_type *base_type = glsl_without_array(var->type);
1851 nir_variable *members[32]; //can't have more than this without breaking NIR
1852 if (!glsl_type_is_struct(base_type))
1853 continue;
1854 /* TODO: arrays? */
1855 if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
1856 continue;
1857 if (glsl_count_attribute_slots(var->type, false) == 1)
1858 continue;
1859 unsigned offset = 0;
1860 for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
1861 members[i] = nir_variable_clone(var, nir);
1862 members[i]->type = glsl_get_struct_field(var->type, i);
1863 members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
1864 members[i]->data.location += offset;
1865 offset += glsl_count_attribute_slots(members[i]->type, false);
1866 nir_shader_add_variable(nir, members[i]);
1867 }
1868 nir_foreach_function(function, nir) {
1869 bool func_progress = false;
1870 if (!function->impl)
1871 continue;
1872 nir_builder b;
1873 nir_builder_init(&b, function->impl);
1874 nir_foreach_block(block, function->impl) {
1875 nir_foreach_instr_safe(instr, block) {
1876 switch (instr->type) {
1877 case nir_instr_type_deref: {
1878 nir_deref_instr *deref = nir_instr_as_deref(instr);
1879 if (!(deref->modes & nir_var_shader_out))
1880 continue;
1881 if (nir_deref_instr_get_variable(deref) != var)
1882 continue;
1883 if (deref->deref_type != nir_deref_type_struct)
1884 continue;
1885 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1886 if (parent->deref_type != nir_deref_type_var)
1887 continue;
1888 deref->modes = nir_var_shader_temp;
1889 parent->modes = nir_var_shader_temp;
1890 b.cursor = nir_before_instr(instr);
1891 nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
1892 nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
1893 nir_instr_remove(&deref->instr);
1894 func_progress = true;
1895 break;
1896 }
1897 default: break;
1898 }
1899 }
1900 }
1901 if (func_progress)
1902 nir_metadata_preserve(function->impl, nir_metadata_none);
1903 }
1904 var->data.mode = nir_var_shader_temp;
1905 changed = true;
1906 progress = true;
1907 }
1908 } while (progress);
1909 return changed;
1910 }
1911
1912 static void
zink_shader_dump(void * words,size_t size,const char * file)1913 zink_shader_dump(void *words, size_t size, const char *file)
1914 {
1915 FILE *fp = fopen(file, "wb");
1916 if (fp) {
1917 fwrite(words, 1, size, fp);
1918 fclose(fp);
1919 fprintf(stderr, "wrote '%s'...\n", file);
1920 }
1921 }
1922
1923 VkShaderModule
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv)1924 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
1925 {
1926 VkShaderModule mod;
1927 VkShaderModuleCreateInfo smci = {0};
1928
1929 if (!spirv)
1930 spirv = zs->spirv;
1931
1932 if (zink_debug & ZINK_DEBUG_SPIRV) {
1933 char buf[256];
1934 static int i;
1935 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
1936 zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
1937 }
1938
1939 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1940 smci.codeSize = spirv->num_words * sizeof(uint32_t);
1941 smci.pCode = spirv->words;
1942
1943 #ifndef NDEBUG
1944 if (zink_debug & ZINK_DEBUG_VALIDATION) {
1945 static const struct spirv_to_nir_options spirv_options = {
1946 .environment = NIR_SPIRV_VULKAN,
1947 .caps = {
1948 .float64 = true,
1949 .int16 = true,
1950 .int64 = true,
1951 .tessellation = true,
1952 .float_controls = true,
1953 .image_ms_array = true,
1954 .image_read_without_format = true,
1955 .image_write_without_format = true,
1956 .storage_image_ms = true,
1957 .geometry_streams = true,
1958 .storage_8bit = true,
1959 .storage_16bit = true,
1960 .variable_pointers = true,
1961 .stencil_export = true,
1962 .post_depth_coverage = true,
1963 .transform_feedback = true,
1964 .device_group = true,
1965 .draw_parameters = true,
1966 .shader_viewport_index_layer = true,
1967 .multiview = true,
1968 .physical_storage_buffer_address = true,
1969 .int64_atomics = true,
1970 .subgroup_arithmetic = true,
1971 .subgroup_basic = true,
1972 .subgroup_ballot = true,
1973 .subgroup_quad = true,
1974 .subgroup_shuffle = true,
1975 .subgroup_vote = true,
1976 .vk_memory_model = true,
1977 .vk_memory_model_device_scope = true,
1978 .int8 = true,
1979 .float16 = true,
1980 .demote_to_helper_invocation = true,
1981 .sparse_residency = true,
1982 .min_lod = true,
1983 },
1984 .ubo_addr_format = nir_address_format_32bit_index_offset,
1985 .ssbo_addr_format = nir_address_format_32bit_index_offset,
1986 .phys_ssbo_addr_format = nir_address_format_64bit_global,
1987 .push_const_addr_format = nir_address_format_logical,
1988 .shared_addr_format = nir_address_format_32bit_offset,
1989 };
1990 uint32_t num_spec_entries = 0;
1991 struct nir_spirv_specialization *spec_entries = NULL;
1992 VkSpecializationInfo sinfo = {0};
1993 VkSpecializationMapEntry me[3];
1994 uint32_t size[3] = {1,1,1};
1995 if (!zs->nir->info.workgroup_size[0]) {
1996 sinfo.mapEntryCount = 3;
1997 sinfo.pMapEntries = &me[0];
1998 sinfo.dataSize = sizeof(uint32_t) * 3;
1999 sinfo.pData = size;
2000 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
2001 for (int i = 0; i < 3; i++) {
2002 me[i].size = sizeof(uint32_t);
2003 me[i].constantID = ids[i];
2004 me[i].offset = i * sizeof(uint32_t);
2005 }
2006 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
2007 }
2008 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
2009 spec_entries, num_spec_entries,
2010 zs->nir->info.stage, "main", &spirv_options, &screen->nir_options);
2011 assert(nir);
2012 ralloc_free(nir);
2013 free(spec_entries);
2014 }
2015 #endif
2016
2017 VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
2018 bool success = zink_screen_handle_vkresult(screen, ret);
2019 assert(success);
2020 return success ? mod : VK_NULL_HANDLE;
2021 }
2022
2023 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2024 find_var_deref(nir_shader *nir, nir_variable *var)
2025 {
2026 nir_foreach_function(function, nir) {
2027 if (!function->impl)
2028 continue;
2029
2030 nir_foreach_block(block, function->impl) {
2031 nir_foreach_instr(instr, block) {
2032 if (instr->type != nir_instr_type_deref)
2033 continue;
2034 nir_deref_instr *deref = nir_instr_as_deref(instr);
2035 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2036 return true;
2037 }
2038 }
2039 }
2040 return false;
2041 }
2042
2043 static void
prune_io(nir_shader * nir)2044 prune_io(nir_shader *nir)
2045 {
2046 nir_foreach_shader_in_variable_safe(var, nir) {
2047 if (!find_var_deref(nir, var))
2048 var->data.mode = nir_var_shader_temp;
2049 }
2050 nir_foreach_shader_out_variable_safe(var, nir) {
2051 if (!find_var_deref(nir, var))
2052 var->data.mode = nir_var_shader_temp;
2053 }
2054 }
2055
2056 VkShaderModule
zink_shader_compile(struct zink_screen * screen,struct zink_shader * zs,nir_shader * base_nir,const struct zink_shader_key * key)2057 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
2058 {
2059 VkShaderModule mod = VK_NULL_HANDLE;
2060 struct zink_shader_info *sinfo = &zs->sinfo;
2061 nir_shader *nir = nir_shader_clone(NULL, base_nir);
2062 bool need_optimize = false;
2063 bool inlined_uniforms = false;
2064
2065 if (key) {
2066 if (key->inline_uniforms) {
2067 NIR_PASS_V(nir, nir_inline_uniforms,
2068 nir->info.num_inlinable_uniforms,
2069 key->base.inlined_uniform_values,
2070 nir->info.inlinable_uniform_dw_offsets);
2071
2072 inlined_uniforms = true;
2073 }
2074
2075 /* TODO: use a separate mem ctx here for ralloc */
2076 switch (zs->nir->info.stage) {
2077 case MESA_SHADER_VERTEX: {
2078 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
2079 const struct zink_vs_key *vs_key = zink_vs_key(key);
2080 switch (vs_key->size) {
2081 case 4:
2082 decomposed_attrs = vs_key->u32.decomposed_attrs;
2083 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
2084 break;
2085 case 2:
2086 decomposed_attrs = vs_key->u16.decomposed_attrs;
2087 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
2088 break;
2089 case 1:
2090 decomposed_attrs = vs_key->u8.decomposed_attrs;
2091 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
2092 break;
2093 default: break;
2094 }
2095 if (decomposed_attrs || decomposed_attrs_without_w)
2096 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
2097 FALLTHROUGH;
2098 }
2099 case MESA_SHADER_TESS_EVAL:
2100 case MESA_SHADER_GEOMETRY:
2101 if (zink_vs_key_base(key)->last_vertex_stage) {
2102 if (zs->sinfo.have_xfb)
2103 sinfo->last_vertex = true;
2104
2105 if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) {
2106 NIR_PASS_V(nir, nir_lower_clip_halfz);
2107 }
2108 if (zink_vs_key_base(key)->push_drawid) {
2109 NIR_PASS_V(nir, lower_drawid);
2110 }
2111 }
2112 break;
2113 case MESA_SHADER_FRAGMENT:
2114 if (!zink_fs_key(key)->samples &&
2115 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
2116 /* VK will always use gl_SampleMask[] values even if sample count is 0,
2117 * so we need to skip this write here to mimic GL's behavior of ignoring it
2118 */
2119 nir_foreach_shader_out_variable(var, nir) {
2120 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2121 var->data.mode = nir_var_shader_temp;
2122 }
2123 nir_fixup_deref_modes(nir);
2124 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2125 need_optimize = true;
2126 }
2127 if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
2128 NIR_PASS_V(nir, lower_dual_blend);
2129 }
2130 if (zink_fs_key(key)->coord_replace_bits) {
2131 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
2132 false, zink_fs_key(key)->coord_replace_yinvert);
2133 }
2134 if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
2135 nir_foreach_shader_in_variable(var, nir)
2136 var->data.sample = true;
2137 nir->info.fs.uses_sample_qualifier = true;
2138 nir->info.fs.uses_sample_shading = true;
2139 }
2140 if (nir->info.fs.uses_fbfetch_output) {
2141 nir_variable *fbfetch = NULL;
2142 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms);
2143 /* old variable must be deleted to avoid spirv errors */
2144 fbfetch->data.mode = nir_var_shader_temp;
2145 nir_fixup_deref_modes(nir);
2146 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2147 need_optimize = true;
2148 }
2149 break;
2150 default: break;
2151 }
2152 if (key->base.nonseamless_cube_mask) {
2153 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
2154 need_optimize = true;
2155 }
2156 }
2157 if (screen->driconf.inline_uniforms) {
2158 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
2159 NIR_PASS_V(nir, rewrite_bo_access, screen);
2160 NIR_PASS_V(nir, remove_bo_access, zs);
2161 need_optimize = true;
2162 }
2163 if (inlined_uniforms) {
2164 optimize_nir(nir, zs);
2165
2166 /* This must be done again. */
2167 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
2168 nir_var_shader_out);
2169
2170 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2171 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
2172 zs->can_inline = false;
2173 } else if (need_optimize)
2174 optimize_nir(nir, zs);
2175 prune_io(nir);
2176
2177 NIR_PASS_V(nir, nir_convert_from_ssa, true);
2178
2179 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
2180 if (spirv)
2181 mod = zink_shader_spirv_compile(screen, zs, spirv);
2182
2183 ralloc_free(nir);
2184
2185 /* TODO: determine if there's any reason to cache spirv output? */
2186 if (zs->nir->info.stage == MESA_SHADER_TESS_CTRL && zs->is_generated)
2187 zs->spirv = spirv;
2188 else
2189 ralloc_free(spirv);
2190 return mod;
2191 }
2192
2193 static bool
lower_baseinstance_instr(nir_builder * b,nir_instr * instr,void * data)2194 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
2195 {
2196 if (instr->type != nir_instr_type_intrinsic)
2197 return false;
2198 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2199 if (intr->intrinsic != nir_intrinsic_load_instance_id)
2200 return false;
2201 b->cursor = nir_after_instr(instr);
2202 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
2203 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
2204 return true;
2205 }
2206
2207 static bool
lower_baseinstance(nir_shader * shader)2208 lower_baseinstance(nir_shader *shader)
2209 {
2210 if (shader->info.stage != MESA_SHADER_VERTEX)
2211 return false;
2212 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
2213 }
2214
2215 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
2216 * so instead we delete all those broken variables and just make new ones
2217 */
2218 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)2219 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
2220 {
2221 uint64_t max_ssbo_size = 0;
2222 uint64_t max_ubo_size = 0;
2223 uint64_t max_uniform_size = 0;
2224
2225 if (!shader->info.num_ssbos && !shader->info.num_ubos)
2226 return false;
2227
2228 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
2229 const struct glsl_type *type = glsl_without_array(var->type);
2230 if (type_is_counter(type))
2231 continue;
2232 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
2233 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
2234 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
2235 if (interface_type) {
2236 unsigned block_size = glsl_get_explicit_size(interface_type, true);
2237 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
2238 size = MAX2(size, block_size);
2239 }
2240 if (var->data.mode == nir_var_mem_ubo) {
2241 if (var->data.driver_location)
2242 max_ubo_size = MAX2(max_ubo_size, size);
2243 else
2244 max_uniform_size = MAX2(max_uniform_size, size);
2245 } else {
2246 max_ssbo_size = MAX2(max_ssbo_size, size);
2247 if (interface_type) {
2248 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
2249 needs_size = true;
2250 }
2251 }
2252 var->data.mode = nir_var_shader_temp;
2253 }
2254 nir_fixup_deref_modes(shader);
2255 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2256 optimize_nir(shader, NULL);
2257
2258 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2259 fields[0].name = ralloc_strdup(shader, "base");
2260 fields[1].name = ralloc_strdup(shader, "unsized");
2261 if (shader->info.num_ubos) {
2262 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
2263 fields[0].type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
2264 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2265 glsl_array_type(glsl_interface_type(fields, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
2266 "uniform_0");
2267 var->interface_type = var->type;
2268 var->data.mode = nir_var_mem_ubo;
2269 var->data.driver_location = 0;
2270 }
2271
2272 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
2273 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
2274 if (num_ubos && ubos_used) {
2275 fields[0].type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
2276 /* shrink array as much as possible */
2277 unsigned first_ubo = ffs(ubos_used) - 2;
2278 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
2279 num_ubos -= first_ubo;
2280 assert(num_ubos);
2281 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
2282 glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0),
2283 "ubos");
2284 var->interface_type = var->type;
2285 var->data.mode = nir_var_mem_ubo;
2286 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
2287 }
2288 }
2289 if (shader->info.num_ssbos && zs->ssbos_used) {
2290 /* shrink array as much as possible */
2291 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
2292 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
2293 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
2294 assert(num_ssbos);
2295 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
2296 const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
2297 fields[0].type = ssbo_type;
2298 fields[1].type = max_ssbo_size ? unsized : NULL;
2299 unsigned field_count = max_ssbo_size && needs_size ? 2 : 1;
2300 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
2301 glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0),
2302 "ssbos");
2303 var->interface_type = var->type;
2304 var->data.mode = nir_var_mem_ssbo;
2305 var->data.driver_location = first_ssbo;
2306 }
2307 return true;
2308 }
2309
2310 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)2311 get_src_mask_ssbo(unsigned total, nir_src src)
2312 {
2313 if (nir_src_is_const(src))
2314 return BITFIELD_BIT(nir_src_as_uint(src));
2315 return BITFIELD_MASK(total);
2316 }
2317
2318 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)2319 get_src_mask_ubo(unsigned total, nir_src src)
2320 {
2321 if (nir_src_is_const(src))
2322 return BITFIELD_BIT(nir_src_as_uint(src));
2323 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
2324 }
2325
2326 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)2327 analyze_io(struct zink_shader *zs, nir_shader *shader)
2328 {
2329 bool ret = false;
2330 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
2331 nir_foreach_block(block, impl) {
2332 nir_foreach_instr(instr, block) {
2333 if (instr->type != nir_instr_type_intrinsic)
2334 continue;
2335
2336 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2337 switch (intrin->intrinsic) {
2338 case nir_intrinsic_store_ssbo:
2339 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
2340 break;
2341
2342 case nir_intrinsic_get_ssbo_size: {
2343 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2344 ret = true;
2345 break;
2346 }
2347 case nir_intrinsic_ssbo_atomic_fadd:
2348 case nir_intrinsic_ssbo_atomic_add:
2349 case nir_intrinsic_ssbo_atomic_imin:
2350 case nir_intrinsic_ssbo_atomic_umin:
2351 case nir_intrinsic_ssbo_atomic_imax:
2352 case nir_intrinsic_ssbo_atomic_umax:
2353 case nir_intrinsic_ssbo_atomic_and:
2354 case nir_intrinsic_ssbo_atomic_or:
2355 case nir_intrinsic_ssbo_atomic_xor:
2356 case nir_intrinsic_ssbo_atomic_exchange:
2357 case nir_intrinsic_ssbo_atomic_comp_swap:
2358 case nir_intrinsic_ssbo_atomic_fmin:
2359 case nir_intrinsic_ssbo_atomic_fmax:
2360 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2361 case nir_intrinsic_load_ssbo:
2362 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
2363 break;
2364 case nir_intrinsic_load_ubo:
2365 case nir_intrinsic_load_ubo_vec4:
2366 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
2367 break;
2368 default:
2369 break;
2370 }
2371 }
2372 }
2373 return ret;
2374 }
2375
2376 struct zink_bindless_info {
2377 nir_variable *bindless[4];
2378 unsigned bindless_set;
2379 };
2380
2381 /* this is a "default" bindless texture used if the shader has no texture variables */
2382 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)2383 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
2384 {
2385 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
2386 nir_variable *var;
2387
2388 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
2389 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
2390 var->data.descriptor_set = descriptor_set;
2391 var->data.driver_location = var->data.binding = binding;
2392 return var;
2393 }
2394
2395 /* this is a "default" bindless image used if the shader has no image variables */
2396 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)2397 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
2398 {
2399 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
2400 nir_variable *var;
2401
2402 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
2403 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
2404 var->data.descriptor_set = descriptor_set;
2405 var->data.driver_location = var->data.binding = binding;
2406 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2407 return var;
2408 }
2409
2410 /* rewrite bindless instructions as array deref instructions */
2411 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)2412 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
2413 {
2414 struct zink_bindless_info *bindless = data;
2415
2416 if (in->type == nir_instr_type_tex) {
2417 nir_tex_instr *tex = nir_instr_as_tex(in);
2418 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2419 if (idx == -1)
2420 return false;
2421
2422 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
2423 if (!var)
2424 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
2425 b->cursor = nir_before_instr(in);
2426 nir_deref_instr *deref = nir_build_deref_var(b, var);
2427 if (glsl_type_is_array(var->type))
2428 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
2429 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
2430
2431 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
2432 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
2433 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
2434 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
2435 *
2436 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
2437 * - Warhammer 40k: Dawn of War III
2438 */
2439 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
2440 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
2441 unsigned coord_components = nir_src_num_components(tex->src[c].src);
2442 if (coord_components < needed_components) {
2443 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
2444 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
2445 tex->coord_components = needed_components;
2446 }
2447 return true;
2448 }
2449 if (in->type != nir_instr_type_intrinsic)
2450 return false;
2451 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2452
2453 nir_intrinsic_op op;
2454 #define OP_SWAP(OP) \
2455 case nir_intrinsic_bindless_image_##OP: \
2456 op = nir_intrinsic_image_deref_##OP; \
2457 break;
2458
2459
2460 /* convert bindless intrinsics to deref intrinsics */
2461 switch (instr->intrinsic) {
2462 OP_SWAP(atomic_add)
2463 OP_SWAP(atomic_and)
2464 OP_SWAP(atomic_comp_swap)
2465 OP_SWAP(atomic_dec_wrap)
2466 OP_SWAP(atomic_exchange)
2467 OP_SWAP(atomic_fadd)
2468 OP_SWAP(atomic_fmax)
2469 OP_SWAP(atomic_fmin)
2470 OP_SWAP(atomic_imax)
2471 OP_SWAP(atomic_imin)
2472 OP_SWAP(atomic_inc_wrap)
2473 OP_SWAP(atomic_or)
2474 OP_SWAP(atomic_umax)
2475 OP_SWAP(atomic_umin)
2476 OP_SWAP(atomic_xor)
2477 OP_SWAP(format)
2478 OP_SWAP(load)
2479 OP_SWAP(order)
2480 OP_SWAP(samples)
2481 OP_SWAP(size)
2482 OP_SWAP(store)
2483 default:
2484 return false;
2485 }
2486
2487 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2488 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
2489 if (!var)
2490 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
2491 instr->intrinsic = op;
2492 b->cursor = nir_before_instr(in);
2493 nir_deref_instr *deref = nir_build_deref_var(b, var);
2494 if (glsl_type_is_array(var->type))
2495 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
2496 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
2497 return true;
2498 }
2499
2500 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)2501 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
2502 {
2503 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
2504 return false;
2505 nir_fixup_deref_modes(shader);
2506 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2507 optimize_nir(shader, NULL);
2508 return true;
2509 }
2510
2511 /* convert shader image/texture io variables to int64 handles for bindless indexing */
2512 static bool
lower_bindless_io_instr(nir_builder * b,nir_instr * in,void * data)2513 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
2514 {
2515 if (in->type != nir_instr_type_intrinsic)
2516 return false;
2517 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2518 if (instr->intrinsic != nir_intrinsic_load_deref &&
2519 instr->intrinsic != nir_intrinsic_store_deref)
2520 return false;
2521
2522 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
2523 nir_variable *var = nir_deref_instr_get_variable(src_deref);
2524 if (var->data.bindless)
2525 return false;
2526 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
2527 return false;
2528 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
2529 return false;
2530
2531 var->type = glsl_int64_t_type();
2532 var->data.bindless = 1;
2533 b->cursor = nir_before_instr(in);
2534 nir_deref_instr *deref = nir_build_deref_var(b, var);
2535 if (instr->intrinsic == nir_intrinsic_load_deref) {
2536 nir_ssa_def *def = nir_load_deref(b, deref);
2537 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
2538 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
2539 } else {
2540 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
2541 }
2542 nir_instr_remove(in);
2543 nir_instr_remove(&src_deref->instr);
2544 return true;
2545 }
2546
2547 static bool
lower_bindless_io(nir_shader * shader)2548 lower_bindless_io(nir_shader *shader)
2549 {
2550 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
2551 }
2552
2553 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)2554 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
2555 {
2556 if (stage == MESA_SHADER_NONE) {
2557 unreachable("not supported");
2558 } else {
2559 switch (type) {
2560 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2561 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2562 return stage * 2 + !!index;
2563
2564 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2565 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2566 assert(index < PIPE_MAX_SAMPLERS);
2567 return (stage * PIPE_MAX_SAMPLERS) + index;
2568
2569 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2570 return stage + (compact_descriptors * (ZINK_SHADER_COUNT * 2));
2571
2572 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2573 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2574 assert(index < ZINK_MAX_SHADER_IMAGES);
2575 return (stage * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_SHADER_COUNT * PIPE_MAX_SAMPLERS));
2576
2577 default:
2578 unreachable("unexpected type");
2579 }
2580 }
2581 }
2582
2583 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)2584 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
2585 {
2586 if (glsl_type_is_struct(type)) {
2587 for (unsigned i = 0; i < glsl_get_length(type); i++)
2588 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
2589 return;
2590 }
2591
2592 /* just a random scalar in a struct */
2593 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
2594 return;
2595
2596 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
2597 unsigned binding;
2598 switch (vktype) {
2599 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2600 binding = 0;
2601 break;
2602 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2603 binding = 1;
2604 break;
2605 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2606 binding = 2;
2607 break;
2608 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2609 binding = 3;
2610 break;
2611 default:
2612 unreachable("unknown");
2613 }
2614 if (!bindless->bindless[binding]) {
2615 bindless->bindless[binding] = nir_variable_clone(var, nir);
2616 bindless->bindless[binding]->data.bindless = 0;
2617 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
2618 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
2619 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
2620 if (!bindless->bindless[binding]->data.image.format)
2621 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
2622 nir_shader_add_variable(nir, bindless->bindless[binding]);
2623 } else {
2624 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
2625 }
2626 var->data.mode = nir_var_shader_temp;
2627 }
2628
2629 static enum pipe_prim_type
prim_to_pipe(enum shader_prim primitive_type)2630 prim_to_pipe(enum shader_prim primitive_type)
2631 {
2632 switch (primitive_type) {
2633 case SHADER_PRIM_POINTS:
2634 return PIPE_PRIM_POINTS;
2635 case SHADER_PRIM_LINES:
2636 case SHADER_PRIM_LINE_LOOP:
2637 case SHADER_PRIM_LINE_STRIP:
2638 case SHADER_PRIM_LINES_ADJACENCY:
2639 case SHADER_PRIM_LINE_STRIP_ADJACENCY:
2640 return PIPE_PRIM_LINES;
2641 default:
2642 return PIPE_PRIM_TRIANGLES;
2643 }
2644 }
2645
2646 static enum pipe_prim_type
tess_prim_to_pipe(enum tess_primitive_mode prim_mode)2647 tess_prim_to_pipe(enum tess_primitive_mode prim_mode)
2648 {
2649 switch (prim_mode) {
2650 case TESS_PRIMITIVE_ISOLINES:
2651 return PIPE_PRIM_LINES;
2652 default:
2653 return PIPE_PRIM_TRIANGLES;
2654 }
2655 }
2656
2657 static enum pipe_prim_type
get_shader_base_prim_type(struct nir_shader * nir)2658 get_shader_base_prim_type(struct nir_shader *nir)
2659 {
2660 switch (nir->info.stage) {
2661 case MESA_SHADER_GEOMETRY:
2662 return prim_to_pipe(nir->info.gs.output_primitive);
2663 case MESA_SHADER_TESS_EVAL:
2664 return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode);
2665 default:
2666 break;
2667 }
2668 return PIPE_PRIM_MAX;
2669 }
2670
2671 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)2672 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
2673 {
2674 struct zink_screen *screen = data;
2675 if (instr->type != nir_instr_type_tex)
2676 return false;
2677 nir_tex_instr *tex = nir_instr_as_tex(instr);
2678 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
2679 return false;
2680 if (tex->is_sparse && screen->need_2D_sparse) {
2681 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
2682 mesa_loge("unhandled/unsupported 1D sparse texture!");
2683 abort();
2684 }
2685 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
2686 b->cursor = nir_before_instr(instr);
2687 tex->coord_components++;
2688 unsigned srcs[] = {
2689 nir_tex_src_coord,
2690 nir_tex_src_offset,
2691 nir_tex_src_ddx,
2692 nir_tex_src_ddy,
2693 };
2694 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
2695 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
2696 if (c == -1)
2697 continue;
2698 if (tex->src[c].src.ssa->num_components == tex->coord_components)
2699 continue;
2700 nir_ssa_def *def;
2701 nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
2702 if (tex->src[c].src.ssa->num_components == 1)
2703 def = nir_vec2(b, tex->src[c].src.ssa, zero);
2704 else
2705 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
2706 nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
2707 }
2708 b->cursor = nir_after_instr(instr);
2709 unsigned needed_components = nir_tex_instr_dest_size(tex);
2710 unsigned num_components = tex->dest.ssa.num_components;
2711 if (needed_components > num_components) {
2712 tex->dest.ssa.num_components = needed_components;
2713 assert(num_components < 3);
2714 /* take either xz or just x since this is promoted to 2D from 1D */
2715 uint32_t mask = num_components == 2 ? (1|4) : 1;
2716 nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
2717 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
2718 }
2719 return true;
2720 }
2721
2722 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)2723 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
2724 {
2725 bool found = false;
2726 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
2727 const struct glsl_type *type = glsl_without_array(var->type);
2728 unsigned length = glsl_get_length(var->type);
2729 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
2730 continue;
2731 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
2732 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
2733
2734 found = true;
2735 }
2736 if (found)
2737 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
2738 return found;
2739 }
2740
2741 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)2742 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
2743 {
2744 nir_foreach_function(function, shader) {
2745 if (!function->impl)
2746 continue;
2747 nir_foreach_block_safe(block, function->impl) {
2748 nir_foreach_instr_safe(instr, block) {
2749 if (instr->type == nir_instr_type_tex) {
2750 nir_tex_instr *tex = nir_instr_as_tex(instr);
2751 zs->sinfo.have_sparse |= tex->is_sparse;
2752 }
2753 if (instr->type != nir_instr_type_intrinsic)
2754 continue;
2755 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2756 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
2757 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2758 intr->intrinsic == nir_intrinsic_image_deref_store ||
2759 intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
2760 intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
2761 intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
2762 intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
2763 intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
2764 intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
2765 intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
2766 intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
2767 intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
2768 intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2769 intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
2770 intr->intrinsic == nir_intrinsic_image_deref_size ||
2771 intr->intrinsic == nir_intrinsic_image_deref_samples ||
2772 intr->intrinsic == nir_intrinsic_image_deref_format ||
2773 intr->intrinsic == nir_intrinsic_image_deref_order) {
2774
2775 nir_variable *var =
2776 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2777
2778 /* Structs have been lowered already, so get_aoa_size is sufficient. */
2779 const unsigned size =
2780 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
2781 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
2782 var->data.binding + (MAX2(size, 1) - 1));
2783 }
2784 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
2785 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
2786 zs->sinfo.have_sparse = true;
2787
2788 static bool warned = false;
2789 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
2790 switch (intr->intrinsic) {
2791 case nir_intrinsic_image_deref_atomic_add: {
2792 nir_variable *var = nir_intrinsic_get_var(intr, 0);
2793 if (util_format_is_float(var->data.image.format))
2794 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
2795 break;
2796 }
2797 default:
2798 break;
2799 }
2800 }
2801 }
2802 }
2803 }
2804 }
2805
2806 static bool
is_residency_code(nir_ssa_def * src)2807 is_residency_code(nir_ssa_def *src)
2808 {
2809 nir_instr *parent = src->parent_instr;
2810 while (1) {
2811 if (parent->type == nir_instr_type_intrinsic) {
2812 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2813 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2814 return false;
2815 }
2816 if (parent->type == nir_instr_type_tex)
2817 return true;
2818 assert(parent->type == nir_instr_type_alu);
2819 nir_alu_instr *alu = nir_instr_as_alu(parent);
2820 parent = alu->src[0].src.ssa->parent_instr;
2821 }
2822 }
2823
2824 static bool
lower_sparse_instr(nir_builder * b,nir_instr * in,void * data)2825 lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
2826 {
2827 if (in->type != nir_instr_type_intrinsic)
2828 return false;
2829 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
2830 if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
2831 b->cursor = nir_before_instr(&instr->instr);
2832 nir_ssa_def *src0;
2833 if (is_residency_code(instr->src[0].ssa))
2834 src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
2835 else
2836 src0 = instr->src[0].ssa;
2837 nir_ssa_def *src1;
2838 if (is_residency_code(instr->src[1].ssa))
2839 src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
2840 else
2841 src1 = instr->src[1].ssa;
2842 nir_ssa_def *def = nir_iand(b, src0, src1);
2843 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
2844 nir_instr_remove(in);
2845 return true;
2846 }
2847 if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
2848 return false;
2849
2850 /* vulkan vec can only be a vec4, but this is (maybe) vec5,
2851 * so just rewrite as the first component since ntv is going to use a different
2852 * method for storing the residency value anyway
2853 */
2854 b->cursor = nir_before_instr(&instr->instr);
2855 nir_instr *parent = instr->src[0].ssa->parent_instr;
2856 if (is_residency_code(instr->src[0].ssa)) {
2857 assert(parent->type == nir_instr_type_alu);
2858 nir_alu_instr *alu = nir_instr_as_alu(parent);
2859 nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
2860 nir_instr_remove(parent);
2861 } else {
2862 nir_ssa_def *src;
2863 if (parent->type == nir_instr_type_intrinsic) {
2864 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2865 assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
2866 src = intr->src[0].ssa;
2867 } else {
2868 assert(parent->type == nir_instr_type_alu);
2869 nir_alu_instr *alu = nir_instr_as_alu(parent);
2870 src = alu->src[0].src.ssa;
2871 }
2872 if (instr->dest.ssa.bit_size != 32) {
2873 if (instr->dest.ssa.bit_size == 1)
2874 src = nir_ieq_imm(b, src, 1);
2875 else
2876 src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
2877 }
2878 nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
2879 nir_instr_remove(in);
2880 }
2881 return true;
2882 }
2883
2884 static bool
lower_sparse(nir_shader * shader)2885 lower_sparse(nir_shader *shader)
2886 {
2887 return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
2888 }
2889
2890 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data)2891 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
2892 {
2893 if (in->type != nir_instr_type_tex)
2894 return false;
2895 nir_tex_instr *tex = nir_instr_as_tex(in);
2896 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
2897 return false;
2898 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
2899 nir_variable *var = NULL;
2900 if (handle != -1) {
2901 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
2902 } else {
2903 nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
2904 if (glsl_type_is_sampler(glsl_without_array(img->type))) {
2905 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
2906 if (tex->texture_index >= img->data.driver_location &&
2907 tex->texture_index < img->data.driver_location + size) {
2908 var = img;
2909 break;
2910 }
2911 }
2912 }
2913 }
2914 assert(var);
2915 const struct glsl_type *type = glsl_without_array(var->type);
2916 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
2917 bool is_int = glsl_base_type_is_integer(ret_type);
2918 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
2919 unsigned dest_size = nir_dest_bit_size(tex->dest);
2920 b->cursor = nir_after_instr(in);
2921 unsigned num_components = nir_dest_num_components(tex->dest);
2922 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
2923 if (bit_size == dest_size && !rewrite_depth)
2924 return false;
2925 nir_ssa_def *dest = &tex->dest.ssa;
2926 if (bit_size != dest_size) {
2927 tex->dest.ssa.bit_size = bit_size;
2928 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
2929 if (rewrite_depth) {
2930 assert(!tex->is_new_style_shadow);
2931 tex->dest.ssa.num_components = 1;
2932 tex->is_new_style_shadow = true;
2933 }
2934
2935 if (is_int) {
2936 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
2937 dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
2938 else
2939 dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
2940 } else {
2941 dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
2942 }
2943 if (rewrite_depth) {
2944 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2945 dest = nir_vec(b, vec, num_components);
2946 }
2947 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
2948 } else if (rewrite_depth) {
2949 assert(!tex->is_new_style_shadow);
2950 tex->dest.ssa.num_components = 1;
2951 tex->is_new_style_shadow = true;
2952 nir_ssa_def *vec[4] = {dest, dest, dest, dest};
2953 nir_ssa_def *splat = nir_vec(b, vec, num_components);
2954 nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
2955 }
2956 return true;
2957 }
2958
2959 static bool
match_tex_dests(nir_shader * shader)2960 match_tex_dests(nir_shader *shader)
2961 {
2962 return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, NULL);
2963 }
2964
2965 static bool
split_bitfields_instr(nir_builder * b,nir_instr * in,void * data)2966 split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
2967 {
2968 if (in->type != nir_instr_type_alu)
2969 return false;
2970 nir_alu_instr *alu = nir_instr_as_alu(in);
2971 switch (alu->op) {
2972 case nir_op_ubitfield_extract:
2973 case nir_op_ibitfield_extract:
2974 case nir_op_bitfield_insert:
2975 break;
2976 default:
2977 return false;
2978 }
2979 unsigned num_components = nir_dest_num_components(alu->dest.dest);
2980 if (num_components == 1)
2981 return false;
2982 b->cursor = nir_before_instr(in);
2983 nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
2984 for (unsigned i = 0; i < num_components; i++) {
2985 if (alu->op == nir_op_bitfield_insert)
2986 dests[i] = nir_bitfield_insert(b,
2987 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2988 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2989 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
2990 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
2991 else if (alu->op == nir_op_ubitfield_extract)
2992 dests[i] = nir_ubitfield_extract(b,
2993 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2994 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
2995 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
2996 else
2997 dests[i] = nir_ibitfield_extract(b,
2998 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
2999 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
3000 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
3001 }
3002 nir_ssa_def *dest = nir_vec(b, dests, num_components);
3003 nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
3004 nir_instr_remove(in);
3005 return true;
3006 }
3007
3008
3009 static bool
split_bitfields(nir_shader * shader)3010 split_bitfields(nir_shader *shader)
3011 {
3012 return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL);
3013 }
3014
3015 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir,const struct pipe_stream_output_info * so_info)3016 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
3017 const struct pipe_stream_output_info *so_info)
3018 {
3019 struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3020 bool have_psiz = false;
3021
3022 ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
3023
3024 ret->hash = _mesa_hash_pointer(ret);
3025 ret->reduced_prim = get_shader_base_prim_type(nir);
3026
3027 ret->programs = _mesa_pointer_set_create(NULL);
3028 simple_mtx_init(&ret->lock, mtx_plain);
3029
3030 nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
3031 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3032 nir->info.stage == MESA_SHADER_TESS_EVAL)
3033 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
3034
3035 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
3036 UINT32_MAX);
3037
3038 if (nir->info.stage == MESA_SHADER_VERTEX)
3039 create_vs_pushconst(nir);
3040 else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
3041 nir->info.stage == MESA_SHADER_TESS_EVAL)
3042 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
3043 else if (nir->info.stage == MESA_SHADER_KERNEL)
3044 create_cs_pushconst(nir);
3045
3046 if (nir->info.stage < MESA_SHADER_FRAGMENT)
3047 have_psiz = check_psiz(nir);
3048 NIR_PASS_V(nir, lower_basevertex);
3049 NIR_PASS_V(nir, lower_work_dim);
3050 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3051 NIR_PASS_V(nir, lower_baseinstance);
3052 NIR_PASS_V(nir, lower_sparse);
3053 NIR_PASS_V(nir, split_bitfields);
3054
3055 if (screen->need_2D_zs)
3056 NIR_PASS_V(nir, lower_1d_shadow, screen);
3057
3058 {
3059 nir_lower_subgroups_options subgroup_options = {0};
3060 subgroup_options.lower_to_scalar = true;
3061 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
3062 subgroup_options.ballot_bit_size = 32;
3063 subgroup_options.ballot_components = 4;
3064 subgroup_options.lower_subgroup_masks = true;
3065 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(nir->info.stage))) {
3066 subgroup_options.subgroup_size = 1;
3067 subgroup_options.lower_vote_trivial = true;
3068 }
3069 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
3070 }
3071
3072 if (so_info && so_info->num_outputs)
3073 NIR_PASS_V(nir, split_blocks);
3074
3075 optimize_nir(nir, NULL);
3076 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3077 NIR_PASS_V(nir, nir_lower_discard_if);
3078 NIR_PASS_V(nir, nir_lower_fragcolor,
3079 nir->info.fs.color_is_dual_source ? 1 : 8);
3080 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
3081 bool needs_size = analyze_io(ret, nir);
3082 NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
3083 /* run in compile if there could be inlined uniforms */
3084 if (!screen->driconf.inline_uniforms) {
3085 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
3086 NIR_PASS_V(nir, rewrite_bo_access, screen);
3087 NIR_PASS_V(nir, remove_bo_access, ret);
3088 }
3089
3090 if (zink_debug & ZINK_DEBUG_NIR) {
3091 fprintf(stderr, "NIR shader:\n---8<---\n");
3092 nir_print_shader(nir, stderr);
3093 fprintf(stderr, "---8<---\n");
3094 }
3095
3096 struct zink_bindless_info bindless = {0};
3097 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
3098 bool has_bindless_io = false;
3099 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
3100 var->data.is_xfb = false;
3101 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
3102 has_bindless_io = true;
3103 break;
3104 }
3105 }
3106 if (has_bindless_io)
3107 NIR_PASS_V(nir, lower_bindless_io);
3108
3109 optimize_nir(nir, NULL);
3110 prune_io(nir);
3111
3112 scan_nir(screen, nir, ret);
3113
3114 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
3115 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
3116 nir_var_image |
3117 nir_var_mem_ubo |
3118 nir_var_mem_ssbo)) {
3119 enum zink_descriptor_type ztype;
3120 const struct glsl_type *type = glsl_without_array(var->type);
3121 if (var->data.mode == nir_var_mem_ubo) {
3122 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
3123 /* buffer 0 is a push descriptor */
3124 var->data.descriptor_set = !!var->data.driver_location;
3125 var->data.binding = !var->data.driver_location ? nir->info.stage :
3126 zink_binding(nir->info.stage,
3127 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
3128 var->data.driver_location,
3129 screen->compact_descriptors);
3130 assert(var->data.driver_location || var->data.binding < 10);
3131 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
3132 int binding = var->data.binding;
3133
3134 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3135 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
3136 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3137 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3138 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3139 ret->num_bindings[ztype]++;
3140 } else if (var->data.mode == nir_var_mem_ssbo) {
3141 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
3142 var->data.descriptor_set = screen->desc_set_id[ztype];
3143 var->data.binding = zink_binding(nir->info.stage,
3144 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
3145 var->data.driver_location,
3146 screen->compact_descriptors);
3147 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3148 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3149 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
3150 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
3151 assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
3152 ret->num_bindings[ztype]++;
3153 } else {
3154 assert(var->data.mode == nir_var_uniform ||
3155 var->data.mode == nir_var_image);
3156 if (var->data.bindless) {
3157 ret->bindless = true;
3158 handle_bindless_var(nir, var, type, &bindless);
3159 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
3160 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
3161 ztype = zink_desc_type_from_vktype(vktype);
3162 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
3163 ret->num_texel_buffers++;
3164 var->data.driver_location = var->data.binding;
3165 var->data.descriptor_set = screen->desc_set_id[ztype];
3166 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
3167 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
3168 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
3169 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
3170 if (glsl_type_is_array(var->type))
3171 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
3172 else
3173 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
3174 ret->num_bindings[ztype]++;
3175 }
3176 }
3177 }
3178 }
3179 bool bindless_lowered = false;
3180 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
3181 ret->bindless |= bindless_lowered;
3182
3183 if (!screen->info.feats.features.shaderInt64)
3184 NIR_PASS_V(nir, lower_64bit_vars);
3185 NIR_PASS_V(nir, match_tex_dests);
3186
3187 ret->nir = nir;
3188 nir_foreach_shader_out_variable(var, nir)
3189 var->data.explicit_xfb_buffer = 0;
3190 if (so_info && so_info->num_outputs)
3191 update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
3192 else if (have_psiz) {
3193 bool have_fake_psiz = false;
3194 nir_variable *psiz = NULL;
3195 nir_foreach_shader_out_variable(var, nir) {
3196 if (var->data.location == VARYING_SLOT_PSIZ) {
3197 if (!var->data.explicit_location)
3198 have_fake_psiz = true;
3199 else
3200 psiz = var;
3201 }
3202 }
3203 if (have_fake_psiz && psiz) {
3204 psiz->data.mode = nir_var_shader_temp;
3205 nir_fixup_deref_modes(nir);
3206 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3207 }
3208 }
3209
3210 ret->can_inline = true;
3211
3212 return ret;
3213 }
3214
3215 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)3216 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
3217 {
3218 struct zink_screen *screen = zink_screen(pscreen);
3219 nir_shader *nir = nirptr;
3220
3221 nir_lower_tex_options tex_opts = {
3222 .lower_invalid_implicit_lod = true,
3223 };
3224 /*
3225 Sampled Image must be an object whose type is OpTypeSampledImage.
3226 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
3227 or Rect, and the Arrayed and MS operands must be 0.
3228 - SPIRV, OpImageSampleProj* opcodes
3229 */
3230 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
3231 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
3232 tex_opts.lower_txp_array = true;
3233 if (!screen->info.feats.features.shaderImageGatherExtended)
3234 tex_opts.lower_tg4_offsets = true;
3235 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
3236 if (nir->info.stage == MESA_SHADER_GEOMETRY)
3237 NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
3238 optimize_nir(nir, NULL);
3239 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
3240 if (screen->driconf.inline_uniforms)
3241 nir_find_inlinable_uniforms(nir);
3242
3243 return NULL;
3244 }
3245
3246 void
zink_shader_free(struct zink_context * ctx,struct zink_shader * shader)3247 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
3248 {
3249 set_foreach(shader->programs, entry) {
3250 if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
3251 struct zink_compute_program *comp = (void*)entry->key;
3252 if (!comp->base.removed) {
3253 _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
3254 comp->base.removed = true;
3255 }
3256 comp->shader = NULL;
3257 zink_compute_program_reference(ctx, &comp, NULL);
3258 } else {
3259 struct zink_gfx_program *prog = (void*)entry->key;
3260 enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
3261 assert(pstage < ZINK_SHADER_COUNT);
3262 if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
3263 unsigned stages_present = prog->stages_present;
3264 if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
3265 stages_present &= ~BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
3266 struct hash_table *ht = &ctx->program_cache[stages_present >> 2];
3267 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
3268 assert(he);
3269 _mesa_hash_table_remove(ht, he);
3270 prog->base.removed = true;
3271 }
3272 if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
3273 prog->shaders[pstage] = NULL;
3274 /* only remove generated tcs during parent tes destruction */
3275 if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
3276 prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL;
3277 zink_gfx_program_reference(ctx, &prog, NULL);
3278 }
3279 }
3280 if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) {
3281 /* automatically destroy generated tcs shaders when tes is destroyed */
3282 zink_shader_free(ctx, shader->generated);
3283 shader->generated = NULL;
3284 }
3285 _mesa_set_destroy(shader->programs, NULL);
3286 ralloc_free(shader->nir);
3287 ralloc_free(shader->spirv);
3288 FREE(shader);
3289 }
3290
3291
3292 VkShaderModule
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices)3293 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
3294 {
3295 assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL);
3296 /* shortcut all the nir passes since we just have to change this one word */
3297 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
3298 return zink_shader_spirv_compile(screen, zs, NULL);
3299 }
3300
3301 /* creating a passthrough tcs shader that's roughly:
3302
3303 #version 150
3304 #extension GL_ARB_tessellation_shader : require
3305
3306 in vec4 some_var[gl_MaxPatchVertices];
3307 out vec4 some_var_out;
3308
3309 layout(push_constant) uniform tcsPushConstants {
3310 layout(offset = 0) float TessLevelInner[2];
3311 layout(offset = 8) float TessLevelOuter[4];
3312 } u_tcsPushConstants;
3313 layout(vertices = $vertices_per_patch) out;
3314 void main()
3315 {
3316 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
3317 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
3318 some_var_out = some_var[gl_InvocationID];
3319 }
3320
3321 */
3322 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,struct zink_shader * vs,unsigned vertices_per_patch)3323 zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
3324 {
3325 struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
3326 ret->hash = _mesa_hash_pointer(ret);
3327 ret->programs = _mesa_pointer_set_create(NULL);
3328 simple_mtx_init(&ret->lock, mtx_plain);
3329
3330 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
3331 nir_function *fn = nir_function_create(nir, "main");
3332 fn->is_entrypoint = true;
3333 nir_function_impl *impl = nir_function_impl_create(fn);
3334
3335 nir_builder b;
3336 nir_builder_init(&b, impl);
3337 b.cursor = nir_before_block(nir_start_block(impl));
3338
3339 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
3340
3341 nir_foreach_shader_out_variable(var, vs->nir) {
3342 const struct glsl_type *type = var->type;
3343 const struct glsl_type *in_type = var->type;
3344 const struct glsl_type *out_type = var->type;
3345 char buf[1024];
3346 snprintf(buf, sizeof(buf), "%s_out", var->name);
3347 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
3348 out_type = glsl_array_type(type, vertices_per_patch, 0);
3349
3350 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
3351 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
3352 out->data.location = in->data.location = var->data.location;
3353 out->data.location_frac = in->data.location_frac = var->data.location_frac;
3354
3355 /* gl_in[] receives values from equivalent built-in output
3356 variables written by the vertex shader (section 2.14.7). Each array
3357 element of gl_in[] is a structure holding values for a specific vertex of
3358 the input patch. The length of gl_in[] is equal to the
3359 implementation-dependent maximum patch size (gl_MaxPatchVertices).
3360 - ARB_tessellation_shader
3361 */
3362 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
3363 nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
3364 nir_ssa_def *load = nir_load_deref(&b, in_array_var);
3365 nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
3366 nir_store_deref(&b, out_array_var, load, 0xff);
3367 }
3368 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
3369 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
3370 gl_TessLevelInner->data.patch = 1;
3371 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
3372 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
3373 gl_TessLevelOuter->data.patch = 1;
3374
3375 /* hacks so we can size these right for now */
3376 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
3377 /* just use a single blob for padding here because it's easier */
3378 fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
3379 fields[0].name = ralloc_asprintf(nir, "padding");
3380 fields[0].offset = 0;
3381 fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
3382 fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
3383 fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
3384 fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
3385 fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
3386 fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
3387 nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
3388 glsl_struct_type(fields, 3, "struct", false), "pushconst");
3389 pushconst->data.location = VARYING_SLOT_VAR0;
3390
3391 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
3392 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
3393
3394 for (unsigned i = 0; i < 2; i++) {
3395 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
3396 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
3397 }
3398 for (unsigned i = 0; i < 4; i++) {
3399 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
3400 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
3401 }
3402
3403 nir->info.tess.tcs_vertices_out = vertices_per_patch;
3404 nir_validate_shader(nir, "created");
3405
3406 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
3407 optimize_nir(nir, NULL);
3408 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3409 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3410
3411 ret->nir = nir;
3412 ret->is_generated = true;
3413 return ret;
3414 }
3415