1 /*
2 * Copyright 2024 Advanced Micro Devices, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "st_nir.h"
7 #include "nir_builder.h"
8
9 struct io_desc {
10 bool is_per_vertex;
11 bool is_output;
12 bool is_store;
13 bool is_indirect;
14 bool is_compact;
15 bool is_xfb;
16 unsigned component;
17 unsigned num_slots;
18 nir_io_semantics sem;
19 nir_variable_mode mode;
20 nir_src location_src;
21 nir_intrinsic_instr *baryc;
22 };
23
24 #define VAR_INDEX_INTERP_AT_PIXEL 1
25 #define VAR_INTERP_UNDEF INTERP_MODE_COUNT
26
var_is_per_vertex(gl_shader_stage stage,nir_variable * var)27 static bool var_is_per_vertex(gl_shader_stage stage, nir_variable *var)
28 {
29 return ((stage == MESA_SHADER_TESS_CTRL ||
30 stage == MESA_SHADER_GEOMETRY) &&
31 var->data.mode & nir_var_shader_in) ||
32 (((stage == MESA_SHADER_TESS_CTRL && var->data.mode & nir_var_shader_out) ||
33 (stage == MESA_SHADER_TESS_EVAL && var->data.mode & nir_var_shader_in)) &&
34 !(var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
35 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
36 (var->data.location >= VARYING_SLOT_PATCH0 &&
37 var->data.location <= VARYING_SLOT_PATCH31)));
38 }
39
40 static const struct glsl_type *
get_var_slot_type(gl_shader_stage stage,nir_variable * var)41 get_var_slot_type(gl_shader_stage stage, nir_variable *var)
42 {
43 if (var_is_per_vertex(stage, var)) {
44 assert(glsl_type_is_array(var->type));
45 return var->type->fields.array;
46 } else {
47 return var->type;
48 }
49 }
50
51 static unsigned
get_var_num_slots(gl_shader_stage stage,nir_variable * var,bool is_driver_location)52 get_var_num_slots(gl_shader_stage stage, nir_variable *var,
53 bool is_driver_location)
54 {
55 const struct glsl_type *type = get_var_slot_type(stage, var);
56
57 assert(!glsl_type_is_array(type) || type->length > 0);
58
59 if (var->data.compact) {
60 assert(glsl_type_is_array(type));
61 return DIV_ROUND_UP(type->length, 4);
62 } else if (is_driver_location &&
63 glsl_type_is_dual_slot(glsl_without_array(var->type))) {
64 assert(!glsl_type_is_array(type));
65 return 2;
66 } else {
67 return glsl_type_is_array(type) ? type->length : 1;
68 }
69 }
70
71 static bool
is_compact(nir_shader * nir,bool is_output,unsigned location)72 is_compact(nir_shader *nir, bool is_output, unsigned location)
73 {
74 return nir->options->compact_arrays &&
75 (nir->info.stage != MESA_SHADER_VERTEX || is_output) &&
76 (nir->info.stage != MESA_SHADER_FRAGMENT || !is_output) &&
77 (location == VARYING_SLOT_CLIP_DIST0 ||
78 location == VARYING_SLOT_CLIP_DIST1 ||
79 location == VARYING_SLOT_CULL_DIST0 ||
80 location == VARYING_SLOT_CULL_DIST1 ||
81 location == VARYING_SLOT_TESS_LEVEL_OUTER ||
82 location == VARYING_SLOT_TESS_LEVEL_INNER);
83 }
84
85 /* Get information about the intrinsic. */
86 static bool
parse_intrinsic(nir_shader * nir,nir_intrinsic_instr * intr,struct io_desc * desc,nir_variable ** var)87 parse_intrinsic(nir_shader *nir, nir_intrinsic_instr *intr,
88 struct io_desc *desc, nir_variable **var)
89 {
90 memset(desc, 0, sizeof(*desc));
91
92 switch (intr->intrinsic) {
93 case nir_intrinsic_load_input:
94 case nir_intrinsic_load_interpolated_input:
95 break;
96 case nir_intrinsic_load_per_vertex_input:
97 desc->is_per_vertex = true;
98 break;
99 case nir_intrinsic_load_output:
100 desc->is_output = true;
101 break;
102 case nir_intrinsic_load_per_vertex_output:
103 desc->is_output = true;
104 desc->is_per_vertex = true;
105 break;
106 case nir_intrinsic_store_output:
107 desc->is_output = true;
108 desc->is_store = true;
109 break;
110 case nir_intrinsic_store_per_vertex_output:
111 desc->is_output = true;
112 desc->is_per_vertex = true;
113 desc->is_store = true;
114 break;
115 default:
116 return false;
117 }
118
119 desc->component = nir_intrinsic_component(intr);
120 desc->sem = nir_intrinsic_io_semantics(intr);
121 desc->mode = desc->is_output ? nir_var_shader_out : nir_var_shader_in;
122 desc->location_src = *nir_get_io_offset_src(intr);
123 desc->is_indirect = !nir_src_is_const(desc->location_src);
124 desc->is_compact = is_compact(nir, desc->is_output, desc->sem.location);
125 desc->is_xfb = nir_instr_xfb_write_mask(intr) != 0;
126 desc->num_slots = desc->is_compact ? DIV_ROUND_UP(desc->sem.num_slots, 4)
127 : desc->sem.num_slots;
128
129 /* Variables can't represent high 16 bits. */
130 assert(!desc->sem.high_16bits);
131
132 /* Validate assumptions about indirect. */
133 if (desc->is_indirect) {
134 assert(desc->sem.num_slots > 1);
135 } else if (desc->is_compact) {
136 assert(desc->sem.num_slots <= 8);
137 assert(nir_src_as_uint(desc->location_src) <= 1);
138 } else {
139 assert(desc->sem.num_slots == 1);
140 assert(nir_src_as_uint(desc->location_src) == 0);
141 }
142
143 if (intr->intrinsic == nir_intrinsic_load_interpolated_input &&
144 intr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic)
145 desc->baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
146
147 /* Find the variable if it exists. */
148 *var = NULL;
149
150 nir_foreach_variable_with_modes(iter, nir, desc->mode) {
151 unsigned end_location = iter->data.location +
152 get_var_num_slots(nir->info.stage, iter, false);
153 assert(iter->data.location < end_location);
154
155 /* Test if the variables intersect. */
156 if (MAX2(desc->sem.location, iter->data.location) <
157 MIN2(desc->sem.location + desc->num_slots, end_location) &&
158 desc->sem.dual_source_blend_index == iter->data.index) {
159 *var = iter;
160 break;
161 }
162 }
163
164 return true;
165 }
166
167 /* Gather which components are used, so that we know how many vector elements
168 * the variables should have.
169 */
170 static bool
gather_component_masks(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)171 gather_component_masks(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
172 {
173 uint8_t *component_masks = (uint8_t *)opaque;
174 nir_shader *nir = b->shader;
175 struct io_desc desc;
176 nir_variable *var;
177
178 if (!parse_intrinsic(nir, intr, &desc, &var))
179 return false;
180
181 assert(NUM_TOTAL_VARYING_SLOTS <= 127);
182 uint8_t mask, index;
183
184 mask = (desc.is_store ? nir_intrinsic_write_mask(intr) :
185 nir_def_components_read(&intr->def)) <<
186 nir_intrinsic_component(intr);
187
188 index = desc.sem.location + (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
189 component_masks[index] |= mask;
190
191 /* Ensure front and back colors have the same component masks */
192 int8_t alternate_location = -1;
193 switch (desc.sem.location) {
194 case VARYING_SLOT_COL0: alternate_location = VARYING_SLOT_BFC0; break;
195 case VARYING_SLOT_COL1: alternate_location = VARYING_SLOT_BFC1; break;
196 case VARYING_SLOT_BFC0: alternate_location = VARYING_SLOT_COL0; break;
197 case VARYING_SLOT_BFC1: alternate_location = VARYING_SLOT_COL1; break;
198 default: break;
199 }
200 if (alternate_location >= 0) {
201 uint8_t index2 = alternate_location + (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
202 component_masks[index2] |= mask;
203 }
204
205 return true;
206 }
207
208 /* Variables are created in a separate pass because a single instruction might
209 * not describe them completely, so we might have to redefine variables as we
210 * parse more instructions.
211 *
212 * For example, if there is indirect indexing after direct indexing, variables
213 * are created as single-slot for the direct indexing first, and then they must
214 * be recreated/expanded when indirect indexing is found.
215 *
216 * Similarly, a normal load might imply that it's vec2 or dvec2, but the next
217 * load with high_dvec2=1 implies that it's dvec4.
218 *
219 * Similarly, both center and centroid interpolation can occur, which means
220 * the declaration should declare center and use load_deref, while the centroid
221 * load should be interp_deref_at_centroid.
222 */
223 static bool
create_vars(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)224 create_vars(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
225 {
226 uint8_t *component_masks = (uint8_t *)opaque;
227 nir_shader *nir = b->shader;
228 struct io_desc desc;
229 nir_variable *var;
230
231 if (!parse_intrinsic(nir, intr, &desc, &var))
232 return false;
233
234 if (var && desc.is_indirect && !desc.is_compact) {
235 const struct glsl_type *type = get_var_slot_type(nir->info.stage, var);
236
237 /* If the variable exists, but it's declared as a non-array because it had
238 * direct access first, ignore it. We'll recreate it as an array.
239 *
240 * If there are 2 arrays in different components (e.g. one in X and
241 * another in Y) and they occupy the same vec4, they might not start
242 * on the same location, but we merge them into a single variable.
243 */
244 if (!glsl_type_is_array(type) ||
245 desc.sem.location != var->data.location ||
246 desc.num_slots != get_var_num_slots(nir->info.stage, var, false))
247 var = NULL;
248 }
249
250 if (!var) {
251 nir_alu_type type = desc.is_store ? nir_intrinsic_src_type(intr) :
252 nir_intrinsic_dest_type(intr);
253 enum glsl_base_type base_type;
254 unsigned num_components = 0;
255 const struct glsl_type *var_type = NULL;
256
257 /* Bool outputs are represented as uint. */
258 if (type == nir_type_bool32)
259 type = nir_type_uint32;
260
261 base_type = nir_get_glsl_base_type_for_nir_type(type);
262
263 if (nir->info.stage == MESA_SHADER_FRAGMENT && desc.is_output) {
264 /* FS outputs. */
265 switch (desc.sem.location) {
266 case FRAG_RESULT_DEPTH:
267 case FRAG_RESULT_STENCIL:
268 case FRAG_RESULT_SAMPLE_MASK:
269 num_components = 1;
270 break;
271 }
272 } else if (nir->info.stage == MESA_SHADER_VERTEX && !desc.is_output) {
273 /* VS inputs. */
274 /* freedreno/a530-traces requires this. */
275 num_components = 4;
276 } else {
277 /* Varyings. */
278 if (desc.is_compact) {
279 unsigned component, decl_size;
280
281 switch (desc.sem.location) {
282 case VARYING_SLOT_TESS_LEVEL_OUTER:
283 var_type = glsl_array_type(glsl_float_type(), 4, sizeof(float));
284 break;
285 case VARYING_SLOT_TESS_LEVEL_INNER:
286 var_type = glsl_array_type(glsl_float_type(), 2, sizeof(float));
287 break;
288 case VARYING_SLOT_CLIP_DIST0:
289 case VARYING_SLOT_CLIP_DIST1:
290 case VARYING_SLOT_CULL_DIST0:
291 case VARYING_SLOT_CULL_DIST1:
292 if (nir->options->io_options &
293 nir_io_separate_clip_cull_distance_arrays) {
294 decl_size = desc.sem.location >= VARYING_SLOT_CULL_DIST0 ?
295 nir->info.cull_distance_array_size :
296 nir->info.clip_distance_array_size;
297 } else {
298 decl_size = nir->info.clip_distance_array_size +
299 nir->info.cull_distance_array_size;
300 }
301 component = (desc.sem.location == VARYING_SLOT_CLIP_DIST1 ||
302 desc.sem.location == VARYING_SLOT_CULL_DIST1) * 4 +
303 desc.component;
304 assert(component < decl_size);
305 var_type = glsl_array_type(glsl_float_type(), decl_size,
306 sizeof(float));
307 break;
308 default:
309 unreachable("unexpected varying slot");
310 }
311 } else {
312 switch (desc.sem.location) {
313 case VARYING_SLOT_POS:
314 /* d3d12 requires this. */
315 num_components = 4;
316 break;
317 case VARYING_SLOT_PSIZ:
318 case VARYING_SLOT_FOGC:
319 case VARYING_SLOT_PRIMITIVE_ID:
320 case VARYING_SLOT_LAYER:
321 case VARYING_SLOT_VIEWPORT:
322 case VARYING_SLOT_VIEWPORT_MASK:
323 case VARYING_SLOT_FACE:
324 num_components = 1;
325 break;
326 case VARYING_SLOT_TESS_LEVEL_INNER:
327 case VARYING_SLOT_PNTC:
328 num_components = 2;
329 break;
330 }
331 }
332 }
333
334 /* Set the vector size based on which components are used. */
335 if (!desc.is_compact && !num_components) {
336 for (unsigned i = 0; i < desc.sem.num_slots; i++) {
337 unsigned index = desc.sem.location + i +
338 (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
339 unsigned n = util_last_bit(component_masks[index]);
340 num_components = MAX2(num_components, n);
341 }
342 }
343
344 if (!var_type) {
345 assert(!desc.is_compact);
346 var_type = glsl_vector_type(base_type, num_components);
347
348 if (desc.is_indirect)
349 var_type = glsl_array_type(var_type, desc.sem.num_slots, 0);
350 }
351
352 unsigned num_vertices = 0;
353
354 if (desc.is_per_vertex) {
355 if (nir->info.stage == MESA_SHADER_TESS_CTRL)
356 num_vertices = desc.is_output ? nir->info.tess.tcs_vertices_out : 32;
357 else if (nir->info.stage == MESA_SHADER_TESS_EVAL && !desc.is_output)
358 num_vertices = 32;
359 else if (nir->info.stage == MESA_SHADER_GEOMETRY && !desc.is_output)
360 num_vertices = mesa_vertices_per_prim(nir->info.gs.input_primitive);
361 else
362 unreachable("unexpected shader stage for per-vertex IO");
363
364 var_type = glsl_array_type(var_type, num_vertices, 0);
365 }
366
367 const char *name = intr->name;
368 if (!name) {
369 if (nir->info.stage == MESA_SHADER_VERTEX && !desc.is_output)
370 name = gl_vert_attrib_name(desc.sem.location);
371 else if (nir->info.stage == MESA_SHADER_FRAGMENT && desc.is_output)
372 name = gl_frag_result_name(desc.sem.location);
373 else
374 name = gl_varying_slot_name_for_stage(desc.sem.location, nir->info.stage);
375 }
376
377 var = nir_variable_create(nir, desc.mode, var_type, name);
378 var->data.location = desc.sem.location;
379 /* If this is the high half of dvec4, the driver location should point
380 * to the low half of dvec4.
381 */
382 var->data.driver_location = nir_intrinsic_base(intr) -
383 (desc.sem.high_dvec2 ? 1 : 0);
384 var->data.compact = desc.is_compact;
385 var->data.precision = desc.sem.medium_precision ? GLSL_PRECISION_MEDIUM
386 : GLSL_PRECISION_HIGH;
387 var->data.index = desc.sem.dual_source_blend_index;
388 var->data.patch =
389 !desc.is_per_vertex &&
390 ((nir->info.stage == MESA_SHADER_TESS_CTRL && desc.is_output) ||
391 (nir->info.stage == MESA_SHADER_TESS_EVAL && !desc.is_output));
392 var->data.interpolation = VAR_INTERP_UNDEF;
393 var->data.always_active_io = desc.is_xfb;
394
395 /* If the variable is an array accessed indirectly, remove any variables
396 * we may have created up to this point that overlap with it.
397 */
398 if (desc.is_indirect) {
399 unsigned var_num_slots = get_var_num_slots(nir->info.stage, var, false);
400 unsigned var_end_location = var->data.location + var_num_slots;
401
402 nir_foreach_variable_with_modes_safe(iter, nir, desc.mode) {
403 unsigned iter_num_slots =
404 get_var_num_slots(nir->info.stage, iter, false);
405 unsigned iter_end_location = iter->data.location + iter_num_slots;
406
407 if (iter != var &&
408 iter->data.index == var->data.index &&
409 /* Test if the variables intersect. */
410 MAX2(iter->data.location, var->data.location) <
411 MIN2(iter_end_location,
412 var_end_location)) {
413 /* Compact variables shouldn't end up here. */
414 assert(!desc.is_compact);
415
416 /* If the array variables overlap, but don't start on the same
417 * location, we merge them.
418 */
419 if (iter->data.location < var->data.location ||
420 iter_end_location > var_end_location) {
421 var->data.location = MIN2(var->data.location,
422 iter->data.location);
423 var->data.driver_location = MIN2(var->data.driver_location,
424 iter->data.driver_location);
425
426 const struct glsl_type *elem_type = var->type;
427
428 if (var_is_per_vertex(nir->info.stage, var)) {
429 assert(glsl_type_is_array(elem_type));
430 elem_type = elem_type->fields.array;
431 }
432
433 assert(glsl_type_is_array(elem_type));
434 elem_type = elem_type->fields.array;
435 assert(!glsl_type_is_array(elem_type));
436
437 unsigned end_location = MAX2(iter_end_location,
438 var_end_location);
439 unsigned new_num_slots = end_location - var->data.location;
440
441 var->type = glsl_array_type(elem_type, new_num_slots, 0);
442
443 if (var_is_per_vertex(nir->info.stage, var)) {
444 assert(num_vertices);
445 var->type = glsl_array_type(var->type, num_vertices, 0);
446 }
447 }
448
449 /* Preserve variable fields from individual variables. */
450 var->data.invariant |= iter->data.invariant;
451 var->data.stream |= iter->data.stream;
452 var->data.per_view |= iter->data.per_view;
453 var->data.fb_fetch_output |= iter->data.fb_fetch_output;
454 var->data.access |= iter->data.access;
455 var->data.always_active_io |= iter->data.always_active_io;
456
457 if (var->data.interpolation == VAR_INTERP_UNDEF)
458 var->data.interpolation = iter->data.interpolation;
459 else
460 assert(var->data.interpolation == iter->data.interpolation);
461
462 if (desc.baryc) {
463 /* This can only contain VAR_INDEX_INTERP_AT_PIXEL. */
464 var->index = iter->index;
465 var->data.centroid = iter->data.centroid;
466 var->data.sample = iter->data.sample;
467 }
468 exec_node_remove(&iter->node);
469 }
470 }
471 }
472 }
473
474 /* Some semantics are dependent on the instruction or component. */
475 var->data.invariant |= desc.sem.invariant;
476 var->data.stream |= (desc.sem.gs_streams << (desc.component * 2));
477 if (var->data.stream)
478 var->data.stream |= NIR_STREAM_PACKED;
479 var->data.per_view |= desc.sem.per_view;
480 var->data.always_active_io |= desc.is_xfb;
481
482 if (desc.sem.fb_fetch_output) {
483 var->data.fb_fetch_output = 1;
484 if (desc.sem.fb_fetch_output_coherent)
485 var->data.access |= ACCESS_COHERENT;
486 }
487
488 if (desc.sem.high_dvec2) {
489 assert(!desc.is_store);
490 assert(!desc.is_indirect); /* TODO: indirect dvec4 VS inputs unhandled */
491 var->type = glsl_dvec4_type();
492 }
493
494 if (desc.baryc) {
495 if (var->data.interpolation == VAR_INTERP_UNDEF)
496 var->data.interpolation = nir_intrinsic_interp_mode(desc.baryc);
497 else
498 assert(var->data.interpolation == nir_intrinsic_interp_mode(desc.baryc));
499
500 switch (desc.baryc->intrinsic) {
501 case nir_intrinsic_load_barycentric_pixel:
502 var->index = VAR_INDEX_INTERP_AT_PIXEL;
503 break;
504 case nir_intrinsic_load_barycentric_at_offset:
505 case nir_intrinsic_load_barycentric_at_sample:
506 break;
507 case nir_intrinsic_load_barycentric_centroid:
508 var->data.centroid = true;
509 break;
510 case nir_intrinsic_load_barycentric_sample:
511 assert(var->index != VAR_INDEX_INTERP_AT_PIXEL);
512 var->data.sample = true;
513 break;
514 default:
515 unreachable("unexpected barycentric intrinsic");
516 }
517
518 if (var->index == VAR_INDEX_INTERP_AT_PIXEL) {
519 /* Centroid interpolation will use interp_deref_at_centroid. */
520 var->data.centroid = false;
521 assert(!var->data.sample);
522 }
523 } else {
524 enum glsl_interp_mode flat_mode =
525 nir->info.stage == MESA_SHADER_FRAGMENT && !desc.is_output ?
526 INTERP_MODE_FLAT : INTERP_MODE_NONE;
527
528 if (var->data.interpolation == VAR_INTERP_UNDEF)
529 var->data.interpolation = flat_mode;
530 else
531 assert(var->data.interpolation == flat_mode);
532 }
533
534 return true;
535 }
536
537 static bool
unlower_io_to_vars(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)538 unlower_io_to_vars(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
539 {
540 struct io_desc desc;
541 nir_variable *var;
542
543 if (!parse_intrinsic(b->shader, intr, &desc, &var))
544 return false;
545
546 b->cursor = nir_after_instr(&intr->instr);
547
548 /* Create the deref. */
549 assert(var);
550 nir_deref_instr *deref = nir_build_deref_var(b, var);
551
552 if (desc.is_per_vertex) {
553 deref = nir_build_deref_array(b, deref,
554 nir_get_io_arrayed_index_src(intr)->ssa);
555 }
556
557 /* Compact variables have a dedicated codepath. */
558 if (var->data.compact) {
559 unsigned mask = desc.is_store ? nir_intrinsic_write_mask(intr) :
560 BITFIELD_MASK(intr->def.num_components);
561 nir_def *chan[4];
562
563 u_foreach_bit(bit, mask) {
564 nir_def *loc_index = desc.location_src.ssa;
565
566 /* In store_output, compact tess levels interpret the location src
567 * as the indirect component index, while compact clip/cull distances
568 * interpret the location src as the vec4 index. Convert it to
569 * the component index for store_deref.
570 */
571 if (desc.sem.location >= VARYING_SLOT_CLIP_DIST0 &&
572 desc.sem.location <= VARYING_SLOT_CULL_DIST1)
573 loc_index = nir_imul_imm(b, loc_index, 4);
574
575 nir_def *index =
576 nir_iadd_imm(b, loc_index,
577 (desc.sem.location - var->data.location) * 4 +
578 desc.component + bit);
579
580 nir_deref_instr *deref_elem = nir_build_deref_array(b, deref, index);
581 assert(!glsl_type_is_array(deref_elem->type));
582
583 if (desc.is_store) {
584 nir_build_store_deref(b, &deref_elem->def,
585 nir_channel(b,intr->src[0].ssa, bit),
586 .write_mask = 0x1,
587 .access = var->data.access);
588 } else {
589 assert(bit < ARRAY_SIZE(chan));
590 chan[bit] = nir_load_deref_with_access(b, deref_elem,
591 var->data.access);
592 }
593 }
594
595 if (!desc.is_store) {
596 nir_def_rewrite_uses(&intr->def,
597 nir_vec(b, chan, intr->def.num_components));
598 }
599
600 nir_instr_remove(&intr->instr);
601 return true;
602 }
603
604 if (get_var_num_slots(b->shader->info.stage, var, false) > 1) {
605 nir_def *index = nir_imm_int(b, desc.sem.location - var->data.location);
606 if (desc.is_indirect)
607 index = nir_iadd(b, index, desc.location_src.ssa);
608
609 deref = nir_build_deref_array(b, deref, index);
610 }
611
612 /* We shouldn't need any other array dereferencies. */
613 assert(!glsl_type_is_array(deref->type));
614 unsigned num_components = deref->type->vector_elements;
615
616 if (desc.is_store) {
617 unsigned writemask = nir_intrinsic_write_mask(intr) << desc.component;
618 nir_def *value = intr->src[0].ssa;
619
620 if (desc.component) {
621 unsigned new_num_components = desc.component + value->num_components;
622 unsigned swizzle[4] = {0};
623 assert(new_num_components <= 4);
624
625 /* Move components within the vector to the right because we only
626 * have vec4 stores. The writemask skips the extra components at
627 * the beginning.
628 *
629 * For component = 1: .xyz -> .xxyz
630 * For component = 2: .xy -> .xxxy
631 * For component = 3: .x -> .xxxx
632 */
633 for (unsigned i = 1; i < value->num_components; i++)
634 swizzle[desc.component + i] = i;
635
636 value = nir_swizzle(b, value, swizzle, new_num_components);
637 }
638
639 value = nir_resize_vector(b, value, num_components);
640
641 /* virgl requires scalarized TESS_LEVEL stores because originally
642 * the GLSL compiler never vectorized them. Doing 1 store per bit of
643 * the writemask is enough to make virgl work.
644 */
645 if (desc.sem.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
646 desc.sem.location == VARYING_SLOT_TESS_LEVEL_INNER) {
647 u_foreach_bit(i, writemask) {
648 nir_build_store_deref(b, &deref->def, value,
649 .write_mask = BITFIELD_BIT(i),
650 .access = var->data.access);
651 }
652 } else {
653 nir_build_store_deref(b, &deref->def, value,
654 .write_mask = writemask,
655 .access = var->data.access);
656 }
657 } else {
658 nir_def *load;
659
660 if (deref->type == glsl_dvec4_type()) {
661 /* Load dvec4, but extract low or high half as vec4. */
662 load = nir_load_deref_with_access(b, deref, var->data.access);
663 load = nir_extract_bits(b, &load, 1, desc.sem.high_dvec2 ? 128 : 0,
664 4, 32);
665 } else {
666 nir_intrinsic_op baryc = desc.baryc ? desc.baryc->intrinsic :
667 nir_num_intrinsics;
668
669 if (baryc == nir_intrinsic_load_barycentric_centroid &&
670 var->index == VAR_INDEX_INTERP_AT_PIXEL) {
671 /* Both pixel and centroid interpolation occurs, so the latter
672 * must use interp_deref_at_centroid.
673 */
674 load = nir_interp_deref_at_centroid(b, num_components,
675 intr->def.bit_size,
676 &deref->def);
677 } else if (baryc == nir_intrinsic_load_barycentric_at_offset) {
678 load = nir_interp_deref_at_offset(b, num_components,
679 intr->def.bit_size, &deref->def,
680 desc.baryc->src[0].ssa);
681 } else if (baryc == nir_intrinsic_load_barycentric_at_sample) {
682 load = nir_interp_deref_at_sample(b, num_components,
683 intr->def.bit_size, &deref->def,
684 desc.baryc->src[0].ssa);
685 } else {
686 load = nir_load_deref_with_access(b, deref, var->data.access);
687 }
688 }
689
690 load = nir_pad_vec4(b, load);
691 load = nir_channels(b, load, BITFIELD_RANGE(desc.component,
692 intr->def.num_components));
693 nir_def_rewrite_uses(&intr->def, load);
694 }
695
696 nir_instr_remove(&intr->instr);
697 return true;
698 }
699
700 bool
st_nir_unlower_io_to_vars(nir_shader * nir)701 st_nir_unlower_io_to_vars(nir_shader *nir)
702 {
703 if (nir->info.stage == MESA_SHADER_COMPUTE)
704 return false;
705
706 /* Flexible interpolation is not supported by this pass. If you want to
707 * enable flexible interpolation for your driver, it has to stop consuming
708 * IO variables.
709 */
710 assert(!(nir->options->io_options &
711 nir_io_has_flexible_input_interpolation_except_flat));
712 assert(!(nir->options->io_options &
713 nir_io_mix_convergent_flat_with_interpolated));
714
715 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
716 unreachable("the shader should have no IO variables");
717 }
718
719 /* Some drivers can't handle holes in driver locations (bases), so
720 * recompute them.
721 */
722 nir_variable_mode modes =
723 nir_var_shader_out |
724 (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0);
725 bool progress = nir_recompute_io_bases(nir, modes);
726
727 /* Gather component masks. */
728 uint8_t component_masks[NUM_TOTAL_VARYING_SLOTS * 2] = {0};
729 if (!nir_shader_intrinsics_pass(nir, gather_component_masks,
730 nir_metadata_all, component_masks)) {
731 nir->info.io_lowered = false; /* Nothing to do. */
732 return progress;
733 }
734
735 /* Create IO variables. */
736 if (!nir_shader_intrinsics_pass(nir, create_vars, nir_metadata_all,
737 component_masks)) {
738 nir->info.io_lowered = false; /* Nothing to do. */
739 return progress;
740 }
741
742 /* Unlower IO using the created variables. */
743 ASSERTED bool lower_progress =
744 nir_shader_intrinsics_pass(nir, unlower_io_to_vars,
745 nir_metadata_control_flow, NULL);
746 assert(lower_progress);
747 nir->info.io_lowered = false;
748
749 /* Count IO variables. */
750 nir->num_inputs = 0;
751 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
752 nir->num_inputs += get_var_num_slots(nir->info.stage, var, true);
753 }
754
755 nir->num_outputs = 0;
756 nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
757 nir->num_outputs += get_var_num_slots(nir->info.stage, var, true);
758 }
759
760 /* llvmpipe and other drivers require that variables are sorted by location,
761 * otherwise a lot of tests fails.
762 *
763 * It looks like location and driver_location are not the only values that
764 * determine behavior. The order in which the variables are declared also
765 * affect behavior.
766 */
767 unsigned varying_var_mask =
768 nir_var_shader_in |
769 (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
770 nir_sort_variables_by_location(nir, varying_var_mask);
771
772 /* Fix locations and info for dual-slot VS inputs. Intel needs this.
773 * All other drivers only use driver_location.
774 */
775 if (nir->info.stage == MESA_SHADER_VERTEX) {
776 unsigned num_dual_slots = 0;
777 nir->num_inputs = 0;
778 nir->info.inputs_read = 0;
779
780 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
781 var->data.location += num_dual_slots;
782 nir->info.inputs_read |= BITFIELD64_BIT(var->data.location);
783 nir->num_inputs++;
784
785 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
786 num_dual_slots++;
787 nir->info.inputs_read |= BITFIELD64_BIT(var->data.location + 1);
788 nir->num_inputs++;
789 }
790 }
791 }
792
793 return true;
794 }
795