• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Advanced Micro Devices, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "st_nir.h"
7 #include "nir_builder.h"
8 
9 struct io_desc {
10    bool is_per_vertex;
11    bool is_output;
12    bool is_store;
13    bool is_indirect;
14    bool is_compact;
15    bool is_xfb;
16    unsigned component;
17    unsigned num_slots;
18    nir_io_semantics sem;
19    nir_variable_mode mode;
20    nir_src location_src;
21    nir_intrinsic_instr *baryc;
22 };
23 
24 #define VAR_INDEX_INTERP_AT_PIXEL   1
25 #define VAR_INTERP_UNDEF            INTERP_MODE_COUNT
26 
var_is_per_vertex(gl_shader_stage stage,nir_variable * var)27 static bool var_is_per_vertex(gl_shader_stage stage, nir_variable *var)
28 {
29    return ((stage == MESA_SHADER_TESS_CTRL ||
30             stage == MESA_SHADER_GEOMETRY) &&
31            var->data.mode & nir_var_shader_in) ||
32           (((stage == MESA_SHADER_TESS_CTRL && var->data.mode & nir_var_shader_out) ||
33             (stage == MESA_SHADER_TESS_EVAL && var->data.mode & nir_var_shader_in)) &&
34            !(var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
35              var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
36              (var->data.location >= VARYING_SLOT_PATCH0 &&
37               var->data.location <= VARYING_SLOT_PATCH31)));
38 }
39 
40 static const struct glsl_type *
get_var_slot_type(gl_shader_stage stage,nir_variable * var)41 get_var_slot_type(gl_shader_stage stage, nir_variable *var)
42 {
43    if (var_is_per_vertex(stage, var)) {
44       assert(glsl_type_is_array(var->type));
45       return var->type->fields.array;
46    } else {
47       return var->type;
48    }
49 }
50 
51 static unsigned
get_var_num_slots(gl_shader_stage stage,nir_variable * var,bool is_driver_location)52 get_var_num_slots(gl_shader_stage stage, nir_variable *var,
53                   bool is_driver_location)
54 {
55    const struct glsl_type *type = get_var_slot_type(stage, var);
56 
57    assert(!glsl_type_is_array(type) || type->length > 0);
58 
59    if (var->data.compact) {
60       assert(glsl_type_is_array(type));
61       return DIV_ROUND_UP(type->length, 4);
62    } else if (is_driver_location &&
63               glsl_type_is_dual_slot(glsl_without_array(var->type))) {
64       assert(!glsl_type_is_array(type));
65       return 2;
66    } else {
67       return glsl_type_is_array(type) ? type->length : 1;
68    }
69 }
70 
71 static bool
is_compact(nir_shader * nir,bool is_output,unsigned location)72 is_compact(nir_shader *nir, bool is_output, unsigned location)
73 {
74    return nir->options->compact_arrays &&
75           (nir->info.stage != MESA_SHADER_VERTEX || is_output) &&
76           (nir->info.stage != MESA_SHADER_FRAGMENT || !is_output) &&
77           (location == VARYING_SLOT_CLIP_DIST0 ||
78            location == VARYING_SLOT_CLIP_DIST1 ||
79            location == VARYING_SLOT_CULL_DIST0 ||
80            location == VARYING_SLOT_CULL_DIST1 ||
81            location == VARYING_SLOT_TESS_LEVEL_OUTER ||
82            location == VARYING_SLOT_TESS_LEVEL_INNER);
83 }
84 
85 /* Get information about the intrinsic. */
86 static bool
parse_intrinsic(nir_shader * nir,nir_intrinsic_instr * intr,struct io_desc * desc,nir_variable ** var)87 parse_intrinsic(nir_shader *nir, nir_intrinsic_instr *intr,
88                 struct io_desc *desc, nir_variable **var)
89 {
90    memset(desc, 0, sizeof(*desc));
91 
92    switch (intr->intrinsic) {
93    case nir_intrinsic_load_input:
94    case nir_intrinsic_load_interpolated_input:
95       break;
96    case nir_intrinsic_load_per_vertex_input:
97       desc->is_per_vertex = true;
98       break;
99    case nir_intrinsic_load_output:
100       desc->is_output = true;
101       break;
102    case nir_intrinsic_load_per_vertex_output:
103       desc->is_output = true;
104       desc->is_per_vertex = true;
105       break;
106    case nir_intrinsic_store_output:
107       desc->is_output = true;
108       desc->is_store = true;
109       break;
110    case nir_intrinsic_store_per_vertex_output:
111       desc->is_output = true;
112       desc->is_per_vertex = true;
113       desc->is_store = true;
114       break;
115    default:
116       return false;
117    }
118 
119    desc->component = nir_intrinsic_component(intr);
120    desc->sem = nir_intrinsic_io_semantics(intr);
121    desc->mode = desc->is_output ? nir_var_shader_out : nir_var_shader_in;
122    desc->location_src = *nir_get_io_offset_src(intr);
123    desc->is_indirect = !nir_src_is_const(desc->location_src);
124    desc->is_compact = is_compact(nir, desc->is_output, desc->sem.location);
125    desc->is_xfb = nir_instr_xfb_write_mask(intr) != 0;
126    desc->num_slots = desc->is_compact ? DIV_ROUND_UP(desc->sem.num_slots, 4)
127                                       : desc->sem.num_slots;
128 
129    /* Variables can't represent high 16 bits. */
130    assert(!desc->sem.high_16bits);
131 
132    /* Validate assumptions about indirect. */
133    if (desc->is_indirect) {
134       assert(desc->sem.num_slots > 1);
135    } else if (desc->is_compact) {
136       assert(desc->sem.num_slots <= 8);
137       assert(nir_src_as_uint(desc->location_src) <= 1);
138    } else {
139       assert(desc->sem.num_slots == 1);
140       assert(nir_src_as_uint(desc->location_src) == 0);
141    }
142 
143    if (intr->intrinsic == nir_intrinsic_load_interpolated_input &&
144        intr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic)
145       desc->baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
146 
147    /* Find the variable if it exists. */
148    *var = NULL;
149 
150    nir_foreach_variable_with_modes(iter, nir, desc->mode) {
151       unsigned end_location = iter->data.location +
152                               get_var_num_slots(nir->info.stage, iter, false);
153       assert(iter->data.location < end_location);
154 
155       /* Test if the variables intersect. */
156       if (MAX2(desc->sem.location, iter->data.location) <
157           MIN2(desc->sem.location + desc->num_slots, end_location) &&
158           desc->sem.dual_source_blend_index == iter->data.index) {
159          *var = iter;
160          break;
161       }
162    }
163 
164    return true;
165 }
166 
167 /* Gather which components are used, so that we know how many vector elements
168  * the variables should have.
169  */
170 static bool
gather_component_masks(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)171 gather_component_masks(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
172 {
173    uint8_t *component_masks = (uint8_t *)opaque;
174    nir_shader *nir = b->shader;
175    struct io_desc desc;
176    nir_variable *var;
177 
178    if (!parse_intrinsic(nir, intr, &desc, &var))
179       return false;
180 
181    assert(NUM_TOTAL_VARYING_SLOTS <= 127);
182    uint8_t mask, index;
183 
184    mask = (desc.is_store ? nir_intrinsic_write_mask(intr) :
185                            nir_def_components_read(&intr->def)) <<
186           nir_intrinsic_component(intr);
187 
188    index = desc.sem.location + (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
189    component_masks[index] |= mask;
190 
191    /* Ensure front and back colors have the same component masks */
192    int8_t alternate_location = -1;
193    switch (desc.sem.location) {
194    case VARYING_SLOT_COL0: alternate_location = VARYING_SLOT_BFC0; break;
195    case VARYING_SLOT_COL1: alternate_location = VARYING_SLOT_BFC1; break;
196    case VARYING_SLOT_BFC0: alternate_location = VARYING_SLOT_COL0; break;
197    case VARYING_SLOT_BFC1: alternate_location = VARYING_SLOT_COL1; break;
198    default: break;
199    }
200    if (alternate_location >= 0) {
201       uint8_t index2 = alternate_location + (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
202       component_masks[index2] |= mask;
203    }
204 
205    return true;
206 }
207 
208 /* Variables are created in a separate pass because a single instruction might
209  * not describe them completely, so we might have to redefine variables as we
210  * parse more instructions.
211  *
212  * For example, if there is indirect indexing after direct indexing, variables
213  * are created as single-slot for the direct indexing first, and then they must
214  * be recreated/expanded when indirect indexing is found.
215  *
216  * Similarly, a normal load might imply that it's vec2 or dvec2, but the next
217  * load with high_dvec2=1 implies that it's dvec4.
218  *
219  * Similarly, both center and centroid interpolation can occur, which means
220  * the declaration should declare center and use load_deref, while the centroid
221  * load should be interp_deref_at_centroid.
222  */
223 static bool
create_vars(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)224 create_vars(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
225 {
226    uint8_t *component_masks = (uint8_t *)opaque;
227    nir_shader *nir = b->shader;
228    struct io_desc desc;
229    nir_variable *var;
230 
231    if (!parse_intrinsic(nir, intr, &desc, &var))
232       return false;
233 
234    if (var && desc.is_indirect && !desc.is_compact) {
235       const struct glsl_type *type = get_var_slot_type(nir->info.stage, var);
236 
237       /* If the variable exists, but it's declared as a non-array because it had
238        * direct access first, ignore it. We'll recreate it as an array.
239        *
240        * If there are 2 arrays in different components (e.g. one in X and
241        * another in Y) and they occupy the same vec4, they might not start
242        * on the same location, but we merge them into a single variable.
243        */
244       if (!glsl_type_is_array(type) ||
245           desc.sem.location != var->data.location ||
246           desc.num_slots != get_var_num_slots(nir->info.stage, var, false))
247          var = NULL;
248    }
249 
250    if (!var) {
251       nir_alu_type type = desc.is_store ? nir_intrinsic_src_type(intr) :
252                                           nir_intrinsic_dest_type(intr);
253       enum glsl_base_type base_type;
254       unsigned num_components = 0;
255       const struct glsl_type *var_type = NULL;
256 
257       /* Bool outputs are represented as uint. */
258       if (type == nir_type_bool32)
259          type = nir_type_uint32;
260 
261       base_type = nir_get_glsl_base_type_for_nir_type(type);
262 
263       if (nir->info.stage == MESA_SHADER_FRAGMENT && desc.is_output) {
264          /* FS outputs. */
265          switch (desc.sem.location) {
266          case FRAG_RESULT_DEPTH:
267          case FRAG_RESULT_STENCIL:
268          case FRAG_RESULT_SAMPLE_MASK:
269             num_components = 1;
270             break;
271          }
272       } else if (nir->info.stage == MESA_SHADER_VERTEX && !desc.is_output) {
273          /* VS inputs. */
274          /* freedreno/a530-traces requires this. */
275          num_components = 4;
276       } else {
277          /* Varyings. */
278          if (desc.is_compact) {
279             unsigned component, decl_size;
280 
281             switch (desc.sem.location) {
282             case VARYING_SLOT_TESS_LEVEL_OUTER:
283                var_type = glsl_array_type(glsl_float_type(), 4, sizeof(float));
284                break;
285             case VARYING_SLOT_TESS_LEVEL_INNER:
286                var_type = glsl_array_type(glsl_float_type(), 2, sizeof(float));
287                break;
288             case VARYING_SLOT_CLIP_DIST0:
289             case VARYING_SLOT_CLIP_DIST1:
290             case VARYING_SLOT_CULL_DIST0:
291             case VARYING_SLOT_CULL_DIST1:
292                if (nir->options->io_options &
293                    nir_io_separate_clip_cull_distance_arrays) {
294                   decl_size = desc.sem.location >= VARYING_SLOT_CULL_DIST0 ?
295                                  nir->info.cull_distance_array_size :
296                                  nir->info.clip_distance_array_size;
297                } else {
298                   decl_size = nir->info.clip_distance_array_size +
299                               nir->info.cull_distance_array_size;
300                }
301                component = (desc.sem.location == VARYING_SLOT_CLIP_DIST1 ||
302                             desc.sem.location == VARYING_SLOT_CULL_DIST1) * 4 +
303                            desc.component;
304                assert(component < decl_size);
305                var_type = glsl_array_type(glsl_float_type(), decl_size,
306                                           sizeof(float));
307                break;
308             default:
309                unreachable("unexpected varying slot");
310             }
311          } else {
312             switch (desc.sem.location) {
313             case VARYING_SLOT_POS:
314                /* d3d12 requires this. */
315                num_components = 4;
316                break;
317             case VARYING_SLOT_PSIZ:
318             case VARYING_SLOT_FOGC:
319             case VARYING_SLOT_PRIMITIVE_ID:
320             case VARYING_SLOT_LAYER:
321             case VARYING_SLOT_VIEWPORT:
322             case VARYING_SLOT_VIEWPORT_MASK:
323             case VARYING_SLOT_FACE:
324                num_components = 1;
325                break;
326             case VARYING_SLOT_TESS_LEVEL_INNER:
327             case VARYING_SLOT_PNTC:
328                num_components = 2;
329                break;
330             }
331          }
332       }
333 
334       /* Set the vector size based on which components are used. */
335       if (!desc.is_compact && !num_components) {
336          for (unsigned i = 0; i < desc.sem.num_slots; i++) {
337             unsigned index = desc.sem.location + i +
338                              (desc.is_output ? NUM_TOTAL_VARYING_SLOTS : 0);
339             unsigned n = util_last_bit(component_masks[index]);
340             num_components = MAX2(num_components, n);
341          }
342       }
343 
344       if (!var_type) {
345          assert(!desc.is_compact);
346          var_type = glsl_vector_type(base_type, num_components);
347 
348          if (desc.is_indirect)
349             var_type = glsl_array_type(var_type, desc.sem.num_slots, 0);
350       }
351 
352       unsigned num_vertices = 0;
353 
354       if (desc.is_per_vertex) {
355          if (nir->info.stage == MESA_SHADER_TESS_CTRL)
356             num_vertices = desc.is_output ? nir->info.tess.tcs_vertices_out : 32;
357          else if (nir->info.stage == MESA_SHADER_TESS_EVAL && !desc.is_output)
358             num_vertices = 32;
359          else if (nir->info.stage == MESA_SHADER_GEOMETRY && !desc.is_output)
360             num_vertices = mesa_vertices_per_prim(nir->info.gs.input_primitive);
361          else
362             unreachable("unexpected shader stage for per-vertex IO");
363 
364          var_type = glsl_array_type(var_type, num_vertices, 0);
365       }
366 
367       const char *name = intr->name;
368       if (!name) {
369          if (nir->info.stage == MESA_SHADER_VERTEX && !desc.is_output)
370             name = gl_vert_attrib_name(desc.sem.location);
371          else if (nir->info.stage == MESA_SHADER_FRAGMENT && desc.is_output)
372             name = gl_frag_result_name(desc.sem.location);
373          else
374             name = gl_varying_slot_name_for_stage(desc.sem.location, nir->info.stage);
375       }
376 
377       var = nir_variable_create(nir, desc.mode, var_type, name);
378       var->data.location = desc.sem.location;
379       /* If this is the high half of dvec4, the driver location should point
380        * to the low half of dvec4.
381        */
382       var->data.driver_location = nir_intrinsic_base(intr) -
383                                   (desc.sem.high_dvec2 ? 1 : 0);
384       var->data.compact = desc.is_compact;
385       var->data.precision = desc.sem.medium_precision ? GLSL_PRECISION_MEDIUM
386                                                       : GLSL_PRECISION_HIGH;
387       var->data.index = desc.sem.dual_source_blend_index;
388       var->data.patch =
389          !desc.is_per_vertex &&
390          ((nir->info.stage == MESA_SHADER_TESS_CTRL && desc.is_output) ||
391           (nir->info.stage == MESA_SHADER_TESS_EVAL && !desc.is_output));
392       var->data.interpolation = VAR_INTERP_UNDEF;
393       var->data.always_active_io = desc.is_xfb;
394 
395       /* If the variable is an array accessed indirectly, remove any variables
396        * we may have created up to this point that overlap with it.
397        */
398       if (desc.is_indirect) {
399          unsigned var_num_slots = get_var_num_slots(nir->info.stage, var, false);
400          unsigned var_end_location = var->data.location + var_num_slots;
401 
402          nir_foreach_variable_with_modes_safe(iter, nir, desc.mode) {
403             unsigned iter_num_slots =
404                get_var_num_slots(nir->info.stage, iter, false);
405             unsigned iter_end_location = iter->data.location + iter_num_slots;
406 
407             if (iter != var &&
408                 iter->data.index == var->data.index &&
409                 /* Test if the variables intersect. */
410                 MAX2(iter->data.location, var->data.location) <
411                 MIN2(iter_end_location,
412                      var_end_location)) {
413                /* Compact variables shouldn't end up here. */
414                assert(!desc.is_compact);
415 
416                /* If the array variables overlap, but don't start on the same
417                 * location, we merge them.
418                 */
419                if (iter->data.location < var->data.location ||
420                    iter_end_location > var_end_location) {
421                   var->data.location = MIN2(var->data.location,
422                                             iter->data.location);
423                   var->data.driver_location = MIN2(var->data.driver_location,
424                                                    iter->data.driver_location);
425 
426                   const struct glsl_type *elem_type = var->type;
427 
428                   if (var_is_per_vertex(nir->info.stage, var)) {
429                      assert(glsl_type_is_array(elem_type));
430                      elem_type = elem_type->fields.array;
431                   }
432 
433                   assert(glsl_type_is_array(elem_type));
434                   elem_type = elem_type->fields.array;
435                   assert(!glsl_type_is_array(elem_type));
436 
437                   unsigned end_location = MAX2(iter_end_location,
438                                                var_end_location);
439                   unsigned new_num_slots = end_location - var->data.location;
440 
441                   var->type = glsl_array_type(elem_type, new_num_slots, 0);
442 
443                   if (var_is_per_vertex(nir->info.stage, var)) {
444                      assert(num_vertices);
445                      var->type = glsl_array_type(var->type, num_vertices, 0);
446                   }
447                }
448 
449                /* Preserve variable fields from individual variables. */
450                var->data.invariant |= iter->data.invariant;
451                var->data.stream |= iter->data.stream;
452                var->data.per_view |= iter->data.per_view;
453                var->data.fb_fetch_output |= iter->data.fb_fetch_output;
454                var->data.access |= iter->data.access;
455                var->data.always_active_io |= iter->data.always_active_io;
456 
457                if (var->data.interpolation == VAR_INTERP_UNDEF)
458                   var->data.interpolation = iter->data.interpolation;
459                else
460                   assert(var->data.interpolation == iter->data.interpolation);
461 
462                if (desc.baryc) {
463                   /* This can only contain VAR_INDEX_INTERP_AT_PIXEL. */
464                   var->index = iter->index;
465                   var->data.centroid = iter->data.centroid;
466                   var->data.sample = iter->data.sample;
467                }
468                exec_node_remove(&iter->node);
469             }
470          }
471       }
472    }
473 
474    /* Some semantics are dependent on the instruction or component. */
475    var->data.invariant |= desc.sem.invariant;
476    var->data.stream |= (desc.sem.gs_streams << (desc.component * 2));
477    if (var->data.stream)
478       var->data.stream |= NIR_STREAM_PACKED;
479    var->data.per_view |= desc.sem.per_view;
480    var->data.always_active_io |= desc.is_xfb;
481 
482    if (desc.sem.fb_fetch_output) {
483       var->data.fb_fetch_output = 1;
484       if (desc.sem.fb_fetch_output_coherent)
485          var->data.access |= ACCESS_COHERENT;
486    }
487 
488    if (desc.sem.high_dvec2) {
489       assert(!desc.is_store);
490       assert(!desc.is_indirect); /* TODO: indirect dvec4 VS inputs unhandled */
491       var->type = glsl_dvec4_type();
492    }
493 
494    if (desc.baryc) {
495       if (var->data.interpolation == VAR_INTERP_UNDEF)
496          var->data.interpolation = nir_intrinsic_interp_mode(desc.baryc);
497       else
498          assert(var->data.interpolation == nir_intrinsic_interp_mode(desc.baryc));
499 
500       switch (desc.baryc->intrinsic) {
501       case nir_intrinsic_load_barycentric_pixel:
502          var->index = VAR_INDEX_INTERP_AT_PIXEL;
503          break;
504       case nir_intrinsic_load_barycentric_at_offset:
505       case nir_intrinsic_load_barycentric_at_sample:
506          break;
507       case nir_intrinsic_load_barycentric_centroid:
508          var->data.centroid = true;
509          break;
510       case nir_intrinsic_load_barycentric_sample:
511          assert(var->index != VAR_INDEX_INTERP_AT_PIXEL);
512          var->data.sample = true;
513          break;
514       default:
515          unreachable("unexpected barycentric intrinsic");
516       }
517 
518       if (var->index == VAR_INDEX_INTERP_AT_PIXEL) {
519          /* Centroid interpolation will use interp_deref_at_centroid. */
520          var->data.centroid = false;
521          assert(!var->data.sample);
522       }
523    } else {
524       enum glsl_interp_mode flat_mode =
525          nir->info.stage == MESA_SHADER_FRAGMENT && !desc.is_output ?
526             INTERP_MODE_FLAT : INTERP_MODE_NONE;
527 
528       if (var->data.interpolation == VAR_INTERP_UNDEF)
529          var->data.interpolation = flat_mode;
530       else
531          assert(var->data.interpolation == flat_mode);
532    }
533 
534    return true;
535 }
536 
537 static bool
unlower_io_to_vars(nir_builder * b,nir_intrinsic_instr * intr,void * opaque)538 unlower_io_to_vars(nir_builder *b, nir_intrinsic_instr *intr, void *opaque)
539 {
540    struct io_desc desc;
541    nir_variable *var;
542 
543    if (!parse_intrinsic(b->shader, intr, &desc, &var))
544       return false;
545 
546    b->cursor = nir_after_instr(&intr->instr);
547 
548    /* Create the deref. */
549    assert(var);
550    nir_deref_instr *deref = nir_build_deref_var(b, var);
551 
552    if (desc.is_per_vertex) {
553       deref = nir_build_deref_array(b, deref,
554                                     nir_get_io_arrayed_index_src(intr)->ssa);
555    }
556 
557    /* Compact variables have a dedicated codepath. */
558    if (var->data.compact) {
559       unsigned mask = desc.is_store ? nir_intrinsic_write_mask(intr) :
560                                       BITFIELD_MASK(intr->def.num_components);
561       nir_def *chan[4];
562 
563       u_foreach_bit(bit, mask) {
564          nir_def *loc_index = desc.location_src.ssa;
565 
566          /* In store_output, compact tess levels interpret the location src
567           * as the indirect component index, while compact clip/cull distances
568           * interpret the location src as the vec4 index. Convert it to
569           * the component index for store_deref.
570           */
571          if (desc.sem.location >= VARYING_SLOT_CLIP_DIST0 &&
572              desc.sem.location <= VARYING_SLOT_CULL_DIST1)
573             loc_index = nir_imul_imm(b, loc_index, 4);
574 
575          nir_def *index =
576             nir_iadd_imm(b, loc_index,
577                          (desc.sem.location - var->data.location) * 4 +
578                          desc.component + bit);
579 
580          nir_deref_instr *deref_elem = nir_build_deref_array(b, deref, index);
581          assert(!glsl_type_is_array(deref_elem->type));
582 
583          if (desc.is_store) {
584             nir_build_store_deref(b, &deref_elem->def,
585                                   nir_channel(b,intr->src[0].ssa, bit),
586                                   .write_mask = 0x1,
587                                   .access = var->data.access);
588          } else {
589             assert(bit < ARRAY_SIZE(chan));
590             chan[bit] = nir_load_deref_with_access(b, deref_elem,
591                                                    var->data.access);
592          }
593       }
594 
595       if (!desc.is_store) {
596          nir_def_rewrite_uses(&intr->def,
597                               nir_vec(b, chan, intr->def.num_components));
598       }
599 
600       nir_instr_remove(&intr->instr);
601       return true;
602    }
603 
604    if (get_var_num_slots(b->shader->info.stage, var, false) > 1) {
605       nir_def *index = nir_imm_int(b, desc.sem.location - var->data.location);
606       if (desc.is_indirect)
607          index = nir_iadd(b, index, desc.location_src.ssa);
608 
609       deref = nir_build_deref_array(b, deref, index);
610    }
611 
612    /* We shouldn't need any other array dereferencies. */
613    assert(!glsl_type_is_array(deref->type));
614    unsigned num_components = deref->type->vector_elements;
615 
616    if (desc.is_store) {
617       unsigned writemask = nir_intrinsic_write_mask(intr) << desc.component;
618       nir_def *value = intr->src[0].ssa;
619 
620       if (desc.component) {
621          unsigned new_num_components = desc.component + value->num_components;
622          unsigned swizzle[4] = {0};
623          assert(new_num_components <= 4);
624 
625          /* Move components within the vector to the right because we only
626           * have vec4 stores. The writemask skips the extra components at
627           * the beginning.
628           *
629           * For component = 1: .xyz -> .xxyz
630           * For component = 2: .xy  -> .xxxy
631           * For component = 3: .x   -> .xxxx
632           */
633          for (unsigned i = 1; i < value->num_components; i++)
634             swizzle[desc.component + i] = i;
635 
636          value = nir_swizzle(b, value, swizzle, new_num_components);
637       }
638 
639       value = nir_resize_vector(b, value, num_components);
640 
641       /* virgl requires scalarized TESS_LEVEL stores because originally
642        * the GLSL compiler never vectorized them. Doing 1 store per bit of
643        * the writemask is enough to make virgl work.
644        */
645       if (desc.sem.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
646           desc.sem.location == VARYING_SLOT_TESS_LEVEL_INNER) {
647          u_foreach_bit(i, writemask) {
648             nir_build_store_deref(b, &deref->def, value,
649                                   .write_mask = BITFIELD_BIT(i),
650                                   .access = var->data.access);
651          }
652       } else {
653          nir_build_store_deref(b, &deref->def, value,
654                                .write_mask = writemask,
655                                .access = var->data.access);
656       }
657    } else {
658       nir_def *load;
659 
660       if (deref->type == glsl_dvec4_type()) {
661          /* Load dvec4, but extract low or high half as vec4. */
662          load = nir_load_deref_with_access(b, deref, var->data.access);
663          load = nir_extract_bits(b, &load, 1, desc.sem.high_dvec2 ? 128 : 0,
664                                  4, 32);
665       } else {
666          nir_intrinsic_op baryc = desc.baryc ? desc.baryc->intrinsic :
667                                                nir_num_intrinsics;
668 
669          if (baryc == nir_intrinsic_load_barycentric_centroid &&
670              var->index == VAR_INDEX_INTERP_AT_PIXEL) {
671             /* Both pixel and centroid interpolation occurs, so the latter
672              * must use interp_deref_at_centroid.
673              */
674             load = nir_interp_deref_at_centroid(b, num_components,
675                                                 intr->def.bit_size,
676                                                 &deref->def);
677          } else if (baryc == nir_intrinsic_load_barycentric_at_offset) {
678             load = nir_interp_deref_at_offset(b, num_components,
679                                               intr->def.bit_size, &deref->def,
680                                               desc.baryc->src[0].ssa);
681          } else if (baryc == nir_intrinsic_load_barycentric_at_sample) {
682             load = nir_interp_deref_at_sample(b, num_components,
683                                               intr->def.bit_size, &deref->def,
684                                               desc.baryc->src[0].ssa);
685          } else {
686             load = nir_load_deref_with_access(b, deref, var->data.access);
687          }
688       }
689 
690       load = nir_pad_vec4(b, load);
691       load = nir_channels(b, load, BITFIELD_RANGE(desc.component,
692                                                   intr->def.num_components));
693       nir_def_rewrite_uses(&intr->def, load);
694    }
695 
696    nir_instr_remove(&intr->instr);
697    return true;
698 }
699 
700 bool
st_nir_unlower_io_to_vars(nir_shader * nir)701 st_nir_unlower_io_to_vars(nir_shader *nir)
702 {
703    if (nir->info.stage == MESA_SHADER_COMPUTE)
704       return false;
705 
706    /* Flexible interpolation is not supported by this pass. If you want to
707     * enable flexible interpolation for your driver, it has to stop consuming
708     * IO variables.
709     */
710    assert(!(nir->options->io_options &
711             nir_io_has_flexible_input_interpolation_except_flat));
712    assert(!(nir->options->io_options &
713             nir_io_mix_convergent_flat_with_interpolated));
714 
715    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
716       unreachable("the shader should have no IO variables");
717    }
718 
719    /* Some drivers can't handle holes in driver locations (bases), so
720     * recompute them.
721     */
722    nir_variable_mode modes =
723       nir_var_shader_out |
724       (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0);
725    bool progress = nir_recompute_io_bases(nir, modes);
726 
727    /* Gather component masks. */
728    uint8_t component_masks[NUM_TOTAL_VARYING_SLOTS * 2] = {0};
729    if (!nir_shader_intrinsics_pass(nir, gather_component_masks,
730                                    nir_metadata_all, component_masks)) {
731       nir->info.io_lowered = false; /* Nothing to do. */
732       return progress;
733    }
734 
735    /* Create IO variables. */
736    if (!nir_shader_intrinsics_pass(nir, create_vars, nir_metadata_all,
737                                    component_masks)) {
738       nir->info.io_lowered = false; /* Nothing to do. */
739       return progress;
740    }
741 
742    /* Unlower IO using the created variables. */
743    ASSERTED bool lower_progress =
744       nir_shader_intrinsics_pass(nir, unlower_io_to_vars,
745                                  nir_metadata_control_flow, NULL);
746    assert(lower_progress);
747    nir->info.io_lowered = false;
748 
749    /* Count IO variables. */
750    nir->num_inputs = 0;
751    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
752       nir->num_inputs += get_var_num_slots(nir->info.stage, var, true);
753    }
754 
755    nir->num_outputs = 0;
756    nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
757       nir->num_outputs += get_var_num_slots(nir->info.stage, var, true);
758    }
759 
760    /* llvmpipe and other drivers require that variables are sorted by location,
761     * otherwise a lot of tests fails.
762     *
763     * It looks like location and driver_location are not the only values that
764     * determine behavior. The order in which the variables are declared also
765     * affect behavior.
766     */
767    unsigned varying_var_mask =
768       nir_var_shader_in |
769       (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
770    nir_sort_variables_by_location(nir, varying_var_mask);
771 
772    /* Fix locations and info for dual-slot VS inputs. Intel needs this.
773     * All other drivers only use driver_location.
774     */
775    if (nir->info.stage == MESA_SHADER_VERTEX) {
776       unsigned num_dual_slots = 0;
777       nir->num_inputs = 0;
778       nir->info.inputs_read = 0;
779 
780       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
781          var->data.location += num_dual_slots;
782          nir->info.inputs_read |= BITFIELD64_BIT(var->data.location);
783          nir->num_inputs++;
784 
785          if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
786             num_dual_slots++;
787             nir->info.inputs_read |= BITFIELD64_BIT(var->data.location + 1);
788             nir->num_inputs++;
789          }
790       }
791    }
792 
793    return true;
794 }
795