• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/hash_table.h"
25 #include "util/set.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28 
29 /* This file contains various little helpers for doing simple linking in
30  * NIR.  Eventually, we'll probably want a full-blown varying packing
31  * implementation in here.  Right now, it just deletes unused things.
32  */
33 
34 /**
35  * Returns the bits in the inputs_read, or outputs_written
36  * bitfield corresponding to this variable.
37  */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41    if (var->data.location < 0)
42       return 0;
43 
44    unsigned location = var->data.patch ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
45 
46    assert(var->data.mode == nir_var_shader_in ||
47           var->data.mode == nir_var_shader_out);
48    assert(var->data.location >= 0);
49    assert(location < 64);
50 
51    const struct glsl_type *type = var->type;
52    if (nir_is_arrayed_io(var, stage)) {
53       assert(glsl_type_is_array(type));
54       type = glsl_get_array_element(type);
55    }
56 
57    unsigned slots = glsl_count_attribute_slots(type, false);
58    return BITFIELD64_MASK(slots) << location;
59 }
60 
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64    return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65           var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66           var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67           var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69 
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74       return 4;
75 
76    return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78 
79 static void
add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82    nir_foreach_function_impl(impl, shader) {
83       nir_foreach_block(block, impl) {
84          nir_foreach_instr(instr, block) {
85             if (instr->type != nir_instr_type_intrinsic)
86                continue;
87 
88             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89             if (intrin->intrinsic != nir_intrinsic_load_deref)
90                continue;
91 
92             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
93             if (!nir_deref_mode_is(deref, nir_var_shader_out))
94                continue;
95 
96             nir_variable *var = nir_deref_instr_get_variable(deref);
97             for (unsigned i = 0; i < get_num_components(var); i++) {
98                if (var->data.patch) {
99                   if (is_non_generic_patch_var(var))
100                      continue;
101 
102                   patches_read[var->data.location_frac + i] |=
103                      get_variable_io_mask(var, shader->info.stage);
104                } else {
105                   read[var->data.location_frac + i] |=
106                      get_variable_io_mask(var, shader->info.stage);
107                }
108             }
109          }
110       }
111    }
112 }
113 
114 static bool
remove_unused_io_access(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)115 remove_unused_io_access(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
116 {
117    nir_variable_mode mode = *(nir_variable_mode *)cb_data;
118 
119    unsigned srcn = 0;
120    switch (intrin->intrinsic) {
121    case nir_intrinsic_load_deref:
122    case nir_intrinsic_store_deref:
123    case nir_intrinsic_interp_deref_at_centroid:
124    case nir_intrinsic_interp_deref_at_sample:
125    case nir_intrinsic_interp_deref_at_offset:
126    case nir_intrinsic_interp_deref_at_vertex:
127       break;
128    case nir_intrinsic_copy_deref:
129       srcn = mode == nir_var_shader_in ? 1 : 0;
130       break;
131    default:
132       return false;
133    }
134 
135    nir_variable *var = nir_intrinsic_get_var(intrin, srcn);
136    if (!var || var->data.mode != mode || var->data.location != NUM_TOTAL_VARYING_SLOTS)
137       return false;
138 
139    if (intrin->intrinsic != nir_intrinsic_store_deref &&
140        intrin->intrinsic != nir_intrinsic_copy_deref) {
141       b->cursor = nir_before_instr(&intrin->instr);
142       nir_def *undef = nir_undef(b, intrin->num_components, intrin->def.bit_size);
143       nir_def_rewrite_uses(&intrin->def, undef);
144    }
145 
146    nir_instr_remove(&intrin->instr);
147    nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[srcn]));
148 
149    return true;
150 }
151 
152 /**
153  * Helper for removing unused shader I/O variables, by demoting them to global
154  * variables (which may then by dead code eliminated).
155  *
156  * Example usage is:
157  *
158  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
159  *                                      read, patches_read) ||
160  *                                      progress;
161  */
162 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)163 nir_remove_unused_io_vars(nir_shader *shader,
164                           nir_variable_mode mode,
165                           uint64_t *used_by_other_stage,
166                           uint64_t *used_by_other_stage_patches)
167 {
168    bool progress = false;
169    uint64_t *used;
170 
171    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
172 
173    uint64_t read[4] = { 0 };
174    uint64_t patches_read[4] = { 0 };
175    if (mode == nir_var_shader_out)
176       add_output_reads(shader, read, patches_read);
177 
178    nir_foreach_variable_with_modes_safe(var, shader, mode) {
179       if (var->data.patch)
180          used = used_by_other_stage_patches;
181       else
182          used = used_by_other_stage;
183 
184       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0 &&
185           !(shader->info.stage == MESA_SHADER_MESH && var->data.location == VARYING_SLOT_PRIMITIVE_ID))
186          continue;
187 
188       if (var->data.always_active_io)
189          continue;
190 
191       if (var->data.explicit_xfb_buffer)
192          continue;
193 
194       uint64_t other_stage = 0;
195       uint64_t this_stage = 0;
196       for (unsigned i = 0; i < get_num_components(var); i++) {
197          other_stage |= used[var->data.location_frac + i];
198          this_stage |= (var->data.patch ? patches_read : read)[var->data.location_frac + i];
199       }
200 
201       uint64_t var_mask = get_variable_io_mask(var, shader->info.stage);
202       if (!((other_stage | this_stage) & var_mask)) {
203          /* Mark the variable as removed by setting the location to an invalid value. */
204          var->data.location = NUM_TOTAL_VARYING_SLOTS;
205          exec_node_remove(&var->node);
206          progress = true;
207       }
208    }
209 
210    if (progress) {
211       nir_shader_intrinsics_pass(shader, &remove_unused_io_access, nir_metadata_control_flow, &mode);
212    } else {
213       nir_shader_preserve_all_metadata(shader);
214    }
215 
216    return progress;
217 }
218 
219 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)220 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
221 {
222    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
223    assert(consumer->info.stage != MESA_SHADER_VERTEX);
224 
225    uint64_t read[4] = { 0 }, written[4] = { 0 };
226    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
227 
228    nir_foreach_shader_out_variable(var, producer) {
229       for (unsigned i = 0; i < get_num_components(var); i++) {
230          if (var->data.patch) {
231             if (is_non_generic_patch_var(var))
232                continue;
233 
234             patches_written[var->data.location_frac + i] |=
235                get_variable_io_mask(var, producer->info.stage);
236          } else {
237             written[var->data.location_frac + i] |=
238                get_variable_io_mask(var, producer->info.stage);
239          }
240       }
241    }
242 
243    nir_foreach_shader_in_variable(var, consumer) {
244       for (unsigned i = 0; i < get_num_components(var); i++) {
245          if (var->data.patch) {
246             if (is_non_generic_patch_var(var))
247                continue;
248 
249             patches_read[var->data.location_frac + i] |=
250                get_variable_io_mask(var, consumer->info.stage);
251          } else {
252             read[var->data.location_frac + i] |=
253                get_variable_io_mask(var, consumer->info.stage);
254          }
255       }
256    }
257 
258    bool progress = false;
259    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
260                                         patches_read);
261 
262    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
263                                         patches_written) ||
264               progress;
265 
266    return progress;
267 }
268 
269 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)270 get_interp_type(nir_variable *var, const struct glsl_type *type,
271                 bool default_to_smooth_interp)
272 {
273    if (var->data.per_primitive)
274       return INTERP_MODE_NONE;
275    if (glsl_type_is_integer(type))
276       return INTERP_MODE_FLAT;
277    else if (var->data.interpolation != INTERP_MODE_NONE)
278       return var->data.interpolation;
279    else if (default_to_smooth_interp)
280       return INTERP_MODE_SMOOTH;
281    else
282       return INTERP_MODE_NONE;
283 }
284 
285 #define INTERPOLATE_LOC_SAMPLE   0
286 #define INTERPOLATE_LOC_CENTROID 1
287 #define INTERPOLATE_LOC_CENTER   2
288 
289 static uint8_t
get_interp_loc(nir_variable * var)290 get_interp_loc(nir_variable *var)
291 {
292    if (var->data.sample)
293       return INTERPOLATE_LOC_SAMPLE;
294    else if (var->data.centroid)
295       return INTERPOLATE_LOC_CENTROID;
296    else
297       return INTERPOLATE_LOC_CENTER;
298 }
299 
300 static bool
is_packing_supported_for_type(const struct glsl_type * type)301 is_packing_supported_for_type(const struct glsl_type *type)
302 {
303    /* We ignore complex types such as arrays, matrices, structs and bitsizes
304     * other then 32bit. All other vector types should have been split into
305     * scalar variables by the lower_io_to_scalar pass. The only exception
306     * should be OpenGL xfb varyings.
307     * TODO: add support for more complex types?
308     */
309    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
310 }
311 
312 struct assigned_comps {
313    uint8_t comps;
314    uint8_t interp_type;
315    uint8_t interp_loc;
316    bool is_32bit;
317    bool is_mediump;
318    bool is_per_primitive;
319 };
320 
321 /* Packing arrays and dual slot varyings is difficult so to avoid complex
322  * algorithms this function just assigns them their existing location for now.
323  * TODO: allow better packing of complex types.
324  */
325 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)326 get_unmoveable_components_masks(nir_shader *shader,
327                                 nir_variable_mode mode,
328                                 struct assigned_comps *comps,
329                                 gl_shader_stage stage,
330                                 bool default_to_smooth_interp)
331 {
332    nir_foreach_variable_with_modes_safe(var, shader, mode) {
333       assert(var->data.location >= 0);
334 
335       /* Only remap things that aren't built-ins. */
336       if (var->data.location >= VARYING_SLOT_VAR0 &&
337           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
338 
339          const struct glsl_type *type = var->type;
340          if (nir_is_arrayed_io(var, stage)) {
341             assert(glsl_type_is_array(type));
342             type = glsl_get_array_element(type);
343          }
344 
345          /* If we can pack this varying then don't mark the components as
346           * used.
347           */
348          if (is_packing_supported_for_type(type) &&
349              !var->data.always_active_io)
350             continue;
351 
352          unsigned location = var->data.location - VARYING_SLOT_VAR0;
353 
354          unsigned elements =
355             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? glsl_get_vector_elements(glsl_without_array(type)) : 4;
356 
357          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
358          unsigned slots = glsl_count_attribute_slots(type, false);
359          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
360          unsigned comps_slot2 = 0;
361          for (unsigned i = 0; i < slots; i++) {
362             if (dual_slot) {
363                if (i & 1) {
364                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
365                } else {
366                   unsigned num_comps = 4 - var->data.location_frac;
367                   comps_slot2 = (elements * dmul) - num_comps;
368 
369                   /* Assume ARB_enhanced_layouts packing rules for doubles */
370                   assert(var->data.location_frac == 0 ||
371                          var->data.location_frac == 2);
372                   assert(comps_slot2 <= 4);
373 
374                   comps[location + i].comps |=
375                      ((1 << num_comps) - 1) << var->data.location_frac;
376                }
377             } else {
378                comps[location + i].comps |=
379                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
380             }
381 
382             comps[location + i].interp_type =
383                get_interp_type(var, type, default_to_smooth_interp);
384             comps[location + i].interp_loc = get_interp_loc(var);
385             comps[location + i].is_32bit =
386                glsl_type_is_32bit(glsl_without_array(type));
387             comps[location + i].is_mediump =
388                var->data.precision == GLSL_PRECISION_MEDIUM ||
389                var->data.precision == GLSL_PRECISION_LOW;
390             comps[location + i].is_per_primitive = var->data.per_primitive;
391          }
392       }
393    }
394 }
395 
396 struct varying_loc {
397    uint8_t component;
398    uint32_t location;
399 };
400 
401 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)402 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
403                     uint64_t slots_used_mask, unsigned num_slots)
404 {
405    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
406 
407    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
408                                           BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
409 }
410 
411 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)412 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
413 {
414    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
415 
416    slots_used[var->data.patch ? 1 : 0] |=
417       BITFIELD64_BIT(var->data.location - loc_offset + offset);
418 }
419 
420 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)421 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
422                            struct varying_loc (*remap)[4],
423                            uint64_t *slots_used, uint64_t *out_slots_read,
424                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
425 {
426    const gl_shader_stage stage = shader->info.stage;
427    uint64_t out_slots_read_tmp[2] = { 0 };
428    uint64_t slots_used_tmp[2] = { 0 };
429 
430    /* We don't touch builtins so just copy the bitmask */
431    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
432 
433    nir_foreach_variable_with_modes(var, shader, mode) {
434       assert(var->data.location >= 0);
435 
436       /* Only remap things that aren't built-ins */
437       if (var->data.location >= VARYING_SLOT_VAR0 &&
438           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
439 
440          const struct glsl_type *type = var->type;
441          if (nir_is_arrayed_io(var, stage)) {
442             assert(glsl_type_is_array(type));
443             type = glsl_get_array_element(type);
444          }
445 
446          unsigned num_slots = glsl_count_attribute_slots(type, false);
447          bool used_across_stages = false;
448          bool outputs_read = false;
449 
450          unsigned location = var->data.location - VARYING_SLOT_VAR0;
451          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
452 
453          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
454          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
455          uint64_t outs_used =
456             var->data.patch ? *p_out_slots_read : *out_slots_read;
457          uint64_t slots =
458             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
459 
460          if (slots & used)
461             used_across_stages = true;
462 
463          if (slots & outs_used)
464             outputs_read = true;
465 
466          if (new_loc->location) {
467             var->data.location = new_loc->location;
468             var->data.location_frac = new_loc->component;
469          }
470 
471          if (var->data.always_active_io) {
472             /* We can't apply link time optimisations (specifically array
473              * splitting) to these so we need to copy the existing mask
474              * otherwise we will mess up the mask for things like partially
475              * marked arrays.
476              */
477             if (used_across_stages)
478                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
479 
480             if (outputs_read) {
481                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
482                                    num_slots);
483             }
484          } else {
485             for (unsigned i = 0; i < num_slots; i++) {
486                if (used_across_stages)
487                   mark_used_slot(var, slots_used_tmp, i);
488 
489                if (outputs_read)
490                   mark_used_slot(var, out_slots_read_tmp, i);
491             }
492          }
493       }
494    }
495 
496    *slots_used = slots_used_tmp[0];
497    *out_slots_read = out_slots_read_tmp[0];
498    *p_slots_used = slots_used_tmp[1];
499    *p_out_slots_read = out_slots_read_tmp[1];
500 }
501 
502 struct varying_component {
503    nir_variable *var;
504    uint8_t interp_type;
505    uint8_t interp_loc;
506    bool is_32bit;
507    bool is_patch;
508    bool is_per_primitive;
509    bool is_mediump;
510    bool is_intra_stage_only;
511    bool initialised;
512 };
513 
514 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)515 cmp_varying_component(const void *comp1_v, const void *comp2_v)
516 {
517    struct varying_component *comp1 = (struct varying_component *)comp1_v;
518    struct varying_component *comp2 = (struct varying_component *)comp2_v;
519 
520    /* We want patches to be order at the end of the array */
521    if (comp1->is_patch != comp2->is_patch)
522       return comp1->is_patch ? 1 : -1;
523 
524    /* Sort per-primitive outputs after per-vertex ones to allow
525     * better compaction when they are mixed in the shader's source.
526     */
527    if (comp1->is_per_primitive != comp2->is_per_primitive)
528       return comp1->is_per_primitive ? 1 : -1;
529 
530    /* We want to try to group together TCS outputs that are only read by other
531     * TCS invocations and not consumed by the follow stage.
532     */
533    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
534       return comp1->is_intra_stage_only ? 1 : -1;
535 
536    /* Group mediump varyings together. */
537    if (comp1->is_mediump != comp2->is_mediump)
538       return comp1->is_mediump ? 1 : -1;
539 
540    /* We can only pack varyings with matching interpolation types so group
541     * them together.
542     */
543    if (comp1->interp_type != comp2->interp_type)
544       return comp1->interp_type - comp2->interp_type;
545 
546    /* Interpolation loc must match also. */
547    if (comp1->interp_loc != comp2->interp_loc)
548       return comp1->interp_loc - comp2->interp_loc;
549 
550    /* If everything else matches just use the original location to sort */
551    const struct nir_variable_data *const data1 = &comp1->var->data;
552    const struct nir_variable_data *const data2 = &comp2->var->data;
553    if (data1->location != data2->location)
554       return data1->location - data2->location;
555    return (int)data1->location_frac - (int)data2->location_frac;
556 }
557 
558 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)559 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
560                               struct varying_component **varying_comp_info,
561                               unsigned *varying_comp_info_size,
562                               bool default_to_smooth_interp)
563 {
564    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = { { 0 } };
565    unsigned num_of_comps_to_pack = 0;
566 
567    /* Count the number of varying that can be packed and create a mapping
568     * of those varyings to the array we will pass to qsort.
569     */
570    nir_foreach_shader_out_variable(var, producer) {
571 
572       /* Only remap things that aren't builtins. */
573       if (var->data.location >= VARYING_SLOT_VAR0 &&
574           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
575 
576          /* We can't repack xfb varyings. */
577          if (var->data.always_active_io)
578             continue;
579 
580          const struct glsl_type *type = var->type;
581          if (nir_is_arrayed_io(var, producer->info.stage)) {
582             assert(glsl_type_is_array(type));
583             type = glsl_get_array_element(type);
584          }
585 
586          if (!is_packing_supported_for_type(type))
587             continue;
588 
589          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
590          store_varying_info_idx[loc][var->data.location_frac] =
591             ++num_of_comps_to_pack;
592       }
593    }
594 
595    *varying_comp_info_size = num_of_comps_to_pack;
596    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
597                                       num_of_comps_to_pack);
598 
599    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
600 
601    /* Walk over the shader and populate the varying component info array */
602    nir_foreach_block(block, impl) {
603       nir_foreach_instr(instr, block) {
604          if (instr->type != nir_instr_type_intrinsic)
605             continue;
606 
607          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
608          if (intr->intrinsic != nir_intrinsic_load_deref &&
609              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
610              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
611              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
612              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
613             continue;
614 
615          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
616          if (!nir_deref_mode_is(deref, nir_var_shader_in))
617             continue;
618 
619          /* We only remap things that aren't builtins. */
620          nir_variable *in_var = nir_deref_instr_get_variable(deref);
621          if (in_var->data.location < VARYING_SLOT_VAR0)
622             continue;
623 
624          /* Do not remap per-vertex shader inputs because it's an array of
625           * 3-elements and this isn't supported.
626           */
627          if (in_var->data.per_vertex)
628             continue;
629 
630          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
631          if (location >= MAX_VARYINGS_INCL_PATCH)
632             continue;
633 
634          unsigned var_info_idx =
635             store_varying_info_idx[location][in_var->data.location_frac];
636          if (!var_info_idx)
637             continue;
638 
639          struct varying_component *vc_info =
640             &(*varying_comp_info)[var_info_idx - 1];
641 
642          if (!vc_info->initialised) {
643             const struct glsl_type *type = in_var->type;
644             if (nir_is_arrayed_io(in_var, consumer->info.stage)) {
645                assert(glsl_type_is_array(type));
646                type = glsl_get_array_element(type);
647             }
648 
649             vc_info->var = in_var;
650             vc_info->interp_type =
651                get_interp_type(in_var, type, default_to_smooth_interp);
652             vc_info->interp_loc = get_interp_loc(in_var);
653             vc_info->is_32bit = glsl_type_is_32bit(type);
654             vc_info->is_patch = in_var->data.patch;
655             vc_info->is_per_primitive = in_var->data.per_primitive;
656             vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) &&
657                                   (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
658                                    in_var->data.precision == GLSL_PRECISION_LOW);
659             vc_info->is_intra_stage_only = false;
660             vc_info->initialised = true;
661          }
662       }
663    }
664 
665    /* Walk over the shader and populate the varying component info array
666     * for varyings which are read by other TCS instances but are not consumed
667     * by the TES.
668     */
669    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
670       impl = nir_shader_get_entrypoint(producer);
671 
672       nir_foreach_block(block, impl) {
673          nir_foreach_instr(instr, block) {
674             if (instr->type != nir_instr_type_intrinsic)
675                continue;
676 
677             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
678             if (intr->intrinsic != nir_intrinsic_load_deref)
679                continue;
680 
681             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
682             if (!nir_deref_mode_is(deref, nir_var_shader_out))
683                continue;
684 
685             /* We only remap things that aren't builtins. */
686             nir_variable *out_var = nir_deref_instr_get_variable(deref);
687             if (out_var->data.location < VARYING_SLOT_VAR0)
688                continue;
689 
690             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
691             if (location >= MAX_VARYINGS_INCL_PATCH)
692                continue;
693 
694             unsigned var_info_idx =
695                store_varying_info_idx[location][out_var->data.location_frac];
696             if (!var_info_idx) {
697                /* Something went wrong, the shader interfaces didn't match, so
698                 * abandon packing. This can happen for example when the
699                 * inputs are scalars but the outputs are struct members.
700                 */
701                *varying_comp_info_size = 0;
702                break;
703             }
704 
705             struct varying_component *vc_info =
706                &(*varying_comp_info)[var_info_idx - 1];
707 
708             if (!vc_info->initialised) {
709                const struct glsl_type *type = out_var->type;
710                if (nir_is_arrayed_io(out_var, producer->info.stage)) {
711                   assert(glsl_type_is_array(type));
712                   type = glsl_get_array_element(type);
713                }
714 
715                vc_info->var = out_var;
716                vc_info->interp_type =
717                   get_interp_type(out_var, type, default_to_smooth_interp);
718                vc_info->interp_loc = get_interp_loc(out_var);
719                vc_info->is_32bit = glsl_type_is_32bit(type);
720                vc_info->is_patch = out_var->data.patch;
721                vc_info->is_per_primitive = out_var->data.per_primitive;
722                vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) &&
723                                      (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
724                                       out_var->data.precision == GLSL_PRECISION_LOW);
725                vc_info->is_intra_stage_only = true;
726                vc_info->initialised = true;
727             }
728          }
729       }
730    }
731 
732    for (unsigned i = 0; i < *varying_comp_info_size; i++) {
733       struct varying_component *vc_info = &(*varying_comp_info)[i];
734       if (!vc_info->initialised) {
735          /* Something went wrong, the shader interfaces didn't match, so
736           * abandon packing. This can happen for example when the outputs are
737           * scalars but the inputs are struct members.
738           */
739          *varying_comp_info_size = 0;
740          break;
741       }
742    }
743 }
744 
745 static bool
allow_pack_interp_type(nir_io_options options,int type)746 allow_pack_interp_type(nir_io_options options, int type)
747 {
748    switch (type) {
749    case INTERP_MODE_NONE:
750    case INTERP_MODE_SMOOTH:
751    case INTERP_MODE_NOPERSPECTIVE:
752       return options & nir_io_has_flexible_input_interpolation_except_flat;
753    default:
754       return false;
755    }
756 }
757 
758 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_io_options options)759    assign_remap_locations(struct varying_loc (*remap)[4],
760                           struct assigned_comps *assigned_comps,
761                           struct varying_component *info,
762                           unsigned *cursor, unsigned *comp,
763                           unsigned max_location,
764                           nir_io_options options)
765 {
766    unsigned tmp_cursor = *cursor;
767    unsigned tmp_comp = *comp;
768 
769    for (; tmp_cursor < max_location; tmp_cursor++) {
770 
771       if (assigned_comps[tmp_cursor].comps) {
772          /* Don't pack per-primitive and per-vertex varyings together. */
773          if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
774             tmp_comp = 0;
775             continue;
776          }
777 
778          /* We can only pack varyings with matching precision. */
779          if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
780             tmp_comp = 0;
781             continue;
782          }
783 
784          /* We can only pack varyings with matching interpolation type
785           * if driver does not support it.
786           */
787          if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
788              (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
789               !allow_pack_interp_type(options, info->interp_type))) {
790             tmp_comp = 0;
791             continue;
792          }
793 
794          /* We can only pack varyings with matching interpolation location
795           * if driver does not support it.
796           */
797          if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
798              !(options & nir_io_has_flexible_input_interpolation_except_flat)) {
799             tmp_comp = 0;
800             continue;
801          }
802 
803          /* We can only pack varyings with matching types, and the current
804           * algorithm only supports packing 32-bit.
805           */
806          if (!assigned_comps[tmp_cursor].is_32bit) {
807             tmp_comp = 0;
808             continue;
809          }
810 
811          while (tmp_comp < 4 &&
812                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
813             tmp_comp++;
814          }
815       }
816 
817       if (tmp_comp == 4) {
818          tmp_comp = 0;
819          continue;
820       }
821 
822       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
823 
824       /* Once we have assigned a location mark it as used */
825       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
826       assigned_comps[tmp_cursor].interp_type = info->interp_type;
827       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
828       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
829       assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
830       assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
831 
832       /* Assign remap location */
833       remap[location][info->var->data.location_frac].component = tmp_comp++;
834       remap[location][info->var->data.location_frac].location =
835          tmp_cursor + VARYING_SLOT_VAR0;
836 
837       break;
838    }
839 
840    *cursor = tmp_cursor;
841    *comp = tmp_comp;
842 }
843 
844 /* If there are empty components in the slot compact the remaining components
845  * as close to component 0 as possible. This will make it easier to fill the
846  * empty components with components from a different slot in a following pass.
847  */
848 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)849 compact_components(nir_shader *producer, nir_shader *consumer,
850                    struct assigned_comps *assigned_comps,
851                    bool default_to_smooth_interp)
852 {
853    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = { { { 0 }, { 0 } } };
854    struct varying_component *varying_comp_info;
855    unsigned varying_comp_info_size;
856 
857    /* Gather varying component info */
858    gather_varying_component_info(producer, consumer, &varying_comp_info,
859                                  &varying_comp_info_size,
860                                  default_to_smooth_interp);
861 
862    /* Sort varying components. */
863    qsort(varying_comp_info, varying_comp_info_size,
864          sizeof(struct varying_component), cmp_varying_component);
865 
866    unsigned cursor = 0;
867    unsigned comp = 0;
868 
869    /* Set the remap array based on the sorted components */
870    for (unsigned i = 0; i < varying_comp_info_size; i++) {
871       struct varying_component *info = &varying_comp_info[i];
872 
873       assert(info->is_patch || cursor < MAX_VARYING);
874       if (info->is_patch) {
875          /* The list should be sorted with all non-patch inputs first followed
876           * by patch inputs.  When we hit our first patch input, we need to
877           * reset the cursor to MAX_VARYING so we put them in the right slot.
878           */
879          if (cursor < MAX_VARYING) {
880             cursor = MAX_VARYING;
881             comp = 0;
882          }
883 
884          assign_remap_locations(remap, assigned_comps, info,
885                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
886                                 consumer->options->io_options);
887       } else {
888          assign_remap_locations(remap, assigned_comps, info,
889                                 &cursor, &comp, MAX_VARYING,
890                                 consumer->options->io_options);
891 
892          /* Check if we failed to assign a remap location. This can happen if
893           * for example there are a bunch of unmovable components with
894           * mismatching interpolation types causing us to skip over locations
895           * that would have been useful for packing later components.
896           * The solution is to iterate over the locations again (this should
897           * happen very rarely in practice).
898           */
899          if (cursor == MAX_VARYING) {
900             cursor = 0;
901             comp = 0;
902             assign_remap_locations(remap, assigned_comps, info,
903                                    &cursor, &comp, MAX_VARYING,
904                                    consumer->options->io_options);
905          }
906       }
907    }
908 
909    ralloc_free(varying_comp_info);
910 
911    uint64_t zero = 0;
912    uint32_t zero32 = 0;
913    remap_slots_and_components(consumer, nir_var_shader_in, remap,
914                               &consumer->info.inputs_read, &zero,
915                               &consumer->info.patch_inputs_read, &zero32);
916    remap_slots_and_components(producer, nir_var_shader_out, remap,
917                               &producer->info.outputs_written,
918                               &producer->info.outputs_read,
919                               &producer->info.patch_outputs_written,
920                               &producer->info.patch_outputs_read);
921 }
922 
923 /* We assume that this has been called more-or-less directly after
924  * remove_unused_varyings.  At this point, all of the varyings that we
925  * aren't going to be using have been completely removed and the
926  * inputs_read and outputs_written fields in nir_shader_info reflect
927  * this.  Therefore, the total set of valid slots is the OR of the two
928  * sets of varyings;  this accounts for varyings which one side may need
929  * to read/write even if the other doesn't.  This can happen if, for
930  * instance, an array is used indirectly from one side causing it to be
931  * unsplittable but directly from the other.
932  */
933 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)934 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
935                      bool default_to_smooth_interp)
936 {
937    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
938    assert(consumer->info.stage != MESA_SHADER_VERTEX);
939 
940    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = { { 0 } };
941 
942    get_unmoveable_components_masks(producer, nir_var_shader_out,
943                                    assigned_comps,
944                                    producer->info.stage,
945                                    default_to_smooth_interp);
946    get_unmoveable_components_masks(consumer, nir_var_shader_in,
947                                    assigned_comps,
948                                    consumer->info.stage,
949                                    default_to_smooth_interp);
950 
951    compact_components(producer, consumer, assigned_comps,
952                       default_to_smooth_interp);
953 }
954 
955 /*
956  * Mark XFB varyings as always_active_io in the consumer so the linking opts
957  * don't touch them.
958  */
959 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)960 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
961 {
962    nir_variable *input_vars[MAX_VARYING][4] = { 0 };
963 
964    nir_foreach_shader_in_variable(var, consumer) {
965       if (var->data.location >= VARYING_SLOT_VAR0 &&
966           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
967 
968          unsigned location = var->data.location - VARYING_SLOT_VAR0;
969          input_vars[location][var->data.location_frac] = var;
970       }
971    }
972 
973    nir_foreach_shader_out_variable(var, producer) {
974       if (var->data.location >= VARYING_SLOT_VAR0 &&
975           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
976 
977          if (!var->data.always_active_io)
978             continue;
979 
980          unsigned location = var->data.location - VARYING_SLOT_VAR0;
981          if (input_vars[location][var->data.location_frac]) {
982             input_vars[location][var->data.location_frac]->data.always_active_io = true;
983          }
984       }
985    }
986 }
987 
988 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)989 does_varying_match(nir_variable *out_var, nir_variable *in_var)
990 {
991    return in_var->data.location == out_var->data.location &&
992           in_var->data.location_frac == out_var->data.location_frac &&
993           in_var->type == out_var->type;
994 }
995 
996 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)997 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
998 {
999    nir_foreach_shader_in_variable(var, consumer) {
1000       if (does_varying_match(out_var, var))
1001          return var;
1002    }
1003 
1004    return NULL;
1005 }
1006 
1007 static bool
can_replace_varying(nir_variable * out_var)1008 can_replace_varying(nir_variable *out_var)
1009 {
1010    /* Skip types that require more complex handling.
1011     * TODO: add support for these types.
1012     */
1013    if (glsl_type_is_array(out_var->type) ||
1014        glsl_type_is_dual_slot(out_var->type) ||
1015        glsl_type_is_matrix(out_var->type) ||
1016        glsl_type_is_struct_or_ifc(out_var->type))
1017       return false;
1018 
1019    /* Limit this pass to scalars for now to keep things simple. Most varyings
1020     * should have been lowered to scalars at this point anyway.
1021     */
1022    if (!glsl_type_is_scalar(out_var->type))
1023       return false;
1024 
1025    if (out_var->data.location < VARYING_SLOT_VAR0 ||
1026        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1027       return false;
1028 
1029    return true;
1030 }
1031 
1032 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1033 replace_varying_input_by_constant_load(nir_shader *shader,
1034                                        nir_intrinsic_instr *store_intr)
1035 {
1036    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1037 
1038    nir_builder b = nir_builder_create(impl);
1039 
1040    nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1041 
1042    bool progress = false;
1043    nir_foreach_block(block, impl) {
1044       nir_foreach_instr(instr, block) {
1045          if (instr->type != nir_instr_type_intrinsic)
1046             continue;
1047 
1048          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1049          if (intr->intrinsic != nir_intrinsic_load_deref)
1050             continue;
1051 
1052          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1053          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1054             continue;
1055 
1056          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1057 
1058          if (!does_varying_match(out_var, in_var))
1059             continue;
1060 
1061          b.cursor = nir_before_instr(instr);
1062 
1063          nir_load_const_instr *out_const =
1064             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1065 
1066          /* Add new const to replace the input */
1067          nir_def *nconst = nir_build_imm(&b, store_intr->num_components,
1068                                          intr->def.bit_size,
1069                                          out_const->value);
1070 
1071          nir_def_rewrite_uses(&intr->def, nconst);
1072 
1073          progress = true;
1074       }
1075    }
1076 
1077    return progress;
1078 }
1079 
1080 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1081 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1082                         nir_intrinsic_instr *dup_store_intr)
1083 {
1084    assert(input_var);
1085 
1086    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1087 
1088    nir_builder b = nir_builder_create(impl);
1089 
1090    nir_variable *dup_out_var = nir_intrinsic_get_var(dup_store_intr, 0);
1091 
1092    bool progress = false;
1093    nir_foreach_block(block, impl) {
1094       nir_foreach_instr(instr, block) {
1095          if (instr->type != nir_instr_type_intrinsic)
1096             continue;
1097 
1098          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1099          if (intr->intrinsic != nir_intrinsic_load_deref)
1100             continue;
1101 
1102          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1103          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1104             continue;
1105 
1106          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1107 
1108          if (!does_varying_match(dup_out_var, in_var) ||
1109              in_var->data.interpolation != input_var->data.interpolation ||
1110              get_interp_loc(in_var) != get_interp_loc(input_var) ||
1111              in_var->data.per_vertex)
1112             continue;
1113 
1114          b.cursor = nir_before_instr(instr);
1115 
1116          nir_def *load = nir_load_var(&b, input_var);
1117          nir_def_rewrite_uses(&intr->def, load);
1118 
1119          progress = true;
1120       }
1121    }
1122 
1123    return progress;
1124 }
1125 
1126 static bool
is_direct_uniform_load(nir_def * def,nir_scalar * s)1127 is_direct_uniform_load(nir_def *def, nir_scalar *s)
1128 {
1129    /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1130    assert(def->num_components == 1);
1131 
1132    /* Uniform load may hide behind some move instruction for converting
1133     * vector to scalar:
1134     *
1135     *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1136     *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1137     *     vec1 32 ssa_3 = mov ssa_2.x
1138     *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1139     *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1140     */
1141    *s = nir_scalar_resolved(def, 0);
1142 
1143    nir_def *ssa = s->def;
1144    if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1145       return false;
1146 
1147    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1148    if (intr->intrinsic != nir_intrinsic_load_deref)
1149       return false;
1150 
1151    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1152    /* TODO: support nir_var_mem_ubo. */
1153    if (!nir_deref_mode_is(deref, nir_var_uniform))
1154       return false;
1155 
1156    /* Does not support indirect uniform load. */
1157    return !nir_deref_instr_has_indirect(deref);
1158 }
1159 
1160 /**
1161  * Add a uniform variable from one shader to a different shader.
1162  *
1163  * \param nir     The shader where to add the uniform
1164  * \param uniform The uniform that's declared in another shader.
1165  */
1166 nir_variable *
nir_clone_uniform_variable(nir_shader * nir,nir_variable * uniform,bool spirv)1167 nir_clone_uniform_variable(nir_shader *nir, nir_variable *uniform, bool spirv)
1168 {
1169    /* Find if uniform already exists in consumer. */
1170    nir_variable *new_var = NULL;
1171    nir_foreach_variable_with_modes(v, nir, uniform->data.mode) {
1172       if ((spirv && uniform->data.mode & nir_var_mem_ubo &&
1173            v->data.binding == uniform->data.binding) ||
1174           (!spirv && !strcmp(uniform->name, v->name))) {
1175          new_var = v;
1176          break;
1177       }
1178    }
1179 
1180    /* Create a variable if not exist. */
1181    if (!new_var) {
1182       new_var = nir_variable_clone(uniform, nir);
1183       nir_shader_add_variable(nir, new_var);
1184    }
1185 
1186    return new_var;
1187 }
1188 
1189 nir_deref_instr *
nir_clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1190 nir_clone_deref_instr(nir_builder *b, nir_variable *var,
1191                       nir_deref_instr *deref)
1192 {
1193    if (deref->deref_type == nir_deref_type_var)
1194       return nir_build_deref_var(b, var);
1195 
1196    nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1197    nir_deref_instr *parent = nir_clone_deref_instr(b, var, parent_deref);
1198 
1199    /* Build array and struct deref instruction.
1200     * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1201     */
1202    switch (deref->deref_type) {
1203    case nir_deref_type_array: {
1204       if (b->shader ==
1205           nir_cf_node_get_function(&deref->instr.block->cf_node)->function->shader) {
1206          /* Cloning within the same shader. */
1207          return nir_build_deref_array(b, parent, deref->arr.index.ssa);
1208       } else {
1209          /* Cloning to a different shader. The index must be constant because
1210           * we don't implement cloning the index SSA here.
1211           */
1212          nir_load_const_instr *index =
1213             nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1214          return nir_build_deref_array_imm(b, parent, index->value->i64);
1215       }
1216    }
1217    case nir_deref_type_ptr_as_array: {
1218       nir_load_const_instr *index =
1219          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1220       nir_def *ssa = nir_imm_intN_t(b, index->value->i64,
1221                                     parent->def.bit_size);
1222       return nir_build_deref_ptr_as_array(b, parent, ssa);
1223    }
1224    case nir_deref_type_struct:
1225       return nir_build_deref_struct(b, parent, deref->strct.index);
1226    default:
1227       unreachable("invalid type");
1228       return NULL;
1229    }
1230 }
1231 
1232 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_scalar * scalar)1233 replace_varying_input_by_uniform_load(nir_shader *shader,
1234                                       nir_intrinsic_instr *store_intr,
1235                                       nir_scalar *scalar)
1236 {
1237    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1238 
1239    nir_builder b = nir_builder_create(impl);
1240 
1241    nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1242 
1243    nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1244    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1245    nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1246    uni_var = nir_clone_uniform_variable(shader, uni_var, false);
1247 
1248    bool progress = false;
1249    nir_foreach_block(block, impl) {
1250       nir_foreach_instr(instr, block) {
1251          if (instr->type != nir_instr_type_intrinsic)
1252             continue;
1253 
1254          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1255          if (intr->intrinsic != nir_intrinsic_load_deref)
1256             continue;
1257 
1258          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1259          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1260             continue;
1261 
1262          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1263 
1264          if (!does_varying_match(out_var, in_var))
1265             continue;
1266 
1267          b.cursor = nir_before_instr(instr);
1268 
1269          /* Clone instructions start from deref load to variable deref. */
1270          nir_deref_instr *uni_deref = nir_clone_deref_instr(&b, uni_var, deref);
1271          nir_def *uni_def = nir_load_deref(&b, uni_deref);
1272 
1273          /* Add a vector to scalar move if uniform is a vector. */
1274          if (uni_def->num_components > 1) {
1275             nir_alu_src src = { 0 };
1276             src.src = nir_src_for_ssa(uni_def);
1277             src.swizzle[0] = scalar->comp;
1278             uni_def = nir_mov_alu(&b, src, 1);
1279          }
1280 
1281          /* Replace load input with load uniform. */
1282          nir_def_rewrite_uses(&intr->def, uni_def);
1283 
1284          progress = true;
1285       }
1286    }
1287 
1288    return progress;
1289 }
1290 
1291 /* The GLSL ES 3.20 spec says:
1292  *
1293  * "The precision of a vertex output does not need to match the precision of
1294  * the corresponding fragment input. The minimum precision at which vertex
1295  * outputs are interpolated is the minimum of the vertex output precision and
1296  * the fragment input precision, with the exception that for highp,
1297  * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1298  * Output Matching by Name in Linked Programs")
1299  *
1300  * To implement this, when linking shaders we will take the minimum precision
1301  * qualifier (allowing drivers to interpolate at lower precision). For
1302  * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1303  * requires we use the *last* specified precision if there is a conflict.
1304  *
1305  * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1306  * NONE, we'll return the other precision, since there is no conflict.
1307  * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1308  * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1309  * "backwards". For non-fragment stages, we'll pick the latter precision to
1310  * comply with the spec. (Note that the order matters.)
1311  *
1312  * For streamout, "Variables declared with lowp or mediump precision are
1313  * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1314  * of OpenGL ES 3.2 specification). So drivers should promote them
1315  * the transform feedback memory store, but not the output store.
1316  */
1317 
1318 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1319 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1320 {
1321    if (producer == GLSL_PRECISION_NONE)
1322       return consumer;
1323    else if (consumer == GLSL_PRECISION_NONE)
1324       return producer;
1325    else
1326       return fs ? MAX2(producer, consumer) : consumer;
1327 }
1328 
1329 static nir_variable *
find_consumer_variable(const nir_shader * consumer,const nir_variable * producer_var)1330 find_consumer_variable(const nir_shader *consumer,
1331                        const nir_variable *producer_var)
1332 {
1333    nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) {
1334       if (var->data.location == producer_var->data.location &&
1335           var->data.location_frac == producer_var->data.location_frac)
1336          return var;
1337    }
1338    return NULL;
1339 }
1340 
1341 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1342 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1343 {
1344    bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1345 
1346    nir_foreach_shader_out_variable(producer_var, producer) {
1347       /* Skip if the slot is not assigned */
1348       if (producer_var->data.location < 0)
1349          continue;
1350 
1351       nir_variable *consumer_var = find_consumer_variable(consumer,
1352                                                           producer_var);
1353 
1354       /* Skip if the variable will be eliminated */
1355       if (!consumer_var)
1356          continue;
1357 
1358       /* Now we have a pair of variables. Let's pick the smaller precision. */
1359       unsigned precision_1 = producer_var->data.precision;
1360       unsigned precision_2 = consumer_var->data.precision;
1361       unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1362 
1363       /* Propagate the new precision */
1364       producer_var->data.precision = consumer_var->data.precision = minimum;
1365    }
1366 }
1367 
1368 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1369 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1370 {
1371    /* TODO: Add support for more shader stage combinations */
1372    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1373        (producer->info.stage != MESA_SHADER_VERTEX &&
1374         producer->info.stage != MESA_SHADER_TESS_EVAL))
1375       return false;
1376 
1377    bool progress = false;
1378 
1379    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1380 
1381    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1382 
1383    /* If we find a store in the last block of the producer we can be sure this
1384     * is the only possible value for this output.
1385     */
1386    nir_block *last_block = nir_impl_last_block(impl);
1387    nir_foreach_instr_reverse(instr, last_block) {
1388       if (instr->type != nir_instr_type_intrinsic)
1389          continue;
1390 
1391       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1392 
1393       if (intr->intrinsic != nir_intrinsic_store_deref)
1394          continue;
1395 
1396       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1397       if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1398          continue;
1399 
1400       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1401       if (!can_replace_varying(out_var))
1402          continue;
1403 
1404       nir_def *ssa = intr->src[1].ssa;
1405       if (ssa->parent_instr->type == nir_instr_type_load_const) {
1406          progress |= replace_varying_input_by_constant_load(consumer, intr);
1407          continue;
1408       }
1409 
1410       nir_scalar uni_scalar;
1411       if (consumer->options->max_varying_expression_cost >= 2 &&
1412           is_direct_uniform_load(ssa, &uni_scalar)) {
1413          progress |= replace_varying_input_by_uniform_load(consumer, intr,
1414                                                            &uni_scalar);
1415          continue;
1416       }
1417 
1418       struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1419       if (entry) {
1420          progress |= replace_duplicate_input(consumer,
1421                                              (nir_variable *)entry->data,
1422                                              intr);
1423       } else {
1424          nir_variable *in_var = get_matching_input_var(consumer, out_var);
1425          if (in_var) {
1426             _mesa_hash_table_insert(varying_values, ssa, in_var);
1427          }
1428       }
1429    }
1430 
1431    _mesa_hash_table_destroy(varying_values, NULL);
1432 
1433    return progress;
1434 }
1435 
1436 /* TODO any better helper somewhere to sort a list? */
1437 
1438 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1439 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1440 {
1441    nir_foreach_variable_in_list(var, var_list) {
1442       /* Use the `per_primitive` bool to sort per-primitive variables
1443        * to the end of the list, so they get the last driver locations
1444        * by nir_assign_io_var_locations.
1445        *
1446        * This is done because AMD HW requires that per-primitive outputs
1447        * are the last params.
1448        * In the future we can add an option for this, if needed by other HW.
1449        */
1450       if (new_var->data.per_primitive < var->data.per_primitive ||
1451           (new_var->data.per_primitive == var->data.per_primitive &&
1452            (var->data.location > new_var->data.location ||
1453             (var->data.location == new_var->data.location &&
1454              var->data.location_frac > new_var->data.location_frac)))) {
1455          exec_node_insert_node_before(&var->node, &new_var->node);
1456          return;
1457       }
1458    }
1459    exec_list_push_tail(var_list, &new_var->node);
1460 }
1461 
1462 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1463 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1464               struct exec_list *sorted_list)
1465 {
1466    exec_list_make_empty(sorted_list);
1467    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1468       exec_node_remove(&var->node);
1469       insert_sorted(sorted_list, var);
1470    }
1471 }
1472 
1473 void
nir_sort_variables_by_location(nir_shader * shader,nir_variable_mode mode)1474 nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode)
1475 {
1476    struct exec_list vars;
1477 
1478    sort_varyings(shader, mode, &vars);
1479    exec_list_append(&shader->variables, &vars);
1480 }
1481 
1482 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1483 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1484                             unsigned *size, gl_shader_stage stage)
1485 {
1486    unsigned location = 0;
1487    unsigned assigned_locations[VARYING_SLOT_TESS_MAX][2];
1488    uint64_t processed_locs[2] = { 0 };
1489 
1490    struct exec_list io_vars;
1491    sort_varyings(shader, mode, &io_vars);
1492 
1493    int ASSERTED last_loc = 0;
1494    bool ASSERTED last_per_prim = false;
1495    bool last_partial = false;
1496    nir_foreach_variable_in_list(var, &io_vars) {
1497       const struct glsl_type *type = var->type;
1498       if (nir_is_arrayed_io(var, stage)) {
1499          assert(glsl_type_is_array(type));
1500          type = glsl_get_array_element(type);
1501       }
1502 
1503       int base;
1504       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1505          base = VERT_ATTRIB_GENERIC0;
1506       else if (var->data.mode == nir_var_shader_out &&
1507                stage == MESA_SHADER_FRAGMENT)
1508          base = FRAG_RESULT_DATA0;
1509       else
1510          base = VARYING_SLOT_VAR0;
1511 
1512       unsigned var_size, driver_size;
1513       if (var->data.compact) {
1514          /* If we are inside a partial compact,
1515           * don't allow another compact to be in this slot
1516           * if it starts at component 0.
1517           */
1518          if (last_partial && var->data.location_frac == 0) {
1519             location++;
1520          }
1521 
1522          /* compact variables must be arrays of scalars */
1523          assert(!var->data.per_view);
1524          assert(glsl_type_is_array(type));
1525          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1526          unsigned start = 4 * location + var->data.location_frac;
1527          unsigned end = start + glsl_get_length(type);
1528          var_size = driver_size = end / 4 - location;
1529          last_partial = end % 4 != 0;
1530       } else {
1531          /* Compact variables bypass the normal varying compacting pass,
1532           * which means they cannot be in the same vec4 slot as a normal
1533           * variable. If part of the current slot is taken up by a compact
1534           * variable, we need to go to the next one.
1535           */
1536          if (last_partial) {
1537             location++;
1538             last_partial = false;
1539          }
1540 
1541          var_size = glsl_count_attribute_slots(type, false);
1542          if (var->data.per_view &&
1543              shader->options->per_view_unique_driver_locations) {
1544             /* per-view variables have an extra array dimension, which is
1545              * ignored when counting user-facing slots (var->data.location),
1546              * but *not* with driver slots (var->data.driver_location). That
1547              * is, each user slot maps to multiple driver slots. */
1548             const struct glsl_type *array_type = var->type;
1549             driver_size = glsl_count_attribute_slots(array_type, false);
1550          } else {
1551             driver_size = var_size;
1552          }
1553       }
1554 
1555       /* Builtins don't allow component packing so we only need to worry about
1556        * user defined varyings sharing the same location.
1557        */
1558       bool processed = false;
1559       if (var->data.location >= base) {
1560          unsigned glsl_location = var->data.location - base;
1561 
1562          for (unsigned i = 0; i < var_size; i++) {
1563             if (processed_locs[var->data.index] &
1564                 ((uint64_t)1 << (glsl_location + i)))
1565                processed = true;
1566             else
1567                processed_locs[var->data.index] |=
1568                   ((uint64_t)1 << (glsl_location + i));
1569          }
1570       }
1571 
1572       /* Because component packing allows varyings to share the same location
1573        * we may have already have processed this location.
1574        */
1575       if (processed) {
1576          /* TODO handle overlapping per-view variables */
1577          assert(!var->data.per_view);
1578          unsigned driver_location = assigned_locations[var->data.location][var->data.index];
1579          var->data.driver_location = driver_location;
1580 
1581          /* An array may be packed such that is crosses multiple other arrays
1582           * or variables, we need to make sure we have allocated the elements
1583           * consecutively if the previously proccessed var was shorter than
1584           * the current array we are processing.
1585           *
1586           * NOTE: The code below assumes the var list is ordered in ascending
1587           * location order, but per-vertex/per-primitive outputs may be
1588           * grouped separately.
1589           */
1590          assert(last_loc <= var->data.location ||
1591                 last_per_prim != var->data.per_primitive);
1592          last_loc = var->data.location;
1593          last_per_prim = var->data.per_primitive;
1594          unsigned last_slot_location = driver_location + var_size;
1595          if (last_slot_location > location) {
1596             unsigned num_unallocated_slots = last_slot_location - location;
1597             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1598             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1599                assigned_locations[var->data.location + i][var->data.index] = location;
1600                location++;
1601             }
1602          }
1603          continue;
1604       }
1605 
1606       for (unsigned i = 0; i < var_size; i++) {
1607          assigned_locations[var->data.location + i][var->data.index] = location + i;
1608       }
1609 
1610       var->data.driver_location = location;
1611       location += driver_size;
1612    }
1613 
1614    if (last_partial)
1615       location++;
1616 
1617    exec_list_append(&shader->variables, &io_vars);
1618    *size = location;
1619 }
1620