• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28 
29 /* This file contains various little helpers for doing simple linking in
30  * NIR.  Eventually, we'll probably want a full-blown varying packing
31  * implementation in here.  Right now, it just deletes unused things.
32  */
33 
34 /**
35  * Returns the bits in the inputs_read, or outputs_written
36  * bitfield corresponding to this variable.
37  */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41    if (var->data.location < 0)
42       return 0;
43 
44    unsigned location = var->data.patch ?
45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46 
47    assert(var->data.mode == nir_var_shader_in ||
48           var->data.mode == nir_var_shader_out);
49    assert(var->data.location >= 0);
50 
51    const struct glsl_type *type = var->type;
52    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53       assert(glsl_type_is_array(type));
54       type = glsl_get_array_element(type);
55    }
56 
57    unsigned slots = glsl_count_attribute_slots(type, false);
58    return ((1ull << slots) - 1) << location;
59 }
60 
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64    return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65           var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66           var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67           var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69 
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74       return 4;
75 
76    return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78 
79 static void
tcs_add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82    nir_foreach_function(function, shader) {
83       if (!function->impl)
84          continue;
85 
86       nir_foreach_block(block, function->impl) {
87          nir_foreach_instr(instr, block) {
88             if (instr->type != nir_instr_type_intrinsic)
89                continue;
90 
91             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
92             if (intrin->intrinsic != nir_intrinsic_load_deref)
93                continue;
94 
95             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
96             if (!nir_deref_mode_is(deref, nir_var_shader_out))
97                continue;
98 
99             nir_variable *var = nir_deref_instr_get_variable(deref);
100             for (unsigned i = 0; i < get_num_components(var); i++) {
101                if (var->data.patch) {
102                   if (is_non_generic_patch_var(var))
103                      continue;
104 
105                   patches_read[var->data.location_frac + i] |=
106                      get_variable_io_mask(var, shader->info.stage);
107                } else {
108                   read[var->data.location_frac + i] |=
109                      get_variable_io_mask(var, shader->info.stage);
110                }
111             }
112          }
113       }
114    }
115 }
116 
117 /**
118  * Helper for removing unused shader I/O variables, by demoting them to global
119  * variables (which may then by dead code eliminated).
120  *
121  * Example usage is:
122  *
123  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
124  *                                      read, patches_read) ||
125  *                                      progress;
126  *
127  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
128  * representing each .location_frac used.  Note that for vector variables,
129  * only the first channel (.location_frac) is examined for deciding if the
130  * variable is used!
131  */
132 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)133 nir_remove_unused_io_vars(nir_shader *shader,
134                           nir_variable_mode mode,
135                           uint64_t *used_by_other_stage,
136                           uint64_t *used_by_other_stage_patches)
137 {
138    bool progress = false;
139    uint64_t *used;
140 
141    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
142 
143    nir_foreach_variable_with_modes_safe(var, shader, mode) {
144       if (var->data.patch)
145          used = used_by_other_stage_patches;
146       else
147          used = used_by_other_stage;
148 
149       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
150          continue;
151 
152       if (var->data.always_active_io)
153          continue;
154 
155       if (var->data.explicit_xfb_buffer)
156          continue;
157 
158       uint64_t other_stage = used[var->data.location_frac];
159 
160       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
161          /* This one is invalid, make it a global variable instead */
162          var->data.location = 0;
163          var->data.mode = nir_var_shader_temp;
164 
165          progress = true;
166       }
167    }
168 
169    if (progress)
170       nir_fixup_deref_modes(shader);
171 
172    return progress;
173 }
174 
175 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)176 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
177 {
178    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
179    assert(consumer->info.stage != MESA_SHADER_VERTEX);
180 
181    uint64_t read[4] = { 0 }, written[4] = { 0 };
182    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
183 
184    nir_foreach_shader_out_variable(var, producer) {
185       for (unsigned i = 0; i < get_num_components(var); i++) {
186          if (var->data.patch) {
187             if (is_non_generic_patch_var(var))
188                continue;
189 
190             patches_written[var->data.location_frac + i] |=
191                get_variable_io_mask(var, producer->info.stage);
192          } else {
193             written[var->data.location_frac + i] |=
194                get_variable_io_mask(var, producer->info.stage);
195          }
196       }
197    }
198 
199    nir_foreach_shader_in_variable(var, consumer) {
200       for (unsigned i = 0; i < get_num_components(var); i++) {
201          if (var->data.patch) {
202             if (is_non_generic_patch_var(var))
203                continue;
204 
205             patches_read[var->data.location_frac + i] |=
206                get_variable_io_mask(var, consumer->info.stage);
207          } else {
208             read[var->data.location_frac + i] |=
209                get_variable_io_mask(var, consumer->info.stage);
210          }
211       }
212    }
213 
214    /* Each TCS invocation can read data written by other TCS invocations,
215     * so even if the outputs are not used by the TES we must also make
216     * sure they are not read by the TCS before demoting them to globals.
217     */
218    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
219       tcs_add_output_reads(producer, read, patches_read);
220 
221    bool progress = false;
222    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
223                                         patches_read);
224 
225    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
226                                         patches_written) || progress;
227 
228    return progress;
229 }
230 
231 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)232 get_interp_type(nir_variable *var, const struct glsl_type *type,
233                 bool default_to_smooth_interp)
234 {
235    if (glsl_type_is_integer(type))
236       return INTERP_MODE_FLAT;
237    else if (var->data.interpolation != INTERP_MODE_NONE)
238       return var->data.interpolation;
239    else if (default_to_smooth_interp)
240       return INTERP_MODE_SMOOTH;
241    else
242       return INTERP_MODE_NONE;
243 }
244 
245 #define INTERPOLATE_LOC_SAMPLE 0
246 #define INTERPOLATE_LOC_CENTROID 1
247 #define INTERPOLATE_LOC_CENTER 2
248 
249 static uint8_t
get_interp_loc(nir_variable * var)250 get_interp_loc(nir_variable *var)
251 {
252    if (var->data.sample)
253       return INTERPOLATE_LOC_SAMPLE;
254    else if (var->data.centroid)
255       return INTERPOLATE_LOC_CENTROID;
256    else
257       return INTERPOLATE_LOC_CENTER;
258 }
259 
260 static bool
is_packing_supported_for_type(const struct glsl_type * type)261 is_packing_supported_for_type(const struct glsl_type *type)
262 {
263    /* We ignore complex types such as arrays, matrices, structs and bitsizes
264     * other then 32bit. All other vector types should have been split into
265     * scalar variables by the lower_io_to_scalar pass. The only exception
266     * should be OpenGL xfb varyings.
267     * TODO: add support for more complex types?
268     */
269    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
270 }
271 
272 struct assigned_comps
273 {
274    uint8_t comps;
275    uint8_t interp_type;
276    uint8_t interp_loc;
277    bool is_32bit;
278    bool is_mediump;
279 };
280 
281 /* Packing arrays and dual slot varyings is difficult so to avoid complex
282  * algorithms this function just assigns them their existing location for now.
283  * TODO: allow better packing of complex types.
284  */
285 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)286 get_unmoveable_components_masks(nir_shader *shader,
287                                 nir_variable_mode mode,
288                                 struct assigned_comps *comps,
289                                 gl_shader_stage stage,
290                                 bool default_to_smooth_interp)
291 {
292    nir_foreach_variable_with_modes_safe(var, shader, mode) {
293       assert(var->data.location >= 0);
294 
295       /* Only remap things that aren't built-ins. */
296       if (var->data.location >= VARYING_SLOT_VAR0 &&
297           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
298 
299          const struct glsl_type *type = var->type;
300          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
301             assert(glsl_type_is_array(type));
302             type = glsl_get_array_element(type);
303          }
304 
305          /* If we can pack this varying then don't mark the components as
306           * used.
307           */
308          if (is_packing_supported_for_type(type))
309             continue;
310 
311          unsigned location = var->data.location - VARYING_SLOT_VAR0;
312 
313          unsigned elements =
314             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
315             glsl_get_vector_elements(glsl_without_array(type)) : 4;
316 
317          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
318          unsigned slots = glsl_count_attribute_slots(type, false);
319          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
320          unsigned comps_slot2 = 0;
321          for (unsigned i = 0; i < slots; i++) {
322             if (dual_slot) {
323                if (i & 1) {
324                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
325                } else {
326                   unsigned num_comps = 4 - var->data.location_frac;
327                   comps_slot2 = (elements * dmul) - num_comps;
328 
329                   /* Assume ARB_enhanced_layouts packing rules for doubles */
330                   assert(var->data.location_frac == 0 ||
331                          var->data.location_frac == 2);
332                   assert(comps_slot2 <= 4);
333 
334                   comps[location + i].comps |=
335                      ((1 << num_comps) - 1) << var->data.location_frac;
336                }
337             } else {
338                comps[location + i].comps |=
339                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
340             }
341 
342             comps[location + i].interp_type =
343                get_interp_type(var, type, default_to_smooth_interp);
344             comps[location + i].interp_loc = get_interp_loc(var);
345             comps[location + i].is_32bit =
346                glsl_type_is_32bit(glsl_without_array(type));
347             comps[location + i].is_mediump =
348                var->data.precision == GLSL_PRECISION_MEDIUM ||
349                var->data.precision == GLSL_PRECISION_LOW;
350          }
351       }
352    }
353 }
354 
355 struct varying_loc
356 {
357    uint8_t component;
358    uint32_t location;
359 };
360 
361 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)362 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
363                     uint64_t slots_used_mask, unsigned num_slots)
364 {
365    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
366 
367    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
368       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
369 }
370 
371 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)372 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
373 {
374    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
375 
376    slots_used[var->data.patch ? 1 : 0] |=
377       BITFIELD64_BIT(var->data.location - loc_offset + offset);
378 }
379 
380 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)381 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
382                            struct varying_loc (*remap)[4],
383                            uint64_t *slots_used, uint64_t *out_slots_read,
384                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
385  {
386    const gl_shader_stage stage = shader->info.stage;
387    uint64_t out_slots_read_tmp[2] = {0};
388    uint64_t slots_used_tmp[2] = {0};
389 
390    /* We don't touch builtins so just copy the bitmask */
391    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
392 
393    nir_foreach_variable_with_modes(var, shader, mode) {
394       assert(var->data.location >= 0);
395 
396       /* Only remap things that aren't built-ins */
397       if (var->data.location >= VARYING_SLOT_VAR0 &&
398           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
399 
400          const struct glsl_type *type = var->type;
401          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
402             assert(glsl_type_is_array(type));
403             type = glsl_get_array_element(type);
404          }
405 
406          unsigned num_slots = glsl_count_attribute_slots(type, false);
407          bool used_across_stages = false;
408          bool outputs_read = false;
409 
410          unsigned location = var->data.location - VARYING_SLOT_VAR0;
411          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
412 
413          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
414          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
415          uint64_t outs_used =
416             var->data.patch ? *p_out_slots_read : *out_slots_read;
417          uint64_t slots =
418             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
419 
420          if (slots & used)
421             used_across_stages = true;
422 
423          if (slots & outs_used)
424             outputs_read = true;
425 
426          if (new_loc->location) {
427             var->data.location = new_loc->location;
428             var->data.location_frac = new_loc->component;
429          }
430 
431          if (var->data.always_active_io) {
432             /* We can't apply link time optimisations (specifically array
433              * splitting) to these so we need to copy the existing mask
434              * otherwise we will mess up the mask for things like partially
435              * marked arrays.
436              */
437             if (used_across_stages)
438                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
439 
440             if (outputs_read) {
441                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
442                                    num_slots);
443             }
444          } else {
445             for (unsigned i = 0; i < num_slots; i++) {
446                if (used_across_stages)
447                   mark_used_slot(var, slots_used_tmp, i);
448 
449                if (outputs_read)
450                   mark_used_slot(var, out_slots_read_tmp, i);
451             }
452          }
453       }
454    }
455 
456    *slots_used = slots_used_tmp[0];
457    *out_slots_read = out_slots_read_tmp[0];
458    *p_slots_used = slots_used_tmp[1];
459    *p_out_slots_read = out_slots_read_tmp[1];
460 }
461 
462 struct varying_component {
463    nir_variable *var;
464    uint8_t interp_type;
465    uint8_t interp_loc;
466    bool is_32bit;
467    bool is_patch;
468    bool is_mediump;
469    bool is_intra_stage_only;
470    bool initialised;
471 };
472 
473 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)474 cmp_varying_component(const void *comp1_v, const void *comp2_v)
475 {
476    struct varying_component *comp1 = (struct varying_component *) comp1_v;
477    struct varying_component *comp2 = (struct varying_component *) comp2_v;
478 
479    /* We want patches to be order at the end of the array */
480    if (comp1->is_patch != comp2->is_patch)
481       return comp1->is_patch ? 1 : -1;
482 
483    /* We want to try to group together TCS outputs that are only read by other
484     * TCS invocations and not consumed by the follow stage.
485     */
486    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
487       return comp1->is_intra_stage_only ? 1 : -1;
488 
489    /* Group mediump varyings together. */
490    if (comp1->is_mediump != comp2->is_mediump)
491       return comp1->is_mediump ? 1 : -1;
492 
493    /* We can only pack varyings with matching interpolation types so group
494     * them together.
495     */
496    if (comp1->interp_type != comp2->interp_type)
497       return comp1->interp_type - comp2->interp_type;
498 
499    /* Interpolation loc must match also. */
500    if (comp1->interp_loc != comp2->interp_loc)
501       return comp1->interp_loc - comp2->interp_loc;
502 
503    /* If everything else matches just use the original location to sort */
504    const struct nir_variable_data *const data1 = &comp1->var->data;
505    const struct nir_variable_data *const data2 = &comp2->var->data;
506    if (data1->location != data2->location)
507       return data1->location - data2->location;
508    return (int)data1->location_frac - (int)data2->location_frac;
509 }
510 
511 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)512 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
513                               struct varying_component **varying_comp_info,
514                               unsigned *varying_comp_info_size,
515                               bool default_to_smooth_interp)
516 {
517    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
518    unsigned num_of_comps_to_pack = 0;
519 
520    /* Count the number of varying that can be packed and create a mapping
521     * of those varyings to the array we will pass to qsort.
522     */
523    nir_foreach_shader_out_variable(var, producer) {
524 
525       /* Only remap things that aren't builtins. */
526       if (var->data.location >= VARYING_SLOT_VAR0 &&
527           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
528 
529          /* We can't repack xfb varyings. */
530          if (var->data.always_active_io)
531             continue;
532 
533          const struct glsl_type *type = var->type;
534          if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
535             assert(glsl_type_is_array(type));
536             type = glsl_get_array_element(type);
537          }
538 
539          if (!is_packing_supported_for_type(type))
540             continue;
541 
542          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
543          store_varying_info_idx[loc][var->data.location_frac] =
544             ++num_of_comps_to_pack;
545       }
546    }
547 
548    *varying_comp_info_size = num_of_comps_to_pack;
549    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
550                                       num_of_comps_to_pack);
551 
552    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
553 
554    /* Walk over the shader and populate the varying component info array */
555    nir_foreach_block(block, impl) {
556       nir_foreach_instr(instr, block) {
557          if (instr->type != nir_instr_type_intrinsic)
558             continue;
559 
560          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
561          if (intr->intrinsic != nir_intrinsic_load_deref &&
562              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
563              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
564              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
565              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
566             continue;
567 
568          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
569          if (!nir_deref_mode_is(deref, nir_var_shader_in))
570             continue;
571 
572          /* We only remap things that aren't builtins. */
573          nir_variable *in_var = nir_deref_instr_get_variable(deref);
574          if (in_var->data.location < VARYING_SLOT_VAR0)
575             continue;
576 
577          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
578          if (location >= MAX_VARYINGS_INCL_PATCH)
579             continue;
580 
581          unsigned var_info_idx =
582             store_varying_info_idx[location][in_var->data.location_frac];
583          if (!var_info_idx)
584             continue;
585 
586          struct varying_component *vc_info =
587             &(*varying_comp_info)[var_info_idx-1];
588 
589          if (!vc_info->initialised) {
590             const struct glsl_type *type = in_var->type;
591             if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
592                 in_var->data.per_view) {
593                assert(glsl_type_is_array(type));
594                type = glsl_get_array_element(type);
595             }
596 
597             vc_info->var = in_var;
598             vc_info->interp_type =
599                get_interp_type(in_var, type, default_to_smooth_interp);
600             vc_info->interp_loc = get_interp_loc(in_var);
601             vc_info->is_32bit = glsl_type_is_32bit(type);
602             vc_info->is_patch = in_var->data.patch;
603             vc_info->is_mediump = !producer->options->linker_ignore_precision &&
604                (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
605                 in_var->data.precision == GLSL_PRECISION_LOW);
606             vc_info->is_intra_stage_only = false;
607             vc_info->initialised = true;
608          }
609       }
610    }
611 
612    /* Walk over the shader and populate the varying component info array
613     * for varyings which are read by other TCS instances but are not consumed
614     * by the TES.
615     */
616    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
617       impl = nir_shader_get_entrypoint(producer);
618 
619       nir_foreach_block(block, impl) {
620          nir_foreach_instr(instr, block) {
621             if (instr->type != nir_instr_type_intrinsic)
622                continue;
623 
624             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
625             if (intr->intrinsic != nir_intrinsic_load_deref)
626                continue;
627 
628             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
629             if (!nir_deref_mode_is(deref, nir_var_shader_out))
630                continue;
631 
632             /* We only remap things that aren't builtins. */
633             nir_variable *out_var = nir_deref_instr_get_variable(deref);
634             if (out_var->data.location < VARYING_SLOT_VAR0)
635                continue;
636 
637             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
638             if (location >= MAX_VARYINGS_INCL_PATCH)
639                continue;
640 
641             unsigned var_info_idx =
642                store_varying_info_idx[location][out_var->data.location_frac];
643             if (!var_info_idx) {
644                /* Something went wrong, the shader interfaces didn't match, so
645                 * abandon packing. This can happen for example when the
646                 * inputs are scalars but the outputs are struct members.
647                 */
648                *varying_comp_info_size = 0;
649                break;
650             }
651 
652             struct varying_component *vc_info =
653                &(*varying_comp_info)[var_info_idx-1];
654 
655             if (!vc_info->initialised) {
656                const struct glsl_type *type = out_var->type;
657                if (nir_is_arrayed_io(out_var, producer->info.stage)) {
658                   assert(glsl_type_is_array(type));
659                   type = glsl_get_array_element(type);
660                }
661 
662                vc_info->var = out_var;
663                vc_info->interp_type =
664                   get_interp_type(out_var, type, default_to_smooth_interp);
665                vc_info->interp_loc = get_interp_loc(out_var);
666                vc_info->is_32bit = glsl_type_is_32bit(type);
667                vc_info->is_patch = out_var->data.patch;
668                vc_info->is_mediump = !producer->options->linker_ignore_precision &&
669                   (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
670                    out_var->data.precision == GLSL_PRECISION_LOW);
671                vc_info->is_intra_stage_only = true;
672                vc_info->initialised = true;
673             }
674          }
675       }
676    }
677 
678    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
679       struct varying_component *vc_info = &(*varying_comp_info)[i];
680       if (!vc_info->initialised) {
681          /* Something went wrong, the shader interfaces didn't match, so
682           * abandon packing. This can happen for example when the outputs are
683           * scalars but the inputs are struct members.
684           */
685          *varying_comp_info_size = 0;
686          break;
687       }
688    }
689 }
690 
691 static bool
allow_pack_interp_type(nir_pack_varying_options options,int type)692 allow_pack_interp_type(nir_pack_varying_options options, int type)
693 {
694    int sel;
695 
696    switch (type) {
697    case INTERP_MODE_NONE:
698       sel = nir_pack_varying_interp_mode_none;
699       break;
700    case INTERP_MODE_SMOOTH:
701       sel = nir_pack_varying_interp_mode_smooth;
702       break;
703    case INTERP_MODE_FLAT:
704       sel = nir_pack_varying_interp_mode_flat;
705       break;
706    case INTERP_MODE_NOPERSPECTIVE:
707       sel = nir_pack_varying_interp_mode_noperspective;
708       break;
709    default:
710       return false;
711    }
712 
713    return options & sel;
714 }
715 
716 static bool
allow_pack_interp_loc(nir_pack_varying_options options,int loc)717 allow_pack_interp_loc(nir_pack_varying_options options, int loc)
718 {
719    int sel;
720 
721    switch (loc) {
722    case INTERPOLATE_LOC_SAMPLE:
723       sel = nir_pack_varying_interp_loc_sample;
724       break;
725    case INTERPOLATE_LOC_CENTROID:
726       sel = nir_pack_varying_interp_loc_centroid;
727       break;
728    case INTERPOLATE_LOC_CENTER:
729       sel = nir_pack_varying_interp_loc_center;
730       break;
731    default:
732       return false;
733    }
734 
735    return options & sel;
736 }
737 
738 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_pack_varying_options options)739 assign_remap_locations(struct varying_loc (*remap)[4],
740                        struct assigned_comps *assigned_comps,
741                        struct varying_component *info,
742                        unsigned *cursor, unsigned *comp,
743                        unsigned max_location,
744                        nir_pack_varying_options options)
745 {
746    unsigned tmp_cursor = *cursor;
747    unsigned tmp_comp = *comp;
748 
749    for (; tmp_cursor < max_location; tmp_cursor++) {
750 
751       if (assigned_comps[tmp_cursor].comps) {
752          /* We can only pack varyings with matching precision. */
753          if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
754             tmp_comp = 0;
755             continue;
756          }
757 
758          /* We can only pack varyings with matching interpolation type
759           * if driver does not support it.
760           */
761          if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
762              (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
763               !allow_pack_interp_type(options, info->interp_type))) {
764             tmp_comp = 0;
765             continue;
766          }
767 
768          /* We can only pack varyings with matching interpolation location
769           * if driver does not support it.
770           */
771          if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
772              (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
773               !allow_pack_interp_loc(options, info->interp_loc))) {
774             tmp_comp = 0;
775             continue;
776          }
777 
778          /* We can only pack varyings with matching types, and the current
779           * algorithm only supports packing 32-bit.
780           */
781          if (!assigned_comps[tmp_cursor].is_32bit) {
782             tmp_comp = 0;
783             continue;
784          }
785 
786          while (tmp_comp < 4 &&
787                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
788             tmp_comp++;
789          }
790       }
791 
792       if (tmp_comp == 4) {
793          tmp_comp = 0;
794          continue;
795       }
796 
797       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
798 
799       /* Once we have assigned a location mark it as used */
800       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
801       assigned_comps[tmp_cursor].interp_type = info->interp_type;
802       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
803       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
804       assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
805 
806       /* Assign remap location */
807       remap[location][info->var->data.location_frac].component = tmp_comp++;
808       remap[location][info->var->data.location_frac].location =
809          tmp_cursor + VARYING_SLOT_VAR0;
810 
811       break;
812    }
813 
814    *cursor = tmp_cursor;
815    *comp = tmp_comp;
816 }
817 
818 /* If there are empty components in the slot compact the remaining components
819  * as close to component 0 as possible. This will make it easier to fill the
820  * empty components with components from a different slot in a following pass.
821  */
822 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)823 compact_components(nir_shader *producer, nir_shader *consumer,
824                    struct assigned_comps *assigned_comps,
825                    bool default_to_smooth_interp)
826 {
827    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
828    struct varying_component *varying_comp_info;
829    unsigned varying_comp_info_size;
830 
831    /* Gather varying component info */
832    gather_varying_component_info(producer, consumer, &varying_comp_info,
833                                  &varying_comp_info_size,
834                                  default_to_smooth_interp);
835 
836    /* Sort varying components. */
837    qsort(varying_comp_info, varying_comp_info_size,
838          sizeof(struct varying_component), cmp_varying_component);
839 
840    nir_pack_varying_options options = consumer->options->pack_varying_options;
841 
842    unsigned cursor = 0;
843    unsigned comp = 0;
844 
845    /* Set the remap array based on the sorted components */
846    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
847       struct varying_component *info = &varying_comp_info[i];
848 
849       assert(info->is_patch || cursor < MAX_VARYING);
850       if (info->is_patch) {
851          /* The list should be sorted with all non-patch inputs first followed
852           * by patch inputs.  When we hit our first patch input, we need to
853           * reset the cursor to MAX_VARYING so we put them in the right slot.
854           */
855          if (cursor < MAX_VARYING) {
856             cursor = MAX_VARYING;
857             comp = 0;
858          }
859 
860          assign_remap_locations(remap, assigned_comps, info,
861                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
862                                 options);
863       } else {
864          assign_remap_locations(remap, assigned_comps, info,
865                                 &cursor, &comp, MAX_VARYING,
866                                 options);
867 
868          /* Check if we failed to assign a remap location. This can happen if
869           * for example there are a bunch of unmovable components with
870           * mismatching interpolation types causing us to skip over locations
871           * that would have been useful for packing later components.
872           * The solution is to iterate over the locations again (this should
873           * happen very rarely in practice).
874           */
875          if (cursor == MAX_VARYING) {
876             cursor = 0;
877             comp = 0;
878             assign_remap_locations(remap, assigned_comps, info,
879                                    &cursor, &comp, MAX_VARYING,
880                                    options);
881          }
882       }
883    }
884 
885    ralloc_free(varying_comp_info);
886 
887    uint64_t zero = 0;
888    uint32_t zero32 = 0;
889    remap_slots_and_components(consumer, nir_var_shader_in, remap,
890                               &consumer->info.inputs_read, &zero,
891                               &consumer->info.patch_inputs_read, &zero32);
892    remap_slots_and_components(producer, nir_var_shader_out, remap,
893                               &producer->info.outputs_written,
894                               &producer->info.outputs_read,
895                               &producer->info.patch_outputs_written,
896                               &producer->info.patch_outputs_read);
897 }
898 
899 /* We assume that this has been called more-or-less directly after
900  * remove_unused_varyings.  At this point, all of the varyings that we
901  * aren't going to be using have been completely removed and the
902  * inputs_read and outputs_written fields in nir_shader_info reflect
903  * this.  Therefore, the total set of valid slots is the OR of the two
904  * sets of varyings;  this accounts for varyings which one side may need
905  * to read/write even if the other doesn't.  This can happen if, for
906  * instance, an array is used indirectly from one side causing it to be
907  * unsplittable but directly from the other.
908  */
909 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)910 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
911                      bool default_to_smooth_interp)
912 {
913    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
914    assert(consumer->info.stage != MESA_SHADER_VERTEX);
915 
916    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
917 
918    get_unmoveable_components_masks(producer, nir_var_shader_out,
919                                    assigned_comps,
920                                    producer->info.stage,
921                                    default_to_smooth_interp);
922    get_unmoveable_components_masks(consumer, nir_var_shader_in,
923                                    assigned_comps,
924                                    consumer->info.stage,
925                                    default_to_smooth_interp);
926 
927    compact_components(producer, consumer, assigned_comps,
928                       default_to_smooth_interp);
929 }
930 
931 /*
932  * Mark XFB varyings as always_active_io in the consumer so the linking opts
933  * don't touch them.
934  */
935 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)936 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
937 {
938    nir_variable *input_vars[MAX_VARYING] = { 0 };
939 
940    nir_foreach_shader_in_variable(var, consumer) {
941       if (var->data.location >= VARYING_SLOT_VAR0 &&
942           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
943 
944          unsigned location = var->data.location - VARYING_SLOT_VAR0;
945          input_vars[location] = var;
946       }
947    }
948 
949    nir_foreach_shader_out_variable(var, producer) {
950       if (var->data.location >= VARYING_SLOT_VAR0 &&
951           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
952 
953          if (!var->data.always_active_io)
954             continue;
955 
956          unsigned location = var->data.location - VARYING_SLOT_VAR0;
957          if (input_vars[location]) {
958             input_vars[location]->data.always_active_io = true;
959          }
960       }
961    }
962 }
963 
964 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)965 does_varying_match(nir_variable *out_var, nir_variable *in_var)
966 {
967    return in_var->data.location == out_var->data.location &&
968           in_var->data.location_frac == out_var->data.location_frac;
969 }
970 
971 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)972 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
973 {
974    nir_foreach_shader_in_variable(var, consumer) {
975       if (does_varying_match(out_var, var))
976          return var;
977    }
978 
979    return NULL;
980 }
981 
982 static bool
can_replace_varying(nir_variable * out_var)983 can_replace_varying(nir_variable *out_var)
984 {
985    /* Skip types that require more complex handling.
986     * TODO: add support for these types.
987     */
988    if (glsl_type_is_array(out_var->type) ||
989        glsl_type_is_dual_slot(out_var->type) ||
990        glsl_type_is_matrix(out_var->type) ||
991        glsl_type_is_struct_or_ifc(out_var->type))
992       return false;
993 
994    /* Limit this pass to scalars for now to keep things simple. Most varyings
995     * should have been lowered to scalars at this point anyway.
996     */
997    if (!glsl_type_is_scalar(out_var->type))
998       return false;
999 
1000    if (out_var->data.location < VARYING_SLOT_VAR0 ||
1001        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1002       return false;
1003 
1004    return true;
1005 }
1006 
1007 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1008 replace_varying_input_by_constant_load(nir_shader *shader,
1009                                        nir_intrinsic_instr *store_intr)
1010 {
1011    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1012 
1013    nir_builder b;
1014    nir_builder_init(&b, impl);
1015 
1016    nir_variable *out_var =
1017       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1018 
1019    bool progress = false;
1020    nir_foreach_block(block, impl) {
1021       nir_foreach_instr(instr, block) {
1022          if (instr->type != nir_instr_type_intrinsic)
1023             continue;
1024 
1025          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1026          if (intr->intrinsic != nir_intrinsic_load_deref)
1027             continue;
1028 
1029          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1030          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1031             continue;
1032 
1033          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1034 
1035          if (!does_varying_match(out_var, in_var))
1036             continue;
1037 
1038          b.cursor = nir_before_instr(instr);
1039 
1040          nir_load_const_instr *out_const =
1041             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1042 
1043          /* Add new const to replace the input */
1044          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1045                                              intr->dest.ssa.bit_size,
1046                                              out_const->value);
1047 
1048          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1049 
1050          progress = true;
1051       }
1052    }
1053 
1054    return progress;
1055 }
1056 
1057 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1058 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1059                          nir_intrinsic_instr *dup_store_intr)
1060 {
1061    assert(input_var);
1062 
1063    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1064 
1065    nir_builder b;
1066    nir_builder_init(&b, impl);
1067 
1068    nir_variable *dup_out_var =
1069       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1070 
1071    bool progress = false;
1072    nir_foreach_block(block, impl) {
1073       nir_foreach_instr(instr, block) {
1074          if (instr->type != nir_instr_type_intrinsic)
1075             continue;
1076 
1077          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1078          if (intr->intrinsic != nir_intrinsic_load_deref)
1079             continue;
1080 
1081          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1082          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1083             continue;
1084 
1085          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1086 
1087          if (!does_varying_match(dup_out_var, in_var) ||
1088              in_var->data.interpolation != input_var->data.interpolation ||
1089              get_interp_loc(in_var) != get_interp_loc(input_var))
1090             continue;
1091 
1092          b.cursor = nir_before_instr(instr);
1093 
1094          nir_ssa_def *load = nir_load_var(&b, input_var);
1095          nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1096 
1097          progress = true;
1098       }
1099    }
1100 
1101    return progress;
1102 }
1103 
1104 static bool
is_direct_uniform_load(nir_ssa_def * def,nir_ssa_scalar * s)1105 is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1106 {
1107    /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1108    assert(def->num_components == 1);
1109 
1110    /* Uniform load may hide behind some move instruction for converting
1111     * vector to scalar:
1112     *
1113     *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1114     *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1115     *     vec1 32 ssa_3 = mov ssa_2.x
1116     *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1117     *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1118     */
1119    *s = nir_ssa_scalar_resolved(def, 0);
1120 
1121    nir_ssa_def *ssa = s->def;
1122    if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1123       return false;
1124 
1125    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1126    if (intr->intrinsic != nir_intrinsic_load_deref)
1127       return false;
1128 
1129    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1130    /* TODO: support nir_var_mem_ubo. */
1131    if (!nir_deref_mode_is(deref, nir_var_uniform))
1132       return false;
1133 
1134    /* Does not support indirect uniform load. */
1135    return !nir_deref_instr_has_indirect(deref);
1136 }
1137 
1138 static nir_variable *
get_uniform_var_in_consumer(nir_shader * consumer,nir_variable * var_in_producer)1139 get_uniform_var_in_consumer(nir_shader *consumer,
1140                             nir_variable *var_in_producer)
1141 {
1142    /* Find if uniform already exists in consumer. */
1143    nir_variable *new_var = NULL;
1144    nir_foreach_uniform_variable(v, consumer) {
1145       if (!strcmp(var_in_producer->name, v->name)) {
1146          new_var = v;
1147          break;
1148       }
1149    }
1150 
1151    /* Create a variable if not exist. */
1152    if (!new_var) {
1153       new_var = nir_variable_clone(var_in_producer, consumer);
1154       nir_shader_add_variable(consumer, new_var);
1155    }
1156 
1157    return new_var;
1158 }
1159 
1160 static nir_deref_instr *
clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1161 clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1162 {
1163    if (deref->deref_type == nir_deref_type_var)
1164        return nir_build_deref_var(b, var);
1165 
1166    nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1167    nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1168 
1169    /* Build array and struct deref instruction.
1170     * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1171     */
1172    switch (deref->deref_type) {
1173    case nir_deref_type_array: {
1174       nir_load_const_instr *index =
1175          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1176       return nir_build_deref_array_imm(b, parent, index->value->i64);
1177    }
1178    case nir_deref_type_ptr_as_array: {
1179       nir_load_const_instr *index =
1180          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1181       nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1182                                         parent->dest.ssa.bit_size);
1183       return nir_build_deref_ptr_as_array(b, parent, ssa);
1184    }
1185    case nir_deref_type_struct:
1186       return nir_build_deref_struct(b, parent, deref->strct.index);
1187    default:
1188       unreachable("invalid type");
1189       return NULL;
1190    }
1191 }
1192 
1193 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_ssa_scalar * scalar)1194 replace_varying_input_by_uniform_load(nir_shader *shader,
1195                                       nir_intrinsic_instr *store_intr,
1196                                       nir_ssa_scalar *scalar)
1197 {
1198    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1199 
1200    nir_builder b;
1201    nir_builder_init(&b, impl);
1202 
1203    nir_variable *out_var =
1204       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1205 
1206    nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1207    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1208    nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1209    uni_var = get_uniform_var_in_consumer(shader, uni_var);
1210 
1211    bool progress = false;
1212    nir_foreach_block(block, impl) {
1213       nir_foreach_instr(instr, block) {
1214          if (instr->type != nir_instr_type_intrinsic)
1215             continue;
1216 
1217          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1218          if (intr->intrinsic != nir_intrinsic_load_deref)
1219             continue;
1220 
1221          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1222          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1223             continue;
1224 
1225          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1226 
1227          if (!does_varying_match(out_var, in_var))
1228             continue;
1229 
1230          b.cursor = nir_before_instr(instr);
1231 
1232          /* Clone instructions start from deref load to variable deref. */
1233          nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1234          nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1235 
1236          /* Add a vector to scalar move if uniform is a vector. */
1237          if (uni_def->num_components > 1) {
1238             nir_alu_src src = {0};
1239             src.src = nir_src_for_ssa(uni_def);
1240             src.swizzle[0] = scalar->comp;
1241             uni_def = nir_mov_alu(&b, src, 1);
1242          }
1243 
1244          /* Replace load input with load uniform. */
1245          nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1246 
1247          progress = true;
1248       }
1249    }
1250 
1251    return progress;
1252 }
1253 
1254 /* The GLSL ES 3.20 spec says:
1255  *
1256  * "The precision of a vertex output does not need to match the precision of
1257  * the corresponding fragment input. The minimum precision at which vertex
1258  * outputs are interpolated is the minimum of the vertex output precision and
1259  * the fragment input precision, with the exception that for highp,
1260  * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1261  * Output Matching by Name in Linked Programs")
1262  *
1263  * To implement this, when linking shaders we will take the minimum precision
1264  * qualifier (allowing drivers to interpolate at lower precision). For
1265  * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1266  * requires we use the *last* specified precision if there is a conflict.
1267  *
1268  * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1269  * NONE, we'll return the other precision, since there is no conflict.
1270  * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1271  * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1272  * "backwards". For non-fragment stages, we'll pick the latter precision to
1273  * comply with the spec. (Note that the order matters.)
1274  *
1275  * For streamout, "Variables declared with lowp or mediump precision are
1276  * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1277  * of OpenGL ES 3.2 specification). So drivers should promote them
1278  * the transform feedback memory store, but not the output store.
1279  */
1280 
1281 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1282 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1283 {
1284    if (producer == GLSL_PRECISION_NONE)
1285       return consumer;
1286    else if (consumer == GLSL_PRECISION_NONE)
1287       return producer;
1288    else
1289       return fs ? MAX2(producer, consumer) : consumer;
1290 }
1291 
1292 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1293 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1294 {
1295    bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1296 
1297    nir_foreach_shader_out_variable(producer_var, producer) {
1298       /* Skip if the slot is not assigned */
1299       if (producer_var->data.location < 0)
1300          continue;
1301 
1302       nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1303             nir_var_shader_in, producer_var->data.location);
1304 
1305       /* Skip if the variable will be eliminated */
1306       if (!consumer_var)
1307          continue;
1308 
1309       /* Now we have a pair of variables. Let's pick the smaller precision. */
1310       unsigned precision_1 = producer_var->data.precision;
1311       unsigned precision_2 = consumer_var->data.precision;
1312       unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1313 
1314       /* Propagate the new precision */
1315       producer_var->data.precision = consumer_var->data.precision = minimum;
1316    }
1317 }
1318 
1319 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1320 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1321 {
1322    /* TODO: Add support for more shader stage combinations */
1323    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1324        (producer->info.stage != MESA_SHADER_VERTEX &&
1325         producer->info.stage != MESA_SHADER_TESS_EVAL))
1326       return false;
1327 
1328    bool progress = false;
1329 
1330    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1331 
1332    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1333 
1334    /* If we find a store in the last block of the producer we can be sure this
1335     * is the only possible value for this output.
1336     */
1337    nir_block *last_block = nir_impl_last_block(impl);
1338    nir_foreach_instr_reverse(instr, last_block) {
1339       if (instr->type != nir_instr_type_intrinsic)
1340          continue;
1341 
1342       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1343 
1344       if (intr->intrinsic != nir_intrinsic_store_deref)
1345          continue;
1346 
1347       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1348       if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1349          continue;
1350 
1351       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1352       if (!can_replace_varying(out_var))
1353          continue;
1354 
1355       nir_ssa_scalar uni_scalar;
1356       nir_ssa_def *ssa = intr->src[1].ssa;
1357       if (ssa->parent_instr->type == nir_instr_type_load_const) {
1358          progress |= replace_varying_input_by_constant_load(consumer, intr);
1359       } else if (is_direct_uniform_load(ssa, &uni_scalar)) {
1360          progress |= replace_varying_input_by_uniform_load(consumer, intr,
1361                                                            &uni_scalar);
1362       } else {
1363          struct hash_entry *entry =
1364                _mesa_hash_table_search(varying_values, ssa);
1365          if (entry) {
1366             progress |= replace_duplicate_input(consumer,
1367                                                 (nir_variable *) entry->data,
1368                                                 intr);
1369          } else {
1370             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1371             if (in_var) {
1372                _mesa_hash_table_insert(varying_values, ssa, in_var);
1373             }
1374          }
1375       }
1376    }
1377 
1378    _mesa_hash_table_destroy(varying_values, NULL);
1379 
1380    return progress;
1381 }
1382 
1383 /* TODO any better helper somewhere to sort a list? */
1384 
1385 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1386 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1387 {
1388    nir_foreach_variable_in_list(var, var_list) {
1389       if (var->data.location > new_var->data.location) {
1390          exec_node_insert_node_before(&var->node, &new_var->node);
1391          return;
1392       }
1393    }
1394    exec_list_push_tail(var_list, &new_var->node);
1395 }
1396 
1397 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1398 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1399               struct exec_list *sorted_list)
1400 {
1401    exec_list_make_empty(sorted_list);
1402    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1403       exec_node_remove(&var->node);
1404       insert_sorted(sorted_list, var);
1405    }
1406 }
1407 
1408 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1409 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1410                             unsigned *size, gl_shader_stage stage)
1411 {
1412    unsigned location = 0;
1413    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1414    uint64_t processed_locs[2] = {0};
1415 
1416    struct exec_list io_vars;
1417    sort_varyings(shader, mode, &io_vars);
1418 
1419    int UNUSED last_loc = 0;
1420    bool last_partial = false;
1421    nir_foreach_variable_in_list(var, &io_vars) {
1422       const struct glsl_type *type = var->type;
1423       if (nir_is_arrayed_io(var, stage)) {
1424          assert(glsl_type_is_array(type));
1425          type = glsl_get_array_element(type);
1426       }
1427 
1428       int base;
1429       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1430          base = VERT_ATTRIB_GENERIC0;
1431       else if (var->data.mode == nir_var_shader_out &&
1432                stage == MESA_SHADER_FRAGMENT)
1433          base = FRAG_RESULT_DATA0;
1434       else
1435          base = VARYING_SLOT_VAR0;
1436 
1437       unsigned var_size, driver_size;
1438       if (var->data.compact) {
1439          /* If we are inside a partial compact,
1440           * don't allow another compact to be in this slot
1441           * if it starts at component 0.
1442           */
1443          if (last_partial && var->data.location_frac == 0) {
1444             location++;
1445          }
1446 
1447          /* compact variables must be arrays of scalars */
1448          assert(!var->data.per_view);
1449          assert(glsl_type_is_array(type));
1450          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1451          unsigned start = 4 * location + var->data.location_frac;
1452          unsigned end = start + glsl_get_length(type);
1453          var_size = driver_size = end / 4 - location;
1454          last_partial = end % 4 != 0;
1455       } else {
1456          /* Compact variables bypass the normal varying compacting pass,
1457           * which means they cannot be in the same vec4 slot as a normal
1458           * variable. If part of the current slot is taken up by a compact
1459           * variable, we need to go to the next one.
1460           */
1461          if (last_partial) {
1462             location++;
1463             last_partial = false;
1464          }
1465 
1466          /* per-view variables have an extra array dimension, which is ignored
1467           * when counting user-facing slots (var->data.location), but *not*
1468           * with driver slots (var->data.driver_location). That is, each user
1469           * slot maps to multiple driver slots.
1470           */
1471          driver_size = glsl_count_attribute_slots(type, false);
1472          if (var->data.per_view) {
1473             assert(glsl_type_is_array(type));
1474             var_size =
1475                glsl_count_attribute_slots(glsl_get_array_element(type), false);
1476          } else {
1477             var_size = driver_size;
1478          }
1479       }
1480 
1481       /* Builtins don't allow component packing so we only need to worry about
1482        * user defined varyings sharing the same location.
1483        */
1484       bool processed = false;
1485       if (var->data.location >= base) {
1486          unsigned glsl_location = var->data.location - base;
1487 
1488          for (unsigned i = 0; i < var_size; i++) {
1489             if (processed_locs[var->data.index] &
1490                 ((uint64_t)1 << (glsl_location + i)))
1491                processed = true;
1492             else
1493                processed_locs[var->data.index] |=
1494                   ((uint64_t)1 << (glsl_location + i));
1495          }
1496       }
1497 
1498       /* Because component packing allows varyings to share the same location
1499        * we may have already have processed this location.
1500        */
1501       if (processed) {
1502          /* TODO handle overlapping per-view variables */
1503          assert(!var->data.per_view);
1504          unsigned driver_location = assigned_locations[var->data.location];
1505          var->data.driver_location = driver_location;
1506 
1507          /* An array may be packed such that is crosses multiple other arrays
1508           * or variables, we need to make sure we have allocated the elements
1509           * consecutively if the previously proccessed var was shorter than
1510           * the current array we are processing.
1511           *
1512           * NOTE: The code below assumes the var list is ordered in ascending
1513           * location order.
1514           */
1515          assert(last_loc <= var->data.location);
1516          last_loc = var->data.location;
1517          unsigned last_slot_location = driver_location + var_size;
1518          if (last_slot_location > location) {
1519             unsigned num_unallocated_slots = last_slot_location - location;
1520             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1521             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1522                assigned_locations[var->data.location + i] = location;
1523                location++;
1524             }
1525          }
1526          continue;
1527       }
1528 
1529       for (unsigned i = 0; i < var_size; i++) {
1530          assigned_locations[var->data.location + i] = location + i;
1531       }
1532 
1533       var->data.driver_location = location;
1534       location += driver_size;
1535    }
1536 
1537    if (last_partial)
1538       location++;
1539 
1540    exec_list_append(&shader->variables, &io_vars);
1541    *size = location;
1542 }
1543 
1544 static uint64_t
get_linked_variable_location(unsigned location,bool patch)1545 get_linked_variable_location(unsigned location, bool patch)
1546 {
1547    if (!patch)
1548       return location;
1549 
1550    /* Reserve locations 0...3 for special patch variables
1551     * like tess factors and bounding boxes, and the generic patch
1552     * variables will come after them.
1553     */
1554    if (location >= VARYING_SLOT_PATCH0)
1555       return location - VARYING_SLOT_PATCH0 + 4;
1556    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1557             location <= VARYING_SLOT_BOUNDING_BOX1)
1558       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1559    else
1560       unreachable("Unsupported variable in get_linked_variable_location.");
1561 }
1562 
1563 static uint64_t
get_linked_variable_io_mask(nir_variable * variable,gl_shader_stage stage)1564 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1565 {
1566    const struct glsl_type *type = variable->type;
1567 
1568    if (nir_is_arrayed_io(variable, stage)) {
1569       assert(glsl_type_is_array(type));
1570       type = glsl_get_array_element(type);
1571    }
1572 
1573    unsigned slots = glsl_count_attribute_slots(type, false);
1574    if (variable->data.compact) {
1575       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1576       slots = DIV_ROUND_UP(component_count, 4);
1577    }
1578 
1579    uint64_t mask = u_bit_consecutive64(0, slots);
1580    return mask;
1581 }
1582 
1583 nir_linked_io_var_info
nir_assign_linked_io_var_locations(nir_shader * producer,nir_shader * consumer)1584 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1585 {
1586    assert(producer);
1587    assert(consumer);
1588 
1589    uint64_t producer_output_mask = 0;
1590    uint64_t producer_patch_output_mask = 0;
1591 
1592    nir_foreach_shader_out_variable(variable, producer) {
1593       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1594       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1595 
1596       if (variable->data.patch)
1597          producer_patch_output_mask |= mask << loc;
1598       else
1599          producer_output_mask |= mask << loc;
1600    }
1601 
1602    uint64_t consumer_input_mask = 0;
1603    uint64_t consumer_patch_input_mask = 0;
1604 
1605    nir_foreach_shader_in_variable(variable, consumer) {
1606       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1607       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1608 
1609       if (variable->data.patch)
1610          consumer_patch_input_mask |= mask << loc;
1611       else
1612          consumer_input_mask |= mask << loc;
1613    }
1614 
1615    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1616    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1617 
1618    nir_foreach_shader_out_variable(variable, producer) {
1619       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1620 
1621       if (variable->data.patch)
1622          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1623       else
1624          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1625    }
1626 
1627    nir_foreach_shader_in_variable(variable, consumer) {
1628       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1629 
1630       if (variable->data.patch)
1631          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1632       else
1633          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1634    }
1635 
1636    nir_linked_io_var_info result = {
1637       .num_linked_io_vars = util_bitcount64(io_mask),
1638       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1639    };
1640 
1641    return result;
1642 }
1643