• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28 
29 /* This file contains various little helpers for doing simple linking in
30  * NIR.  Eventually, we'll probably want a full-blown varying packing
31  * implementation in here.  Right now, it just deletes unused things.
32  */
33 
34 /**
35  * Returns the bits in the inputs_read, or outputs_written
36  * bitfield corresponding to this variable.
37  */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41    if (var->data.location < 0)
42       return 0;
43 
44    unsigned location = var->data.patch ?
45       var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46 
47    assert(var->data.mode == nir_var_shader_in ||
48           var->data.mode == nir_var_shader_out);
49    assert(var->data.location >= 0);
50    assert(location < 64);
51 
52    const struct glsl_type *type = var->type;
53    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
54       assert(glsl_type_is_array(type));
55       type = glsl_get_array_element(type);
56    }
57 
58    unsigned slots = glsl_count_attribute_slots(type, false);
59    return BITFIELD64_MASK(slots) << location;
60 }
61 
62 static bool
is_non_generic_patch_var(nir_variable * var)63 is_non_generic_patch_var(nir_variable *var)
64 {
65    return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
66           var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
67           var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
68           var->data.location == VARYING_SLOT_BOUNDING_BOX1;
69 }
70 
71 static uint8_t
get_num_components(nir_variable * var)72 get_num_components(nir_variable *var)
73 {
74    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
75       return 4;
76 
77    return glsl_get_vector_elements(glsl_without_array(var->type));
78 }
79 
80 static void
tcs_add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)81 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
82 {
83    nir_foreach_function(function, shader) {
84       if (!function->impl)
85          continue;
86 
87       nir_foreach_block(block, function->impl) {
88          nir_foreach_instr(instr, block) {
89             if (instr->type != nir_instr_type_intrinsic)
90                continue;
91 
92             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
93             if (intrin->intrinsic != nir_intrinsic_load_deref)
94                continue;
95 
96             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
97             if (!nir_deref_mode_is(deref, nir_var_shader_out))
98                continue;
99 
100             nir_variable *var = nir_deref_instr_get_variable(deref);
101             for (unsigned i = 0; i < get_num_components(var); i++) {
102                if (var->data.patch) {
103                   if (is_non_generic_patch_var(var))
104                      continue;
105 
106                   patches_read[var->data.location_frac + i] |=
107                      get_variable_io_mask(var, shader->info.stage);
108                } else {
109                   read[var->data.location_frac + i] |=
110                      get_variable_io_mask(var, shader->info.stage);
111                }
112             }
113          }
114       }
115    }
116 }
117 
118 /**
119  * Helper for removing unused shader I/O variables, by demoting them to global
120  * variables (which may then by dead code eliminated).
121  *
122  * Example usage is:
123  *
124  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
125  *                                      read, patches_read) ||
126  *                                      progress;
127  *
128  * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
129  * representing each .location_frac used.  Note that for vector variables,
130  * only the first channel (.location_frac) is examined for deciding if the
131  * variable is used!
132  */
133 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)134 nir_remove_unused_io_vars(nir_shader *shader,
135                           nir_variable_mode mode,
136                           uint64_t *used_by_other_stage,
137                           uint64_t *used_by_other_stage_patches)
138 {
139    bool progress = false;
140    uint64_t *used;
141 
142    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
143 
144    nir_foreach_variable_with_modes_safe(var, shader, mode) {
145       if (var->data.patch)
146          used = used_by_other_stage_patches;
147       else
148          used = used_by_other_stage;
149 
150       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
151          if (shader->info.stage != MESA_SHADER_MESH || var->data.location != VARYING_SLOT_PRIMITIVE_ID)
152             continue;
153 
154       if (var->data.always_active_io)
155          continue;
156 
157       if (var->data.explicit_xfb_buffer)
158          continue;
159 
160       uint64_t other_stage = used[var->data.location_frac];
161 
162       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
163          /* This one is invalid, make it a global variable instead */
164          if (shader->info.stage == MESA_SHADER_MESH &&
165                (shader->info.outputs_read & BITFIELD64_BIT(var->data.location)))
166             var->data.mode = nir_var_mem_shared;
167          else
168             var->data.mode = nir_var_shader_temp;
169          var->data.location = 0;
170 
171          progress = true;
172       }
173    }
174 
175    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
176    if (progress) {
177       nir_metadata_preserve(impl, nir_metadata_dominance |
178                             nir_metadata_block_index);
179       nir_fixup_deref_modes(shader);
180    } else {
181       nir_metadata_preserve(impl, nir_metadata_all);
182    }
183 
184    return progress;
185 }
186 
187 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)188 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
189 {
190    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
191    assert(consumer->info.stage != MESA_SHADER_VERTEX);
192 
193    uint64_t read[4] = { 0 }, written[4] = { 0 };
194    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
195 
196    nir_foreach_shader_out_variable(var, producer) {
197       for (unsigned i = 0; i < get_num_components(var); i++) {
198          if (var->data.patch) {
199             if (is_non_generic_patch_var(var))
200                continue;
201 
202             patches_written[var->data.location_frac + i] |=
203                get_variable_io_mask(var, producer->info.stage);
204          } else {
205             written[var->data.location_frac + i] |=
206                get_variable_io_mask(var, producer->info.stage);
207          }
208       }
209    }
210 
211    nir_foreach_shader_in_variable(var, consumer) {
212       for (unsigned i = 0; i < get_num_components(var); i++) {
213          if (var->data.patch) {
214             if (is_non_generic_patch_var(var))
215                continue;
216 
217             patches_read[var->data.location_frac + i] |=
218                get_variable_io_mask(var, consumer->info.stage);
219          } else {
220             read[var->data.location_frac + i] |=
221                get_variable_io_mask(var, consumer->info.stage);
222          }
223       }
224    }
225 
226    /* Each TCS invocation can read data written by other TCS invocations,
227     * so even if the outputs are not used by the TES we must also make
228     * sure they are not read by the TCS before demoting them to globals.
229     */
230    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
231       tcs_add_output_reads(producer, read, patches_read);
232 
233    bool progress = false;
234    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
235                                         patches_read);
236 
237    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
238                                         patches_written) || progress;
239 
240    return progress;
241 }
242 
243 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)244 get_interp_type(nir_variable *var, const struct glsl_type *type,
245                 bool default_to_smooth_interp)
246 {
247    if (var->data.per_primitive)
248       return INTERP_MODE_NONE;
249    if (glsl_type_is_integer(type))
250       return INTERP_MODE_FLAT;
251    else if (var->data.interpolation != INTERP_MODE_NONE)
252       return var->data.interpolation;
253    else if (default_to_smooth_interp)
254       return INTERP_MODE_SMOOTH;
255    else
256       return INTERP_MODE_NONE;
257 }
258 
259 #define INTERPOLATE_LOC_SAMPLE 0
260 #define INTERPOLATE_LOC_CENTROID 1
261 #define INTERPOLATE_LOC_CENTER 2
262 
263 static uint8_t
get_interp_loc(nir_variable * var)264 get_interp_loc(nir_variable *var)
265 {
266    if (var->data.sample)
267       return INTERPOLATE_LOC_SAMPLE;
268    else if (var->data.centroid)
269       return INTERPOLATE_LOC_CENTROID;
270    else
271       return INTERPOLATE_LOC_CENTER;
272 }
273 
274 static bool
is_packing_supported_for_type(const struct glsl_type * type)275 is_packing_supported_for_type(const struct glsl_type *type)
276 {
277    /* We ignore complex types such as arrays, matrices, structs and bitsizes
278     * other then 32bit. All other vector types should have been split into
279     * scalar variables by the lower_io_to_scalar pass. The only exception
280     * should be OpenGL xfb varyings.
281     * TODO: add support for more complex types?
282     */
283    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
284 }
285 
286 struct assigned_comps
287 {
288    uint8_t comps;
289    uint8_t interp_type;
290    uint8_t interp_loc;
291    bool is_32bit;
292    bool is_mediump;
293    bool is_per_primitive;
294 };
295 
296 /* Packing arrays and dual slot varyings is difficult so to avoid complex
297  * algorithms this function just assigns them their existing location for now.
298  * TODO: allow better packing of complex types.
299  */
300 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)301 get_unmoveable_components_masks(nir_shader *shader,
302                                 nir_variable_mode mode,
303                                 struct assigned_comps *comps,
304                                 gl_shader_stage stage,
305                                 bool default_to_smooth_interp)
306 {
307    nir_foreach_variable_with_modes_safe(var, shader, mode) {
308       assert(var->data.location >= 0);
309 
310       /* Only remap things that aren't built-ins. */
311       if (var->data.location >= VARYING_SLOT_VAR0 &&
312           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
313 
314          const struct glsl_type *type = var->type;
315          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
316             assert(glsl_type_is_array(type));
317             type = glsl_get_array_element(type);
318          }
319 
320          /* If we can pack this varying then don't mark the components as
321           * used.
322           */
323          if (is_packing_supported_for_type(type) &&
324              !var->data.always_active_io)
325             continue;
326 
327          unsigned location = var->data.location - VARYING_SLOT_VAR0;
328 
329          unsigned elements =
330             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
331             glsl_get_vector_elements(glsl_without_array(type)) : 4;
332 
333          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
334          unsigned slots = glsl_count_attribute_slots(type, false);
335          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
336          unsigned comps_slot2 = 0;
337          for (unsigned i = 0; i < slots; i++) {
338             if (dual_slot) {
339                if (i & 1) {
340                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
341                } else {
342                   unsigned num_comps = 4 - var->data.location_frac;
343                   comps_slot2 = (elements * dmul) - num_comps;
344 
345                   /* Assume ARB_enhanced_layouts packing rules for doubles */
346                   assert(var->data.location_frac == 0 ||
347                          var->data.location_frac == 2);
348                   assert(comps_slot2 <= 4);
349 
350                   comps[location + i].comps |=
351                      ((1 << num_comps) - 1) << var->data.location_frac;
352                }
353             } else {
354                comps[location + i].comps |=
355                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
356             }
357 
358             comps[location + i].interp_type =
359                get_interp_type(var, type, default_to_smooth_interp);
360             comps[location + i].interp_loc = get_interp_loc(var);
361             comps[location + i].is_32bit =
362                glsl_type_is_32bit(glsl_without_array(type));
363             comps[location + i].is_mediump =
364                var->data.precision == GLSL_PRECISION_MEDIUM ||
365                var->data.precision == GLSL_PRECISION_LOW;
366             comps[location + i].is_per_primitive = var->data.per_primitive;
367          }
368       }
369    }
370 }
371 
372 struct varying_loc
373 {
374    uint8_t component;
375    uint32_t location;
376 };
377 
378 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)379 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
380                     uint64_t slots_used_mask, unsigned num_slots)
381 {
382    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
383 
384    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
385       BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
386 }
387 
388 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)389 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
390 {
391    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
392 
393    slots_used[var->data.patch ? 1 : 0] |=
394       BITFIELD64_BIT(var->data.location - loc_offset + offset);
395 }
396 
397 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)398 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
399                            struct varying_loc (*remap)[4],
400                            uint64_t *slots_used, uint64_t *out_slots_read,
401                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
402  {
403    const gl_shader_stage stage = shader->info.stage;
404    uint64_t out_slots_read_tmp[2] = {0};
405    uint64_t slots_used_tmp[2] = {0};
406 
407    /* We don't touch builtins so just copy the bitmask */
408    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
409 
410    nir_foreach_variable_with_modes(var, shader, mode) {
411       assert(var->data.location >= 0);
412 
413       /* Only remap things that aren't built-ins */
414       if (var->data.location >= VARYING_SLOT_VAR0 &&
415           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
416 
417          const struct glsl_type *type = var->type;
418          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
419             assert(glsl_type_is_array(type));
420             type = glsl_get_array_element(type);
421          }
422 
423          unsigned num_slots = glsl_count_attribute_slots(type, false);
424          bool used_across_stages = false;
425          bool outputs_read = false;
426 
427          unsigned location = var->data.location - VARYING_SLOT_VAR0;
428          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
429 
430          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
431          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
432          uint64_t outs_used =
433             var->data.patch ? *p_out_slots_read : *out_slots_read;
434          uint64_t slots =
435             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
436 
437          if (slots & used)
438             used_across_stages = true;
439 
440          if (slots & outs_used)
441             outputs_read = true;
442 
443          if (new_loc->location) {
444             var->data.location = new_loc->location;
445             var->data.location_frac = new_loc->component;
446          }
447 
448          if (var->data.always_active_io) {
449             /* We can't apply link time optimisations (specifically array
450              * splitting) to these so we need to copy the existing mask
451              * otherwise we will mess up the mask for things like partially
452              * marked arrays.
453              */
454             if (used_across_stages)
455                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
456 
457             if (outputs_read) {
458                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
459                                    num_slots);
460             }
461          } else {
462             for (unsigned i = 0; i < num_slots; i++) {
463                if (used_across_stages)
464                   mark_used_slot(var, slots_used_tmp, i);
465 
466                if (outputs_read)
467                   mark_used_slot(var, out_slots_read_tmp, i);
468             }
469          }
470       }
471    }
472 
473    *slots_used = slots_used_tmp[0];
474    *out_slots_read = out_slots_read_tmp[0];
475    *p_slots_used = slots_used_tmp[1];
476    *p_out_slots_read = out_slots_read_tmp[1];
477 }
478 
479 struct varying_component {
480    nir_variable *var;
481    uint8_t interp_type;
482    uint8_t interp_loc;
483    bool is_32bit;
484    bool is_patch;
485    bool is_per_primitive;
486    bool is_mediump;
487    bool is_intra_stage_only;
488    bool initialised;
489 };
490 
491 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)492 cmp_varying_component(const void *comp1_v, const void *comp2_v)
493 {
494    struct varying_component *comp1 = (struct varying_component *) comp1_v;
495    struct varying_component *comp2 = (struct varying_component *) comp2_v;
496 
497    /* We want patches to be order at the end of the array */
498    if (comp1->is_patch != comp2->is_patch)
499       return comp1->is_patch ? 1 : -1;
500 
501    /* Sort per-primitive outputs after per-vertex ones to allow
502     * better compaction when they are mixed in the shader's source.
503     */
504    if (comp1->is_per_primitive != comp2->is_per_primitive)
505       return comp1->is_per_primitive ? 1 : -1;
506 
507    /* We want to try to group together TCS outputs that are only read by other
508     * TCS invocations and not consumed by the follow stage.
509     */
510    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
511       return comp1->is_intra_stage_only ? 1 : -1;
512 
513    /* Group mediump varyings together. */
514    if (comp1->is_mediump != comp2->is_mediump)
515       return comp1->is_mediump ? 1 : -1;
516 
517    /* We can only pack varyings with matching interpolation types so group
518     * them together.
519     */
520    if (comp1->interp_type != comp2->interp_type)
521       return comp1->interp_type - comp2->interp_type;
522 
523    /* Interpolation loc must match also. */
524    if (comp1->interp_loc != comp2->interp_loc)
525       return comp1->interp_loc - comp2->interp_loc;
526 
527    /* If everything else matches just use the original location to sort */
528    const struct nir_variable_data *const data1 = &comp1->var->data;
529    const struct nir_variable_data *const data2 = &comp2->var->data;
530    if (data1->location != data2->location)
531       return data1->location - data2->location;
532    return (int)data1->location_frac - (int)data2->location_frac;
533 }
534 
535 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)536 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
537                               struct varying_component **varying_comp_info,
538                               unsigned *varying_comp_info_size,
539                               bool default_to_smooth_interp)
540 {
541    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
542    unsigned num_of_comps_to_pack = 0;
543 
544    /* Count the number of varying that can be packed and create a mapping
545     * of those varyings to the array we will pass to qsort.
546     */
547    nir_foreach_shader_out_variable(var, producer) {
548 
549       /* Only remap things that aren't builtins. */
550       if (var->data.location >= VARYING_SLOT_VAR0 &&
551           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
552 
553          /* We can't repack xfb varyings. */
554          if (var->data.always_active_io)
555             continue;
556 
557          const struct glsl_type *type = var->type;
558          if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
559             assert(glsl_type_is_array(type));
560             type = glsl_get_array_element(type);
561          }
562 
563          if (!is_packing_supported_for_type(type))
564             continue;
565 
566          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
567          store_varying_info_idx[loc][var->data.location_frac] =
568             ++num_of_comps_to_pack;
569       }
570    }
571 
572    *varying_comp_info_size = num_of_comps_to_pack;
573    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
574                                       num_of_comps_to_pack);
575 
576    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
577 
578    /* Walk over the shader and populate the varying component info array */
579    nir_foreach_block(block, impl) {
580       nir_foreach_instr(instr, block) {
581          if (instr->type != nir_instr_type_intrinsic)
582             continue;
583 
584          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
585          if (intr->intrinsic != nir_intrinsic_load_deref &&
586              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
587              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
588              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
589              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
590             continue;
591 
592          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
593          if (!nir_deref_mode_is(deref, nir_var_shader_in))
594             continue;
595 
596          /* We only remap things that aren't builtins. */
597          nir_variable *in_var = nir_deref_instr_get_variable(deref);
598          if (in_var->data.location < VARYING_SLOT_VAR0)
599             continue;
600 
601          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
602          if (location >= MAX_VARYINGS_INCL_PATCH)
603             continue;
604 
605          unsigned var_info_idx =
606             store_varying_info_idx[location][in_var->data.location_frac];
607          if (!var_info_idx)
608             continue;
609 
610          struct varying_component *vc_info =
611             &(*varying_comp_info)[var_info_idx-1];
612 
613          if (!vc_info->initialised) {
614             const struct glsl_type *type = in_var->type;
615             if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
616                 in_var->data.per_view) {
617                assert(glsl_type_is_array(type));
618                type = glsl_get_array_element(type);
619             }
620 
621             vc_info->var = in_var;
622             vc_info->interp_type =
623                get_interp_type(in_var, type, default_to_smooth_interp);
624             vc_info->interp_loc = get_interp_loc(in_var);
625             vc_info->is_32bit = glsl_type_is_32bit(type);
626             vc_info->is_patch = in_var->data.patch;
627             vc_info->is_per_primitive = in_var->data.per_primitive;
628             vc_info->is_mediump = !producer->options->linker_ignore_precision &&
629                (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
630                 in_var->data.precision == GLSL_PRECISION_LOW);
631             vc_info->is_intra_stage_only = false;
632             vc_info->initialised = true;
633          }
634       }
635    }
636 
637    /* Walk over the shader and populate the varying component info array
638     * for varyings which are read by other TCS instances but are not consumed
639     * by the TES.
640     */
641    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
642       impl = nir_shader_get_entrypoint(producer);
643 
644       nir_foreach_block(block, impl) {
645          nir_foreach_instr(instr, block) {
646             if (instr->type != nir_instr_type_intrinsic)
647                continue;
648 
649             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
650             if (intr->intrinsic != nir_intrinsic_load_deref)
651                continue;
652 
653             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
654             if (!nir_deref_mode_is(deref, nir_var_shader_out))
655                continue;
656 
657             /* We only remap things that aren't builtins. */
658             nir_variable *out_var = nir_deref_instr_get_variable(deref);
659             if (out_var->data.location < VARYING_SLOT_VAR0)
660                continue;
661 
662             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
663             if (location >= MAX_VARYINGS_INCL_PATCH)
664                continue;
665 
666             unsigned var_info_idx =
667                store_varying_info_idx[location][out_var->data.location_frac];
668             if (!var_info_idx) {
669                /* Something went wrong, the shader interfaces didn't match, so
670                 * abandon packing. This can happen for example when the
671                 * inputs are scalars but the outputs are struct members.
672                 */
673                *varying_comp_info_size = 0;
674                break;
675             }
676 
677             struct varying_component *vc_info =
678                &(*varying_comp_info)[var_info_idx-1];
679 
680             if (!vc_info->initialised) {
681                const struct glsl_type *type = out_var->type;
682                if (nir_is_arrayed_io(out_var, producer->info.stage)) {
683                   assert(glsl_type_is_array(type));
684                   type = glsl_get_array_element(type);
685                }
686 
687                vc_info->var = out_var;
688                vc_info->interp_type =
689                   get_interp_type(out_var, type, default_to_smooth_interp);
690                vc_info->interp_loc = get_interp_loc(out_var);
691                vc_info->is_32bit = glsl_type_is_32bit(type);
692                vc_info->is_patch = out_var->data.patch;
693                vc_info->is_per_primitive = out_var->data.per_primitive;
694                vc_info->is_mediump = !producer->options->linker_ignore_precision &&
695                   (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
696                    out_var->data.precision == GLSL_PRECISION_LOW);
697                vc_info->is_intra_stage_only = true;
698                vc_info->initialised = true;
699             }
700          }
701       }
702    }
703 
704    for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
705       struct varying_component *vc_info = &(*varying_comp_info)[i];
706       if (!vc_info->initialised) {
707          /* Something went wrong, the shader interfaces didn't match, so
708           * abandon packing. This can happen for example when the outputs are
709           * scalars but the inputs are struct members.
710           */
711          *varying_comp_info_size = 0;
712          break;
713       }
714    }
715 }
716 
717 static bool
allow_pack_interp_type(nir_pack_varying_options options,int type)718 allow_pack_interp_type(nir_pack_varying_options options, int type)
719 {
720    int sel;
721 
722    switch (type) {
723    case INTERP_MODE_NONE:
724       sel = nir_pack_varying_interp_mode_none;
725       break;
726    case INTERP_MODE_SMOOTH:
727       sel = nir_pack_varying_interp_mode_smooth;
728       break;
729    case INTERP_MODE_FLAT:
730       sel = nir_pack_varying_interp_mode_flat;
731       break;
732    case INTERP_MODE_NOPERSPECTIVE:
733       sel = nir_pack_varying_interp_mode_noperspective;
734       break;
735    default:
736       return false;
737    }
738 
739    return options & sel;
740 }
741 
742 static bool
allow_pack_interp_loc(nir_pack_varying_options options,int loc)743 allow_pack_interp_loc(nir_pack_varying_options options, int loc)
744 {
745    int sel;
746 
747    switch (loc) {
748    case INTERPOLATE_LOC_SAMPLE:
749       sel = nir_pack_varying_interp_loc_sample;
750       break;
751    case INTERPOLATE_LOC_CENTROID:
752       sel = nir_pack_varying_interp_loc_centroid;
753       break;
754    case INTERPOLATE_LOC_CENTER:
755       sel = nir_pack_varying_interp_loc_center;
756       break;
757    default:
758       return false;
759    }
760 
761    return options & sel;
762 }
763 
764 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_pack_varying_options options)765 assign_remap_locations(struct varying_loc (*remap)[4],
766                        struct assigned_comps *assigned_comps,
767                        struct varying_component *info,
768                        unsigned *cursor, unsigned *comp,
769                        unsigned max_location,
770                        nir_pack_varying_options options)
771 {
772    unsigned tmp_cursor = *cursor;
773    unsigned tmp_comp = *comp;
774 
775    for (; tmp_cursor < max_location; tmp_cursor++) {
776 
777       if (assigned_comps[tmp_cursor].comps) {
778          /* Don't pack per-primitive and per-vertex varyings together. */
779          if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
780             tmp_comp = 0;
781             continue;
782          }
783 
784          /* We can only pack varyings with matching precision. */
785          if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
786             tmp_comp = 0;
787             continue;
788          }
789 
790          /* We can only pack varyings with matching interpolation type
791           * if driver does not support it.
792           */
793          if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
794              (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
795               !allow_pack_interp_type(options, info->interp_type))) {
796             tmp_comp = 0;
797             continue;
798          }
799 
800          /* We can only pack varyings with matching interpolation location
801           * if driver does not support it.
802           */
803          if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
804              (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
805               !allow_pack_interp_loc(options, info->interp_loc))) {
806             tmp_comp = 0;
807             continue;
808          }
809 
810          /* We can only pack varyings with matching types, and the current
811           * algorithm only supports packing 32-bit.
812           */
813          if (!assigned_comps[tmp_cursor].is_32bit) {
814             tmp_comp = 0;
815             continue;
816          }
817 
818          while (tmp_comp < 4 &&
819                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
820             tmp_comp++;
821          }
822       }
823 
824       if (tmp_comp == 4) {
825          tmp_comp = 0;
826          continue;
827       }
828 
829       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
830 
831       /* Once we have assigned a location mark it as used */
832       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
833       assigned_comps[tmp_cursor].interp_type = info->interp_type;
834       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
835       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
836       assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
837       assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
838 
839       /* Assign remap location */
840       remap[location][info->var->data.location_frac].component = tmp_comp++;
841       remap[location][info->var->data.location_frac].location =
842          tmp_cursor + VARYING_SLOT_VAR0;
843 
844       break;
845    }
846 
847    *cursor = tmp_cursor;
848    *comp = tmp_comp;
849 }
850 
851 /* If there are empty components in the slot compact the remaining components
852  * as close to component 0 as possible. This will make it easier to fill the
853  * empty components with components from a different slot in a following pass.
854  */
855 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)856 compact_components(nir_shader *producer, nir_shader *consumer,
857                    struct assigned_comps *assigned_comps,
858                    bool default_to_smooth_interp)
859 {
860    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
861    struct varying_component *varying_comp_info;
862    unsigned varying_comp_info_size;
863 
864    /* Gather varying component info */
865    gather_varying_component_info(producer, consumer, &varying_comp_info,
866                                  &varying_comp_info_size,
867                                  default_to_smooth_interp);
868 
869    /* Sort varying components. */
870    qsort(varying_comp_info, varying_comp_info_size,
871          sizeof(struct varying_component), cmp_varying_component);
872 
873    nir_pack_varying_options options = consumer->options->pack_varying_options;
874 
875    unsigned cursor = 0;
876    unsigned comp = 0;
877 
878    /* Set the remap array based on the sorted components */
879    for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
880       struct varying_component *info = &varying_comp_info[i];
881 
882       assert(info->is_patch || cursor < MAX_VARYING);
883       if (info->is_patch) {
884          /* The list should be sorted with all non-patch inputs first followed
885           * by patch inputs.  When we hit our first patch input, we need to
886           * reset the cursor to MAX_VARYING so we put them in the right slot.
887           */
888          if (cursor < MAX_VARYING) {
889             cursor = MAX_VARYING;
890             comp = 0;
891          }
892 
893          assign_remap_locations(remap, assigned_comps, info,
894                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
895                                 options);
896       } else {
897          assign_remap_locations(remap, assigned_comps, info,
898                                 &cursor, &comp, MAX_VARYING,
899                                 options);
900 
901          /* Check if we failed to assign a remap location. This can happen if
902           * for example there are a bunch of unmovable components with
903           * mismatching interpolation types causing us to skip over locations
904           * that would have been useful for packing later components.
905           * The solution is to iterate over the locations again (this should
906           * happen very rarely in practice).
907           */
908          if (cursor == MAX_VARYING) {
909             cursor = 0;
910             comp = 0;
911             assign_remap_locations(remap, assigned_comps, info,
912                                    &cursor, &comp, MAX_VARYING,
913                                    options);
914          }
915       }
916    }
917 
918    ralloc_free(varying_comp_info);
919 
920    uint64_t zero = 0;
921    uint32_t zero32 = 0;
922    remap_slots_and_components(consumer, nir_var_shader_in, remap,
923                               &consumer->info.inputs_read, &zero,
924                               &consumer->info.patch_inputs_read, &zero32);
925    remap_slots_and_components(producer, nir_var_shader_out, remap,
926                               &producer->info.outputs_written,
927                               &producer->info.outputs_read,
928                               &producer->info.patch_outputs_written,
929                               &producer->info.patch_outputs_read);
930 }
931 
932 /* We assume that this has been called more-or-less directly after
933  * remove_unused_varyings.  At this point, all of the varyings that we
934  * aren't going to be using have been completely removed and the
935  * inputs_read and outputs_written fields in nir_shader_info reflect
936  * this.  Therefore, the total set of valid slots is the OR of the two
937  * sets of varyings;  this accounts for varyings which one side may need
938  * to read/write even if the other doesn't.  This can happen if, for
939  * instance, an array is used indirectly from one side causing it to be
940  * unsplittable but directly from the other.
941  */
942 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)943 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
944                      bool default_to_smooth_interp)
945 {
946    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
947    assert(consumer->info.stage != MESA_SHADER_VERTEX);
948 
949    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
950 
951    get_unmoveable_components_masks(producer, nir_var_shader_out,
952                                    assigned_comps,
953                                    producer->info.stage,
954                                    default_to_smooth_interp);
955    get_unmoveable_components_masks(consumer, nir_var_shader_in,
956                                    assigned_comps,
957                                    consumer->info.stage,
958                                    default_to_smooth_interp);
959 
960    compact_components(producer, consumer, assigned_comps,
961                       default_to_smooth_interp);
962 }
963 
964 /*
965  * Mark XFB varyings as always_active_io in the consumer so the linking opts
966  * don't touch them.
967  */
968 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)969 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
970 {
971    nir_variable *input_vars[MAX_VARYING][4] = { 0 };
972 
973    nir_foreach_shader_in_variable(var, consumer) {
974       if (var->data.location >= VARYING_SLOT_VAR0 &&
975           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
976 
977          unsigned location = var->data.location - VARYING_SLOT_VAR0;
978          input_vars[location][var->data.location_frac] = var;
979       }
980    }
981 
982    nir_foreach_shader_out_variable(var, producer) {
983       if (var->data.location >= VARYING_SLOT_VAR0 &&
984           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
985 
986          if (!var->data.always_active_io)
987             continue;
988 
989          unsigned location = var->data.location - VARYING_SLOT_VAR0;
990          if (input_vars[location][var->data.location_frac]) {
991             input_vars[location][var->data.location_frac]->data.always_active_io = true;
992          }
993       }
994    }
995 }
996 
997 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)998 does_varying_match(nir_variable *out_var, nir_variable *in_var)
999 {
1000    return in_var->data.location == out_var->data.location &&
1001           in_var->data.location_frac == out_var->data.location_frac;
1002 }
1003 
1004 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)1005 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
1006 {
1007    nir_foreach_shader_in_variable(var, consumer) {
1008       if (does_varying_match(out_var, var))
1009          return var;
1010    }
1011 
1012    return NULL;
1013 }
1014 
1015 static bool
can_replace_varying(nir_variable * out_var)1016 can_replace_varying(nir_variable *out_var)
1017 {
1018    /* Skip types that require more complex handling.
1019     * TODO: add support for these types.
1020     */
1021    if (glsl_type_is_array(out_var->type) ||
1022        glsl_type_is_dual_slot(out_var->type) ||
1023        glsl_type_is_matrix(out_var->type) ||
1024        glsl_type_is_struct_or_ifc(out_var->type))
1025       return false;
1026 
1027    /* Limit this pass to scalars for now to keep things simple. Most varyings
1028     * should have been lowered to scalars at this point anyway.
1029     */
1030    if (!glsl_type_is_scalar(out_var->type))
1031       return false;
1032 
1033    if (out_var->data.location < VARYING_SLOT_VAR0 ||
1034        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1035       return false;
1036 
1037    return true;
1038 }
1039 
1040 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1041 replace_varying_input_by_constant_load(nir_shader *shader,
1042                                        nir_intrinsic_instr *store_intr)
1043 {
1044    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1045 
1046    nir_builder b;
1047    nir_builder_init(&b, impl);
1048 
1049    nir_variable *out_var =
1050       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1051 
1052    bool progress = false;
1053    nir_foreach_block(block, impl) {
1054       nir_foreach_instr(instr, block) {
1055          if (instr->type != nir_instr_type_intrinsic)
1056             continue;
1057 
1058          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1059          if (intr->intrinsic != nir_intrinsic_load_deref)
1060             continue;
1061 
1062          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1063          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1064             continue;
1065 
1066          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1067 
1068          if (!does_varying_match(out_var, in_var))
1069             continue;
1070 
1071          b.cursor = nir_before_instr(instr);
1072 
1073          nir_load_const_instr *out_const =
1074             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1075 
1076          /* Add new const to replace the input */
1077          nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1078                                              intr->dest.ssa.bit_size,
1079                                              out_const->value);
1080 
1081          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1082 
1083          progress = true;
1084       }
1085    }
1086 
1087    return progress;
1088 }
1089 
1090 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1091 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1092                          nir_intrinsic_instr *dup_store_intr)
1093 {
1094    assert(input_var);
1095 
1096    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1097 
1098    nir_builder b;
1099    nir_builder_init(&b, impl);
1100 
1101    nir_variable *dup_out_var =
1102       nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1103 
1104    bool progress = false;
1105    nir_foreach_block(block, impl) {
1106       nir_foreach_instr(instr, block) {
1107          if (instr->type != nir_instr_type_intrinsic)
1108             continue;
1109 
1110          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1111          if (intr->intrinsic != nir_intrinsic_load_deref)
1112             continue;
1113 
1114          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1115          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1116             continue;
1117 
1118          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1119 
1120          if (!does_varying_match(dup_out_var, in_var) ||
1121              in_var->data.interpolation != input_var->data.interpolation ||
1122              get_interp_loc(in_var) != get_interp_loc(input_var))
1123             continue;
1124 
1125          b.cursor = nir_before_instr(instr);
1126 
1127          nir_ssa_def *load = nir_load_var(&b, input_var);
1128          nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1129 
1130          progress = true;
1131       }
1132    }
1133 
1134    return progress;
1135 }
1136 
1137 static bool
is_direct_uniform_load(nir_ssa_def * def,nir_ssa_scalar * s)1138 is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1139 {
1140    /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1141    assert(def->num_components == 1);
1142 
1143    /* Uniform load may hide behind some move instruction for converting
1144     * vector to scalar:
1145     *
1146     *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1147     *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1148     *     vec1 32 ssa_3 = mov ssa_2.x
1149     *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1150     *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1151     */
1152    *s = nir_ssa_scalar_resolved(def, 0);
1153 
1154    nir_ssa_def *ssa = s->def;
1155    if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1156       return false;
1157 
1158    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1159    if (intr->intrinsic != nir_intrinsic_load_deref)
1160       return false;
1161 
1162    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1163    /* TODO: support nir_var_mem_ubo. */
1164    if (!nir_deref_mode_is(deref, nir_var_uniform))
1165       return false;
1166 
1167    /* Does not support indirect uniform load. */
1168    return !nir_deref_instr_has_indirect(deref);
1169 }
1170 
1171 static nir_variable *
get_uniform_var_in_consumer(nir_shader * consumer,nir_variable * var_in_producer)1172 get_uniform_var_in_consumer(nir_shader *consumer,
1173                             nir_variable *var_in_producer)
1174 {
1175    /* Find if uniform already exists in consumer. */
1176    nir_variable *new_var = NULL;
1177    nir_foreach_uniform_variable(v, consumer) {
1178       if (!strcmp(var_in_producer->name, v->name)) {
1179          new_var = v;
1180          break;
1181       }
1182    }
1183 
1184    /* Create a variable if not exist. */
1185    if (!new_var) {
1186       new_var = nir_variable_clone(var_in_producer, consumer);
1187       nir_shader_add_variable(consumer, new_var);
1188    }
1189 
1190    return new_var;
1191 }
1192 
1193 static nir_deref_instr *
clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1194 clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1195 {
1196    if (deref->deref_type == nir_deref_type_var)
1197        return nir_build_deref_var(b, var);
1198 
1199    nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1200    nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1201 
1202    /* Build array and struct deref instruction.
1203     * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1204     */
1205    switch (deref->deref_type) {
1206    case nir_deref_type_array: {
1207       nir_load_const_instr *index =
1208          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1209       return nir_build_deref_array_imm(b, parent, index->value->i64);
1210    }
1211    case nir_deref_type_ptr_as_array: {
1212       nir_load_const_instr *index =
1213          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1214       nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1215                                         parent->dest.ssa.bit_size);
1216       return nir_build_deref_ptr_as_array(b, parent, ssa);
1217    }
1218    case nir_deref_type_struct:
1219       return nir_build_deref_struct(b, parent, deref->strct.index);
1220    default:
1221       unreachable("invalid type");
1222       return NULL;
1223    }
1224 }
1225 
1226 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_ssa_scalar * scalar)1227 replace_varying_input_by_uniform_load(nir_shader *shader,
1228                                       nir_intrinsic_instr *store_intr,
1229                                       nir_ssa_scalar *scalar)
1230 {
1231    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1232 
1233    nir_builder b;
1234    nir_builder_init(&b, impl);
1235 
1236    nir_variable *out_var =
1237       nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1238 
1239    nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1240    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1241    nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1242    uni_var = get_uniform_var_in_consumer(shader, uni_var);
1243 
1244    bool progress = false;
1245    nir_foreach_block(block, impl) {
1246       nir_foreach_instr(instr, block) {
1247          if (instr->type != nir_instr_type_intrinsic)
1248             continue;
1249 
1250          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1251          if (intr->intrinsic != nir_intrinsic_load_deref)
1252             continue;
1253 
1254          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1255          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1256             continue;
1257 
1258          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1259 
1260          if (!does_varying_match(out_var, in_var))
1261             continue;
1262 
1263          b.cursor = nir_before_instr(instr);
1264 
1265          /* Clone instructions start from deref load to variable deref. */
1266          nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1267          nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1268 
1269          /* Add a vector to scalar move if uniform is a vector. */
1270          if (uni_def->num_components > 1) {
1271             nir_alu_src src = {0};
1272             src.src = nir_src_for_ssa(uni_def);
1273             src.swizzle[0] = scalar->comp;
1274             uni_def = nir_mov_alu(&b, src, 1);
1275          }
1276 
1277          /* Replace load input with load uniform. */
1278          nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1279 
1280          progress = true;
1281       }
1282    }
1283 
1284    return progress;
1285 }
1286 
1287 /* The GLSL ES 3.20 spec says:
1288  *
1289  * "The precision of a vertex output does not need to match the precision of
1290  * the corresponding fragment input. The minimum precision at which vertex
1291  * outputs are interpolated is the minimum of the vertex output precision and
1292  * the fragment input precision, with the exception that for highp,
1293  * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1294  * Output Matching by Name in Linked Programs")
1295  *
1296  * To implement this, when linking shaders we will take the minimum precision
1297  * qualifier (allowing drivers to interpolate at lower precision). For
1298  * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1299  * requires we use the *last* specified precision if there is a conflict.
1300  *
1301  * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1302  * NONE, we'll return the other precision, since there is no conflict.
1303  * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1304  * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1305  * "backwards". For non-fragment stages, we'll pick the latter precision to
1306  * comply with the spec. (Note that the order matters.)
1307  *
1308  * For streamout, "Variables declared with lowp or mediump precision are
1309  * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1310  * of OpenGL ES 3.2 specification). So drivers should promote them
1311  * the transform feedback memory store, but not the output store.
1312  */
1313 
1314 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1315 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1316 {
1317    if (producer == GLSL_PRECISION_NONE)
1318       return consumer;
1319    else if (consumer == GLSL_PRECISION_NONE)
1320       return producer;
1321    else
1322       return fs ? MAX2(producer, consumer) : consumer;
1323 }
1324 
1325 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1326 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1327 {
1328    bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1329 
1330    nir_foreach_shader_out_variable(producer_var, producer) {
1331       /* Skip if the slot is not assigned */
1332       if (producer_var->data.location < 0)
1333          continue;
1334 
1335       nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1336             nir_var_shader_in, producer_var->data.location);
1337 
1338       /* Skip if the variable will be eliminated */
1339       if (!consumer_var)
1340          continue;
1341 
1342       /* Now we have a pair of variables. Let's pick the smaller precision. */
1343       unsigned precision_1 = producer_var->data.precision;
1344       unsigned precision_2 = consumer_var->data.precision;
1345       unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1346 
1347       /* Propagate the new precision */
1348       producer_var->data.precision = consumer_var->data.precision = minimum;
1349    }
1350 }
1351 
1352 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1353 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1354 {
1355    /* TODO: Add support for more shader stage combinations */
1356    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1357        (producer->info.stage != MESA_SHADER_VERTEX &&
1358         producer->info.stage != MESA_SHADER_TESS_EVAL))
1359       return false;
1360 
1361    bool progress = false;
1362 
1363    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1364 
1365    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1366 
1367    /* If we find a store in the last block of the producer we can be sure this
1368     * is the only possible value for this output.
1369     */
1370    nir_block *last_block = nir_impl_last_block(impl);
1371    nir_foreach_instr_reverse(instr, last_block) {
1372       if (instr->type != nir_instr_type_intrinsic)
1373          continue;
1374 
1375       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1376 
1377       if (intr->intrinsic != nir_intrinsic_store_deref)
1378          continue;
1379 
1380       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1381       if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1382          continue;
1383 
1384       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1385       if (!can_replace_varying(out_var))
1386          continue;
1387 
1388       nir_ssa_def *ssa = intr->src[1].ssa;
1389       if (ssa->parent_instr->type == nir_instr_type_load_const) {
1390          progress |= replace_varying_input_by_constant_load(consumer, intr);
1391          continue;
1392       }
1393 
1394       nir_ssa_scalar uni_scalar;
1395       if (is_direct_uniform_load(ssa, &uni_scalar)) {
1396          if (consumer->options->lower_varying_from_uniform) {
1397             progress |= replace_varying_input_by_uniform_load(consumer, intr,
1398                                                               &uni_scalar);
1399             continue;
1400          } else {
1401             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1402             /* The varying is loaded from same uniform, so no need to do any
1403              * interpolation. Mark it as flat explicitly.
1404              */
1405             if (!consumer->options->no_integers &&
1406                 in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) {
1407                in_var->data.interpolation = INTERP_MODE_FLAT;
1408                out_var->data.interpolation = INTERP_MODE_FLAT;
1409             }
1410          }
1411       }
1412 
1413       struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1414       if (entry) {
1415          progress |= replace_duplicate_input(consumer,
1416                                              (nir_variable *) entry->data,
1417                                              intr);
1418       } else {
1419          nir_variable *in_var = get_matching_input_var(consumer, out_var);
1420          if (in_var) {
1421             _mesa_hash_table_insert(varying_values, ssa, in_var);
1422          }
1423       }
1424    }
1425 
1426    _mesa_hash_table_destroy(varying_values, NULL);
1427 
1428    return progress;
1429 }
1430 
1431 /* TODO any better helper somewhere to sort a list? */
1432 
1433 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1434 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1435 {
1436    nir_foreach_variable_in_list(var, var_list) {
1437       /* Use the `per_primitive` bool to sort per-primitive variables
1438        * to the end of the list, so they get the last driver locations
1439        * by nir_assign_io_var_locations.
1440        *
1441        * This is done because AMD HW requires that per-primitive outputs
1442        * are the last params.
1443        * In the future we can add an option for this, if needed by other HW.
1444        */
1445       if (new_var->data.per_primitive < var->data.per_primitive ||
1446           (new_var->data.per_primitive == var->data.per_primitive &&
1447            (var->data.location > new_var->data.location ||
1448             (var->data.location == new_var->data.location &&
1449              var->data.location_frac > new_var->data.location_frac)))) {
1450          exec_node_insert_node_before(&var->node, &new_var->node);
1451          return;
1452       }
1453    }
1454    exec_list_push_tail(var_list, &new_var->node);
1455 }
1456 
1457 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1458 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1459               struct exec_list *sorted_list)
1460 {
1461    exec_list_make_empty(sorted_list);
1462    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1463       exec_node_remove(&var->node);
1464       insert_sorted(sorted_list, var);
1465    }
1466 }
1467 
1468 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1469 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1470                             unsigned *size, gl_shader_stage stage)
1471 {
1472    unsigned location = 0;
1473    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1474    uint64_t processed_locs[2] = {0};
1475 
1476    struct exec_list io_vars;
1477    sort_varyings(shader, mode, &io_vars);
1478 
1479    int ASSERTED last_loc = 0;
1480    bool ASSERTED last_per_prim = false;
1481    bool last_partial = false;
1482    nir_foreach_variable_in_list(var, &io_vars) {
1483       const struct glsl_type *type = var->type;
1484       if (nir_is_arrayed_io(var, stage)) {
1485          assert(glsl_type_is_array(type));
1486          type = glsl_get_array_element(type);
1487       }
1488 
1489       int base;
1490       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1491          base = VERT_ATTRIB_GENERIC0;
1492       else if (var->data.mode == nir_var_shader_out &&
1493                stage == MESA_SHADER_FRAGMENT)
1494          base = FRAG_RESULT_DATA0;
1495       else
1496          base = VARYING_SLOT_VAR0;
1497 
1498       unsigned var_size, driver_size;
1499       if (var->data.compact) {
1500          /* If we are inside a partial compact,
1501           * don't allow another compact to be in this slot
1502           * if it starts at component 0.
1503           */
1504          if (last_partial && var->data.location_frac == 0) {
1505             location++;
1506          }
1507 
1508          /* compact variables must be arrays of scalars */
1509          assert(!var->data.per_view);
1510          assert(glsl_type_is_array(type));
1511          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1512          unsigned start = 4 * location + var->data.location_frac;
1513          unsigned end = start + glsl_get_length(type);
1514          var_size = driver_size = end / 4 - location;
1515          last_partial = end % 4 != 0;
1516       } else {
1517          /* Compact variables bypass the normal varying compacting pass,
1518           * which means they cannot be in the same vec4 slot as a normal
1519           * variable. If part of the current slot is taken up by a compact
1520           * variable, we need to go to the next one.
1521           */
1522          if (last_partial) {
1523             location++;
1524             last_partial = false;
1525          }
1526 
1527          /* per-view variables have an extra array dimension, which is ignored
1528           * when counting user-facing slots (var->data.location), but *not*
1529           * with driver slots (var->data.driver_location). That is, each user
1530           * slot maps to multiple driver slots.
1531           */
1532          driver_size = glsl_count_attribute_slots(type, false);
1533          if (var->data.per_view) {
1534             assert(glsl_type_is_array(type));
1535             var_size =
1536                glsl_count_attribute_slots(glsl_get_array_element(type), false);
1537          } else {
1538             var_size = driver_size;
1539          }
1540       }
1541 
1542       /* Builtins don't allow component packing so we only need to worry about
1543        * user defined varyings sharing the same location.
1544        */
1545       bool processed = false;
1546       if (var->data.location >= base) {
1547          unsigned glsl_location = var->data.location - base;
1548 
1549          for (unsigned i = 0; i < var_size; i++) {
1550             if (processed_locs[var->data.index] &
1551                 ((uint64_t)1 << (glsl_location + i)))
1552                processed = true;
1553             else
1554                processed_locs[var->data.index] |=
1555                   ((uint64_t)1 << (glsl_location + i));
1556          }
1557       }
1558 
1559       /* Because component packing allows varyings to share the same location
1560        * we may have already have processed this location.
1561        */
1562       if (processed) {
1563          /* TODO handle overlapping per-view variables */
1564          assert(!var->data.per_view);
1565          unsigned driver_location = assigned_locations[var->data.location];
1566          var->data.driver_location = driver_location;
1567 
1568          /* An array may be packed such that is crosses multiple other arrays
1569           * or variables, we need to make sure we have allocated the elements
1570           * consecutively if the previously proccessed var was shorter than
1571           * the current array we are processing.
1572           *
1573           * NOTE: The code below assumes the var list is ordered in ascending
1574           * location order, but per-vertex/per-primitive outputs may be
1575           * grouped separately.
1576           */
1577          assert(last_loc <= var->data.location ||
1578                 last_per_prim != var->data.per_primitive);
1579          last_loc = var->data.location;
1580          last_per_prim = var->data.per_primitive;
1581          unsigned last_slot_location = driver_location + var_size;
1582          if (last_slot_location > location) {
1583             unsigned num_unallocated_slots = last_slot_location - location;
1584             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1585             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1586                assigned_locations[var->data.location + i] = location;
1587                location++;
1588             }
1589          }
1590          continue;
1591       }
1592 
1593       for (unsigned i = 0; i < var_size; i++) {
1594          assigned_locations[var->data.location + i] = location + i;
1595       }
1596 
1597       var->data.driver_location = location;
1598       location += driver_size;
1599    }
1600 
1601    if (last_partial)
1602       location++;
1603 
1604    exec_list_append(&shader->variables, &io_vars);
1605    *size = location;
1606 }
1607 
1608 static uint64_t
get_linked_variable_location(unsigned location,bool patch)1609 get_linked_variable_location(unsigned location, bool patch)
1610 {
1611    if (!patch)
1612       return location;
1613 
1614    /* Reserve locations 0...3 for special patch variables
1615     * like tess factors and bounding boxes, and the generic patch
1616     * variables will come after them.
1617     */
1618    if (location >= VARYING_SLOT_PATCH0)
1619       return location - VARYING_SLOT_PATCH0 + 4;
1620    else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1621             location <= VARYING_SLOT_BOUNDING_BOX1)
1622       return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1623    else
1624       unreachable("Unsupported variable in get_linked_variable_location.");
1625 }
1626 
1627 static uint64_t
get_linked_variable_io_mask(nir_variable * variable,gl_shader_stage stage)1628 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1629 {
1630    const struct glsl_type *type = variable->type;
1631 
1632    if (nir_is_arrayed_io(variable, stage)) {
1633       assert(glsl_type_is_array(type));
1634       type = glsl_get_array_element(type);
1635    }
1636 
1637    unsigned slots = glsl_count_attribute_slots(type, false);
1638    if (variable->data.compact) {
1639       unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1640       slots = DIV_ROUND_UP(component_count, 4);
1641    }
1642 
1643    uint64_t mask = u_bit_consecutive64(0, slots);
1644    return mask;
1645 }
1646 
1647 nir_linked_io_var_info
nir_assign_linked_io_var_locations(nir_shader * producer,nir_shader * consumer)1648 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1649 {
1650    assert(producer);
1651    assert(consumer);
1652 
1653    uint64_t producer_output_mask = 0;
1654    uint64_t producer_patch_output_mask = 0;
1655 
1656    nir_foreach_shader_out_variable(variable, producer) {
1657       uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1658       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1659 
1660       if (variable->data.patch)
1661          producer_patch_output_mask |= mask << loc;
1662       else
1663          producer_output_mask |= mask << loc;
1664    }
1665 
1666    uint64_t consumer_input_mask = 0;
1667    uint64_t consumer_patch_input_mask = 0;
1668 
1669    nir_foreach_shader_in_variable(variable, consumer) {
1670       uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1671       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1672 
1673       if (variable->data.patch)
1674          consumer_patch_input_mask |= mask << loc;
1675       else
1676          consumer_input_mask |= mask << loc;
1677    }
1678 
1679    uint64_t io_mask = producer_output_mask | consumer_input_mask;
1680    uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1681 
1682    nir_foreach_shader_out_variable(variable, producer) {
1683       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1684 
1685       if (variable->data.patch)
1686          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1687       else
1688          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1689    }
1690 
1691    nir_foreach_shader_in_variable(variable, consumer) {
1692       uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1693 
1694       if (variable->data.patch)
1695          variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1696       else
1697          variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1698    }
1699 
1700    nir_linked_io_var_info result = {
1701       .num_linked_io_vars = util_bitcount64(io_mask),
1702       .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1703    };
1704 
1705    return result;
1706 }
1707