• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2021 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * Linker functions related specifically to linking varyings between shader
27  * stages.
28  */
29 
30 #include "main/errors.h"
31 #include "main/macros.h"
32 #include "main/menums.h"
33 #include "main/mtypes.h"
34 #include "program/symbol_table.h"
35 #include "util/hash_table.h"
36 #include "util/u_math.h"
37 #include "util/perf/cpu_trace.h"
38 
39 #include "nir.h"
40 #include "nir_builder.h"
41 #include "nir_deref.h"
42 #include "gl_nir.h"
43 #include "gl_nir_link_varyings.h"
44 #include "gl_nir_linker.h"
45 #include "linker_util.h"
46 #include "string_to_uint_map.h"
47 
48 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
49 
50 /* Temporary storage for the set of attributes that need locations assigned. */
51 struct temp_attr {
52    unsigned slots;
53    nir_variable *var;
54 };
55 
56 /* Used below in the call to qsort. */
57 static int
compare_attr(const void * a,const void * b)58 compare_attr(const void *a, const void *b)
59 {
60    const struct temp_attr *const l = (const struct temp_attr *) a;
61    const struct temp_attr *const r = (const struct temp_attr *) b;
62 
63    /* Reversed because we want a descending order sort below. */
64    return r->slots - l->slots;
65 }
66 
67 /**
68  * Get the varying type stripped of the outermost array if we're processing
69  * a stage whose varyings are arrays indexed by a vertex number (such as
70  * geometry shader inputs).
71  */
72 static const struct glsl_type *
get_varying_type(const nir_variable * var,gl_shader_stage stage)73 get_varying_type(const nir_variable *var, gl_shader_stage stage)
74 {
75    const struct glsl_type *type = var->type;
76    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
77       assert(glsl_type_is_array(type));
78       type = glsl_get_array_element(type);
79    }
80 
81    return type;
82 }
83 
84 /**
85  * Find a contiguous set of available bits in a bitmask.
86  *
87  * \param used_mask     Bits representing used (1) and unused (0) locations
88  * \param needed_count  Number of contiguous bits needed.
89  *
90  * \return
91  * Base location of the available bits on success or -1 on failure.
92  */
93 static int
find_available_slots(unsigned used_mask,unsigned needed_count)94 find_available_slots(unsigned used_mask, unsigned needed_count)
95 {
96    unsigned needed_mask = (1 << needed_count) - 1;
97    const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
98 
99    /* The comparison to 32 is redundant, but without it GCC emits "warning:
100     * cannot optimize possibly infinite loops" for the loop below.
101     */
102    if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
103       return -1;
104 
105    for (int i = 0; i <= max_bit_to_test; i++) {
106       if ((needed_mask & ~used_mask) == needed_mask)
107          return i;
108 
109       needed_mask <<= 1;
110    }
111 
112    return -1;
113 }
114 
115 /* Find deref based on variable name.
116  * Note: This function does not support arrays.
117  */
118 static bool
find_deref(nir_shader * shader,const char * name)119 find_deref(nir_shader *shader, const char *name)
120 {
121    nir_foreach_function(func, shader) {
122       nir_foreach_block(block, func->impl) {
123          nir_foreach_instr(instr, block) {
124             if (instr->type == nir_instr_type_deref) {
125                nir_deref_instr *deref = nir_instr_as_deref(instr);
126                if (deref->deref_type == nir_deref_type_var &&
127                    strcmp(deref->var->name, name) == 0)
128                   return true;
129             }
130          }
131       }
132    }
133 
134    return false;
135 }
136 
137 /**
138  * Validate the types and qualifiers of an output from one stage against the
139  * matching input to another stage.
140  */
141 static void
cross_validate_types_and_qualifiers(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)142 cross_validate_types_and_qualifiers(const struct gl_constants *consts,
143                                     struct gl_shader_program *prog,
144                                     const nir_variable *input,
145                                     const nir_variable *output,
146                                     gl_shader_stage consumer_stage,
147                                     gl_shader_stage producer_stage)
148 {
149    /* Check that the types match between stages.
150     */
151    const struct glsl_type *type_to_match = input->type;
152 
153    /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
154    const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
155                                    consumer_stage != MESA_SHADER_FRAGMENT) ||
156                                   consumer_stage == MESA_SHADER_GEOMETRY;
157    if (extra_array_level) {
158       assert(glsl_type_is_array(type_to_match));
159       type_to_match = glsl_get_array_element(type_to_match);
160    }
161 
162    if (type_to_match != output->type) {
163       if (glsl_type_is_struct(output->type)) {
164          /* Structures across shader stages can have different name
165           * and considered to match in type if and only if structure
166           * members match in name, type, qualification, and declaration
167           * order. The precision doesn’t need to match.
168           */
169          if (!glsl_record_compare(output->type, type_to_match,
170                                   false, /* match_name */
171                                   true, /* match_locations */
172                                   false /* match_precision */)) {
173             linker_error(prog,
174                   "%s shader output `%s' declared as struct `%s', "
175                   "doesn't match in type with %s shader input "
176                   "declared as struct `%s'\n",
177                   _mesa_shader_stage_to_string(producer_stage),
178                   output->name,
179                   glsl_get_type_name(output->type),
180                   _mesa_shader_stage_to_string(consumer_stage),
181                   glsl_get_type_name(input->type));
182          }
183       } else if (!glsl_type_is_array(output->type) ||
184                  !is_gl_identifier(output->name)) {
185          /* There is a bit of a special case for gl_TexCoord.  This
186           * built-in is unsized by default.  Applications that variable
187           * access it must redeclare it with a size.  There is some
188           * language in the GLSL spec that implies the fragment shader
189           * and vertex shader do not have to agree on this size.  Other
190           * driver behave this way, and one or two applications seem to
191           * rely on it.
192           *
193           * Neither declaration needs to be modified here because the array
194           * sizes are fixed later when update_array_sizes is called.
195           *
196           * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
197           *
198           *     "Unlike user-defined varying variables, the built-in
199           *     varying variables don't have a strict one-to-one
200           *     correspondence between the vertex language and the
201           *     fragment language."
202           */
203          linker_error(prog,
204                       "%s shader output `%s' declared as type `%s', "
205                       "but %s shader input declared as type `%s'\n",
206                       _mesa_shader_stage_to_string(producer_stage),
207                       output->name,
208                       glsl_get_type_name(output->type),
209                       _mesa_shader_stage_to_string(consumer_stage),
210                       glsl_get_type_name(input->type));
211          return;
212       }
213    }
214 
215    /* Check that all of the qualifiers match between stages.
216     */
217 
218    /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
219     * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
220     * conformance test suite does not verify that the qualifiers must match.
221     * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
222     * OpenGLES 3.0 drivers, so we relax the checking in all cases.
223     */
224    if (false /* always skip the centroid check */ &&
225        prog->GLSL_Version < (prog->IsES ? 310 : 430) &&
226        input->data.centroid != output->data.centroid) {
227       linker_error(prog,
228                    "%s shader output `%s' %s centroid qualifier, "
229                    "but %s shader input %s centroid qualifier\n",
230                    _mesa_shader_stage_to_string(producer_stage),
231                    output->name,
232                    (output->data.centroid) ? "has" : "lacks",
233                    _mesa_shader_stage_to_string(consumer_stage),
234                    (input->data.centroid) ? "has" : "lacks");
235       return;
236    }
237 
238    if (input->data.sample != output->data.sample) {
239       linker_error(prog,
240                    "%s shader output `%s' %s sample qualifier, "
241                    "but %s shader input %s sample qualifier\n",
242                    _mesa_shader_stage_to_string(producer_stage),
243                    output->name,
244                    (output->data.sample) ? "has" : "lacks",
245                    _mesa_shader_stage_to_string(consumer_stage),
246                    (input->data.sample) ? "has" : "lacks");
247       return;
248    }
249 
250    if (input->data.patch != output->data.patch) {
251       linker_error(prog,
252                    "%s shader output `%s' %s patch qualifier, "
253                    "but %s shader input %s patch qualifier\n",
254                    _mesa_shader_stage_to_string(producer_stage),
255                    output->name,
256                    (output->data.patch) ? "has" : "lacks",
257                    _mesa_shader_stage_to_string(consumer_stage),
258                    (input->data.patch) ? "has" : "lacks");
259       return;
260    }
261 
262    /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
263     *
264     *    "As only outputs need be declared with invariant, an output from
265     *     one shader stage will still match an input of a subsequent stage
266     *     without the input being declared as invariant."
267     *
268     * while GLSL 4.10 says:
269     *
270     *    "For variables leaving one shader and coming into another shader,
271     *     the invariant keyword has to be used in both shaders, or a link
272     *     error will result."
273     *
274     * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
275     *
276     *    "The invariance of varyings that are declared in both the vertex
277     *     and fragment shaders must match."
278     */
279    if (input->data.explicit_invariant != output->data.explicit_invariant &&
280        prog->GLSL_Version < (prog->IsES ? 300 : 420)) {
281       linker_error(prog,
282                    "%s shader output `%s' %s invariant qualifier, "
283                    "but %s shader input %s invariant qualifier\n",
284                    _mesa_shader_stage_to_string(producer_stage),
285                    output->name,
286                    (output->data.explicit_invariant) ? "has" : "lacks",
287                    _mesa_shader_stage_to_string(consumer_stage),
288                    (input->data.explicit_invariant) ? "has" : "lacks");
289       return;
290    }
291 
292    /* GLSL >= 4.40 removes text requiring interpolation qualifiers
293     * to match cross stage, they must only match within the same stage.
294     *
295     * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
296     *
297     *     "It is a link-time error if, within the same stage, the interpolation
298     *     qualifiers of variables of the same name do not match.
299     *
300     * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
301     *
302     *    "When no interpolation qualifier is present, smooth interpolation
303     *    is used."
304     *
305     * So we match variables where one is smooth and the other has no explicit
306     * qualifier.
307     */
308    unsigned input_interpolation = input->data.interpolation;
309    unsigned output_interpolation = output->data.interpolation;
310    if (prog->IsES) {
311       if (input_interpolation == INTERP_MODE_NONE)
312          input_interpolation = INTERP_MODE_SMOOTH;
313       if (output_interpolation == INTERP_MODE_NONE)
314          output_interpolation = INTERP_MODE_SMOOTH;
315    }
316    if (input_interpolation != output_interpolation &&
317        prog->GLSL_Version < 440) {
318       if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
319          linker_error(prog,
320                       "%s shader output `%s' specifies %s "
321                       "interpolation qualifier, "
322                       "but %s shader input specifies %s "
323                       "interpolation qualifier\n",
324                       _mesa_shader_stage_to_string(producer_stage),
325                       output->name,
326                       interpolation_string(output->data.interpolation),
327                       _mesa_shader_stage_to_string(consumer_stage),
328                       interpolation_string(input->data.interpolation));
329          return;
330       } else {
331          linker_warning(prog,
332                         "%s shader output `%s' specifies %s "
333                         "interpolation qualifier, "
334                         "but %s shader input specifies %s "
335                         "interpolation qualifier\n",
336                         _mesa_shader_stage_to_string(producer_stage),
337                         output->name,
338                         interpolation_string(output->data.interpolation),
339                         _mesa_shader_stage_to_string(consumer_stage),
340                         interpolation_string(input->data.interpolation));
341       }
342    }
343 }
344 
345 /**
346  * Validate front and back color outputs against single color input
347  */
348 static void
cross_validate_front_and_back_color(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * front_color,const nir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)349 cross_validate_front_and_back_color(const struct gl_constants *consts,
350                                     struct gl_shader_program *prog,
351                                     const nir_variable *input,
352                                     const nir_variable *front_color,
353                                     const nir_variable *back_color,
354                                     gl_shader_stage consumer_stage,
355                                     gl_shader_stage producer_stage)
356 {
357    if (front_color != NULL && front_color->data.assigned)
358       cross_validate_types_and_qualifiers(consts, prog, input, front_color,
359                                           consumer_stage, producer_stage);
360 
361    if (back_color != NULL && back_color->data.assigned)
362       cross_validate_types_and_qualifiers(consts, prog, input, back_color,
363                                           consumer_stage, producer_stage);
364 }
365 
366 static unsigned
compute_variable_location_slot(nir_variable * var,gl_shader_stage stage)367 compute_variable_location_slot(nir_variable *var, gl_shader_stage stage)
368 {
369    unsigned location_start = VARYING_SLOT_VAR0;
370 
371    switch (stage) {
372       case MESA_SHADER_VERTEX:
373          if (var->data.mode == nir_var_shader_in)
374             location_start = VERT_ATTRIB_GENERIC0;
375          break;
376       case MESA_SHADER_TESS_CTRL:
377       case MESA_SHADER_TESS_EVAL:
378          if (var->data.patch)
379             location_start = VARYING_SLOT_PATCH0;
380          break;
381       case MESA_SHADER_FRAGMENT:
382          if (var->data.mode == nir_var_shader_out)
383             location_start = FRAG_RESULT_DATA0;
384          break;
385       default:
386          break;
387    }
388 
389    return var->data.location - location_start;
390 }
391 
392 
393 struct explicit_location_info {
394    nir_variable *var;
395    bool base_type_is_integer;
396    unsigned base_type_bit_size;
397    unsigned interpolation;
398    bool centroid;
399    bool sample;
400    bool patch;
401 };
402 
403 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],nir_variable * var,unsigned location,unsigned component,unsigned location_limit,const struct glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,struct gl_shader_program * prog,gl_shader_stage stage)404 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
405                         nir_variable *var,
406                         unsigned location,
407                         unsigned component,
408                         unsigned location_limit,
409                         const struct glsl_type *type,
410                         unsigned interpolation,
411                         bool centroid,
412                         bool sample,
413                         bool patch,
414                         struct gl_shader_program *prog,
415                         gl_shader_stage stage)
416 {
417    unsigned last_comp;
418    unsigned base_type_bit_size;
419    const struct glsl_type *type_without_array = glsl_without_array(type);
420    const bool base_type_is_integer =
421       glsl_base_type_is_integer(glsl_get_base_type(type_without_array));
422    const bool is_struct = glsl_type_is_struct(type_without_array);
423    if (is_struct) {
424       /* structs don't have a defined underlying base type so just treat all
425        * component slots as used and set the bit size to 0. If there is
426        * location aliasing, we'll fail anyway later.
427        */
428       last_comp = 4;
429       base_type_bit_size = 0;
430    } else {
431       unsigned dmul = glsl_type_is_64bit(type_without_array) ? 2 : 1;
432       last_comp = component + glsl_get_vector_elements(type_without_array) * dmul;
433       base_type_bit_size =
434          glsl_base_type_get_bit_size(glsl_get_base_type(type_without_array));
435    }
436 
437    while (location < location_limit) {
438       unsigned comp = 0;
439       while (comp < 4) {
440          struct explicit_location_info *info =
441             &explicit_locations[location][comp];
442 
443          if (info->var) {
444             if (glsl_type_is_struct(glsl_without_array(info->var->type)) ||
445                 is_struct) {
446                /* Structs cannot share location since they are incompatible
447                 * with any other underlying numerical type.
448                 */
449                linker_error(prog,
450                             "%s shader has multiple %sputs sharing the "
451                             "same location that don't have the same "
452                             "underlying numerical type. Struct variable '%s', "
453                             "location %u\n",
454                             _mesa_shader_stage_to_string(stage),
455                             var->data.mode == nir_var_shader_in ? "in" : "out",
456                             is_struct ? var->name : info->var->name,
457                             location);
458                return false;
459             } else if (comp >= component && comp < last_comp) {
460                /* Component aliasing is not allowed */
461                linker_error(prog,
462                             "%s shader has multiple %sputs explicitly "
463                             "assigned to location %d and component %d\n",
464                             _mesa_shader_stage_to_string(stage),
465                             var->data.mode == nir_var_shader_in ? "in" : "out",
466                             location, comp);
467                return false;
468             } else {
469                /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
470                 * Qualifiers, Page 67, (Location aliasing):
471                 *
472                 *   " Further, when location aliasing, the aliases sharing the
473                 *     location must have the same underlying numerical type
474                 *     and bit width (floating-point or integer, 32-bit versus
475                 *     64-bit, etc.) and the same auxiliary storage and
476                 *     interpolation qualification."
477                 */
478 
479                /* If the underlying numerical type isn't integer, implicitly
480                 * it will be float or else we would have failed by now.
481                 */
482                if (info->base_type_is_integer != base_type_is_integer) {
483                   linker_error(prog,
484                                "%s shader has multiple %sputs sharing the "
485                                "same location that don't have the same "
486                                "underlying numerical type. Location %u "
487                                "component %u.\n",
488                                _mesa_shader_stage_to_string(stage),
489                                var->data.mode == nir_var_shader_in ?
490                                "in" : "out", location, comp);
491                   return false;
492                }
493 
494                if (info->base_type_bit_size != base_type_bit_size) {
495                   linker_error(prog,
496                                "%s shader has multiple %sputs sharing the "
497                                "same location that don't have the same "
498                                "underlying numerical bit size. Location %u "
499                                "component %u.\n",
500                                _mesa_shader_stage_to_string(stage),
501                                var->data.mode == nir_var_shader_in ?
502                                "in" : "out", location, comp);
503                   return false;
504                }
505 
506                if (info->interpolation != interpolation) {
507                   linker_error(prog,
508                                "%s shader has multiple %sputs sharing the "
509                                "same location that don't have the same "
510                                "interpolation qualification. Location %u "
511                                "component %u.\n",
512                                _mesa_shader_stage_to_string(stage),
513                                var->data.mode == nir_var_shader_in ?
514                                "in" : "out", location, comp);
515                   return false;
516                }
517 
518                if (info->centroid != centroid ||
519                    info->sample != sample ||
520                    info->patch != patch) {
521                   linker_error(prog,
522                                "%s shader has multiple %sputs sharing the "
523                                "same location that don't have the same "
524                                "auxiliary storage qualification. Location %u "
525                                "component %u.\n",
526                                _mesa_shader_stage_to_string(stage),
527                                var->data.mode == nir_var_shader_in ?
528                                "in" : "out", location, comp);
529                   return false;
530                }
531             }
532          } else if (comp >= component && comp < last_comp) {
533             info->var = var;
534             info->base_type_is_integer = base_type_is_integer;
535             info->base_type_bit_size = base_type_bit_size;
536             info->interpolation = interpolation;
537             info->centroid = centroid;
538             info->sample = sample;
539             info->patch = patch;
540          }
541 
542          comp++;
543 
544          /* We need to do some special handling for doubles as dvec3 and
545           * dvec4 consume two consecutive locations. We don't need to
546           * worry about components beginning at anything other than 0 as
547           * the spec does not allow this for dvec3 and dvec4.
548           */
549          if (comp == 4 && last_comp > 4) {
550             last_comp = last_comp - 4;
551             /* Bump location index and reset the component index */
552             location++;
553             comp = 0;
554             component = 0;
555          }
556       }
557 
558       location++;
559    }
560 
561    return true;
562 }
563 
564 static bool
validate_explicit_variable_location(const struct gl_constants * consts,struct explicit_location_info explicit_locations[][4],nir_variable * var,struct gl_shader_program * prog,struct gl_linked_shader * sh)565 validate_explicit_variable_location(const struct gl_constants *consts,
566                                     struct explicit_location_info explicit_locations[][4],
567                                     nir_variable *var,
568                                     struct gl_shader_program *prog,
569                                     struct gl_linked_shader *sh)
570 {
571    const struct glsl_type *type = get_varying_type(var, sh->Stage);
572    unsigned num_elements = glsl_count_attribute_slots(type, false);
573    unsigned idx = compute_variable_location_slot(var, sh->Stage);
574    unsigned slot_limit = idx + num_elements;
575 
576    /* Vertex shader inputs and fragment shader outputs are validated in
577     * assign_attribute_or_color_locations() so we should not attempt to
578     * validate them again here.
579     */
580    unsigned slot_max;
581    if (var->data.mode == nir_var_shader_out) {
582       assert(sh->Stage != MESA_SHADER_FRAGMENT);
583       slot_max = consts->Program[sh->Stage].MaxOutputComponents / 4;
584    } else {
585       assert(var->data.mode == nir_var_shader_in);
586       assert(sh->Stage != MESA_SHADER_VERTEX);
587       slot_max = consts->Program[sh->Stage].MaxInputComponents / 4;
588    }
589 
590    if (slot_limit > slot_max) {
591       linker_error(prog,
592                    "Invalid location %u in %s shader\n",
593                    idx, _mesa_shader_stage_to_string(sh->Stage));
594       return false;
595    }
596 
597    const struct glsl_type *type_without_array = glsl_without_array(type);
598    if (glsl_type_is_interface(type_without_array)) {
599       for (unsigned i = 0; i < glsl_get_length(type_without_array); i++) {
600          const struct glsl_struct_field *field =
601             glsl_get_struct_field_data(type_without_array, i);
602          unsigned field_location = field->location -
603             (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
604          unsigned field_slots = glsl_count_attribute_slots(field->type, false);
605          if (!check_location_aliasing(explicit_locations, var,
606                                       field_location,
607                                       0,
608                                       field_location + field_slots,
609                                       field->type,
610                                       field->interpolation,
611                                       field->centroid,
612                                       field->sample,
613                                       field->patch,
614                                       prog, sh->Stage)) {
615             return false;
616          }
617       }
618    } else if (!check_location_aliasing(explicit_locations, var,
619                                        idx, var->data.location_frac,
620                                        slot_limit, type,
621                                        var->data.interpolation,
622                                        var->data.centroid,
623                                        var->data.sample,
624                                        var->data.patch,
625                                        prog, sh->Stage)) {
626       return false;
627    }
628 
629    return true;
630 }
631 
632 /**
633  * Validate explicit locations for the inputs to the first stage and the
634  * outputs of the last stage in a program, if those are not the VS and FS
635  * shaders.
636  */
637 void
gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants * consts,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)638 gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
639                                                             struct gl_shader_program *prog,
640                                                             gl_shader_stage first_stage,
641                                                             gl_shader_stage last_stage)
642 {
643    /* VS inputs and FS outputs are validated in
644     * assign_attribute_or_color_locations()
645     */
646    bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
647    bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
648    if (!validate_first_stage && !validate_last_stage)
649       return;
650 
651    struct explicit_location_info explicit_locations[MAX_VARYING][4];
652 
653    gl_shader_stage stages[2] = { first_stage, last_stage };
654    bool validate_stage[2] = { validate_first_stage, validate_last_stage };
655    nir_variable_mode var_mode[2] = { nir_var_shader_in, nir_var_shader_out };
656 
657    for (unsigned i = 0; i < 2; i++) {
658       if (!validate_stage[i])
659          continue;
660 
661       gl_shader_stage stage = stages[i];
662 
663       struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
664       assert(sh);
665 
666       memset(explicit_locations, 0, sizeof(explicit_locations));
667 
668       nir_foreach_variable_with_modes(var, sh->Program->nir, var_mode[i]) {
669          if (!var->data.explicit_location ||
670              var->data.location < VARYING_SLOT_VAR0)
671             continue;
672 
673          if (!validate_explicit_variable_location(consts, explicit_locations,
674                                                   var, prog, sh)) {
675             return;
676          }
677       }
678    }
679 }
680 
681 /**
682  * Check if we should force input / output matching between shader
683  * interfaces.
684  *
685  * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
686  *
687  *   "Only the input variables that are actually read need to be
688  *    written by the previous stage; it is allowed to have
689  *    superfluous declarations of input variables."
690  *
691  * However it's not defined anywhere as to how we should handle
692  * inputs that are not written in the previous stage and it's not
693  * clear what "actually read" means.
694  *
695  * The GLSL 4.20 spec however is much clearer:
696  *
697  *    "Only the input variables that are statically read need to
698  *     be written by the previous stage; it is allowed to have
699  *     superfluous declarations of input variables."
700  *
701  * It also has a table that states it is an error to statically
702  * read an input that is not defined in the previous stage. While
703  * it is not an error to not statically write to the output (it
704  * just needs to be defined to not be an error).
705  *
706  * The text in the GLSL 4.20 spec was an attempt to clarify the
707  * previous spec iterations. However given the difference in spec
708  * and that some applications seem to depend on not erroring when
709  * the input is not actually read in control flow we only apply
710  * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
711  * seen in the wild that depend on the less strict interpretation.
712  */
713 static bool
static_input_output_matching(struct gl_shader_program * prog)714 static_input_output_matching(struct gl_shader_program *prog)
715 {
716    return prog->GLSL_Version >= (prog->IsES ? 0 : 420);
717 }
718 
719 /**
720  * Validate that outputs from one stage match inputs of another
721  */
722 void
gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)723 gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
724                                         struct gl_shader_program *prog,
725                                         struct gl_linked_shader *producer,
726                                         struct gl_linked_shader *consumer)
727 {
728    struct _mesa_symbol_table *table = _mesa_symbol_table_ctor();
729    struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {0};
730    struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {0};
731 
732    /* Find all shader outputs in the "producer" stage.
733     */
734    nir_foreach_variable_with_modes(var, producer->Program->nir, nir_var_shader_out) {
735       if (!var->data.explicit_location
736           || var->data.location < VARYING_SLOT_VAR0) {
737          /* Interface block validation is handled elsewhere */
738          if (!var->interface_type || is_gl_identifier(var->name))
739             _mesa_symbol_table_add_symbol(table, var->name, var);
740 
741       } else {
742          /* User-defined varyings with explicit locations are handled
743           * differently because they do not need to have matching names.
744           */
745          if (!validate_explicit_variable_location(consts,
746                                                   output_explicit_locations,
747                                                   var, prog, producer)) {
748             goto out;
749          }
750       }
751    }
752 
753    /* Find all shader inputs in the "consumer" stage.  Any variables that have
754     * matching outputs already in the symbol table must have the same type and
755     * qualifiers.
756     *
757     * Exception: if the consumer is the geometry shader, then the inputs
758     * should be arrays and the type of the array element should match the type
759     * of the corresponding producer output.
760     */
761    nir_foreach_variable_with_modes(input, consumer->Program->nir, nir_var_shader_in) {
762       if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
763          const nir_variable *front_color =
764             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontColor");
765 
766          const nir_variable *back_color =
767             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackColor");
768 
769          cross_validate_front_and_back_color(consts, prog, input,
770                                              front_color, back_color,
771                                              consumer->Stage, producer->Stage);
772       } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
773          const nir_variable *front_color =
774             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontSecondaryColor");
775 
776          const nir_variable *back_color =
777             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackSecondaryColor");
778 
779          cross_validate_front_and_back_color(consts, prog, input,
780                                              front_color, back_color,
781                                              consumer->Stage, producer->Stage);
782       } else {
783          /* The rules for connecting inputs and outputs change in the presence
784           * of explicit locations.  In this case, we no longer care about the
785           * names of the variables.  Instead, we care only about the
786           * explicitly assigned location.
787           */
788          nir_variable *output = NULL;
789          if (input->data.explicit_location
790              && input->data.location >= VARYING_SLOT_VAR0) {
791 
792             const struct glsl_type *type =
793                get_varying_type(input, consumer->Stage);
794             unsigned num_elements = glsl_count_attribute_slots(type, false);
795             unsigned idx =
796                compute_variable_location_slot(input, consumer->Stage);
797             unsigned slot_limit = idx + num_elements;
798 
799             if (!validate_explicit_variable_location(consts,
800                                                      input_explicit_locations,
801                                                      input, prog, consumer)) {
802                goto out;
803             }
804 
805             while (idx < slot_limit) {
806                if (idx >= MAX_VARYING) {
807                   linker_error(prog,
808                                "Invalid location %u in %s shader\n", idx,
809                                _mesa_shader_stage_to_string(consumer->Stage));
810                   goto out;
811                }
812 
813                output = output_explicit_locations[idx][input->data.location_frac].var;
814 
815                if (output == NULL) {
816                   /* A linker failure should only happen when there is no
817                    * output declaration and there is Static Use of the
818                    * declared input.
819                    */
820                   if (input->data.used && static_input_output_matching(prog)) {
821                      linker_error(prog,
822                                   "%s shader input `%s' with explicit location "
823                                   "has no matching output\n",
824                                   _mesa_shader_stage_to_string(consumer->Stage),
825                                   input->name);
826                      break;
827                   }
828                } else if (input->data.location != output->data.location) {
829                   linker_error(prog,
830                                "%s shader input `%s' with explicit location "
831                                "has no matching output\n",
832                                _mesa_shader_stage_to_string(consumer->Stage),
833                                input->name);
834                   break;
835                }
836                idx++;
837             }
838          } else {
839             /* Interface block validation is handled elsewhere */
840             if (input->interface_type)
841                continue;
842 
843             output = (nir_variable *)
844                _mesa_symbol_table_find_symbol(table, input->name);
845          }
846 
847          if (output != NULL) {
848             /* Interface blocks have their own validation elsewhere so don't
849              * try validating them here.
850              */
851             if (!(input->interface_type && output->interface_type))
852                cross_validate_types_and_qualifiers(consts, prog, input, output,
853                                                    consumer->Stage,
854                                                    producer->Stage);
855          } else {
856             /* Check for input vars with unmatched output vars in prev stage
857              * taking into account that interface blocks could have a matching
858              * output but with different name, so we ignore them.
859              */
860             assert(!input->data.assigned);
861             if (input->data.used && !input->interface_type &&
862                 !input->data.explicit_location &&
863                 static_input_output_matching(prog))
864                linker_error(prog,
865                             "%s shader input `%s' "
866                             "has no matching output in the previous stage\n",
867                             _mesa_shader_stage_to_string(consumer->Stage),
868                             input->name);
869          }
870       }
871    }
872 
873  out:
874    _mesa_symbol_table_dtor(table);
875 }
876 
877 /**
878  * Assign locations for either VS inputs or FS outputs.
879  *
880  * \param mem_ctx        Temporary ralloc context used for linking.
881  * \param prog           Shader program whose variables need locations
882  *                       assigned.
883  * \param constants      Driver specific constant values for the program.
884  * \param target_index   Selector for the program target to receive location
885  *                       assignmnets.  Must be either \c MESA_SHADER_VERTEX or
886  *                       \c MESA_SHADER_FRAGMENT.
887  * \param do_assignment  Whether we are actually marking the assignment or we
888  *                       are just doing a dry-run checking.
889  *
890  * \return
891  * If locations are (or can be, in case of dry-running) successfully assigned,
892  * true is returned.  Otherwise an error is emitted to the shader link log and
893  * false is returned.
894  */
895 static bool
assign_attribute_or_color_locations(void * mem_ctx,struct gl_shader_program * prog,const struct gl_constants * constants,unsigned target_index,bool do_assignment)896 assign_attribute_or_color_locations(void *mem_ctx,
897                                     struct gl_shader_program *prog,
898                                     const struct gl_constants *constants,
899                                     unsigned target_index,
900                                     bool do_assignment)
901 {
902    /* Maximum number of generic locations.  This corresponds to either the
903     * maximum number of draw buffers or the maximum number of generic
904     * attributes.
905     */
906    unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
907       constants->Program[target_index].MaxAttribs :
908       MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);
909 
910    assert(max_index <= 32);
911    struct temp_attr to_assign[32];
912 
913    /* Mark invalid locations as being used.
914     */
915    unsigned used_locations = ~SAFE_MASK_FROM_INDEX(max_index);
916    unsigned double_storage_locations = 0;
917 
918    assert((target_index == MESA_SHADER_VERTEX)
919           || (target_index == MESA_SHADER_FRAGMENT));
920 
921    if (prog->_LinkedShaders[target_index] == NULL)
922       return true;
923 
924    /* Operate in a total of four passes.
925     *
926     * 1. Invalidate the location assignments for all vertex shader inputs.
927     *
928     * 2. Assign locations for inputs that have user-defined (via
929     *    glBindVertexAttribLocation) locations and outputs that have
930     *    user-defined locations (via glBindFragDataLocation).
931     *
932     * 3. Sort the attributes without assigned locations by number of slots
933     *    required in decreasing order.  Fragmentation caused by attribute
934     *    locations assigned by the application may prevent large attributes
935     *    from having enough contiguous space.
936     *
937     * 4. Assign locations to any inputs without assigned locations.
938     */
939 
940    const int generic_base = (target_index == MESA_SHADER_VERTEX)
941       ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;
942 
943    nir_variable_mode io_mode =
944       (target_index == MESA_SHADER_VERTEX)
945       ? nir_var_shader_in : nir_var_shader_out;
946 
947    /* Temporary array for the set of attributes that have locations assigned,
948     * for the purpose of checking overlapping slots/components of (non-ES)
949     * fragment shader outputs.
950     */
951    nir_variable *assigned[FRAG_RESULT_MAX * 4]; /* (max # of FS outputs) * # components */
952    unsigned assigned_attr = 0;
953 
954    unsigned num_attr = 0;
955 
956    nir_shader *shader = prog->_LinkedShaders[target_index]->Program->nir;
957    nir_foreach_variable_with_modes(var, shader, io_mode) {
958 
959       if (var->data.explicit_location) {
960          if ((var->data.location >= (int)(max_index + generic_base))
961              || (var->data.location < 0)) {
962             linker_error(prog,
963                          "invalid explicit location %d specified for `%s'\n",
964                          (var->data.location < 0)
965                          ? var->data.location
966                          : var->data.location - generic_base,
967                          var->name);
968             return false;
969          }
970       } else if (target_index == MESA_SHADER_VERTEX) {
971          unsigned binding;
972 
973          if (string_to_uint_map_get(prog->AttributeBindings, &binding, var->name)) {
974             assert(binding >= VERT_ATTRIB_GENERIC0);
975             var->data.location = binding;
976          }
977       } else if (target_index == MESA_SHADER_FRAGMENT) {
978          unsigned binding;
979          unsigned index;
980          const char *name = var->name;
981          const struct glsl_type *type = var->type;
982 
983          while (type) {
984             /* Check if there's a binding for the variable name */
985             if (string_to_uint_map_get(prog->FragDataBindings, &binding, name)) {
986                assert(binding >= FRAG_RESULT_DATA0);
987                var->data.location = binding;
988 
989                if (string_to_uint_map_get(prog->FragDataIndexBindings, &index, name)) {
990                   var->data.index = index;
991                }
992                break;
993             }
994 
995             /* If not, but it's an array type, look for name[0] */
996             if (glsl_type_is_array(type)) {
997                name = ralloc_asprintf(mem_ctx, "%s[0]", name);
998                type = glsl_get_array_element(type);
999                continue;
1000             }
1001 
1002             break;
1003          }
1004       }
1005 
1006       if (strcmp(var->name, "gl_LastFragData") == 0)
1007          continue;
1008 
1009       /* From GL4.5 core spec, section 15.2 (Shader Execution):
1010        *
1011        *     "Output binding assignments will cause LinkProgram to fail:
1012        *     ...
1013        *     If the program has an active output assigned to a location greater
1014        *     than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
1015        *     an active output assigned an index greater than or equal to one;"
1016        */
1017       if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
1018           var->data.location - generic_base >=
1019           (int) constants->MaxDualSourceDrawBuffers) {
1020          linker_error(prog,
1021                       "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
1022                       "with index %u for %s\n",
1023                       var->data.location - generic_base, var->data.index,
1024                       var->name);
1025          return false;
1026       }
1027 
1028       const unsigned slots =
1029          glsl_count_attribute_slots(var->type,
1030                                     target_index == MESA_SHADER_VERTEX);
1031 
1032       /* If the variable is not a built-in and has a location statically
1033        * assigned in the shader (presumably via a layout qualifier), make sure
1034        * that it doesn't collide with other assigned locations.  Otherwise,
1035        * add it to the list of variables that need linker-assigned locations.
1036        */
1037       if (var->data.location != -1) {
1038          if (var->data.location >= generic_base && var->data.index < 1) {
1039             /* From page 61 of the OpenGL 4.0 spec:
1040              *
1041              *     "LinkProgram will fail if the attribute bindings assigned
1042              *     by BindAttribLocation do not leave not enough space to
1043              *     assign a location for an active matrix attribute or an
1044              *     active attribute array, both of which require multiple
1045              *     contiguous generic attributes."
1046              *
1047              * I think above text prohibits the aliasing of explicit and
1048              * automatic assignments. But, aliasing is allowed in manual
1049              * assignments of attribute locations. See below comments for
1050              * the details.
1051              *
1052              * From OpenGL 4.0 spec, page 61:
1053              *
1054              *     "It is possible for an application to bind more than one
1055              *     attribute name to the same location. This is referred to as
1056              *     aliasing. This will only work if only one of the aliased
1057              *     attributes is active in the executable program, or if no
1058              *     path through the shader consumes more than one attribute of
1059              *     a set of attributes aliased to the same location. A link
1060              *     error can occur if the linker determines that every path
1061              *     through the shader consumes multiple aliased attributes,
1062              *     but implementations are not required to generate an error
1063              *     in this case."
1064              *
1065              * From GLSL 4.30 spec, page 54:
1066              *
1067              *    "A program will fail to link if any two non-vertex shader
1068              *     input variables are assigned to the same location. For
1069              *     vertex shaders, multiple input variables may be assigned
1070              *     to the same location using either layout qualifiers or via
1071              *     the OpenGL API. However, such aliasing is intended only to
1072              *     support vertex shaders where each execution path accesses
1073              *     at most one input per each location. Implementations are
1074              *     permitted, but not required, to generate link-time errors
1075              *     if they detect that every path through the vertex shader
1076              *     executable accesses multiple inputs assigned to any single
1077              *     location. For all shader types, a program will fail to link
1078              *     if explicit location assignments leave the linker unable
1079              *     to find space for other variables without explicit
1080              *     assignments."
1081              *
1082              * From OpenGL ES 3.0 spec, page 56:
1083              *
1084              *    "Binding more than one attribute name to the same location
1085              *     is referred to as aliasing, and is not permitted in OpenGL
1086              *     ES Shading Language 3.00 vertex shaders. LinkProgram will
1087              *     fail when this condition exists. However, aliasing is
1088              *     possible in OpenGL ES Shading Language 1.00 vertex shaders.
1089              *     This will only work if only one of the aliased attributes
1090              *     is active in the executable program, or if no path through
1091              *     the shader consumes more than one attribute of a set of
1092              *     attributes aliased to the same location. A link error can
1093              *     occur if the linker determines that every path through the
1094              *     shader consumes multiple aliased attributes, but implemen-
1095              *     tations are not required to generate an error in this case."
1096              *
1097              * After looking at above references from OpenGL, OpenGL ES and
1098              * GLSL specifications, we allow aliasing of vertex input variables
1099              * in: OpenGL 2.0 (and above) and OpenGL ES 2.0.
1100              *
1101              * NOTE: This is not required by the spec but its worth mentioning
1102              * here that we're not doing anything to make sure that no path
1103              * through the vertex shader executable accesses multiple inputs
1104              * assigned to any single location.
1105              */
1106 
1107             /* Mask representing the contiguous slots that will be used by
1108              * this attribute.
1109              */
1110             const unsigned attr = var->data.location - generic_base;
1111             const unsigned use_mask = (1 << slots) - 1;
1112             const char *const string = (target_index == MESA_SHADER_VERTEX)
1113                ? "vertex shader input" : "fragment shader output";
1114 
1115             /* Generate a link error if the requested locations for this
1116              * attribute exceed the maximum allowed attribute location.
1117              */
1118             if (attr + slots > max_index) {
1119                linker_error(prog,
1120                            "insufficient contiguous locations "
1121                            "available for %s `%s' %d %d %d\n", string,
1122                            var->name, used_locations, use_mask, attr);
1123                return false;
1124             }
1125 
1126             /* Generate a link error if the set of bits requested for this
1127              * attribute overlaps any previously allocated bits.
1128              */
1129             if ((~(use_mask << attr) & used_locations) != used_locations) {
1130                if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1131                   /* From section 4.4.2 (Output Layout Qualifiers) of the GLSL
1132                    * 4.40 spec:
1133                    *
1134                    *    "Additionally, for fragment shader outputs, if two
1135                    *    variables are placed within the same location, they
1136                    *    must have the same underlying type (floating-point or
1137                    *    integer). No component aliasing of output variables or
1138                    *    members is allowed.
1139                    */
1140                   for (unsigned i = 0; i < assigned_attr; i++) {
1141                      unsigned assigned_slots =
1142                         glsl_count_attribute_slots(assigned[i]->type, false);
1143                      unsigned assig_attr =
1144                         assigned[i]->data.location - generic_base;
1145                      unsigned assigned_use_mask = (1 << assigned_slots) - 1;
1146 
1147                      if ((assigned_use_mask << assig_attr) &
1148                          (use_mask << attr)) {
1149 
1150                         const struct glsl_type *assigned_type =
1151                            glsl_without_array(assigned[i]->type);
1152                         const struct glsl_type *type =
1153                            glsl_without_array(var->type);
1154                         if (glsl_get_base_type(assigned_type) !=
1155                             glsl_get_base_type(type)) {
1156                            linker_error(prog, "types do not match for aliased"
1157                                         " %ss %s and %s\n", string,
1158                                         assigned[i]->name, var->name);
1159                            return false;
1160                         }
1161 
1162                         unsigned assigned_component_mask =
1163                            ((1 << glsl_get_vector_elements(assigned_type)) - 1) <<
1164                            assigned[i]->data.location_frac;
1165                         unsigned component_mask =
1166                            ((1 << glsl_get_vector_elements(type)) - 1) <<
1167                            var->data.location_frac;
1168                         if (assigned_component_mask & component_mask) {
1169                            linker_error(prog, "overlapping component is "
1170                                         "assigned to %ss %s and %s "
1171                                         "(component=%d)\n",
1172                                         string, assigned[i]->name, var->name,
1173                                         var->data.location_frac);
1174                            return false;
1175                         }
1176                      }
1177                   }
1178                } else if (target_index == MESA_SHADER_FRAGMENT ||
1179                           (prog->IsES && prog->GLSL_Version >= 300)) {
1180                   linker_error(prog, "overlapping location is assigned "
1181                                "to %s `%s' %d %d %d\n", string, var->name,
1182                                used_locations, use_mask, attr);
1183                   return false;
1184                } else {
1185                   linker_warning(prog, "overlapping location is assigned "
1186                                  "to %s `%s' %d %d %d\n", string, var->name,
1187                                  used_locations, use_mask, attr);
1188                }
1189             }
1190 
1191             if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1192                /* Only track assigned variables for non-ES fragment shaders
1193                 * to avoid overflowing the array.
1194                 *
1195                 * At most one variable per fragment output component should
1196                 * reach this.
1197                 */
1198                assert(assigned_attr < ARRAY_SIZE(assigned));
1199                assigned[assigned_attr] = var;
1200                assigned_attr++;
1201             }
1202 
1203             used_locations |= (use_mask << attr);
1204 
1205             /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
1206              *
1207              * "A program with more than the value of MAX_VERTEX_ATTRIBS
1208              *  active attribute variables may fail to link, unless
1209              *  device-dependent optimizations are able to make the program
1210              *  fit within available hardware resources. For the purposes
1211              *  of this test, attribute variables of the type dvec3, dvec4,
1212              *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
1213              *  count as consuming twice as many attributes as equivalent
1214              *  single-precision types. While these types use the same number
1215              *  of generic attributes as their single-precision equivalents,
1216              *  implementations are permitted to consume two single-precision
1217              *  vectors of internal storage for each three- or four-component
1218              *  double-precision vector."
1219              *
1220              * Mark this attribute slot as taking up twice as much space
1221              * so we can count it properly against limits.  According to
1222              * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
1223              * is optional behavior, but it seems preferable.
1224              */
1225             if (glsl_type_is_dual_slot(glsl_without_array(var->type)))
1226                double_storage_locations |= (use_mask << attr);
1227          }
1228 
1229          continue;
1230       }
1231 
1232       if (num_attr >= max_index) {
1233          linker_error(prog, "too many %s (max %u)",
1234                       target_index == MESA_SHADER_VERTEX ?
1235                       "vertex shader inputs" : "fragment shader outputs",
1236                       max_index);
1237          return false;
1238       }
1239       to_assign[num_attr].slots = slots;
1240       to_assign[num_attr].var = var;
1241       num_attr++;
1242    }
1243 
1244    if (!do_assignment)
1245       return true;
1246 
1247    if (target_index == MESA_SHADER_VERTEX) {
1248       unsigned total_attribs_size =
1249          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1250          util_bitcount(double_storage_locations);
1251       if (total_attribs_size > max_index) {
1252          linker_error(prog,
1253                       "attempt to use %d vertex attribute slots only %d available ",
1254                       total_attribs_size, max_index);
1255          return false;
1256       }
1257    }
1258 
1259    /* If all of the attributes were assigned locations by the application (or
1260     * are built-in attributes with fixed locations), return early.  This should
1261     * be the common case.
1262     */
1263    if (num_attr == 0)
1264       return true;
1265 
1266    qsort(to_assign, num_attr, sizeof(to_assign[0]), &compare_attr);
1267 
1268    if (target_index == MESA_SHADER_VERTEX) {
1269       /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS.  It can
1270        * only be explicitly assigned by via glBindAttribLocation.  Mark it as
1271        * reserved to prevent it from being automatically allocated below.
1272        */
1273       if (find_deref(shader, "gl_Vertex"))
1274          used_locations |= (1 << 0);
1275    }
1276 
1277    for (unsigned i = 0; i < num_attr; i++) {
1278       /* Mask representing the contiguous slots that will be used by this
1279        * attribute.
1280        */
1281       const unsigned use_mask = (1 << to_assign[i].slots) - 1;
1282 
1283       int location = find_available_slots(used_locations, to_assign[i].slots);
1284 
1285       if (location < 0) {
1286          const char *const string = (target_index == MESA_SHADER_VERTEX)
1287             ? "vertex shader input" : "fragment shader output";
1288 
1289          linker_error(prog,
1290                       "insufficient contiguous locations "
1291                       "available for %s `%s'\n",
1292                       string, to_assign[i].var->name);
1293          return false;
1294       }
1295 
1296       to_assign[i].var->data.location = generic_base + location;
1297       used_locations |= (use_mask << location);
1298 
1299       if (glsl_type_is_dual_slot(glsl_without_array(to_assign[i].var->type)))
1300          double_storage_locations |= (use_mask << location);
1301    }
1302 
1303    /* Now that we have all the locations, from the GL 4.5 core spec, section
1304     * 11.1.1 (Vertex Attributes), dvec3, dvec4, dmat2x3, dmat2x4, dmat3,
1305     * dmat3x4, dmat4x3, and dmat4 count as consuming twice as many attributes
1306     * as equivalent single-precision types.
1307     */
1308    if (target_index == MESA_SHADER_VERTEX) {
1309       unsigned total_attribs_size =
1310          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1311          util_bitcount(double_storage_locations);
1312       if (total_attribs_size > max_index) {
1313          linker_error(prog,
1314                       "attempt to use %d vertex attribute slots only %d available ",
1315                       total_attribs_size, max_index);
1316          return false;
1317       }
1318    }
1319 
1320    return true;
1321 }
1322 
1323 static bool
varying_has_user_specified_location(const nir_variable * var)1324 varying_has_user_specified_location(const nir_variable *var)
1325 {
1326    return var->data.explicit_location &&
1327       var->data.location >= VARYING_SLOT_VAR0;
1328 }
1329 
1330 static void
create_xfb_varying_names(void * mem_ctx,const struct glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const struct glsl_type * ifc_member_t,char *** varying_names)1331 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
1332                          size_t name_length, unsigned *count,
1333                          const char *ifc_member_name,
1334                          const struct glsl_type *ifc_member_t,
1335                          char ***varying_names)
1336 {
1337    if (glsl_type_is_interface(t)) {
1338       size_t new_length = name_length;
1339 
1340       assert(ifc_member_name && ifc_member_t);
1341       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
1342 
1343       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
1344                                NULL, NULL, varying_names);
1345    } else if (glsl_type_is_struct(t)) {
1346       for (unsigned i = 0; i < glsl_get_length(t); i++) {
1347          const char *field = glsl_get_struct_elem_name(t, i);
1348          size_t new_length = name_length;
1349 
1350          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
1351 
1352          create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
1353                                   new_length, count, NULL, NULL,
1354                                   varying_names);
1355       }
1356    } else if (glsl_type_is_struct(glsl_without_array(t)) ||
1357               glsl_type_is_interface(glsl_without_array(t)) ||
1358               (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
1359       for (unsigned i = 0; i < glsl_get_length(t); i++) {
1360          size_t new_length = name_length;
1361 
1362          /* Append the subscript to the current variable name */
1363          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1364 
1365          create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
1366                                   new_length, count, ifc_member_name,
1367                                   ifc_member_t, varying_names);
1368       }
1369    } else {
1370       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
1371    }
1372 }
1373 
1374 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const struct gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_xfb_decls,char *** varying_names)1375 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
1376                               struct gl_shader_program *prog,
1377                               unsigned *num_xfb_decls,
1378                               char ***varying_names)
1379 {
1380    bool has_xfb_qualifiers = false;
1381 
1382    /* We still need to enable transform feedback mode even if xfb_stride is
1383     * only applied to a global out. Also we don't bother to propagate
1384     * xfb_stride to interface block members so this will catch that case also.
1385     */
1386    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1387       if (prog->TransformFeedback.BufferStride[j]) {
1388          has_xfb_qualifiers = true;
1389          break;
1390       }
1391    }
1392 
1393    nir_foreach_shader_out_variable(var, sh->Program->nir) {
1394       /* From the ARB_enhanced_layouts spec:
1395        *
1396        *    "Any shader making any static use (after preprocessing) of any of
1397        *     these *xfb_* qualifiers will cause the shader to be in a
1398        *     transform feedback capturing mode and hence responsible for
1399        *     describing the transform feedback setup.  This mode will capture
1400        *     any output selected by *xfb_offset*, directly or indirectly, to
1401        *     a transform feedback buffer."
1402        */
1403       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
1404          has_xfb_qualifiers = true;
1405       }
1406 
1407       if (var->data.explicit_offset) {
1408          *num_xfb_decls += glsl_varying_count(var->type);
1409          has_xfb_qualifiers = true;
1410       }
1411    }
1412 
1413    if (*num_xfb_decls == 0)
1414       return has_xfb_qualifiers;
1415 
1416    unsigned i = 0;
1417    *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
1418    nir_foreach_shader_out_variable(var, sh->Program->nir) {
1419       if (var->data.explicit_offset) {
1420          char *name;
1421          const struct glsl_type *type, *member_type;
1422 
1423          if (var->data.from_named_ifc_block) {
1424             type = var->interface_type;
1425 
1426             /* Find the member type before it was altered by lowering */
1427             const struct glsl_type *type_wa = glsl_without_array(type);
1428             member_type =
1429                glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
1430             name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
1431          } else {
1432             type = var->type;
1433             member_type = NULL;
1434             name = ralloc_strdup(NULL, var->name);
1435          }
1436          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
1437                                   var->name, member_type, varying_names);
1438          ralloc_free(name);
1439       }
1440    }
1441 
1442    assert(i == *num_xfb_decls);
1443    return has_xfb_qualifiers;
1444 }
1445 
1446 /**
1447  * Initialize this struct based on a string that was passed to
1448  * glTransformFeedbackVaryings.
1449  *
1450  * If the input is mal-formed, this call still succeeds, but it sets
1451  * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
1452  * will fail to find any matching variable.
1453  */
1454 static void
xfb_decl_init(struct xfb_decl * xfb_decl,const struct gl_constants * consts,const struct gl_extensions * exts,const void * mem_ctx,const char * input)1455 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1456               const struct gl_extensions *exts, const void *mem_ctx,
1457               const char *input)
1458 {
1459    /* We don't have to be pedantic about what is a valid GLSL variable name,
1460     * because any variable with an invalid name can't exist in the IR anyway.
1461     */
1462    xfb_decl->location = -1;
1463    xfb_decl->orig_name = input;
1464    xfb_decl->lowered_builtin_array_variable = none;
1465    xfb_decl->skip_components = 0;
1466    xfb_decl->next_buffer_separator = false;
1467    xfb_decl->matched_candidate = NULL;
1468    xfb_decl->stream_id = 0;
1469    xfb_decl->buffer = 0;
1470    xfb_decl->offset = 0;
1471 
1472    if (exts->ARB_transform_feedback3) {
1473       /* Parse gl_NextBuffer. */
1474       if (strcmp(input, "gl_NextBuffer") == 0) {
1475          xfb_decl->next_buffer_separator = true;
1476          return;
1477       }
1478 
1479       /* Parse gl_SkipComponents. */
1480       if (strcmp(input, "gl_SkipComponents1") == 0)
1481          xfb_decl->skip_components = 1;
1482       else if (strcmp(input, "gl_SkipComponents2") == 0)
1483          xfb_decl->skip_components = 2;
1484       else if (strcmp(input, "gl_SkipComponents3") == 0)
1485          xfb_decl->skip_components = 3;
1486       else if (strcmp(input, "gl_SkipComponents4") == 0)
1487          xfb_decl->skip_components = 4;
1488 
1489       if (xfb_decl->skip_components)
1490          return;
1491    }
1492 
1493    /* Parse a declaration. */
1494    const char *base_name_end;
1495    long subscript = link_util_parse_program_resource_name(input, strlen(input),
1496                                                           &base_name_end);
1497    xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1498    if (xfb_decl->var_name == NULL) {
1499       _mesa_error_no_memory(__func__);
1500       return;
1501    }
1502 
1503    if (subscript >= 0) {
1504       xfb_decl->array_subscript = subscript;
1505       xfb_decl->is_subscripted = true;
1506    } else {
1507       xfb_decl->is_subscripted = false;
1508    }
1509 
1510    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1511     * class must behave specially to account for the fact that gl_ClipDistance
1512     * is converted from a float[8] to a vec4[2].
1513     */
1514    if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1515        strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
1516       xfb_decl->lowered_builtin_array_variable = clip_distance;
1517    }
1518    if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1519        strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
1520       xfb_decl->lowered_builtin_array_variable = cull_distance;
1521    }
1522 }
1523 
1524 /**
1525  * Determine whether two xfb_decl structs refer to the same variable and
1526  * array index (if applicable).
1527  */
1528 static bool
xfb_decl_is_same(const struct xfb_decl * x,const struct xfb_decl * y)1529 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
1530 {
1531    assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
1532 
1533    if (strcmp(x->var_name, y->var_name) != 0)
1534       return false;
1535    if (x->is_subscripted != y->is_subscripted)
1536       return false;
1537    if (x->is_subscripted && x->array_subscript != y->array_subscript)
1538       return false;
1539    return true;
1540 }
1541 
1542 /**
1543  * The total number of varying components taken up by this variable.  Only
1544  * valid if assign_location() has been called.
1545  */
1546 static unsigned
xfb_decl_num_components(struct xfb_decl * xfb_decl)1547 xfb_decl_num_components(struct xfb_decl *xfb_decl)
1548 {
1549    if (xfb_decl->lowered_builtin_array_variable)
1550       return xfb_decl->size;
1551    else
1552       return xfb_decl->vector_elements * xfb_decl->matrix_columns *
1553          xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1554 }
1555 
1556 /**
1557  * Assign a location and stream ID for this xfb_decl object based on the
1558  * transform feedback candidate found by find_candidate.
1559  *
1560  * If an error occurs, the error is reported through linker_error() and false
1561  * is returned.
1562  */
1563 static bool
xfb_decl_assign_location(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,bool disable_varying_packing,bool xfb_enabled)1564 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
1565                          const struct gl_constants *consts,
1566                          struct gl_shader_program *prog,
1567                          bool disable_varying_packing, bool xfb_enabled)
1568 {
1569    assert(xfb_decl_is_varying(xfb_decl));
1570 
1571    unsigned fine_location
1572       = xfb_decl->matched_candidate->toplevel_var->data.location * 4
1573       + xfb_decl->matched_candidate->toplevel_var->data.location_frac
1574       + xfb_decl->matched_candidate->struct_offset_floats;
1575    const unsigned dmul =
1576       glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
1577 
1578    if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
1579       /* Array variable */
1580       const struct glsl_type *element_type =
1581          glsl_get_array_element(xfb_decl->matched_candidate->type);
1582       const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
1583       const unsigned vector_elements = glsl_get_vector_elements(element_type);
1584       unsigned actual_array_size;
1585       switch (xfb_decl->lowered_builtin_array_variable) {
1586       case clip_distance:
1587          actual_array_size = prog->last_vert_prog ?
1588             prog->last_vert_prog->info.clip_distance_array_size : 0;
1589          break;
1590       case cull_distance:
1591          actual_array_size = prog->last_vert_prog ?
1592             prog->last_vert_prog->info.cull_distance_array_size : 0;
1593          break;
1594       case none:
1595       default:
1596          actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
1597          break;
1598       }
1599 
1600       if (xfb_decl->is_subscripted) {
1601          /* Check array bounds. */
1602          if (xfb_decl->array_subscript >= actual_array_size) {
1603             linker_error(prog, "Transform feedback varying %s has index "
1604                          "%i, but the array size is %u.",
1605                          xfb_decl->orig_name, xfb_decl->array_subscript,
1606                          actual_array_size);
1607             return false;
1608          }
1609 
1610          bool array_will_be_lowered =
1611             lower_packed_varying_needs_lowering(prog->last_vert_prog->nir,
1612                                                 xfb_decl->matched_candidate->toplevel_var,
1613                                                 nir_var_shader_out,
1614                                                 disable_varying_packing,
1615                                                 xfb_enabled) ||
1616             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_ClipDistance") == 0 ||
1617             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_CullDistance") == 0 ||
1618             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelInner") == 0 ||
1619             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelOuter") == 0;
1620 
1621          unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
1622             1 : (array_will_be_lowered ? vector_elements : 4) * matrix_cols * dmul;
1623          fine_location += array_elem_size * xfb_decl->array_subscript;
1624          xfb_decl->size = 1;
1625       } else {
1626          xfb_decl->size = actual_array_size;
1627       }
1628       xfb_decl->vector_elements = vector_elements;
1629       xfb_decl->matrix_columns = matrix_cols;
1630       if (xfb_decl->lowered_builtin_array_variable)
1631          xfb_decl->type = GL_FLOAT;
1632       else
1633          xfb_decl->type = glsl_get_gl_type(element_type);
1634    } else {
1635       /* Regular variable (scalar, vector, or matrix) */
1636       if (xfb_decl->is_subscripted) {
1637          linker_error(prog, "Transform feedback varying %s requested, "
1638                       "but %s is not an array.",
1639                       xfb_decl->orig_name, xfb_decl->var_name);
1640          return false;
1641       }
1642       xfb_decl->size = 1;
1643       xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
1644       xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
1645       xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
1646    }
1647    xfb_decl->location = fine_location / 4;
1648    xfb_decl->location_frac = fine_location % 4;
1649 
1650    /* From GL_EXT_transform_feedback:
1651     *   A program will fail to link if:
1652     *
1653     *   * the total number of components to capture in any varying
1654     *     variable in <varyings> is greater than the constant
1655     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1656     *     buffer mode is SEPARATE_ATTRIBS_EXT;
1657     */
1658    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1659        xfb_decl_num_components(xfb_decl) >
1660        consts->MaxTransformFeedbackSeparateComponents) {
1661       linker_error(prog, "Transform feedback varying %s exceeds "
1662                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1663                    xfb_decl->orig_name);
1664       return false;
1665    }
1666 
1667    /* Only transform feedback varyings can be assigned to non-zero streams,
1668     * so assign the stream id here.
1669     */
1670    xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
1671 
1672    unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
1673    unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
1674    xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
1675    xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
1676       array_offset + struct_offset;
1677 
1678    return true;
1679 }
1680 
1681 static unsigned
xfb_decl_get_num_outputs(struct xfb_decl * xfb_decl)1682 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
1683 {
1684    if (!xfb_decl_is_varying(xfb_decl)) {
1685       return 0;
1686    }
1687 
1688    if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1689       unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
1690       unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
1691       return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
1692    } else {
1693       return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
1694    }
1695 }
1696 
1697 static bool
xfb_decl_is_varying_written(struct xfb_decl * xfb_decl)1698 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
1699 {
1700    if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
1701       return false;
1702 
1703    return xfb_decl->matched_candidate->toplevel_var->data.assigned;
1704 }
1705 
1706 /**
1707  * Update gl_transform_feedback_info to reflect this xfb_decl.
1708  *
1709  * If an error occurs, the error is reported through linker_error() and false
1710  * is returned.
1711  */
1712 static bool
xfb_decl_store(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx)1713 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1714                struct gl_shader_program *prog,
1715                struct gl_transform_feedback_info *info,
1716                unsigned buffer, unsigned buffer_index,
1717                const unsigned max_outputs,
1718                BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1719                bool *explicit_stride, unsigned *max_member_alignment,
1720                bool has_xfb_qualifiers, const void* mem_ctx)
1721 {
1722    unsigned xfb_offset = 0;
1723    unsigned size = xfb_decl->size;
1724    /* Handle gl_SkipComponents. */
1725    if (xfb_decl->skip_components) {
1726       info->Buffers[buffer].Stride += xfb_decl->skip_components;
1727       size = xfb_decl->skip_components;
1728       goto store_varying;
1729    }
1730 
1731    if (xfb_decl->next_buffer_separator) {
1732       size = 0;
1733       goto store_varying;
1734    }
1735 
1736    if (has_xfb_qualifiers) {
1737       xfb_offset = xfb_decl->offset / 4;
1738    } else {
1739       xfb_offset = info->Buffers[buffer].Stride;
1740    }
1741    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1742 
1743    {
1744       unsigned location = xfb_decl->location;
1745       unsigned location_frac = xfb_decl->location_frac;
1746       unsigned num_components = xfb_decl_num_components(xfb_decl);
1747 
1748       /* From GL_EXT_transform_feedback:
1749        *
1750        *   " A program will fail to link if:
1751        *
1752        *       * the total number of components to capture is greater than the
1753        *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1754        *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1755        *
1756        * From GL_ARB_enhanced_layouts:
1757        *
1758        *   " The resulting stride (implicit or explicit) must be less than or
1759        *     equal to the implementation-dependent constant
1760        *     gl_MaxTransformFeedbackInterleavedComponents."
1761        */
1762       if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1763            has_xfb_qualifiers) &&
1764           xfb_offset + num_components >
1765           consts->MaxTransformFeedbackInterleavedComponents) {
1766          linker_error(prog,
1767                       "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1768                       "limit has been exceeded.");
1769          return false;
1770       }
1771 
1772       /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1773        * Page 76, (Transform Feedback Layout Qualifiers):
1774        *
1775        *   " No aliasing in output buffers is allowed: It is a compile-time or
1776        *     link-time error to specify variables with overlapping transform
1777        *     feedback offsets."
1778        */
1779       const unsigned max_components =
1780          consts->MaxTransformFeedbackInterleavedComponents;
1781       const unsigned first_component = xfb_offset;
1782       const unsigned last_component = xfb_offset + num_components - 1;
1783       const unsigned start_word = BITSET_BITWORD(first_component);
1784       const unsigned end_word = BITSET_BITWORD(last_component);
1785       BITSET_WORD *used;
1786       assert(last_component < max_components);
1787 
1788       if (!used_components[buffer]) {
1789          used_components[buffer] =
1790             rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1791       }
1792       used = used_components[buffer];
1793 
1794       for (unsigned word = start_word; word <= end_word; word++) {
1795          unsigned start_range = 0;
1796          unsigned end_range = BITSET_WORDBITS - 1;
1797 
1798          if (word == start_word)
1799             start_range = first_component % BITSET_WORDBITS;
1800 
1801          if (word == end_word)
1802             end_range = last_component % BITSET_WORDBITS;
1803 
1804          if (used[word] & BITSET_RANGE(start_range, end_range)) {
1805             linker_error(prog,
1806                          "variable '%s', xfb_offset (%d) is causing aliasing.",
1807                          xfb_decl->orig_name, xfb_offset * 4);
1808             return false;
1809          }
1810          used[word] |= BITSET_RANGE(start_range, end_range);
1811       }
1812 
1813       const unsigned type_num_components =
1814          xfb_decl->vector_elements *
1815          (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1816       unsigned current_type_components_left = type_num_components;
1817 
1818       while (num_components > 0) {
1819          unsigned output_size = 0;
1820 
1821          /*  From GL_ARB_enhanced_layouts:
1822           *
1823           * "When an attribute variable declared using an array type is bound to
1824           * generic attribute index <i>, the active array elements are assigned to
1825           * consecutive generic attributes beginning with generic attribute <i>.  The
1826           * number of attributes and components assigned to each element are
1827           * determined according to the data type of array elements and "component"
1828           * layout qualifier (if any) specified in the declaration of the array."
1829           *
1830           * "When an attribute variable declared using a matrix type is bound to a
1831           * generic attribute index <i>, its values are taken from consecutive generic
1832           * attributes beginning with generic attribute <i>.  Such matrices are
1833           * treated as an array of column vectors with values taken from the generic
1834           * attributes.
1835           * This means there may be gaps in the varyings we are taking values from."
1836           *
1837           * Examples:
1838           *
1839           * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1840           * |                                |                               |
1841           * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
1842           * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
1843           * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
1844           * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
1845           * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
1846           *
1847           */
1848          if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1849             output_size = MIN3(num_components, current_type_components_left, 4);
1850             current_type_components_left -= output_size;
1851             if (current_type_components_left == 0) {
1852                current_type_components_left = type_num_components;
1853             }
1854          } else {
1855             output_size = MIN2(num_components, 4 - location_frac);
1856          }
1857 
1858          assert((info->NumOutputs == 0 && max_outputs == 0) ||
1859                 info->NumOutputs < max_outputs);
1860 
1861          /* From the ARB_enhanced_layouts spec:
1862           *
1863           *    "If such a block member or variable is not written during a shader
1864           *    invocation, the buffer contents at the assigned offset will be
1865           *    undefined.  Even if there are no static writes to a variable or
1866           *    member that is assigned a transform feedback offset, the space is
1867           *    still allocated in the buffer and still affects the stride."
1868           */
1869          if (xfb_decl_is_varying_written(xfb_decl)) {
1870             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1871             info->Outputs[info->NumOutputs].OutputRegister = location;
1872             info->Outputs[info->NumOutputs].NumComponents = output_size;
1873             info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
1874             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1875             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1876             ++info->NumOutputs;
1877          }
1878          info->Buffers[buffer].Stream = xfb_decl->stream_id;
1879          xfb_offset += output_size;
1880 
1881          num_components -= output_size;
1882          location++;
1883          location_frac = 0;
1884       }
1885    }
1886 
1887    if (explicit_stride && explicit_stride[buffer]) {
1888       if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
1889           info->Buffers[buffer].Stride % 2) {
1890          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1891                       "multiple of 8 as its applied to a type that is or "
1892                       "contains a double.",
1893                       info->Buffers[buffer].Stride * 4);
1894          return false;
1895       }
1896 
1897       if (xfb_offset > info->Buffers[buffer].Stride) {
1898          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1899                       "buffer (%d)", xfb_offset * 4,
1900                       info->Buffers[buffer].Stride * 4, buffer);
1901          return false;
1902       }
1903    } else {
1904       if (max_member_alignment && has_xfb_qualifiers) {
1905          max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
1906                                              _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1907          info->Buffers[buffer].Stride = ALIGN(xfb_offset,
1908                                               max_member_alignment[buffer]);
1909       } else {
1910          info->Buffers[buffer].Stride = xfb_offset;
1911       }
1912    }
1913 
1914  store_varying:
1915    info->Varyings[info->NumVarying].name.string =
1916       ralloc_strdup(prog, xfb_decl->orig_name);
1917    resource_name_updated(&info->Varyings[info->NumVarying].name);
1918    info->Varyings[info->NumVarying].Type = xfb_decl->type;
1919    info->Varyings[info->NumVarying].Size = size;
1920    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1921    info->NumVarying++;
1922    info->Buffers[buffer].NumVaryings++;
1923 
1924    return true;
1925 }
1926 
1927 static const struct tfeedback_candidate *
xfb_decl_find_candidate(struct xfb_decl * xfb_decl,struct gl_shader_program * prog,struct hash_table * tfeedback_candidates)1928 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
1929                         struct gl_shader_program *prog,
1930                         struct hash_table *tfeedback_candidates)
1931 {
1932    const char *name = xfb_decl->var_name;
1933    switch (xfb_decl->lowered_builtin_array_variable) {
1934    case none:
1935       name = xfb_decl->var_name;
1936       break;
1937    case clip_distance:
1938    case cull_distance:
1939       name = "gl_ClipDistanceMESA";
1940       break;
1941    }
1942    struct hash_entry *entry =
1943       _mesa_hash_table_search(tfeedback_candidates, name);
1944 
1945    xfb_decl->matched_candidate = entry ?
1946          (struct tfeedback_candidate *) entry->data : NULL;
1947 
1948    if (!xfb_decl->matched_candidate) {
1949       /* From GL_EXT_transform_feedback:
1950        *   A program will fail to link if:
1951        *
1952        *   * any variable name specified in the <varyings> array is not
1953        *     declared as an output in the geometry shader (if present) or
1954        *     the vertex shader (if no geometry shader is present);
1955        */
1956       linker_error(prog, "Transform feedback varying %s undeclared.",
1957                    xfb_decl->orig_name);
1958    }
1959 
1960    return xfb_decl->matched_candidate;
1961 }
1962 
1963 /**
1964  * Force a candidate over the previously matched one. It happens when a new
1965  * varying needs to be created to match the xfb declaration, for example,
1966  * to fullfil an alignment criteria.
1967  */
1968 static void
xfb_decl_set_lowered_candidate(struct xfb_decl * xfb_decl,struct tfeedback_candidate * candidate)1969 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
1970                                struct tfeedback_candidate *candidate)
1971 {
1972    xfb_decl->matched_candidate = candidate;
1973 
1974    /* The subscript part is no longer relevant */
1975    xfb_decl->is_subscripted = false;
1976    xfb_decl->array_subscript = 0;
1977 }
1978 
1979 /**
1980  * Parse all the transform feedback declarations that were passed to
1981  * glTransformFeedbackVaryings() and store them in xfb_decl objects.
1982  *
1983  * If an error occurs, the error is reported through linker_error() and false
1984  * is returned.
1985  */
1986 static bool
parse_xfb_decls(const struct gl_constants * consts,const struct gl_extensions * exts,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,struct xfb_decl * decls)1987 parse_xfb_decls(const struct gl_constants *consts,
1988                 const struct gl_extensions *exts,
1989                 struct gl_shader_program *prog,
1990                 const void *mem_ctx, unsigned num_names,
1991                 char **varying_names, struct xfb_decl *decls)
1992 {
1993    for (unsigned i = 0; i < num_names; ++i) {
1994       xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i]);
1995 
1996       if (!xfb_decl_is_varying(&decls[i]))
1997          continue;
1998 
1999       /* From GL_EXT_transform_feedback:
2000        *   A program will fail to link if:
2001        *
2002        *   * any two entries in the <varyings> array specify the same varying
2003        *     variable;
2004        *
2005        * We interpret this to mean "any two entries in the <varyings> array
2006        * specify the same varying variable and array index", since transform
2007        * feedback of arrays would be useless otherwise.
2008        */
2009       for (unsigned j = 0; j < i; ++j) {
2010          if (xfb_decl_is_varying(&decls[j])) {
2011             if (xfb_decl_is_same(&decls[i], &decls[j])) {
2012                linker_error(prog, "Transform feedback varying %s specified "
2013                             "more than once.", varying_names[i]);
2014                return false;
2015             }
2016          }
2017       }
2018    }
2019    return true;
2020 }
2021 
2022 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)2023 cmp_xfb_offset(const void * x_generic, const void * y_generic)
2024 {
2025    struct xfb_decl *x = (struct xfb_decl *) x_generic;
2026    struct xfb_decl *y = (struct xfb_decl *) y_generic;
2027 
2028    if (x->buffer != y->buffer)
2029       return x->buffer - y->buffer;
2030    return x->offset - y->offset;
2031 }
2032 
2033 /**
2034  * Store transform feedback location assignments into
2035  * prog->sh.LinkedTransformFeedback based on the data stored in
2036  * xfb_decls.
2037  *
2038  * If an error occurs, the error is reported through linker_error() and false
2039  * is returned.
2040  */
2041 static bool
store_tfeedback_info(const struct gl_constants * consts,struct gl_shader_program * prog,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,bool has_xfb_qualifiers,const void * mem_ctx)2042 store_tfeedback_info(const struct gl_constants *consts,
2043                      struct gl_shader_program *prog, unsigned num_xfb_decls,
2044                      struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
2045                      const void *mem_ctx)
2046 {
2047    if (!prog->last_vert_prog)
2048       return true;
2049 
2050    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
2051     * tracking the number of buffers doesn't overflow.
2052     */
2053    assert(consts->MaxTransformFeedbackBuffers < 32);
2054 
2055    bool separate_attribs_mode =
2056       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
2057 
2058    struct gl_program *xfb_prog = prog->last_vert_prog;
2059    xfb_prog->sh.LinkedTransformFeedback =
2060       rzalloc(xfb_prog, struct gl_transform_feedback_info);
2061 
2062    /* The xfb_offset qualifier does not have to be used in increasing order
2063     * however some drivers expect to receive the list of transform feedback
2064     * declarations in order so sort it now for convenience.
2065     */
2066    if (has_xfb_qualifiers) {
2067       qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
2068             cmp_xfb_offset);
2069    }
2070 
2071    xfb_prog->sh.LinkedTransformFeedback->Varyings =
2072       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
2073                     num_xfb_decls);
2074 
2075    unsigned num_outputs = 0;
2076    for (unsigned i = 0; i < num_xfb_decls; ++i) {
2077       if (xfb_decl_is_varying_written(&xfb_decls[i]))
2078          num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
2079    }
2080 
2081    xfb_prog->sh.LinkedTransformFeedback->Outputs =
2082       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
2083                     num_outputs);
2084 
2085    unsigned num_buffers = 0;
2086    unsigned buffers = 0;
2087    BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
2088 
2089    if (!has_xfb_qualifiers && separate_attribs_mode) {
2090       /* GL_SEPARATE_ATTRIBS */
2091       for (unsigned i = 0; i < num_xfb_decls; ++i) {
2092          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2093                              xfb_prog->sh.LinkedTransformFeedback,
2094                              num_buffers, num_buffers, num_outputs,
2095                              used_components, NULL, NULL, has_xfb_qualifiers,
2096                              mem_ctx))
2097             return false;
2098 
2099          buffers |= 1 << num_buffers;
2100          num_buffers++;
2101       }
2102    }
2103    else {
2104       /* GL_INVERLEAVED_ATTRIBS */
2105       int buffer_stream_id = -1;
2106       unsigned buffer =
2107          num_xfb_decls ? xfb_decls[0].buffer : 0;
2108       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
2109       unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
2110       /* Apply any xfb_stride global qualifiers */
2111       if (has_xfb_qualifiers) {
2112          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
2113             if (prog->TransformFeedback.BufferStride[j]) {
2114                explicit_stride[j] = true;
2115                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
2116                   prog->TransformFeedback.BufferStride[j] / 4;
2117             }
2118          }
2119       }
2120 
2121       for (unsigned i = 0; i < num_xfb_decls; ++i) {
2122          if (has_xfb_qualifiers &&
2123              buffer != xfb_decls[i].buffer) {
2124             /* we have moved to the next buffer so reset stream id */
2125             buffer_stream_id = -1;
2126             num_buffers++;
2127          }
2128 
2129          if (xfb_decls[i].next_buffer_separator) {
2130             if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2131                                 xfb_prog->sh.LinkedTransformFeedback,
2132                                 buffer, num_buffers, num_outputs,
2133                                 used_components, explicit_stride,
2134                                 max_member_alignment, has_xfb_qualifiers,
2135                                 mem_ctx))
2136                return false;
2137             num_buffers++;
2138             buffer_stream_id = -1;
2139             continue;
2140          }
2141 
2142          if (has_xfb_qualifiers) {
2143             buffer = xfb_decls[i].buffer;
2144          } else {
2145             buffer = num_buffers;
2146          }
2147 
2148          if (xfb_decl_is_varying(&xfb_decls[i])) {
2149             if (buffer_stream_id == -1)  {
2150                /* First varying writing to this buffer: remember its stream */
2151                buffer_stream_id = (int) xfb_decls[i].stream_id;
2152 
2153                /* Only mark a buffer as active when there is a varying
2154                 * attached to it. This behaviour is based on a revised version
2155                 * of section 13.2.2 of the GL 4.6 spec.
2156                 */
2157                buffers |= 1 << buffer;
2158             } else if (buffer_stream_id !=
2159                        (int) xfb_decls[i].stream_id) {
2160                /* Varying writes to the same buffer from a different stream */
2161                linker_error(prog,
2162                             "Transform feedback can't capture varyings belonging "
2163                             "to different vertex streams in a single buffer. "
2164                             "Varying %s writes to buffer from stream %u, other "
2165                             "varyings in the same buffer write from stream %u.",
2166                             xfb_decls[i].orig_name,
2167                             xfb_decls[i].stream_id,
2168                             buffer_stream_id);
2169                return false;
2170             }
2171          }
2172 
2173          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2174                              xfb_prog->sh.LinkedTransformFeedback,
2175                              buffer, num_buffers, num_outputs, used_components,
2176                              explicit_stride, max_member_alignment,
2177                              has_xfb_qualifiers, mem_ctx))
2178             return false;
2179       }
2180    }
2181    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
2182 
2183    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
2184    return true;
2185 }
2186 
2187 /**
2188  * Enum representing the order in which varyings are packed within a
2189  * packing class.
2190  *
2191  * Currently we pack vec4's first, then vec2's, then scalar values, then
2192  * vec3's.  This order ensures that the only vectors that are at risk of
2193  * having to be "double parked" (split between two adjacent varying slots)
2194  * are the vec3's.
2195  */
2196 enum packing_order_enum {
2197    PACKING_ORDER_VEC4,
2198    PACKING_ORDER_VEC2,
2199    PACKING_ORDER_SCALAR,
2200    PACKING_ORDER_VEC3,
2201 };
2202 
2203 /**
2204  * Structure recording the relationship between a single producer output
2205  * and a single consumer input.
2206  */
2207 struct match {
2208    /**
2209     * Packing class for this varying, computed by compute_packing_class().
2210     */
2211    unsigned packing_class;
2212 
2213    /**
2214     * Packing order for this varying, computed by compute_packing_order().
2215     */
2216    enum packing_order_enum packing_order;
2217 
2218    /**
2219     * The output variable in the producer stage.
2220     */
2221    nir_variable *producer_var;
2222 
2223    /**
2224     * The input variable in the consumer stage.
2225     */
2226    nir_variable *consumer_var;
2227 
2228    /**
2229     * The location which has been assigned for this varying.  This is
2230     * expressed in multiples of a float, with the first generic varying
2231     * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
2232     * value 0.
2233     */
2234    unsigned generic_location;
2235 };
2236 
2237 /**
2238  * Data structure recording the relationship between outputs of one shader
2239  * stage (the "producer") and inputs of another (the "consumer").
2240  */
2241 struct varying_matches
2242 {
2243    /**
2244     * If true, this driver disables varying packing, so all varyings need to
2245     * be aligned on slot boundaries, and take up a number of slots equal to
2246     * their number of matrix columns times their array size.
2247     *
2248     * Packing may also be disabled because our current packing method is not
2249     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
2250     * guaranteed to match across stages.
2251     */
2252    bool disable_varying_packing;
2253 
2254    /**
2255     * If true, this driver disables packing for varyings used by transform
2256     * feedback.
2257     */
2258    bool disable_xfb_packing;
2259 
2260    /**
2261     * If true, this driver has transform feedback enabled. The transform
2262     * feedback code usually requires at least some packing be done even
2263     * when varying packing is disabled, fortunately where transform feedback
2264     * requires packing it's safe to override the disabled setting. See
2265     * is_varying_packing_safe().
2266     */
2267    bool xfb_enabled;
2268 
2269    bool enhanced_layouts_enabled;
2270 
2271    /**
2272     * If true, this driver prefers varyings to be aligned to power of two
2273     * in a slot.
2274     */
2275    bool prefer_pot_aligned_varyings;
2276 
2277    struct match *matches;
2278 
2279    /**
2280     * The number of elements in the \c matches array that are currently in
2281     * use.
2282     */
2283    unsigned num_matches;
2284 
2285    /**
2286     * The number of elements that were set aside for the \c matches array when
2287     * it was allocated.
2288     */
2289    unsigned matches_capacity;
2290 
2291    gl_shader_stage producer_stage;
2292    gl_shader_stage consumer_stage;
2293 };
2294 
2295 /**
2296  * Comparison function passed to qsort() to sort varyings by packing_class and
2297  * then by packing_order.
2298  */
2299 static int
varying_matches_match_comparator(const void * x_generic,const void * y_generic)2300 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
2301 {
2302    const struct match *x = (const struct match *) x_generic;
2303    const struct match *y = (const struct match *) y_generic;
2304 
2305    if (x->packing_class != y->packing_class)
2306       return x->packing_class - y->packing_class;
2307    return x->packing_order - y->packing_order;
2308 }
2309 
2310 /**
2311  * Comparison function passed to qsort() to sort varyings used only by
2312  * transform feedback when packing of other varyings is disabled.
2313  */
2314 static int
varying_matches_xfb_comparator(const void * x_generic,const void * y_generic)2315 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
2316 {
2317    const struct match *x = (const struct match *) x_generic;
2318 
2319    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2320       return varying_matches_match_comparator(x_generic, y_generic);
2321 
2322    /* FIXME: When the comparator returns 0 it means the elements being
2323     * compared are equivalent. However the qsort documentation says:
2324     *
2325     *    "The order of equivalent elements is undefined."
2326     *
2327     * In practice the sort ends up reversing the order of the varyings which
2328     * means locations are also assigned in this reversed order and happens to
2329     * be what we want. This is also whats happening in
2330     * varying_matches_match_comparator().
2331     */
2332    return 0;
2333 }
2334 
2335 /**
2336  * Comparison function passed to qsort() to sort varyings NOT used by
2337  * transform feedback when packing of xfb varyings is disabled.
2338  */
2339 static int
varying_matches_not_xfb_comparator(const void * x_generic,const void * y_generic)2340 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
2341 {
2342    const struct match *x = (const struct match *) x_generic;
2343 
2344    if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2345       return varying_matches_match_comparator(x_generic, y_generic);
2346 
2347    /* FIXME: When the comparator returns 0 it means the elements being
2348     * compared are equivalent. However the qsort documentation says:
2349     *
2350     *    "The order of equivalent elements is undefined."
2351     *
2352     * In practice the sort ends up reversing the order of the varyings which
2353     * means locations are also assigned in this reversed order and happens to
2354     * be what we want. This is also whats happening in
2355     * varying_matches_match_comparator().
2356     */
2357    return 0;
2358 }
2359 
2360 static bool
is_unpackable_tess(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)2361 is_unpackable_tess(gl_shader_stage producer_stage,
2362                    gl_shader_stage consumer_stage)
2363 {
2364    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
2365        consumer_stage == MESA_SHADER_TESS_CTRL ||
2366        producer_stage == MESA_SHADER_TESS_CTRL)
2367       return true;
2368 
2369    return false;
2370 }
2371 
2372 static void
init_varying_matches(void * mem_ctx,struct varying_matches * vm,const struct gl_constants * consts,const struct gl_extensions * exts,gl_shader_stage producer_stage,gl_shader_stage consumer_stage,bool sso)2373 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
2374                      const struct gl_constants *consts,
2375                      const struct gl_extensions *exts,
2376                      gl_shader_stage producer_stage,
2377                      gl_shader_stage consumer_stage,
2378                      bool sso)
2379 {
2380    /* Tessellation shaders treat inputs and outputs as shared memory and can
2381     * access inputs and outputs of other invocations.
2382     * Therefore, they can't be lowered to temps easily (and definitely not
2383     * efficiently).
2384     */
2385    bool unpackable_tess =
2386       is_unpackable_tess(producer_stage, consumer_stage);
2387 
2388    /* Transform feedback code assumes varying arrays are packed, so if the
2389     * driver has disabled varying packing, make sure to at least enable
2390     * packing required by transform feedback. See below for exception.
2391     */
2392    bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
2393 
2394    /* Some drivers actually requires packing to be explicitly disabled
2395     * for varyings used by transform feedback.
2396     */
2397    bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
2398 
2399    /* Disable packing on outward facing interfaces for SSO because in ES we
2400     * need to retain the unpacked varying information for draw time
2401     * validation.
2402     *
2403     * Packing is still enabled on individual arrays, structs, and matrices as
2404     * these are required by the transform feedback code and it is still safe
2405     * to do so. We also enable packing when a varying is only used for
2406     * transform feedback and its not a SSO.
2407     */
2408    bool disable_varying_packing =
2409       consts->DisableVaryingPacking || unpackable_tess;
2410    if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
2411       disable_varying_packing = true;
2412 
2413    /* Note: this initial capacity is rather arbitrarily chosen to be large
2414     * enough for many cases without wasting an unreasonable amount of space.
2415     * varying_matches_record() will resize the array if there are more than
2416     * this number of varyings.
2417     */
2418    vm->matches_capacity = 8;
2419    vm->matches = (struct match *)
2420       ralloc_array(mem_ctx, struct match, vm->matches_capacity);
2421    vm->num_matches = 0;
2422 
2423    vm->disable_varying_packing = disable_varying_packing;
2424    vm->disable_xfb_packing = disable_xfb_packing;
2425    vm->xfb_enabled = xfb_enabled;
2426    vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
2427    vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
2428    vm->producer_stage = producer_stage;
2429    vm->consumer_stage = consumer_stage;
2430 }
2431 
2432 /**
2433  * Packing is always safe on individual arrays, structures, and matrices. It
2434  * is also safe if the varying is only used for transform feedback.
2435  */
2436 static bool
is_varying_packing_safe(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2437 is_varying_packing_safe(struct varying_matches *vm,
2438                         const struct glsl_type *type, const nir_variable *var)
2439 {
2440    if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
2441       return false;
2442 
2443    return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
2444                               glsl_type_is_struct(type) ||
2445                               var->data.is_xfb_only);
2446 }
2447 
2448 static bool
is_packing_disabled(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2449 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
2450                     const nir_variable *var)
2451 {
2452    return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
2453       (vm->disable_xfb_packing && var->data.is_xfb &&
2454        !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
2455          glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
2456 }
2457 
2458 /**
2459  * Compute the "packing class" of the given varying.  This is an unsigned
2460  * integer with the property that two variables in the same packing class can
2461  * be safely backed into the same vec4.
2462  */
2463 static unsigned
varying_matches_compute_packing_class(const nir_variable * var)2464 varying_matches_compute_packing_class(const nir_variable *var)
2465 {
2466    /* Without help from the back-end, there is no way to pack together
2467     * variables with different interpolation types, because
2468     * lower_packed_varyings must choose exactly one interpolation type for
2469     * each packed varying it creates.
2470     *
2471     * However, we can safely pack together floats, ints, and uints, because:
2472     *
2473     * - varyings of base type "int" and "uint" must use the "flat"
2474     *   interpolation type, which can only occur in GLSL 1.30 and above.
2475     *
2476     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2477     *   can store flat floats as ints without losing any information (using
2478     *   the ir_unop_bitcast_* opcodes).
2479     *
2480     * Therefore, the packing class depends only on the interpolation type.
2481     */
2482    bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
2483       glsl_contains_integer(var->type) || glsl_contains_double(var->type);
2484 
2485    const unsigned interp = is_interpolation_flat
2486       ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
2487 
2488    assert(interp < (1 << 3));
2489 
2490    const unsigned packing_class = (interp << 0) |
2491                                   (var->data.centroid << 3) |
2492                                   (var->data.sample << 4) |
2493                                   (var->data.patch << 5) |
2494                                   (var->data.must_be_shader_input << 6);
2495 
2496    return packing_class;
2497 }
2498 
2499 /**
2500  * Compute the "packing order" of the given varying.  This is a sort key we
2501  * use to determine when to attempt to pack the given varying relative to
2502  * other varyings in the same packing class.
2503  */
2504 static enum packing_order_enum
varying_matches_compute_packing_order(const nir_variable * var)2505 varying_matches_compute_packing_order(const nir_variable *var)
2506 {
2507    const struct glsl_type *element_type = glsl_without_array(var->type);
2508 
2509    switch (glsl_get_component_slots(element_type) % 4) {
2510    case 1: return PACKING_ORDER_SCALAR;
2511    case 2: return PACKING_ORDER_VEC2;
2512    case 3: return PACKING_ORDER_VEC3;
2513    case 0: return PACKING_ORDER_VEC4;
2514    default:
2515       assert(!"Unexpected value of vector_elements");
2516       return PACKING_ORDER_VEC4;
2517    }
2518 }
2519 
2520 /**
2521  * Record the given producer/consumer variable pair in the list of variables
2522  * that should later be assigned locations.
2523  *
2524  * It is permissible for \c consumer_var to be NULL (this happens if a
2525  * variable is output by the producer and consumed by transform feedback, but
2526  * not consumed by the consumer).
2527  *
2528  * If \c producer_var has already been paired up with a consumer_var, or
2529  * producer_var is part of fixed pipeline functionality (and hence already has
2530  * a location assigned), this function has no effect.
2531  *
2532  * Note: as a side effect this function may change the interpolation type of
2533  * \c producer_var, but only when the change couldn't possibly affect
2534  * rendering.
2535  */
2536 static void
varying_matches_record(void * mem_ctx,struct varying_matches * vm,nir_variable * producer_var,nir_variable * consumer_var)2537 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
2538                        nir_variable *producer_var, nir_variable *consumer_var)
2539 {
2540    assert(producer_var != NULL || consumer_var != NULL);
2541 
2542    if ((producer_var &&
2543        (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
2544        (consumer_var &&
2545         (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
2546       /* Either a location already exists for this variable (since it is part
2547        * of fixed functionality), or it has already been assigned explicitly.
2548        */
2549       return;
2550    }
2551 
2552    /* The varyings should not have been matched and assgned previously */
2553    assert((producer_var == NULL || producer_var->data.location == -1) &&
2554           (consumer_var == NULL || consumer_var->data.location == -1));
2555 
2556    bool needs_flat_qualifier = consumer_var == NULL &&
2557       (glsl_contains_integer(producer_var->type) ||
2558        glsl_contains_double(producer_var->type));
2559 
2560    if (!vm->disable_varying_packing &&
2561        (!vm->disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
2562        (needs_flat_qualifier ||
2563         (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
2564       /* Since this varying is not being consumed by the fragment shader, its
2565        * interpolation type varying cannot possibly affect rendering.
2566        * Also, this variable is non-flat and is (or contains) an integer
2567        * or a double.
2568        * If the consumer stage is unknown, don't modify the interpolation
2569        * type as it could affect rendering later with separate shaders.
2570        *
2571        * lower_packed_varyings requires all integer varyings to flat,
2572        * regardless of where they appear.  We can trivially satisfy that
2573        * requirement by changing the interpolation type to flat here.
2574        */
2575       if (producer_var) {
2576          producer_var->data.centroid = false;
2577          producer_var->data.sample = false;
2578          producer_var->data.interpolation = INTERP_MODE_FLAT;
2579       }
2580 
2581       if (consumer_var) {
2582          consumer_var->data.centroid = false;
2583          consumer_var->data.sample = false;
2584          consumer_var->data.interpolation = INTERP_MODE_FLAT;
2585       }
2586    }
2587 
2588    if (vm->num_matches == vm->matches_capacity) {
2589       vm->matches_capacity *= 2;
2590       vm->matches = (struct match *)
2591          reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
2592    }
2593 
2594    /* We must use the consumer to compute the packing class because in GL4.4+
2595     * there is no guarantee interpolation qualifiers will match across stages.
2596     *
2597     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
2598     *
2599     *    "The type and presence of interpolation qualifiers of variables with
2600     *    the same name declared in all linked shaders for the same cross-stage
2601     *    interface must match, otherwise the link command will fail.
2602     *
2603     *    When comparing an output from one stage to an input of a subsequent
2604     *    stage, the input and output don't match if their interpolation
2605     *    qualifiers (or lack thereof) are not the same."
2606     *
2607     * This text was also in at least revison 7 of the 4.40 spec but is no
2608     * longer in revision 9 and not in the 4.50 spec.
2609     */
2610    const nir_variable *const var = (consumer_var != NULL)
2611       ? consumer_var : producer_var;
2612 
2613    if (producer_var && consumer_var &&
2614        consumer_var->data.must_be_shader_input) {
2615       producer_var->data.must_be_shader_input = 1;
2616    }
2617 
2618    vm->matches[vm->num_matches].packing_class
2619       = varying_matches_compute_packing_class(var);
2620    vm->matches[vm->num_matches].packing_order
2621       = varying_matches_compute_packing_order(var);
2622 
2623    vm->matches[vm->num_matches].producer_var = producer_var;
2624    vm->matches[vm->num_matches].consumer_var = consumer_var;
2625    vm->num_matches++;
2626 }
2627 
2628 /**
2629  * Choose locations for all of the variable matches that were previously
2630  * passed to varying_matches_record().
2631  * \param components  returns array[slot] of number of components used
2632  *                    per slot (1, 2, 3 or 4)
2633  * \param reserved_slots  bitmask indicating which varying slots are already
2634  *                        allocated
2635  * \return number of slots (4-element vectors) allocated
2636  */
2637 static unsigned
varying_matches_assign_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)2638 varying_matches_assign_locations(struct varying_matches *vm,
2639                                  struct gl_shader_program *prog,
2640                                  uint8_t components[], uint64_t reserved_slots)
2641 {
2642    /* If packing has been disabled then we cannot safely sort the varyings by
2643     * class as it may mean we are using a version of OpenGL where
2644     * interpolation qualifiers are not guaranteed to be matching across
2645     * shaders, sorting in this case could result in mismatching shader
2646     * interfaces.
2647     * When packing is disabled the sort orders varyings used by transform
2648     * feedback first, but also depends on *undefined behaviour* of qsort to
2649     * reverse the order of the varyings. See: xfb_comparator().
2650     *
2651     * If packing is only disabled for xfb varyings (mutually exclusive with
2652     * disable_varying_packing), we then group varyings depending on if they
2653     * are captured for transform feedback. The same *undefined behaviour* is
2654     * taken advantage of.
2655     */
2656    if (vm->disable_varying_packing) {
2657       /* Only sort varyings that are only used by transform feedback. */
2658       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2659             &varying_matches_xfb_comparator);
2660    } else if (vm->disable_xfb_packing) {
2661       /* Only sort varyings that are NOT used by transform feedback. */
2662       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2663             &varying_matches_not_xfb_comparator);
2664    } else {
2665       /* Sort varying matches into an order that makes them easy to pack. */
2666       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2667             &varying_matches_match_comparator);
2668    }
2669 
2670    unsigned generic_location = 0;
2671    unsigned generic_patch_location = MAX_VARYING*4;
2672    bool previous_var_xfb = false;
2673    bool previous_var_xfb_only = false;
2674    unsigned previous_packing_class = ~0u;
2675 
2676    /* For tranform feedback separate mode, we know the number of attributes
2677     * is <= the number of buffers.  So packing isn't critical.  In fact,
2678     * packing vec3 attributes can cause trouble because splitting a vec3
2679     * effectively creates an additional transform feedback output.  The
2680     * extra TFB output may exceed device driver limits.
2681     *
2682     * Also don't pack vec3 if the driver prefers power of two aligned
2683     * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
2684     * pot-aligned, we only need to take care of vec3s
2685     */
2686    const bool dont_pack_vec3 =
2687       (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2688        prog->TransformFeedback.NumVarying > 0) ||
2689       vm->prefer_pot_aligned_varyings;
2690 
2691    for (unsigned i = 0; i < vm->num_matches; i++) {
2692       unsigned *location = &generic_location;
2693       const nir_variable *var;
2694       const struct glsl_type *type;
2695       bool is_vertex_input = false;
2696 
2697       if (vm->matches[i].consumer_var) {
2698          var = vm->matches[i].consumer_var;
2699          type = get_varying_type(var, vm->consumer_stage);
2700          if (vm->consumer_stage == MESA_SHADER_VERTEX)
2701             is_vertex_input = true;
2702       } else {
2703          if (!vm->matches[i].producer_var)
2704             continue; /* The varying was optimised away */
2705 
2706          var = vm->matches[i].producer_var;
2707          type = get_varying_type(var, vm->producer_stage);
2708       }
2709 
2710       if (var->data.patch)
2711          location = &generic_patch_location;
2712 
2713       /* Advance to the next slot if this varying has a different packing
2714        * class than the previous one, and we're not already on a slot
2715        * boundary.
2716        *
2717        * Also advance if varying packing is disabled for transform feedback,
2718        * and previous or current varying is used for transform feedback.
2719        *
2720        * Also advance to the next slot if packing is disabled. This makes sure
2721        * we don't assign varyings the same locations which is possible
2722        * because we still pack individual arrays, records and matrices even
2723        * when packing is disabled. Note we don't advance to the next slot if
2724        * we can pack varyings together that are only used for transform
2725        * feedback.
2726        */
2727       if (var->data.must_be_shader_input ||
2728           (vm->disable_xfb_packing &&
2729            (previous_var_xfb || var->data.is_xfb)) ||
2730           (vm->disable_varying_packing &&
2731            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2732           (previous_packing_class != vm->matches[i].packing_class) ||
2733           (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2734            dont_pack_vec3)) {
2735          *location = ALIGN(*location, 4);
2736       }
2737 
2738       previous_var_xfb = var->data.is_xfb;
2739       previous_var_xfb_only = var->data.is_xfb_only;
2740       previous_packing_class = vm->matches[i].packing_class;
2741 
2742       /* The number of components taken up by this variable. For vertex shader
2743        * inputs, we use the number of slots * 4, as they have different
2744        * counting rules.
2745        */
2746       unsigned num_components = 0;
2747       if (is_vertex_input) {
2748          num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
2749       } else {
2750          if (is_packing_disabled(vm, type, var)) {
2751             num_components = glsl_count_attribute_slots(type, false) * 4;
2752          } else {
2753             num_components = glsl_get_component_slots_aligned(type, *location);
2754          }
2755       }
2756 
2757       /* The last slot for this variable, inclusive. */
2758       unsigned slot_end = *location + num_components - 1;
2759 
2760       /* FIXME: We could be smarter in the below code and loop back over
2761        * trying to fill any locations that we skipped because we couldn't pack
2762        * the varying between an explicit location. For now just let the user
2763        * hit the linking error if we run out of room and suggest they use
2764        * explicit locations.
2765        */
2766       while (slot_end < MAX_VARYING * 4u) {
2767          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2768          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2769 
2770          assert(slots > 0);
2771 
2772          if ((reserved_slots & slot_mask) == 0) {
2773             break;
2774          }
2775 
2776          *location = ALIGN(*location + 1, 4);
2777          slot_end = *location + num_components - 1;
2778       }
2779 
2780       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2781          linker_error(prog, "insufficient contiguous locations available for "
2782                       "%s it is possible an array or struct could not be "
2783                       "packed between varyings with explicit locations. Try "
2784                       "using an explicit location for arrays and structs.",
2785                       var->name);
2786       }
2787 
2788       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2789          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2790             components[j] = 4;
2791          components[slot_end / 4u] = (slot_end & 3) + 1;
2792       }
2793 
2794       vm->matches[i].generic_location = *location;
2795 
2796       *location = slot_end + 1;
2797    }
2798 
2799    return (generic_location + 3) / 4;
2800 }
2801 
2802 static void
varying_matches_assign_temp_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint64_t reserved_slots)2803 varying_matches_assign_temp_locations(struct varying_matches *vm,
2804                                       struct gl_shader_program *prog,
2805                                       uint64_t reserved_slots)
2806 {
2807    unsigned tmp_loc = 0;
2808    for (unsigned i = 0; i < vm->num_matches; i++) {
2809       nir_variable *producer_var = vm->matches[i].producer_var;
2810       nir_variable *consumer_var = vm->matches[i].consumer_var;
2811 
2812       while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
2813          if (reserved_slots & (UINT64_C(1) << tmp_loc))
2814             tmp_loc++;
2815          else
2816             break;
2817       }
2818 
2819       if (producer_var) {
2820          assert(producer_var->data.location == -1);
2821          producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2822       }
2823 
2824       if (consumer_var) {
2825          assert(consumer_var->data.location == -1);
2826          consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2827       }
2828 
2829       tmp_loc++;
2830    }
2831 }
2832 
2833 /**
2834  * Update the producer and consumer shaders to reflect the locations
2835  * assignments that were made by varying_matches_assign_locations().
2836  */
2837 static void
varying_matches_store_locations(struct varying_matches * vm)2838 varying_matches_store_locations(struct varying_matches *vm)
2839 {
2840    /* Check is location needs to be packed with lower_packed_varyings() or if
2841     * we can just use ARB_enhanced_layouts packing.
2842     */
2843    bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
2844    const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2845 
2846    for (unsigned i = 0; i < vm->num_matches; i++) {
2847       nir_variable *producer_var = vm->matches[i].producer_var;
2848       nir_variable *consumer_var = vm->matches[i].consumer_var;
2849       unsigned generic_location = vm->matches[i].generic_location;
2850       unsigned slot = generic_location / 4;
2851       unsigned offset = generic_location % 4;
2852 
2853       if (producer_var) {
2854          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2855          producer_var->data.location_frac = offset;
2856       }
2857 
2858       if (consumer_var) {
2859          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2860          consumer_var->data.location_frac = offset;
2861       }
2862 
2863       /* Find locations suitable for native packing via
2864        * ARB_enhanced_layouts.
2865        */
2866       if (vm->enhanced_layouts_enabled) {
2867          nir_variable *var = producer_var ? producer_var : consumer_var;
2868          unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
2869          const struct glsl_type *type =
2870             get_varying_type(var, stage);
2871          unsigned comp_slots = glsl_get_component_slots(type) + offset;
2872          unsigned slots = comp_slots / 4;
2873          if (comp_slots % 4)
2874             slots += 1;
2875 
2876          if (producer_var && consumer_var) {
2877             if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
2878                 glsl_type_is_64bit(type)) {
2879                for (unsigned j = 0; j < slots; j++) {
2880                   pack_loc[slot + j] = true;
2881                }
2882             } else if (offset + glsl_get_vector_elements(type) > 4) {
2883                pack_loc[slot] = true;
2884                pack_loc[slot + 1] = true;
2885             } else {
2886                loc_type[slot][offset] = type;
2887             }
2888          } else {
2889             for (unsigned j = 0; j < slots; j++) {
2890                pack_loc[slot + j] = true;
2891             }
2892          }
2893       }
2894    }
2895 
2896    /* Attempt to use ARB_enhanced_layouts for more efficient packing if
2897     * suitable.
2898     */
2899    if (vm->enhanced_layouts_enabled) {
2900       for (unsigned i = 0; i < vm->num_matches; i++) {
2901          nir_variable *producer_var = vm->matches[i].producer_var;
2902          nir_variable *consumer_var = vm->matches[i].consumer_var;
2903          if (!producer_var || !consumer_var)
2904             continue;
2905 
2906          unsigned generic_location = vm->matches[i].generic_location;
2907          unsigned slot = generic_location / 4;
2908          if (pack_loc[slot])
2909             continue;
2910 
2911          const struct glsl_type *type =
2912             get_varying_type(producer_var, vm->producer_stage);
2913          bool type_match = true;
2914          for (unsigned j = 0; j < 4; j++) {
2915             if (loc_type[slot][j]) {
2916                if (glsl_get_base_type(type) !=
2917                    glsl_get_base_type(loc_type[slot][j]))
2918                   type_match = false;
2919             }
2920          }
2921 
2922          if (type_match) {
2923             producer_var->data.explicit_location = 1;
2924             consumer_var->data.explicit_location = 1;
2925          }
2926       }
2927    }
2928 }
2929 
2930 /**
2931  * Is the given variable a varying variable to be counted against the
2932  * limit in ctx->Const.MaxVarying?
2933  * This includes variables such as texcoords, colors and generic
2934  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2935  */
2936 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const nir_variable * var)2937 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
2938 {
2939    /* Only fragment shaders will take a varying variable as an input */
2940    if (stage == MESA_SHADER_FRAGMENT &&
2941        var->data.mode == nir_var_shader_in) {
2942       switch (var->data.location) {
2943       case VARYING_SLOT_POS:
2944       case VARYING_SLOT_FACE:
2945       case VARYING_SLOT_PNTC:
2946          return false;
2947       default:
2948          return true;
2949       }
2950    }
2951    return false;
2952 }
2953 
2954 struct tfeedback_candidate_generator_state {
2955    /**
2956     * Memory context used to allocate hash table keys and values.
2957     */
2958    void *mem_ctx;
2959 
2960    /**
2961     * Hash table in which tfeedback_candidate objects should be stored.
2962     */
2963    struct hash_table *tfeedback_candidates;
2964 
2965    gl_shader_stage stage;
2966 
2967    /**
2968     * Pointer to the toplevel variable that is being traversed.
2969     */
2970    nir_variable *toplevel_var;
2971 
2972    /**
2973     * Total number of varying floats that have been visited so far.  This is
2974     * used to determine the offset to each varying within the toplevel
2975     * variable.
2976     */
2977    unsigned varying_floats;
2978 
2979    /**
2980     * Offset within the xfb. Counted in floats.
2981     */
2982    unsigned xfb_offset_floats;
2983 };
2984 
2985 /**
2986  * Generates tfeedback_candidate structs describing all possible targets of
2987  * transform feedback.
2988  *
2989  * tfeedback_candidate structs are stored in the hash table
2990  * tfeedback_candidates.  This hash table maps varying names to instances of the
2991  * tfeedback_candidate struct.
2992  */
2993 static void
tfeedback_candidate_generator(struct tfeedback_candidate_generator_state * state,char ** name,size_t name_length,const struct glsl_type * type,const struct glsl_struct_field * named_ifc_member)2994 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
2995                               char **name, size_t name_length,
2996                               const struct glsl_type *type,
2997                               const struct glsl_struct_field *named_ifc_member)
2998 {
2999    switch (glsl_get_base_type(type)) {
3000    case GLSL_TYPE_INTERFACE:
3001       if (named_ifc_member) {
3002          ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
3003                                       named_ifc_member->name);
3004          tfeedback_candidate_generator(state, name, name_length,
3005                                        named_ifc_member->type, NULL);
3006          return;
3007       }
3008       FALLTHROUGH;
3009    case GLSL_TYPE_STRUCT:
3010       for (unsigned i = 0; i < glsl_get_length(type); i++) {
3011          size_t new_length = name_length;
3012 
3013          /* Append '.field' to the current variable name. */
3014          if (name) {
3015             ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
3016                                          glsl_get_struct_elem_name(type, i));
3017          }
3018 
3019          tfeedback_candidate_generator(state, name, new_length,
3020                                        glsl_get_struct_field(type, i), NULL);
3021       }
3022 
3023       return;
3024    case GLSL_TYPE_ARRAY:
3025       if (glsl_type_is_struct(glsl_without_array(type)) ||
3026           glsl_type_is_interface(glsl_without_array(type)) ||
3027           glsl_type_is_array(glsl_get_array_element(type))) {
3028 
3029          for (unsigned i = 0; i < glsl_get_length(type); i++) {
3030             size_t new_length = name_length;
3031 
3032             /* Append the subscript to the current variable name */
3033             ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
3034 
3035             tfeedback_candidate_generator(state, name, new_length,
3036                                           glsl_get_array_element(type),
3037                                           named_ifc_member);
3038          }
3039 
3040          return;
3041       }
3042       FALLTHROUGH;
3043    default:
3044       assert(!glsl_type_is_struct(glsl_without_array(type)));
3045       assert(!glsl_type_is_interface(glsl_without_array(type)));
3046 
3047       struct tfeedback_candidate *candidate
3048          = rzalloc(state->mem_ctx, struct tfeedback_candidate);
3049       candidate->toplevel_var = state->toplevel_var;
3050       candidate->type = type;
3051 
3052       if (glsl_type_is_64bit(glsl_without_array(type))) {
3053          /*  From ARB_gpu_shader_fp64:
3054           *
3055           * If any variable captured in transform feedback has double-precision
3056           * components, the practical requirements for defined behavior are:
3057           *     ...
3058           * (c) each double-precision variable captured must be aligned to a
3059           *     multiple of eight bytes relative to the beginning of a vertex.
3060           */
3061          state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
3062          /* 64-bit members of structs are also aligned. */
3063          state->varying_floats = ALIGN(state->varying_floats, 2);
3064       }
3065 
3066       candidate->xfb_offset_floats = state->xfb_offset_floats;
3067       candidate->struct_offset_floats = state->varying_floats;
3068 
3069       _mesa_hash_table_insert(state->tfeedback_candidates,
3070                               ralloc_strdup(state->mem_ctx, *name),
3071                               candidate);
3072 
3073       const unsigned component_slots = glsl_get_component_slots(type);
3074 
3075       if (varying_has_user_specified_location(state->toplevel_var)) {
3076          state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
3077       } else {
3078          state->varying_floats += component_slots;
3079       }
3080 
3081       state->xfb_offset_floats += component_slots;
3082    }
3083 }
3084 
3085 static void
populate_consumer_input_sets(void * mem_ctx,nir_shader * nir,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3086 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
3087                              struct hash_table *consumer_inputs,
3088                              struct hash_table *consumer_interface_inputs,
3089                              nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3090 {
3091    memset(consumer_inputs_with_locations, 0,
3092           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
3093 
3094    nir_foreach_shader_in_variable(input_var, nir) {
3095       /* All interface blocks should have been lowered by this point */
3096       assert(!glsl_type_is_interface(input_var->type));
3097 
3098       if (input_var->data.explicit_location) {
3099          /* assign_varying_locations only cares about finding the
3100           * nir_variable at the start of a contiguous location block.
3101           *
3102           *     - For !producer, consumer_inputs_with_locations isn't used.
3103           *
3104           *     - For !consumer, consumer_inputs_with_locations is empty.
3105           *
3106           * For consumer && producer, if you were trying to set some
3107           * nir_variable to the middle of a location block on the other side
3108           * of producer/consumer, cross_validate_outputs_to_inputs() should
3109           * be link-erroring due to either type mismatch or location
3110           * overlaps.  If the variables do match up, then they've got a
3111           * matching data.location and you only looked at
3112           * consumer_inputs_with_locations[var->data.location], not any
3113           * following entries for the array/structure.
3114           */
3115          consumer_inputs_with_locations[input_var->data.location] =
3116             input_var;
3117       } else if (input_var->interface_type != NULL) {
3118          char *const iface_field_name =
3119             ralloc_asprintf(mem_ctx, "%s.%s",
3120                glsl_get_type_name(glsl_without_array(input_var->interface_type)),
3121                input_var->name);
3122          _mesa_hash_table_insert(consumer_interface_inputs,
3123                                  iface_field_name, input_var);
3124       } else {
3125          _mesa_hash_table_insert(consumer_inputs,
3126                                  ralloc_strdup(mem_ctx, input_var->name),
3127                                  input_var);
3128       }
3129    }
3130 }
3131 
3132 /**
3133  * Find a variable from the consumer that "matches" the specified variable
3134  *
3135  * This function only finds inputs with names that match.  There is no
3136  * validation (here) that the types, etc. are compatible.
3137  */
3138 static nir_variable *
get_matching_input(void * mem_ctx,const nir_variable * output_var,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3139 get_matching_input(void *mem_ctx,
3140                    const nir_variable *output_var,
3141                    struct hash_table *consumer_inputs,
3142                    struct hash_table *consumer_interface_inputs,
3143                    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3144 {
3145    nir_variable *input_var;
3146 
3147    if (output_var->data.explicit_location) {
3148       input_var = consumer_inputs_with_locations[output_var->data.location];
3149    } else if (output_var->interface_type != NULL) {
3150       char *const iface_field_name =
3151          ralloc_asprintf(mem_ctx, "%s.%s",
3152             glsl_get_type_name(glsl_without_array(output_var->interface_type)),
3153             output_var->name);
3154       struct hash_entry *entry =
3155          _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
3156       input_var = entry ? (nir_variable *) entry->data : NULL;
3157    } else {
3158       struct hash_entry *entry =
3159          _mesa_hash_table_search(consumer_inputs, output_var->name);
3160       input_var = entry ? (nir_variable *) entry->data : NULL;
3161    }
3162 
3163    return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
3164       ? NULL : input_var;
3165 }
3166 
3167 static int
io_variable_cmp(const void * _a,const void * _b)3168 io_variable_cmp(const void *_a, const void *_b)
3169 {
3170    const nir_variable *const a = *(const nir_variable **) _a;
3171    const nir_variable *const b = *(const nir_variable **) _b;
3172 
3173    if (a->data.explicit_location && b->data.explicit_location)
3174       return b->data.location - a->data.location;
3175 
3176    if (a->data.explicit_location && !b->data.explicit_location)
3177       return 1;
3178 
3179    if (!a->data.explicit_location && b->data.explicit_location)
3180       return -1;
3181 
3182    return -strcmp(a->name, b->name);
3183 }
3184 
3185 /**
3186  * Sort the shader IO variables into canonical order
3187  */
3188 static void
canonicalize_shader_io(nir_shader * nir,nir_variable_mode io_mode)3189 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
3190 {
3191    nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
3192    unsigned num_variables = 0;
3193 
3194    nir_foreach_variable_with_modes(var, nir, io_mode) {
3195       /* If we have already encountered more I/O variables that could
3196        * successfully link, bail.
3197        */
3198       if (num_variables == ARRAY_SIZE(var_table))
3199          return;
3200 
3201       var_table[num_variables++] = var;
3202    }
3203 
3204    if (num_variables == 0)
3205       return;
3206 
3207    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
3208     * we're going to push the variables on to the IR list as a stack, so we
3209     * want the last variable (in canonical order) to be first in the list.
3210     */
3211    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
3212 
3213    /* Remove the variable from it's current location in the varible list, and
3214     * put it at the front.
3215     */
3216    for (unsigned i = 0; i < num_variables; i++) {
3217       exec_node_remove(&var_table[i]->node);
3218       exec_list_push_head(&nir->variables, &var_table[i]->node);
3219    }
3220 }
3221 
3222 /**
3223  * Generate a bitfield map of the explicit locations for shader varyings.
3224  *
3225  * Note: For Tessellation shaders we are sitting right on the limits of the
3226  * 64 bit map. Per-vertex and per-patch both have separate location domains
3227  * with a max of MAX_VARYING.
3228  */
3229 static uint64_t
reserved_varying_slot(struct gl_linked_shader * sh,nir_variable_mode io_mode)3230 reserved_varying_slot(struct gl_linked_shader *sh,
3231                       nir_variable_mode io_mode)
3232 {
3233    assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
3234    /* Avoid an overflow of the returned value */
3235    assert(MAX_VARYINGS_INCL_PATCH <= 64);
3236 
3237    uint64_t slots = 0;
3238    int var_slot;
3239 
3240    if (!sh)
3241       return slots;
3242 
3243    nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
3244       if (!var->data.explicit_location ||
3245           var->data.location < VARYING_SLOT_VAR0)
3246          continue;
3247 
3248       var_slot = var->data.location - VARYING_SLOT_VAR0;
3249 
3250       bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
3251                                 sh->Stage == MESA_SHADER_VERTEX;
3252       unsigned num_elements =
3253          glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
3254                                     is_gl_vertex_input);
3255       for (unsigned i = 0; i < num_elements; i++) {
3256          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
3257             slots |= UINT64_C(1) << var_slot;
3258          var_slot += 1;
3259       }
3260    }
3261 
3262    return slots;
3263 }
3264 
3265 /**
3266  * Sets the bits in the inputs_read, or outputs_written
3267  * bitfield corresponding to this variable.
3268  */
3269 static void
set_variable_io_mask(BITSET_WORD * bits,nir_variable * var,gl_shader_stage stage)3270 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
3271 {
3272    assert(var->data.mode == nir_var_shader_in ||
3273           var->data.mode == nir_var_shader_out);
3274    assert(var->data.location >= VARYING_SLOT_VAR0);
3275 
3276    const struct glsl_type *type = var->type;
3277    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
3278       assert(glsl_type_is_array(type));
3279       type = glsl_get_array_element(type);
3280    }
3281 
3282    unsigned location = var->data.location - VARYING_SLOT_VAR0;
3283    unsigned slots = glsl_count_attribute_slots(type, false);
3284    for (unsigned i = 0; i < slots; i++) {
3285       BITSET_SET(bits, location + i);
3286    }
3287 }
3288 
3289 static uint8_t
get_num_components(nir_variable * var)3290 get_num_components(nir_variable *var)
3291 {
3292    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
3293       return 4;
3294 
3295    return glsl_get_vector_elements(glsl_without_array(var->type));
3296 }
3297 
3298 static void
tcs_add_output_reads(nir_shader * shader,BITSET_WORD ** read)3299 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
3300 {
3301    nir_foreach_function_impl(impl, shader) {
3302       nir_foreach_block(block, impl) {
3303          nir_foreach_instr(instr, block) {
3304             if (instr->type != nir_instr_type_intrinsic)
3305                continue;
3306 
3307             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3308             if (intrin->intrinsic != nir_intrinsic_load_deref)
3309                continue;
3310 
3311             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3312             if (!nir_deref_mode_is(deref, nir_var_shader_out))
3313                continue;
3314 
3315             nir_variable *var = nir_deref_instr_get_variable(deref);
3316             for (unsigned i = 0; i < get_num_components(var); i++) {
3317                if (var->data.location < VARYING_SLOT_VAR0)
3318                   continue;
3319 
3320                unsigned comp = var->data.location_frac;
3321                set_variable_io_mask(read[comp + i], var, shader->info.stage);
3322             }
3323          }
3324       }
3325    }
3326 }
3327 
3328 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
3329  * as no further NIR pass expects to see this.
3330  */
3331 static bool
replace_unused_interpolate_at_with_undef(nir_builder * b,nir_instr * instr,void * data)3332 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
3333                                          void *data)
3334 {
3335    if (instr->type == nir_instr_type_intrinsic) {
3336       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3337 
3338       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
3339           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
3340           intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
3341          nir_variable *var = nir_intrinsic_get_var(intrin, 0);
3342          if (var->data.mode == nir_var_shader_temp) {
3343             /* Create undef and rewrite the interp uses */
3344             nir_def *undef =
3345                nir_undef(b, intrin->def.num_components,
3346                              intrin->def.bit_size);
3347             nir_def_rewrite_uses(&intrin->def, undef);
3348 
3349             nir_instr_remove(&intrin->instr);
3350             return true;
3351          }
3352       }
3353    }
3354 
3355    return false;
3356 }
3357 
3358 static void
fixup_vars_lowered_to_temp(nir_shader * shader,nir_variable_mode mode)3359 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
3360 {
3361    /* Remove all interpolate uses of the unset varying and replace with undef. */
3362    if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
3363       (void) nir_shader_instructions_pass(shader,
3364                                           replace_unused_interpolate_at_with_undef,
3365                                           nir_metadata_block_index |
3366                                           nir_metadata_dominance,
3367                                           NULL);
3368    }
3369 
3370    nir_lower_global_vars_to_local(shader);
3371    nir_fixup_deref_modes(shader);
3372 }
3373 
3374 /**
3375  * Helper for removing unused shader I/O variables, by demoting them to global
3376  * variables (which may then be dead code eliminated).
3377  *
3378  * Example usage is:
3379  *
3380  * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
3381  *                                      read, patches_read) ||
3382  *                                      progress;
3383  *
3384  * The "used" should be an array of 4 BITSET_WORDs representing each
3385  * .location_frac used.  Note that for vector variables, only the first channel
3386  * (.location_frac) is examined for deciding if the variable is used!
3387  */
3388 static bool
remove_unused_io_vars(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,nir_variable_mode mode,BITSET_WORD ** used_by_other_stage)3389 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
3390                       struct gl_shader_program *prog,
3391                       nir_variable_mode mode,
3392                       BITSET_WORD **used_by_other_stage)
3393 {
3394    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
3395 
3396    bool progress = false;
3397    nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
3398 
3399    BITSET_WORD **used;
3400    nir_foreach_variable_with_modes_safe(var, shader, mode) {
3401       used = used_by_other_stage;
3402 
3403       /* Skip builtins dead builtins are removed elsewhere */
3404       if (is_gl_identifier(var->name))
3405          continue;
3406 
3407       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
3408          continue;
3409 
3410       /* Skip xfb varyings and any other type we cannot remove */
3411       if (var->data.always_active_io)
3412          continue;
3413 
3414       if (var->data.explicit_xfb_buffer)
3415          continue;
3416 
3417       BITSET_WORD *other_stage = used[var->data.location_frac];
3418 
3419       /* if location == -1 lower varying to global as it has no match and is not
3420        * a xfb varying, this must be done after skiping bultins as builtins
3421        * could be assigned a location of -1.
3422        * We also lower unused varyings with explicit locations.
3423        */
3424       bool use_found = false;
3425       if (var->data.location >= 0) {
3426          unsigned location = var->data.location - VARYING_SLOT_VAR0;
3427 
3428          const struct glsl_type *type = var->type;
3429          if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
3430             assert(glsl_type_is_array(type));
3431             type = glsl_get_array_element(type);
3432          }
3433 
3434          unsigned slots = glsl_count_attribute_slots(type, false);
3435          for (unsigned i = 0; i < slots; i++) {
3436             if (BITSET_TEST(other_stage, location + i)) {
3437                use_found = true;
3438                break;
3439             }
3440          }
3441       }
3442 
3443       if (!use_found) {
3444          /* This one is invalid, make it a global variable instead */
3445          var->data.location = 0;
3446          var->data.mode = nir_var_shader_temp;
3447 
3448          progress = true;
3449 
3450          if (mode == nir_var_shader_in) {
3451             if (!prog->IsES && prog->GLSL_Version <= 120) {
3452                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3453                 *
3454                 *     Only those varying variables used (i.e. read) in
3455                 *     the fragment shader executable must be written to
3456                 *     by the vertex shader executable; declaring
3457                 *     superfluous varying variables in a vertex shader is
3458                 *     permissible.
3459                 *
3460                 * We interpret this text as meaning that the VS must
3461                 * write the variable for the FS to read it.  See
3462                 * "glsl1-varying read but not written" in piglit.
3463                 */
3464                linker_error(prog, "%s shader varying %s not written "
3465                             "by %s shader\n.",
3466                             _mesa_shader_stage_to_string(consumer->info.stage),
3467                             var->name,
3468                             _mesa_shader_stage_to_string(producer->info.stage));
3469             } else {
3470                linker_warning(prog, "%s shader varying %s not written "
3471                               "by %s shader\n.",
3472                               _mesa_shader_stage_to_string(consumer->info.stage),
3473                               var->name,
3474                               _mesa_shader_stage_to_string(producer->info.stage));
3475             }
3476          }
3477       }
3478    }
3479 
3480    if (progress)
3481       fixup_vars_lowered_to_temp(shader, mode);
3482 
3483    return progress;
3484 }
3485 
3486 static bool
remove_unused_varyings(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3487 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
3488                        struct gl_shader_program *prog, void *mem_ctx)
3489 {
3490    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
3491    assert(consumer->info.stage != MESA_SHADER_VERTEX);
3492 
3493    int max_loc_out = 0;
3494    nir_foreach_shader_out_variable(var, producer) {
3495       if (var->data.location < VARYING_SLOT_VAR0)
3496          continue;
3497 
3498       const struct glsl_type *type = var->type;
3499       if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
3500          assert(glsl_type_is_array(type));
3501          type = glsl_get_array_element(type);
3502       }
3503       unsigned slots = glsl_count_attribute_slots(type, false);
3504 
3505       max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3506          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
3507    }
3508 
3509    int max_loc_in = 0;
3510    nir_foreach_shader_in_variable(var, consumer) {
3511       if (var->data.location < VARYING_SLOT_VAR0)
3512          continue;
3513 
3514       const struct glsl_type *type = var->type;
3515       if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
3516          assert(glsl_type_is_array(type));
3517          type = glsl_get_array_element(type);
3518       }
3519       unsigned slots = glsl_count_attribute_slots(type, false);
3520 
3521       max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3522          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
3523    }
3524 
3525    /* Old glsl shaders that don't use explicit locations can contain greater
3526     * than 64 varyings before unused varyings are removed so we must count them
3527     * and make use of the BITSET macros to keep track of used slots. Once we
3528     * have removed these excess varyings we can make use of further nir varying
3529     * linking optimimisation passes.
3530     */
3531    BITSET_WORD *read[4];
3532    BITSET_WORD *written[4];
3533    int max_loc = MAX2(max_loc_in, max_loc_out);
3534    for (unsigned i = 0; i < 4; i++) {
3535       read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3536       written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3537    }
3538 
3539    nir_foreach_shader_out_variable(var, producer) {
3540       if (var->data.location < VARYING_SLOT_VAR0)
3541          continue;
3542 
3543       for (unsigned i = 0; i < get_num_components(var); i++) {
3544          unsigned comp = var->data.location_frac;
3545          set_variable_io_mask(written[comp + i], var, producer->info.stage);
3546       }
3547    }
3548 
3549    nir_foreach_shader_in_variable(var, consumer) {
3550       if (var->data.location < VARYING_SLOT_VAR0)
3551          continue;
3552 
3553       for (unsigned i = 0; i < get_num_components(var); i++) {
3554          unsigned comp = var->data.location_frac;
3555          set_variable_io_mask(read[comp + i], var, consumer->info.stage);
3556       }
3557    }
3558 
3559    /* Each TCS invocation can read data written by other TCS invocations,
3560     * so even if the outputs are not used by the TES we must also make
3561     * sure they are not read by the TCS before demoting them to globals.
3562     */
3563    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
3564       tcs_add_output_reads(producer, read);
3565 
3566    bool progress = false;
3567    progress =
3568       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
3569    progress =
3570       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
3571 
3572    return progress;
3573 }
3574 
3575 static bool
should_add_varying_match_record(nir_variable * const input_var,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)3576 should_add_varying_match_record(nir_variable *const input_var,
3577                                 struct gl_shader_program *prog,
3578                                 struct gl_linked_shader *producer,
3579                                 struct gl_linked_shader *consumer) {
3580 
3581    /* If a matching input variable was found, add this output (and the input) to
3582     * the set.  If this is a separable program and there is no consumer stage,
3583     * add the output.
3584     *
3585     * Always add TCS outputs. They are shared by all invocations
3586     * within a patch and can be used as shared memory.
3587     */
3588    return input_var || (prog->SeparateShader && consumer == NULL) ||
3589              producer->Stage == MESA_SHADER_TESS_CTRL;
3590 }
3591 
3592 /* This assigns some initial unoptimised varying locations so that our nir
3593  * optimisations can perform some initial optimisations and also does initial
3594  * processing of
3595  */
3596 static bool
assign_initial_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,struct varying_matches * vm)3597 assign_initial_varying_locations(const struct gl_constants *consts,
3598                                  const struct gl_extensions *exts,
3599                                  void *mem_ctx,
3600                                  struct gl_shader_program *prog,
3601                                  struct gl_linked_shader *producer,
3602                                  struct gl_linked_shader *consumer,
3603                                  unsigned num_xfb_decls,
3604                                  struct xfb_decl *xfb_decls,
3605                                  struct varying_matches *vm)
3606 {
3607    init_varying_matches(mem_ctx, vm, consts, exts,
3608                         producer ? producer->Stage : MESA_SHADER_NONE,
3609                         consumer ? consumer->Stage : MESA_SHADER_NONE,
3610                         prog->SeparateShader);
3611 
3612    struct hash_table *tfeedback_candidates =
3613          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3614                                  _mesa_key_string_equal);
3615    struct hash_table *consumer_inputs =
3616          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3617                                  _mesa_key_string_equal);
3618    struct hash_table *consumer_interface_inputs =
3619          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3620                                  _mesa_key_string_equal);
3621    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
3622       NULL,
3623    };
3624 
3625    if (consumer)
3626       populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
3627                                    consumer_inputs, consumer_interface_inputs,
3628                                    consumer_inputs_with_locations);
3629 
3630    if (producer) {
3631       nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
3632          /* Only geometry shaders can use non-zero streams */
3633          assert(output_var->data.stream == 0 ||
3634                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
3635                  producer->Stage == MESA_SHADER_GEOMETRY));
3636 
3637          if (num_xfb_decls > 0) {
3638             /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
3639              * ("Vertex Shader Variables / Output Variables")
3640              *
3641              * "Each program object can specify a set of output variables from
3642              * one shader to be recorded in transform feedback mode (see
3643              * section 13.3). The variables that can be recorded are those
3644              * emitted by the first active shader, in order, from the
3645              * following list:
3646              *
3647              *  * geometry shader
3648              *  * tessellation evaluation shader
3649              *  * tessellation control shader
3650              *  * vertex shader"
3651              *
3652              * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
3653              * Variables / Output Variables") tessellation control shader is
3654              * not included in the stages list.
3655              */
3656             if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
3657 
3658                const struct glsl_type *type = output_var->data.from_named_ifc_block ?
3659                   output_var->interface_type : output_var->type;
3660                if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
3661                   assert(glsl_type_is_array(type));
3662                   type = glsl_get_array_element(type);
3663                }
3664 
3665                const struct glsl_struct_field *ifc_member = NULL;
3666                if (output_var->data.from_named_ifc_block) {
3667                   ifc_member =
3668                      glsl_get_struct_field_data(glsl_without_array(type),
3669                         glsl_get_field_index(glsl_without_array(type), output_var->name));
3670                }
3671 
3672                char *name;
3673                if (glsl_type_is_struct(glsl_without_array(type)) ||
3674                    (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
3675                   type = output_var->type;
3676                   name = ralloc_strdup(NULL, output_var->name);
3677                } else if (glsl_type_is_interface(glsl_without_array(type))) {
3678                   name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
3679                } else  {
3680                   name = ralloc_strdup(NULL, output_var->name);
3681                }
3682 
3683                struct tfeedback_candidate_generator_state state;
3684                state.mem_ctx = mem_ctx;
3685                state.tfeedback_candidates = tfeedback_candidates;
3686                state.stage = producer->Stage;
3687                state.toplevel_var = output_var;
3688                state.varying_floats = 0;
3689                state.xfb_offset_floats = 0;
3690 
3691                tfeedback_candidate_generator(&state, &name, strlen(name), type,
3692                                              ifc_member);
3693                ralloc_free(name);
3694             }
3695          }
3696 
3697          nir_variable *const input_var =
3698             get_matching_input(mem_ctx, output_var, consumer_inputs,
3699                                consumer_interface_inputs,
3700                                consumer_inputs_with_locations);
3701 
3702          if (should_add_varying_match_record(input_var, prog, producer,
3703                                              consumer)) {
3704             varying_matches_record(mem_ctx, vm, output_var, input_var);
3705          }
3706 
3707          /* Only stream 0 outputs can be consumed in the next stage */
3708          if (input_var && output_var->data.stream != 0) {
3709             linker_error(prog, "output %s is assigned to stream=%d but "
3710                          "is linked to an input, which requires stream=0",
3711                          output_var->name, output_var->data.stream);
3712             return false;
3713          }
3714       }
3715    } else {
3716       /* If there's no producer stage, then this must be a separable program.
3717        * For example, we may have a program that has just a fragment shader.
3718        * Later this program will be used with some arbitrary vertex (or
3719        * geometry) shader program.  This means that locations must be assigned
3720        * for all the inputs.
3721        */
3722       nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
3723          varying_matches_record(mem_ctx, vm, NULL, input_var);
3724       }
3725    }
3726 
3727    for (unsigned i = 0; i < num_xfb_decls; ++i) {
3728       if (!xfb_decl_is_varying(&xfb_decls[i]))
3729          continue;
3730 
3731       const struct tfeedback_candidate *matched_candidate
3732          = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
3733 
3734       if (matched_candidate == NULL)
3735          return false;
3736 
3737       /* There are two situations where a new output varying is needed:
3738        *
3739        *  - If varying packing is disabled for xfb and the current declaration
3740        *    is subscripting an array, whether the subscript is aligned or not.
3741        *    to preserve the rest of the array for the consumer.
3742        *
3743        *  - If a builtin variable needs to be copied to a new variable
3744        *    before its content is modified by another lowering pass (e.g.
3745        *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
3746        */
3747       const bool lowered =
3748          (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
3749          (matched_candidate->toplevel_var->data.explicit_location &&
3750           matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
3751           (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
3752           (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
3753               BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
3754 
3755       if (lowered) {
3756          nir_variable *new_var;
3757          struct tfeedback_candidate *new_candidate = NULL;
3758 
3759          new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
3760                                             xfb_decls[i].orig_name,
3761                                             matched_candidate->toplevel_var);
3762          if (new_var == NULL)
3763             return false;
3764 
3765          /* Create new candidate and replace matched_candidate */
3766          new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
3767          new_candidate->toplevel_var = new_var;
3768          new_candidate->type = new_var->type;
3769          new_candidate->struct_offset_floats = 0;
3770          new_candidate->xfb_offset_floats = 0;
3771          _mesa_hash_table_insert(tfeedback_candidates,
3772                                  ralloc_strdup(mem_ctx, new_var->name),
3773                                  new_candidate);
3774 
3775          xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
3776          matched_candidate = new_candidate;
3777       }
3778 
3779       /* Mark as xfb varying */
3780       matched_candidate->toplevel_var->data.is_xfb = 1;
3781 
3782       /* Mark xfb varyings as always active */
3783       matched_candidate->toplevel_var->data.always_active_io = 1;
3784 
3785       /* Mark any corresponding inputs as always active also. We must do this
3786        * because we have a NIR pass that lowers vectors to scalars and another
3787        * that removes unused varyings.
3788        * We don't split varyings marked as always active because there is no
3789        * point in doing so. This means we need to mark both sides of the
3790        * interface as always active otherwise we will have a mismatch and
3791        * start removing things we shouldn't.
3792        */
3793       nir_variable *const input_var =
3794          get_matching_input(mem_ctx, matched_candidate->toplevel_var,
3795                             consumer_inputs, consumer_interface_inputs,
3796                             consumer_inputs_with_locations);
3797       if (input_var) {
3798          input_var->data.is_xfb = 1;
3799          input_var->data.always_active_io = 1;
3800       }
3801 
3802       /* Add the xfb varying to varying matches if it wasn't already added */
3803       if ((!should_add_varying_match_record(input_var, prog, producer,
3804                                             consumer) &&
3805            !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
3806          matched_candidate->toplevel_var->data.is_xfb_only = 1;
3807          varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
3808                                 NULL);
3809       }
3810    }
3811 
3812    uint64_t reserved_out_slots = 0;
3813    if (producer)
3814       reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
3815 
3816    uint64_t reserved_in_slots = 0;
3817    if (consumer)
3818       reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
3819 
3820    /* Assign temporary user varying locations. This is required for our NIR
3821     * varying optimisations to do their matching.
3822     */
3823    const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
3824    varying_matches_assign_temp_locations(vm, prog, reserved_slots);
3825 
3826    for (unsigned i = 0; i < num_xfb_decls; ++i) {
3827       if (!xfb_decl_is_varying(&xfb_decls[i]))
3828          continue;
3829 
3830       xfb_decls[i].matched_candidate->initial_location =
3831          xfb_decls[i].matched_candidate->toplevel_var->data.location;
3832       xfb_decls[i].matched_candidate->initial_location_frac =
3833          xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
3834    }
3835 
3836    return true;
3837 }
3838 
3839 static void
link_shader_opts(struct varying_matches * vm,nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3840 link_shader_opts(struct varying_matches *vm,
3841                  nir_shader *producer, nir_shader *consumer,
3842                  struct gl_shader_program *prog, void *mem_ctx)
3843 {
3844    /* If we can't pack the stage using this pass then we can't lower io to
3845     * scalar just yet. Instead we leave it to a later NIR linking pass that uses
3846     * ARB_enhanced_layout style packing to pack things further.
3847     *
3848     * Otherwise we might end up causing linking errors and perf regressions
3849     * because the new scalars will be assigned individual slots and can overflow
3850     * the available slots.
3851     */
3852    if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
3853       !vm->disable_xfb_packing) {
3854       NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
3855       NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
3856    }
3857 
3858    gl_nir_opts(producer);
3859    gl_nir_opts(consumer);
3860 
3861    if (nir_link_opt_varyings(producer, consumer))
3862       gl_nir_opts(consumer);
3863 
3864    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
3865    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
3866 
3867    if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
3868       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
3869       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
3870 
3871       gl_nir_opts(producer);
3872       gl_nir_opts(consumer);
3873 
3874       /* Optimizations can cause varyings to become unused.
3875        * nir_compact_varyings() depends on all dead varyings being removed so
3876        * we need to call nir_remove_dead_variables() again here.
3877        */
3878       NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
3879                  NULL);
3880       NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
3881                  NULL);
3882    }
3883 
3884    nir_link_varying_precision(producer, consumer);
3885 }
3886 
3887 /**
3888  * Assign locations for all variables that are produced in one pipeline stage
3889  * (the "producer") and consumed in the next stage (the "consumer").
3890  *
3891  * Variables produced by the producer may also be consumed by transform
3892  * feedback.
3893  *
3894  * \param num_xfb_decls is the number of declarations indicating
3895  *        variables that may be consumed by transform feedback.
3896  *
3897  * \param xfb_decls is a pointer to an array of xfb_decl objects
3898  *        representing the result of parsing the strings passed to
3899  *        glTransformFeedbackVaryings().  assign_location() will be called for
3900  *        each of these objects that matches one of the outputs of the
3901  *        producer.
3902  *
3903  * When num_xfb_decls is nonzero, it is permissible for the consumer to
3904  * be NULL.  In this case, varying locations are assigned solely based on the
3905  * requirements of transform feedback.
3906  */
3907 static bool
assign_final_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,const uint64_t reserved_slots,struct varying_matches * vm)3908 assign_final_varying_locations(const struct gl_constants *consts,
3909                                const struct gl_extensions *exts,
3910                                void *mem_ctx,
3911                                struct gl_shader_program *prog,
3912                                struct gl_linked_shader *producer,
3913                                struct gl_linked_shader *consumer,
3914                                unsigned num_xfb_decls,
3915                                struct xfb_decl *xfb_decls,
3916                                const uint64_t reserved_slots,
3917                                struct varying_matches *vm)
3918 {
3919    init_varying_matches(mem_ctx, vm, consts, exts,
3920                         producer ? producer->Stage : MESA_SHADER_NONE,
3921                         consumer ? consumer->Stage : MESA_SHADER_NONE,
3922                         prog->SeparateShader);
3923 
3924    /* Regather varying matches as we ran optimisations and the previous pointers
3925     * are no longer valid.
3926     */
3927    if (producer) {
3928       nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
3929          if (var_out->data.location < VARYING_SLOT_VAR0 ||
3930              var_out->data.explicit_location)
3931             continue;
3932 
3933          if (vm->num_matches == vm->matches_capacity) {
3934             vm->matches_capacity *= 2;
3935             vm->matches = (struct match *)
3936                reralloc(mem_ctx, vm->matches, struct match,
3937                         vm->matches_capacity);
3938          }
3939 
3940          vm->matches[vm->num_matches].packing_class
3941             = varying_matches_compute_packing_class(var_out);
3942          vm->matches[vm->num_matches].packing_order
3943             = varying_matches_compute_packing_order(var_out);
3944 
3945          vm->matches[vm->num_matches].producer_var = var_out;
3946          vm->matches[vm->num_matches].consumer_var = NULL;
3947          vm->num_matches++;
3948       }
3949 
3950       /* Regather xfb varyings too */
3951       for (unsigned i = 0; i < num_xfb_decls; i++) {
3952          if (!xfb_decl_is_varying(&xfb_decls[i]))
3953             continue;
3954 
3955          /* Varying pointer was already reset */
3956          if (xfb_decls[i].matched_candidate->initial_location == -1)
3957             continue;
3958 
3959          bool UNUSED is_reset = false;
3960          bool UNUSED no_outputs = true;
3961          nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
3962             no_outputs = false;
3963             assert(var_out->data.location != -1);
3964             if (var_out->data.location ==
3965                 xfb_decls[i].matched_candidate->initial_location &&
3966                 var_out->data.location_frac ==
3967                 xfb_decls[i].matched_candidate->initial_location_frac) {
3968                xfb_decls[i].matched_candidate->toplevel_var = var_out;
3969                xfb_decls[i].matched_candidate->initial_location = -1;
3970                is_reset = true;
3971                break;
3972             }
3973          }
3974          assert(is_reset || no_outputs);
3975       }
3976    }
3977 
3978    bool found_match = false;
3979    if (consumer) {
3980       nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
3981          if (var_in->data.location < VARYING_SLOT_VAR0 ||
3982              var_in->data.explicit_location)
3983             continue;
3984 
3985          found_match = false;
3986          for (unsigned i = 0; i < vm->num_matches; i++) {
3987             if (vm->matches[i].producer_var &&
3988                 (vm->matches[i].producer_var->data.location == var_in->data.location &&
3989                  vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
3990 
3991                vm->matches[i].consumer_var = var_in;
3992                found_match = true;
3993                break;
3994             }
3995          }
3996          if (!found_match) {
3997             if (vm->num_matches == vm->matches_capacity) {
3998                vm->matches_capacity *= 2;
3999                vm->matches = (struct match *)
4000                   reralloc(mem_ctx, vm->matches, struct match,
4001                            vm->matches_capacity);
4002             }
4003 
4004             vm->matches[vm->num_matches].packing_class
4005                = varying_matches_compute_packing_class(var_in);
4006             vm->matches[vm->num_matches].packing_order
4007                = varying_matches_compute_packing_order(var_in);
4008 
4009             vm->matches[vm->num_matches].producer_var = NULL;
4010             vm->matches[vm->num_matches].consumer_var = var_in;
4011             vm->num_matches++;
4012          }
4013       }
4014    }
4015 
4016    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
4017    const unsigned slots_used =
4018       varying_matches_assign_locations(vm, prog, components, reserved_slots);
4019    varying_matches_store_locations(vm);
4020 
4021    for (unsigned i = 0; i < num_xfb_decls; ++i) {
4022       if (xfb_decl_is_varying(&xfb_decls[i])) {
4023          if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog,
4024              vm->disable_varying_packing, vm->xfb_enabled))
4025             return false;
4026       }
4027    }
4028 
4029    if (producer) {
4030       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4031                                    nir_var_shader_out, 0, producer,
4032                                    vm->disable_varying_packing,
4033                                    vm->disable_xfb_packing, vm->xfb_enabled);
4034       nir_lower_pack(producer->Program->nir);
4035    }
4036 
4037    if (consumer) {
4038       unsigned consumer_vertices = 0;
4039       if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
4040          consumer_vertices = prog->Geom.VerticesIn;
4041 
4042       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4043                                    nir_var_shader_in, consumer_vertices,
4044                                    consumer, vm->disable_varying_packing,
4045                                    vm->disable_xfb_packing, vm->xfb_enabled);
4046       nir_lower_pack(consumer->Program->nir);
4047    }
4048 
4049    return true;
4050 }
4051 
4052 static bool
check_against_output_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * producer,unsigned num_explicit_locations)4053 check_against_output_limit(const struct gl_constants *consts, gl_api api,
4054                            struct gl_shader_program *prog,
4055                            struct gl_linked_shader *producer,
4056                            unsigned num_explicit_locations)
4057 {
4058    unsigned output_vectors = num_explicit_locations;
4059    nir_foreach_shader_out_variable(var, producer->Program->nir) {
4060       if (!var->data.explicit_location &&
4061           var_counts_against_varying_limit(producer->Stage, var)) {
4062          /* outputs for fragment shader can't be doubles */
4063          output_vectors += glsl_count_attribute_slots(var->type, false);
4064       }
4065    }
4066 
4067    assert(producer->Stage != MESA_SHADER_FRAGMENT);
4068    unsigned max_output_components =
4069       consts->Program[producer->Stage].MaxOutputComponents;
4070 
4071    const unsigned output_components = output_vectors * 4;
4072    if (output_components > max_output_components) {
4073       if (api == API_OPENGLES2 || prog->IsES)
4074          linker_error(prog, "%s shader uses too many output vectors "
4075                       "(%u > %u)\n",
4076                       _mesa_shader_stage_to_string(producer->Stage),
4077                       output_vectors,
4078                       max_output_components / 4);
4079       else
4080          linker_error(prog, "%s shader uses too many output components "
4081                       "(%u > %u)\n",
4082                       _mesa_shader_stage_to_string(producer->Stage),
4083                       output_components,
4084                       max_output_components);
4085 
4086       return false;
4087    }
4088 
4089    return true;
4090 }
4091 
4092 static bool
check_against_input_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * consumer,unsigned num_explicit_locations)4093 check_against_input_limit(const struct gl_constants *consts, gl_api api,
4094                           struct gl_shader_program *prog,
4095                           struct gl_linked_shader *consumer,
4096                           unsigned num_explicit_locations)
4097 {
4098    unsigned input_vectors = num_explicit_locations;
4099 
4100    nir_foreach_shader_in_variable(var, consumer->Program->nir) {
4101       if (!var->data.explicit_location &&
4102           var_counts_against_varying_limit(consumer->Stage, var)) {
4103          /* vertex inputs aren't varying counted */
4104          input_vectors += glsl_count_attribute_slots(var->type, false);
4105       }
4106    }
4107 
4108    assert(consumer->Stage != MESA_SHADER_VERTEX);
4109    unsigned max_input_components =
4110       consts->Program[consumer->Stage].MaxInputComponents;
4111 
4112    const unsigned input_components = input_vectors * 4;
4113    if (input_components > max_input_components) {
4114       if (api == API_OPENGLES2 || prog->IsES)
4115          linker_error(prog, "%s shader uses too many input vectors "
4116                       "(%u > %u)\n",
4117                       _mesa_shader_stage_to_string(consumer->Stage),
4118                       input_vectors,
4119                       max_input_components / 4);
4120       else
4121          linker_error(prog, "%s shader uses too many input components "
4122                       "(%u > %u)\n",
4123                       _mesa_shader_stage_to_string(consumer->Stage),
4124                       input_components,
4125                       max_input_components);
4126 
4127       return false;
4128    }
4129 
4130    return true;
4131 }
4132 
4133 /* Lower unset/unused inputs/outputs */
4134 static void
remove_unused_shader_inputs_and_outputs(struct gl_shader_program * prog,unsigned stage,nir_variable_mode mode)4135 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
4136                                         unsigned stage, nir_variable_mode mode)
4137 {
4138    bool progress = false;
4139    nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
4140 
4141    nir_foreach_variable_with_modes_safe(var, shader, mode) {
4142       if (!var->data.is_xfb_only && var->data.location == -1) {
4143          var->data.location = 0;
4144          var->data.mode = nir_var_shader_temp;
4145          progress = true;
4146       }
4147    }
4148 
4149    if (progress)
4150       fixup_vars_lowered_to_temp(shader, mode);
4151 }
4152 
4153 static bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,void * mem_ctx)4154 link_varyings(struct gl_shader_program *prog, unsigned first,
4155               unsigned last, const struct gl_constants *consts,
4156               const struct gl_extensions *exts, gl_api api, void *mem_ctx)
4157 {
4158    bool has_xfb_qualifiers = false;
4159    unsigned num_xfb_decls = 0;
4160    char **varying_names = NULL;
4161    struct xfb_decl *xfb_decls = NULL;
4162 
4163    if (last > MESA_SHADER_FRAGMENT)
4164       return true;
4165 
4166    /* From the ARB_enhanced_layouts spec:
4167     *
4168     *    "If the shader used to record output variables for transform feedback
4169     *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
4170     *    qualifiers, the values specified by TransformFeedbackVaryings are
4171     *    ignored, and the set of variables captured for transform feedback is
4172     *    instead derived from the specified layout qualifiers."
4173     */
4174    for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
4175       /* Find last stage before fragment shader */
4176       if (prog->_LinkedShaders[i]) {
4177          has_xfb_qualifiers =
4178             process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
4179                                           prog, &num_xfb_decls,
4180                                           &varying_names);
4181          break;
4182       }
4183    }
4184 
4185    if (!has_xfb_qualifiers) {
4186       num_xfb_decls = prog->TransformFeedback.NumVarying;
4187       varying_names = prog->TransformFeedback.VaryingNames;
4188    }
4189 
4190    if (num_xfb_decls != 0) {
4191       /* From GL_EXT_transform_feedback:
4192        *   A program will fail to link if:
4193        *
4194        *   * the <count> specified by TransformFeedbackVaryingsEXT is
4195        *     non-zero, but the program object has no vertex or geometry
4196        *     shader;
4197        */
4198       if (first >= MESA_SHADER_FRAGMENT) {
4199          linker_error(prog, "Transform feedback varyings specified, but "
4200                       "no vertex, tessellation, or geometry shader is "
4201                       "present.\n");
4202          return false;
4203       }
4204 
4205       xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
4206                                       num_xfb_decls);
4207       if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
4208                            varying_names, xfb_decls))
4209          return false;
4210    }
4211 
4212    struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
4213    unsigned num_shaders = 0;
4214 
4215    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4216       if (prog->_LinkedShaders[i])
4217          linked_shader[num_shaders++] = prog->_LinkedShaders[i];
4218    }
4219 
4220    struct varying_matches vm;
4221    if (last < MESA_SHADER_FRAGMENT &&
4222        (num_xfb_decls != 0 || prog->SeparateShader)) {
4223          struct gl_linked_shader *producer = prog->_LinkedShaders[last];
4224          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4225                                                producer, NULL, num_xfb_decls,
4226                                                xfb_decls, &vm))
4227             return false;
4228    }
4229 
4230    if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
4231       remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
4232       remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
4233    }
4234 
4235    if (prog->SeparateShader) {
4236       struct gl_linked_shader *consumer = linked_shader[0];
4237       if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
4238                                             consumer, 0, NULL, &vm))
4239          return false;
4240    }
4241 
4242    if (num_shaders == 1) {
4243       /* Linking shaders also optimizes them. Separate shaders, compute shaders
4244        * and shaders with a fixed-func VS or FS that don't need linking are
4245        * optimized here.
4246        */
4247       gl_nir_opts(linked_shader[0]->Program->nir);
4248    } else {
4249       /* Linking the stages in the opposite order (from fragment to vertex)
4250        * ensures that inter-shader outputs written to in an earlier stage
4251        * are eliminated if they are (transitively) not used in a later
4252        * stage.
4253        */
4254       for (int i = num_shaders - 2; i >= 0; i--) {
4255          unsigned stage_num_xfb_decls =
4256             linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
4257             num_xfb_decls : 0;
4258 
4259          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4260                                                linked_shader[i],
4261                                                linked_shader[i + 1],
4262                                                stage_num_xfb_decls, xfb_decls,
4263                                                &vm))
4264             return false;
4265 
4266          /* Now that validation is done its safe to remove unused varyings. As
4267           * we have both a producer and consumer its safe to remove unused
4268           * varyings even if the program is a SSO because the stages are being
4269           * linked together i.e. we have a multi-stage SSO.
4270           */
4271          link_shader_opts(&vm, linked_shader[i]->Program->nir,
4272                           linked_shader[i + 1]->Program->nir,
4273                           prog, mem_ctx);
4274 
4275          remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
4276                                                  nir_var_shader_out);
4277          remove_unused_shader_inputs_and_outputs(prog,
4278                                                  linked_shader[i + 1]->Stage,
4279                                                  nir_var_shader_in);
4280       }
4281    }
4282 
4283    if (!prog->SeparateShader) {
4284       /* If not SSO remove unused varyings from the first/last stage */
4285       NIR_PASS(_, prog->_LinkedShaders[first]->Program->nir,
4286                  nir_remove_dead_variables, nir_var_shader_in, NULL);
4287       NIR_PASS(_, prog->_LinkedShaders[last]->Program->nir,
4288                  nir_remove_dead_variables, nir_var_shader_out, NULL);
4289    } else {
4290       /* Sort inputs / outputs into a canonical order.  This is necessary so
4291        * that inputs / outputs of separable shaders will be assigned
4292        * predictable locations regardless of the order in which declarations
4293        * appeared in the shader source.
4294        */
4295       if (first != MESA_SHADER_VERTEX) {
4296          canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
4297                                 nir_var_shader_in);
4298       }
4299 
4300       if (last != MESA_SHADER_FRAGMENT) {
4301          canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
4302                                 nir_var_shader_out);
4303       }
4304    }
4305 
4306    /* If there is no fragment shader we need to set transform feedback.
4307     *
4308     * For SSO we also need to assign output locations.  We assign them here
4309     * because we need to do it for both single stage programs and multi stage
4310     * programs.
4311     */
4312    if (last < MESA_SHADER_FRAGMENT &&
4313        (num_xfb_decls != 0 || prog->SeparateShader)) {
4314       const uint64_t reserved_out_slots =
4315          reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
4316       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4317                                           prog->_LinkedShaders[last], NULL,
4318                                           num_xfb_decls, xfb_decls,
4319                                           reserved_out_slots, &vm))
4320          return false;
4321    }
4322 
4323    if (prog->SeparateShader) {
4324       struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
4325 
4326       const uint64_t reserved_slots =
4327          reserved_varying_slot(sh, nir_var_shader_in);
4328 
4329       /* Assign input locations for SSO, output locations are already
4330        * assigned.
4331        */
4332       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4333                                           NULL /* producer */,
4334                                           sh /* consumer */,
4335                                           0 /* num_xfb_decls */,
4336                                           NULL /* xfb_decls */,
4337                                           reserved_slots, &vm))
4338          return false;
4339    }
4340 
4341    if (num_shaders == 1) {
4342       gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
4343                                        0, NULL);
4344       gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
4345                                        num_xfb_decls, xfb_decls);
4346    } else {
4347       /* Linking the stages in the opposite order (from fragment to vertex)
4348        * ensures that inter-shader outputs written to in an earlier stage
4349        * are eliminated if they are (transitively) not used in a later
4350        * stage.
4351        */
4352       int next = last;
4353       for (int i = next - 1; i >= 0; i--) {
4354          if (prog->_LinkedShaders[i] == NULL && i != 0)
4355             continue;
4356 
4357          struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
4358          struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
4359 
4360          gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
4361                                           next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4362                                           xfb_decls);
4363 
4364          const uint64_t reserved_out_slots =
4365             reserved_varying_slot(sh_i, nir_var_shader_out);
4366          const uint64_t reserved_in_slots =
4367             reserved_varying_slot(sh_next, nir_var_shader_in);
4368 
4369          if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
4370                    sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4371                    xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
4372             return false;
4373 
4374          /* This must be done after all dead varyings are eliminated. */
4375          if (sh_i != NULL) {
4376             unsigned slots_used = util_bitcount64(reserved_out_slots);
4377             if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
4378                return false;
4379          }
4380 
4381          unsigned slots_used = util_bitcount64(reserved_in_slots);
4382          if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
4383             return false;
4384 
4385          next = i;
4386       }
4387    }
4388 
4389    if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
4390                              has_xfb_qualifiers, mem_ctx))
4391       return false;
4392 
4393    return prog->data->LinkStatus != LINKING_FAILURE;
4394 }
4395 
4396 /**
4397  * Store the gl_FragDepth layout in the gl_shader_program struct.
4398  */
4399 static void
store_fragdepth_layout(struct gl_shader_program * prog)4400 store_fragdepth_layout(struct gl_shader_program *prog)
4401 {
4402    if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
4403       return;
4404    }
4405 
4406    nir_shader *nir = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->nir;
4407    nir_foreach_shader_out_variable(var, nir) {
4408       if (strcmp(var->name, "gl_FragDepth") == 0) {
4409          switch (var->data.depth_layout) {
4410          case nir_depth_layout_none:
4411             prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
4412             return;
4413          case nir_depth_layout_any:
4414             prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
4415             return;
4416          case nir_depth_layout_greater:
4417             prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
4418             return;
4419          case nir_depth_layout_less:
4420             prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
4421             return;
4422          case nir_depth_layout_unchanged:
4423             prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
4424             return;
4425          default:
4426             assert(0);
4427             return;
4428          }
4429       }
4430    }
4431 }
4432 
4433 bool
gl_assign_attribute_or_color_locations(const struct gl_constants * consts,struct gl_shader_program * prog)4434 gl_assign_attribute_or_color_locations(const struct gl_constants *consts,
4435                                        struct gl_shader_program *prog)
4436 {
4437    void *mem_ctx = ralloc_context(NULL);
4438 
4439    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4440                                             MESA_SHADER_VERTEX, true)) {
4441       ralloc_free(mem_ctx);
4442       return false;
4443    }
4444 
4445    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4446                                             MESA_SHADER_FRAGMENT, true)) {
4447       ralloc_free(mem_ctx);
4448       return false;
4449    }
4450 
4451    ralloc_free(mem_ctx);
4452    return true;
4453 }
4454 
4455 bool
gl_nir_link_varyings(const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,struct gl_shader_program * prog)4456 gl_nir_link_varyings(const struct gl_constants *consts,
4457                      const struct gl_extensions *exts,
4458                      gl_api api, struct gl_shader_program *prog)
4459 {
4460    void *mem_ctx = ralloc_context(NULL);
4461 
4462    unsigned first, last;
4463 
4464    MESA_TRACE_FUNC();
4465 
4466    store_fragdepth_layout(prog);
4467 
4468    first = MESA_SHADER_STAGES;
4469    last = 0;
4470 
4471    /* We need to initialise the program resource list because the varying
4472     * packing pass my start inserting varyings onto the list.
4473     */
4474    init_program_resource_list(prog);
4475 
4476    /* Determine first and last stage. */
4477    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4478       if (!prog->_LinkedShaders[i])
4479          continue;
4480       if (first == MESA_SHADER_STAGES)
4481          first = i;
4482       last = i;
4483    }
4484 
4485    bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
4486    if (r) {
4487       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4488          if (!prog->_LinkedShaders[i])
4489             continue;
4490 
4491          /* Check for transform feedback varyings specified via the API */
4492          prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
4493             prog->TransformFeedback.NumVarying > 0;
4494 
4495          /* Check for transform feedback varyings specified in the Shader */
4496          if (prog->last_vert_prog) {
4497             prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
4498                prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
4499          }
4500       }
4501 
4502       /* Assign NIR XFB info to the last stage before the fragment shader */
4503       for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
4504          struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
4505          if (sh && stage != MESA_SHADER_TESS_CTRL) {
4506             sh->Program->nir->xfb_info =
4507                gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
4508                                   sh->Program->nir);
4509             break;
4510          }
4511       }
4512    }
4513 
4514    ralloc_free(mem_ctx);
4515    return r;
4516 }
4517