• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2021 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * Linker functions related specifically to linking varyings between shader
27  * stages.
28  */
29 
30 #include "main/errors.h"
31 #include "main/macros.h"
32 #include "main/menums.h"
33 #include "main/mtypes.h"
34 #include "util/hash_table.h"
35 #include "util/u_math.h"
36 
37 #include "nir.h"
38 #include "nir_builder.h"
39 #include "gl_nir.h"
40 #include "gl_nir_link_varyings.h"
41 #include "gl_nir_linker.h"
42 #include "linker_util.h"
43 #include "nir_gl_types.h"
44 
45 
46 /**
47  * Get the varying type stripped of the outermost array if we're processing
48  * a stage whose varyings are arrays indexed by a vertex number (such as
49  * geometry shader inputs).
50  */
51 static const struct glsl_type *
get_varying_type(const nir_variable * var,gl_shader_stage stage)52 get_varying_type(const nir_variable *var, gl_shader_stage stage)
53 {
54    const struct glsl_type *type = var->type;
55    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
56       assert(glsl_type_is_array(type));
57       type = glsl_get_array_element(type);
58    }
59 
60    return type;
61 }
62 
63 static bool
varying_has_user_specified_location(const nir_variable * var)64 varying_has_user_specified_location(const nir_variable *var)
65 {
66    return var->data.explicit_location &&
67       var->data.location >= VARYING_SLOT_VAR0;
68 }
69 
70 static void
create_xfb_varying_names(void * mem_ctx,const struct glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const struct glsl_type * ifc_member_t,char *** varying_names)71 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
72                          size_t name_length, unsigned *count,
73                          const char *ifc_member_name,
74                          const struct glsl_type *ifc_member_t,
75                          char ***varying_names)
76 {
77    if (glsl_type_is_interface(t)) {
78       size_t new_length = name_length;
79 
80       assert(ifc_member_name && ifc_member_t);
81       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
82 
83       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
84                                NULL, NULL, varying_names);
85    } else if (glsl_type_is_struct(t)) {
86       for (unsigned i = 0; i < glsl_get_length(t); i++) {
87          const char *field = glsl_get_struct_elem_name(t, i);
88          size_t new_length = name_length;
89 
90          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
91 
92          create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
93                                   new_length, count, NULL, NULL,
94                                   varying_names);
95       }
96    } else if (glsl_type_is_struct(glsl_without_array(t)) ||
97               glsl_type_is_interface(glsl_without_array(t)) ||
98               (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
99       for (unsigned i = 0; i < glsl_get_length(t); i++) {
100          size_t new_length = name_length;
101 
102          /* Append the subscript to the current variable name */
103          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
104 
105          create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
106                                   new_length, count, ifc_member_name,
107                                   ifc_member_t, varying_names);
108       }
109    } else {
110       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
111    }
112 }
113 
114 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const struct gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_xfb_decls,char *** varying_names)115 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
116                               struct gl_shader_program *prog,
117                               unsigned *num_xfb_decls,
118                               char ***varying_names)
119 {
120    bool has_xfb_qualifiers = false;
121 
122    /* We still need to enable transform feedback mode even if xfb_stride is
123     * only applied to a global out. Also we don't bother to propagate
124     * xfb_stride to interface block members so this will catch that case also.
125     */
126    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
127       if (prog->TransformFeedback.BufferStride[j]) {
128          has_xfb_qualifiers = true;
129          break;
130       }
131    }
132 
133    nir_foreach_shader_out_variable(var, sh->Program->nir) {
134       /* From the ARB_enhanced_layouts spec:
135        *
136        *    "Any shader making any static use (after preprocessing) of any of
137        *     these *xfb_* qualifiers will cause the shader to be in a
138        *     transform feedback capturing mode and hence responsible for
139        *     describing the transform feedback setup.  This mode will capture
140        *     any output selected by *xfb_offset*, directly or indirectly, to
141        *     a transform feedback buffer."
142        */
143       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
144          has_xfb_qualifiers = true;
145       }
146 
147       if (var->data.explicit_offset) {
148          *num_xfb_decls += glsl_varying_count(var->type);
149          has_xfb_qualifiers = true;
150       }
151    }
152 
153    if (*num_xfb_decls == 0)
154       return has_xfb_qualifiers;
155 
156    unsigned i = 0;
157    *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
158    nir_foreach_shader_out_variable(var, sh->Program->nir) {
159       if (var->data.explicit_offset) {
160          char *name;
161          const struct glsl_type *type, *member_type;
162 
163          if (var->data.from_named_ifc_block) {
164             type = var->interface_type;
165 
166             /* Find the member type before it was altered by lowering */
167             const struct glsl_type *type_wa = glsl_without_array(type);
168             member_type =
169                glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
170             name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
171          } else {
172             type = var->type;
173             member_type = NULL;
174             name = ralloc_strdup(NULL, var->name);
175          }
176          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
177                                   var->name, member_type, varying_names);
178          ralloc_free(name);
179       }
180    }
181 
182    assert(i == *num_xfb_decls);
183    return has_xfb_qualifiers;
184 }
185 
186 /**
187  * Initialize this struct based on a string that was passed to
188  * glTransformFeedbackVaryings.
189  *
190  * If the input is mal-formed, this call still succeeds, but it sets
191  * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
192  * will fail to find any matching variable.
193  */
194 static void
xfb_decl_init(struct xfb_decl * xfb_decl,const struct gl_constants * consts,const struct gl_extensions * exts,const void * mem_ctx,const char * input)195 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
196               const struct gl_extensions *exts, const void *mem_ctx,
197               const char *input)
198 {
199    /* We don't have to be pedantic about what is a valid GLSL variable name,
200     * because any variable with an invalid name can't exist in the IR anyway.
201     */
202    xfb_decl->location = -1;
203    xfb_decl->orig_name = input;
204    xfb_decl->lowered_builtin_array_variable = none;
205    xfb_decl->skip_components = 0;
206    xfb_decl->next_buffer_separator = false;
207    xfb_decl->matched_candidate = NULL;
208    xfb_decl->stream_id = 0;
209    xfb_decl->buffer = 0;
210    xfb_decl->offset = 0;
211 
212    if (exts->ARB_transform_feedback3) {
213       /* Parse gl_NextBuffer. */
214       if (strcmp(input, "gl_NextBuffer") == 0) {
215          xfb_decl->next_buffer_separator = true;
216          return;
217       }
218 
219       /* Parse gl_SkipComponents. */
220       if (strcmp(input, "gl_SkipComponents1") == 0)
221          xfb_decl->skip_components = 1;
222       else if (strcmp(input, "gl_SkipComponents2") == 0)
223          xfb_decl->skip_components = 2;
224       else if (strcmp(input, "gl_SkipComponents3") == 0)
225          xfb_decl->skip_components = 3;
226       else if (strcmp(input, "gl_SkipComponents4") == 0)
227          xfb_decl->skip_components = 4;
228 
229       if (xfb_decl->skip_components)
230          return;
231    }
232 
233    /* Parse a declaration. */
234    const char *base_name_end;
235    long subscript = link_util_parse_program_resource_name(input, strlen(input),
236                                                           &base_name_end);
237    xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
238    if (xfb_decl->var_name == NULL) {
239       _mesa_error_no_memory(__func__);
240       return;
241    }
242 
243    if (subscript >= 0) {
244       xfb_decl->array_subscript = subscript;
245       xfb_decl->is_subscripted = true;
246    } else {
247       xfb_decl->is_subscripted = false;
248    }
249 
250    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
251     * class must behave specially to account for the fact that gl_ClipDistance
252     * is converted from a float[8] to a vec4[2].
253     */
254    if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
255        strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
256       xfb_decl->lowered_builtin_array_variable = clip_distance;
257    }
258    if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
259        strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
260       xfb_decl->lowered_builtin_array_variable = cull_distance;
261    }
262 
263    if (consts->LowerTessLevel &&
264        (strcmp(xfb_decl->var_name, "gl_TessLevelOuter") == 0))
265       xfb_decl->lowered_builtin_array_variable = tess_level_outer;
266    if (consts->LowerTessLevel &&
267        (strcmp(xfb_decl->var_name, "gl_TessLevelInner") == 0))
268       xfb_decl->lowered_builtin_array_variable = tess_level_inner;
269 }
270 
271 /**
272  * Determine whether two xfb_decl structs refer to the same variable and
273  * array index (if applicable).
274  */
275 static bool
xfb_decl_is_same(const struct xfb_decl * x,const struct xfb_decl * y)276 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
277 {
278    assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
279 
280    if (strcmp(x->var_name, y->var_name) != 0)
281       return false;
282    if (x->is_subscripted != y->is_subscripted)
283       return false;
284    if (x->is_subscripted && x->array_subscript != y->array_subscript)
285       return false;
286    return true;
287 }
288 
289 /**
290  * The total number of varying components taken up by this variable.  Only
291  * valid if assign_location() has been called.
292  */
293 static unsigned
xfb_decl_num_components(struct xfb_decl * xfb_decl)294 xfb_decl_num_components(struct xfb_decl *xfb_decl)
295 {
296    if (xfb_decl->lowered_builtin_array_variable)
297       return xfb_decl->size;
298    else
299       return xfb_decl->vector_elements * xfb_decl->matrix_columns *
300          xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
301 }
302 
303 /**
304  * Assign a location and stream ID for this xfb_decl object based on the
305  * transform feedback candidate found by find_candidate.
306  *
307  * If an error occurs, the error is reported through linker_error() and false
308  * is returned.
309  */
310 static bool
xfb_decl_assign_location(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog)311 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
312                          const struct gl_constants *consts,
313                          struct gl_shader_program *prog)
314 {
315    assert(xfb_decl_is_varying(xfb_decl));
316 
317    unsigned fine_location
318       = xfb_decl->matched_candidate->toplevel_var->data.location * 4
319       + xfb_decl->matched_candidate->toplevel_var->data.location_frac
320       + xfb_decl->matched_candidate->struct_offset_floats;
321    const unsigned dmul =
322       glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
323 
324    if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
325       /* Array variable */
326       const struct glsl_type *element_type =
327          glsl_get_array_element(xfb_decl->matched_candidate->type);
328       const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
329       const unsigned vector_elements = glsl_get_vector_elements(element_type);
330       unsigned actual_array_size;
331       switch (xfb_decl->lowered_builtin_array_variable) {
332       case clip_distance:
333          actual_array_size = prog->last_vert_prog ?
334             prog->last_vert_prog->info.clip_distance_array_size : 0;
335          break;
336       case cull_distance:
337          actual_array_size = prog->last_vert_prog ?
338             prog->last_vert_prog->info.cull_distance_array_size : 0;
339          break;
340       case tess_level_outer:
341          actual_array_size = 4;
342          break;
343       case tess_level_inner:
344          actual_array_size = 2;
345          break;
346       case none:
347       default:
348          actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
349          break;
350       }
351 
352       if (xfb_decl->is_subscripted) {
353          /* Check array bounds. */
354          if (xfb_decl->array_subscript >= actual_array_size) {
355             linker_error(prog, "Transform feedback varying %s has index "
356                          "%i, but the array size is %u.",
357                          xfb_decl->orig_name, xfb_decl->array_subscript,
358                          actual_array_size);
359             return false;
360          }
361          unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
362             1 : vector_elements * matrix_cols * dmul;
363          fine_location += array_elem_size * xfb_decl->array_subscript;
364          xfb_decl->size = 1;
365       } else {
366          xfb_decl->size = actual_array_size;
367       }
368       xfb_decl->vector_elements = vector_elements;
369       xfb_decl->matrix_columns = matrix_cols;
370       if (xfb_decl->lowered_builtin_array_variable)
371          xfb_decl->type = GL_FLOAT;
372       else
373          xfb_decl->type = glsl_get_gl_type(element_type);
374    } else {
375       /* Regular variable (scalar, vector, or matrix) */
376       if (xfb_decl->is_subscripted) {
377          linker_error(prog, "Transform feedback varying %s requested, "
378                       "but %s is not an array.",
379                       xfb_decl->orig_name, xfb_decl->var_name);
380          return false;
381       }
382       xfb_decl->size = 1;
383       xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
384       xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
385       xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
386    }
387    xfb_decl->location = fine_location / 4;
388    xfb_decl->location_frac = fine_location % 4;
389 
390    /* From GL_EXT_transform_feedback:
391     *   A program will fail to link if:
392     *
393     *   * the total number of components to capture in any varying
394     *     variable in <varyings> is greater than the constant
395     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
396     *     buffer mode is SEPARATE_ATTRIBS_EXT;
397     */
398    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
399        xfb_decl_num_components(xfb_decl) >
400        consts->MaxTransformFeedbackSeparateComponents) {
401       linker_error(prog, "Transform feedback varying %s exceeds "
402                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
403                    xfb_decl->orig_name);
404       return false;
405    }
406 
407    /* Only transform feedback varyings can be assigned to non-zero streams,
408     * so assign the stream id here.
409     */
410    xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
411 
412    unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
413    unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
414    xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
415    xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
416       array_offset + struct_offset;
417 
418    return true;
419 }
420 
421 static unsigned
xfb_decl_get_num_outputs(struct xfb_decl * xfb_decl)422 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
423 {
424    if (!xfb_decl_is_varying(xfb_decl)) {
425       return 0;
426    }
427 
428    if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
429       unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
430       unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
431       return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
432    } else {
433       return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
434    }
435 }
436 
437 static bool
xfb_decl_is_varying_written(struct xfb_decl * xfb_decl)438 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
439 {
440    if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
441       return false;
442 
443    return xfb_decl->matched_candidate->toplevel_var->data.assigned;
444 }
445 
446 /**
447  * Update gl_transform_feedback_info to reflect this xfb_decl.
448  *
449  * If an error occurs, the error is reported through linker_error() and false
450  * is returned.
451  */
452 static bool
xfb_decl_store(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx)453 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
454                struct gl_shader_program *prog,
455                struct gl_transform_feedback_info *info,
456                unsigned buffer, unsigned buffer_index,
457                const unsigned max_outputs,
458                BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
459                bool *explicit_stride, unsigned *max_member_alignment,
460                bool has_xfb_qualifiers, const void* mem_ctx)
461 {
462    unsigned xfb_offset = 0;
463    unsigned size = xfb_decl->size;
464    /* Handle gl_SkipComponents. */
465    if (xfb_decl->skip_components) {
466       info->Buffers[buffer].Stride += xfb_decl->skip_components;
467       size = xfb_decl->skip_components;
468       goto store_varying;
469    }
470 
471    if (xfb_decl->next_buffer_separator) {
472       size = 0;
473       goto store_varying;
474    }
475 
476    if (has_xfb_qualifiers) {
477       xfb_offset = xfb_decl->offset / 4;
478    } else {
479       xfb_offset = info->Buffers[buffer].Stride;
480    }
481    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
482 
483    {
484       unsigned location = xfb_decl->location;
485       unsigned location_frac = xfb_decl->location_frac;
486       unsigned num_components = xfb_decl_num_components(xfb_decl);
487 
488       /* From GL_EXT_transform_feedback:
489        *
490        *   " A program will fail to link if:
491        *
492        *       * the total number of components to capture is greater than the
493        *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
494        *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
495        *
496        * From GL_ARB_enhanced_layouts:
497        *
498        *   " The resulting stride (implicit or explicit) must be less than or
499        *     equal to the implementation-dependent constant
500        *     gl_MaxTransformFeedbackInterleavedComponents."
501        */
502       if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
503            has_xfb_qualifiers) &&
504           xfb_offset + num_components >
505           consts->MaxTransformFeedbackInterleavedComponents) {
506          linker_error(prog,
507                       "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
508                       "limit has been exceeded.");
509          return false;
510       }
511 
512       /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
513        * Page 76, (Transform Feedback Layout Qualifiers):
514        *
515        *   " No aliasing in output buffers is allowed: It is a compile-time or
516        *     link-time error to specify variables with overlapping transform
517        *     feedback offsets."
518        */
519       const unsigned max_components =
520          consts->MaxTransformFeedbackInterleavedComponents;
521       const unsigned first_component = xfb_offset;
522       const unsigned last_component = xfb_offset + num_components - 1;
523       const unsigned start_word = BITSET_BITWORD(first_component);
524       const unsigned end_word = BITSET_BITWORD(last_component);
525       BITSET_WORD *used;
526       assert(last_component < max_components);
527 
528       if (!used_components[buffer]) {
529          used_components[buffer] =
530             rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
531       }
532       used = used_components[buffer];
533 
534       for (unsigned word = start_word; word <= end_word; word++) {
535          unsigned start_range = 0;
536          unsigned end_range = BITSET_WORDBITS - 1;
537 
538          if (word == start_word)
539             start_range = first_component % BITSET_WORDBITS;
540 
541          if (word == end_word)
542             end_range = last_component % BITSET_WORDBITS;
543 
544          if (used[word] & BITSET_RANGE(start_range, end_range)) {
545             linker_error(prog,
546                          "variable '%s', xfb_offset (%d) is causing aliasing.",
547                          xfb_decl->orig_name, xfb_offset * 4);
548             return false;
549          }
550          used[word] |= BITSET_RANGE(start_range, end_range);
551       }
552 
553       const unsigned type_num_components =
554          xfb_decl->vector_elements *
555          (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
556       unsigned current_type_components_left = type_num_components;
557 
558       while (num_components > 0) {
559          unsigned output_size = 0;
560 
561          /*  From GL_ARB_enhanced_layouts:
562           *
563           * "When an attribute variable declared using an array type is bound to
564           * generic attribute index <i>, the active array elements are assigned to
565           * consecutive generic attributes beginning with generic attribute <i>.  The
566           * number of attributes and components assigned to each element are
567           * determined according to the data type of array elements and "component"
568           * layout qualifier (if any) specified in the declaration of the array."
569           *
570           * "When an attribute variable declared using a matrix type is bound to a
571           * generic attribute index <i>, its values are taken from consecutive generic
572           * attributes beginning with generic attribute <i>.  Such matrices are
573           * treated as an array of column vectors with values taken from the generic
574           * attributes.
575           * This means there may be gaps in the varyings we are taking values from."
576           *
577           * Examples:
578           *
579           * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
580           * |                                |                               |
581           * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
582           * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
583           * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
584           * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
585           * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
586           *
587           */
588          if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
589             output_size = MIN3(num_components, current_type_components_left, 4);
590             current_type_components_left -= output_size;
591             if (current_type_components_left == 0) {
592                current_type_components_left = type_num_components;
593             }
594          } else {
595             output_size = MIN2(num_components, 4 - location_frac);
596          }
597 
598          assert((info->NumOutputs == 0 && max_outputs == 0) ||
599                 info->NumOutputs < max_outputs);
600 
601          /* From the ARB_enhanced_layouts spec:
602           *
603           *    "If such a block member or variable is not written during a shader
604           *    invocation, the buffer contents at the assigned offset will be
605           *    undefined.  Even if there are no static writes to a variable or
606           *    member that is assigned a transform feedback offset, the space is
607           *    still allocated in the buffer and still affects the stride."
608           */
609          if (xfb_decl_is_varying_written(xfb_decl)) {
610             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
611             info->Outputs[info->NumOutputs].OutputRegister = location;
612             info->Outputs[info->NumOutputs].NumComponents = output_size;
613             info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
614             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
615             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
616             ++info->NumOutputs;
617          }
618          info->Buffers[buffer].Stream = xfb_decl->stream_id;
619          xfb_offset += output_size;
620 
621          num_components -= output_size;
622          location++;
623          location_frac = 0;
624       }
625    }
626 
627    if (explicit_stride && explicit_stride[buffer]) {
628       if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
629           info->Buffers[buffer].Stride % 2) {
630          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
631                       "multiple of 8 as its applied to a type that is or "
632                       "contains a double.",
633                       info->Buffers[buffer].Stride * 4);
634          return false;
635       }
636 
637       if (xfb_offset > info->Buffers[buffer].Stride) {
638          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
639                       "buffer (%d)", xfb_offset * 4,
640                       info->Buffers[buffer].Stride * 4, buffer);
641          return false;
642       }
643    } else {
644       if (max_member_alignment && has_xfb_qualifiers) {
645          max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
646                                              _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
647          info->Buffers[buffer].Stride = ALIGN(xfb_offset,
648                                               max_member_alignment[buffer]);
649       } else {
650          info->Buffers[buffer].Stride = xfb_offset;
651       }
652    }
653 
654  store_varying:
655    info->Varyings[info->NumVarying].name.string =
656       ralloc_strdup(prog, xfb_decl->orig_name);
657    resource_name_updated(&info->Varyings[info->NumVarying].name);
658    info->Varyings[info->NumVarying].Type = xfb_decl->type;
659    info->Varyings[info->NumVarying].Size = size;
660    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
661    info->NumVarying++;
662    info->Buffers[buffer].NumVaryings++;
663 
664    return true;
665 }
666 
667 static const struct tfeedback_candidate *
xfb_decl_find_candidate(struct xfb_decl * xfb_decl,struct gl_shader_program * prog,struct hash_table * tfeedback_candidates)668 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
669                         struct gl_shader_program *prog,
670                         struct hash_table *tfeedback_candidates)
671 {
672    const char *name = xfb_decl->var_name;
673    switch (xfb_decl->lowered_builtin_array_variable) {
674    case none:
675       name = xfb_decl->var_name;
676       break;
677    case clip_distance:
678       name = "gl_ClipDistanceMESA";
679       break;
680    case cull_distance:
681       name = "gl_CullDistanceMESA";
682       break;
683    case tess_level_outer:
684       name = "gl_TessLevelOuterMESA";
685       break;
686    case tess_level_inner:
687       name = "gl_TessLevelInnerMESA";
688       break;
689    }
690    struct hash_entry *entry =
691       _mesa_hash_table_search(tfeedback_candidates, name);
692 
693    xfb_decl->matched_candidate = entry ?
694          (struct tfeedback_candidate *) entry->data : NULL;
695 
696    if (!xfb_decl->matched_candidate) {
697       /* From GL_EXT_transform_feedback:
698        *   A program will fail to link if:
699        *
700        *   * any variable name specified in the <varyings> array is not
701        *     declared as an output in the geometry shader (if present) or
702        *     the vertex shader (if no geometry shader is present);
703        */
704       linker_error(prog, "Transform feedback varying %s undeclared.",
705                    xfb_decl->orig_name);
706    }
707 
708    return xfb_decl->matched_candidate;
709 }
710 
711 /**
712  * Force a candidate over the previously matched one. It happens when a new
713  * varying needs to be created to match the xfb declaration, for example,
714  * to fullfil an alignment criteria.
715  */
716 static void
xfb_decl_set_lowered_candidate(struct xfb_decl * xfb_decl,struct tfeedback_candidate * candidate)717 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
718                                struct tfeedback_candidate *candidate)
719 {
720    xfb_decl->matched_candidate = candidate;
721 
722    /* The subscript part is no longer relevant */
723    xfb_decl->is_subscripted = false;
724    xfb_decl->array_subscript = 0;
725 }
726 
727 /**
728  * Parse all the transform feedback declarations that were passed to
729  * glTransformFeedbackVaryings() and store them in xfb_decl objects.
730  *
731  * If an error occurs, the error is reported through linker_error() and false
732  * is returned.
733  */
734 static bool
parse_xfb_decls(const struct gl_constants * consts,const struct gl_extensions * exts,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,struct xfb_decl * decls)735 parse_xfb_decls(const struct gl_constants *consts,
736                 const struct gl_extensions *exts,
737                 struct gl_shader_program *prog,
738                 const void *mem_ctx, unsigned num_names,
739                 char **varying_names, struct xfb_decl *decls)
740 {
741    for (unsigned i = 0; i < num_names; ++i) {
742       xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i]);
743 
744       if (!xfb_decl_is_varying(&decls[i]))
745          continue;
746 
747       /* From GL_EXT_transform_feedback:
748        *   A program will fail to link if:
749        *
750        *   * any two entries in the <varyings> array specify the same varying
751        *     variable;
752        *
753        * We interpret this to mean "any two entries in the <varyings> array
754        * specify the same varying variable and array index", since transform
755        * feedback of arrays would be useless otherwise.
756        */
757       for (unsigned j = 0; j < i; ++j) {
758          if (xfb_decl_is_varying(&decls[j])) {
759             if (xfb_decl_is_same(&decls[i], &decls[j])) {
760                linker_error(prog, "Transform feedback varying %s specified "
761                             "more than once.", varying_names[i]);
762                return false;
763             }
764          }
765       }
766    }
767    return true;
768 }
769 
770 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)771 cmp_xfb_offset(const void * x_generic, const void * y_generic)
772 {
773    struct xfb_decl *x = (struct xfb_decl *) x_generic;
774    struct xfb_decl *y = (struct xfb_decl *) y_generic;
775 
776    if (x->buffer != y->buffer)
777       return x->buffer - y->buffer;
778    return x->offset - y->offset;
779 }
780 
781 /**
782  * Store transform feedback location assignments into
783  * prog->sh.LinkedTransformFeedback based on the data stored in
784  * xfb_decls.
785  *
786  * If an error occurs, the error is reported through linker_error() and false
787  * is returned.
788  */
789 static bool
store_tfeedback_info(const struct gl_constants * consts,struct gl_shader_program * prog,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,bool has_xfb_qualifiers,const void * mem_ctx)790 store_tfeedback_info(const struct gl_constants *consts,
791                      struct gl_shader_program *prog, unsigned num_xfb_decls,
792                      struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
793                      const void *mem_ctx)
794 {
795    if (!prog->last_vert_prog)
796       return true;
797 
798    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
799     * tracking the number of buffers doesn't overflow.
800     */
801    assert(consts->MaxTransformFeedbackBuffers < 32);
802 
803    bool separate_attribs_mode =
804       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
805 
806    struct gl_program *xfb_prog = prog->last_vert_prog;
807    xfb_prog->sh.LinkedTransformFeedback =
808       rzalloc(xfb_prog, struct gl_transform_feedback_info);
809 
810    /* The xfb_offset qualifier does not have to be used in increasing order
811     * however some drivers expect to receive the list of transform feedback
812     * declarations in order so sort it now for convenience.
813     */
814    if (has_xfb_qualifiers) {
815       qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
816             cmp_xfb_offset);
817    }
818 
819    xfb_prog->sh.LinkedTransformFeedback->Varyings =
820       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
821                     num_xfb_decls);
822 
823    unsigned num_outputs = 0;
824    for (unsigned i = 0; i < num_xfb_decls; ++i) {
825       if (xfb_decl_is_varying_written(&xfb_decls[i]))
826          num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
827    }
828 
829    xfb_prog->sh.LinkedTransformFeedback->Outputs =
830       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
831                     num_outputs);
832 
833    unsigned num_buffers = 0;
834    unsigned buffers = 0;
835    BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
836 
837    if (!has_xfb_qualifiers && separate_attribs_mode) {
838       /* GL_SEPARATE_ATTRIBS */
839       for (unsigned i = 0; i < num_xfb_decls; ++i) {
840          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
841                              xfb_prog->sh.LinkedTransformFeedback,
842                              num_buffers, num_buffers, num_outputs,
843                              used_components, NULL, NULL, has_xfb_qualifiers,
844                              mem_ctx))
845             return false;
846 
847          buffers |= 1 << num_buffers;
848          num_buffers++;
849       }
850    }
851    else {
852       /* GL_INVERLEAVED_ATTRIBS */
853       int buffer_stream_id = -1;
854       unsigned buffer =
855          num_xfb_decls ? xfb_decls[0].buffer : 0;
856       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
857       unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
858       /* Apply any xfb_stride global qualifiers */
859       if (has_xfb_qualifiers) {
860          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
861             if (prog->TransformFeedback.BufferStride[j]) {
862                explicit_stride[j] = true;
863                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
864                   prog->TransformFeedback.BufferStride[j] / 4;
865             }
866          }
867       }
868 
869       for (unsigned i = 0; i < num_xfb_decls; ++i) {
870          if (has_xfb_qualifiers &&
871              buffer != xfb_decls[i].buffer) {
872             /* we have moved to the next buffer so reset stream id */
873             buffer_stream_id = -1;
874             num_buffers++;
875          }
876 
877          if (xfb_decls[i].next_buffer_separator) {
878             if (!xfb_decl_store(&xfb_decls[i], consts, prog,
879                                 xfb_prog->sh.LinkedTransformFeedback,
880                                 buffer, num_buffers, num_outputs,
881                                 used_components, explicit_stride,
882                                 max_member_alignment, has_xfb_qualifiers,
883                                 mem_ctx))
884                return false;
885             num_buffers++;
886             buffer_stream_id = -1;
887             continue;
888          }
889 
890          if (has_xfb_qualifiers) {
891             buffer = xfb_decls[i].buffer;
892          } else {
893             buffer = num_buffers;
894          }
895 
896          if (xfb_decl_is_varying(&xfb_decls[i])) {
897             if (buffer_stream_id == -1)  {
898                /* First varying writing to this buffer: remember its stream */
899                buffer_stream_id = (int) xfb_decls[i].stream_id;
900 
901                /* Only mark a buffer as active when there is a varying
902                 * attached to it. This behaviour is based on a revised version
903                 * of section 13.2.2 of the GL 4.6 spec.
904                 */
905                buffers |= 1 << buffer;
906             } else if (buffer_stream_id !=
907                        (int) xfb_decls[i].stream_id) {
908                /* Varying writes to the same buffer from a different stream */
909                linker_error(prog,
910                             "Transform feedback can't capture varyings belonging "
911                             "to different vertex streams in a single buffer. "
912                             "Varying %s writes to buffer from stream %u, other "
913                             "varyings in the same buffer write from stream %u.",
914                             xfb_decls[i].orig_name,
915                             xfb_decls[i].stream_id,
916                             buffer_stream_id);
917                return false;
918             }
919          }
920 
921          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
922                              xfb_prog->sh.LinkedTransformFeedback,
923                              buffer, num_buffers, num_outputs, used_components,
924                              explicit_stride, max_member_alignment,
925                              has_xfb_qualifiers, mem_ctx))
926             return false;
927       }
928    }
929    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
930 
931    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
932    return true;
933 }
934 
935 /**
936  * Enum representing the order in which varyings are packed within a
937  * packing class.
938  *
939  * Currently we pack vec4's first, then vec2's, then scalar values, then
940  * vec3's.  This order ensures that the only vectors that are at risk of
941  * having to be "double parked" (split between two adjacent varying slots)
942  * are the vec3's.
943  */
944 enum packing_order_enum {
945    PACKING_ORDER_VEC4,
946    PACKING_ORDER_VEC2,
947    PACKING_ORDER_SCALAR,
948    PACKING_ORDER_VEC3,
949 };
950 
951 /**
952  * Structure recording the relationship between a single producer output
953  * and a single consumer input.
954  */
955 struct match {
956    /**
957     * Packing class for this varying, computed by compute_packing_class().
958     */
959    unsigned packing_class;
960 
961    /**
962     * Packing order for this varying, computed by compute_packing_order().
963     */
964    enum packing_order_enum packing_order;
965 
966    /**
967     * The output variable in the producer stage.
968     */
969    nir_variable *producer_var;
970 
971    /**
972     * The input variable in the consumer stage.
973     */
974    nir_variable *consumer_var;
975 
976    /**
977     * The location which has been assigned for this varying.  This is
978     * expressed in multiples of a float, with the first generic varying
979     * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
980     * value 0.
981     */
982    unsigned generic_location;
983 };
984 
985 /**
986  * Data structure recording the relationship between outputs of one shader
987  * stage (the "producer") and inputs of another (the "consumer").
988  */
989 struct varying_matches
990 {
991    /**
992     * If true, this driver disables varying packing, so all varyings need to
993     * be aligned on slot boundaries, and take up a number of slots equal to
994     * their number of matrix columns times their array size.
995     *
996     * Packing may also be disabled because our current packing method is not
997     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
998     * guaranteed to match across stages.
999     */
1000    bool disable_varying_packing;
1001 
1002    /**
1003     * If true, this driver disables packing for varyings used by transform
1004     * feedback.
1005     */
1006    bool disable_xfb_packing;
1007 
1008    /**
1009     * If true, this driver has transform feedback enabled. The transform
1010     * feedback code usually requires at least some packing be done even
1011     * when varying packing is disabled, fortunately where transform feedback
1012     * requires packing it's safe to override the disabled setting. See
1013     * is_varying_packing_safe().
1014     */
1015    bool xfb_enabled;
1016 
1017    bool enhanced_layouts_enabled;
1018 
1019    /**
1020     * If true, this driver prefers varyings to be aligned to power of two
1021     * in a slot.
1022     */
1023    bool prefer_pot_aligned_varyings;
1024 
1025    struct match *matches;
1026 
1027    /**
1028     * The number of elements in the \c matches array that are currently in
1029     * use.
1030     */
1031    unsigned num_matches;
1032 
1033    /**
1034     * The number of elements that were set aside for the \c matches array when
1035     * it was allocated.
1036     */
1037    unsigned matches_capacity;
1038 
1039    gl_shader_stage producer_stage;
1040    gl_shader_stage consumer_stage;
1041 };
1042 
1043 /**
1044  * Comparison function passed to qsort() to sort varyings by packing_class and
1045  * then by packing_order.
1046  */
1047 static int
varying_matches_match_comparator(const void * x_generic,const void * y_generic)1048 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
1049 {
1050    const struct match *x = (const struct match *) x_generic;
1051    const struct match *y = (const struct match *) y_generic;
1052 
1053    if (x->packing_class != y->packing_class)
1054       return x->packing_class - y->packing_class;
1055    return x->packing_order - y->packing_order;
1056 }
1057 
1058 /**
1059  * Comparison function passed to qsort() to sort varyings used only by
1060  * transform feedback when packing of other varyings is disabled.
1061  */
1062 static int
varying_matches_xfb_comparator(const void * x_generic,const void * y_generic)1063 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
1064 {
1065    const struct match *x = (const struct match *) x_generic;
1066 
1067    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
1068       return varying_matches_match_comparator(x_generic, y_generic);
1069 
1070    /* FIXME: When the comparator returns 0 it means the elements being
1071     * compared are equivalent. However the qsort documentation says:
1072     *
1073     *    "The order of equivalent elements is undefined."
1074     *
1075     * In practice the sort ends up reversing the order of the varyings which
1076     * means locations are also assigned in this reversed order and happens to
1077     * be what we want. This is also whats happening in
1078     * varying_matches_match_comparator().
1079     */
1080    return 0;
1081 }
1082 
1083 /**
1084  * Comparison function passed to qsort() to sort varyings NOT used by
1085  * transform feedback when packing of xfb varyings is disabled.
1086  */
1087 static int
varying_matches_not_xfb_comparator(const void * x_generic,const void * y_generic)1088 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
1089 {
1090    const struct match *x = (const struct match *) x_generic;
1091 
1092    if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
1093       return varying_matches_match_comparator(x_generic, y_generic);
1094 
1095    /* FIXME: When the comparator returns 0 it means the elements being
1096     * compared are equivalent. However the qsort documentation says:
1097     *
1098     *    "The order of equivalent elements is undefined."
1099     *
1100     * In practice the sort ends up reversing the order of the varyings which
1101     * means locations are also assigned in this reversed order and happens to
1102     * be what we want. This is also whats happening in
1103     * varying_matches_match_comparator().
1104     */
1105    return 0;
1106 }
1107 
1108 static bool
is_unpackable_tess(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)1109 is_unpackable_tess(gl_shader_stage producer_stage,
1110                    gl_shader_stage consumer_stage)
1111 {
1112    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1113        consumer_stage == MESA_SHADER_TESS_CTRL ||
1114        producer_stage == MESA_SHADER_TESS_CTRL)
1115       return true;
1116 
1117    return false;
1118 }
1119 
1120 static void
init_varying_matches(void * mem_ctx,struct varying_matches * vm,const struct gl_constants * consts,const struct gl_extensions * exts,gl_shader_stage producer_stage,gl_shader_stage consumer_stage,bool sso)1121 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
1122                      const struct gl_constants *consts,
1123                      const struct gl_extensions *exts,
1124                      gl_shader_stage producer_stage,
1125                      gl_shader_stage consumer_stage,
1126                      bool sso)
1127 {
1128    /* Tessellation shaders treat inputs and outputs as shared memory and can
1129     * access inputs and outputs of other invocations.
1130     * Therefore, they can't be lowered to temps easily (and definitely not
1131     * efficiently).
1132     */
1133    bool unpackable_tess =
1134       is_unpackable_tess(producer_stage, consumer_stage);
1135 
1136    /* Transform feedback code assumes varying arrays are packed, so if the
1137     * driver has disabled varying packing, make sure to at least enable
1138     * packing required by transform feedback. See below for exception.
1139     */
1140    bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
1141 
1142    /* Some drivers actually requires packing to be explicitly disabled
1143     * for varyings used by transform feedback.
1144     */
1145    bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
1146 
1147    /* Disable packing on outward facing interfaces for SSO because in ES we
1148     * need to retain the unpacked varying information for draw time
1149     * validation.
1150     *
1151     * Packing is still enabled on individual arrays, structs, and matrices as
1152     * these are required by the transform feedback code and it is still safe
1153     * to do so. We also enable packing when a varying is only used for
1154     * transform feedback and its not a SSO.
1155     */
1156    bool disable_varying_packing =
1157       consts->DisableVaryingPacking || unpackable_tess;
1158    if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
1159       disable_varying_packing = true;
1160 
1161    /* Note: this initial capacity is rather arbitrarily chosen to be large
1162     * enough for many cases without wasting an unreasonable amount of space.
1163     * varying_matches_record() will resize the array if there are more than
1164     * this number of varyings.
1165     */
1166    vm->matches_capacity = 8;
1167    vm->matches = (struct match *)
1168       ralloc_array(mem_ctx, struct match, vm->matches_capacity);
1169    vm->num_matches = 0;
1170 
1171    vm->disable_varying_packing = disable_varying_packing;
1172    vm->disable_xfb_packing = disable_xfb_packing;
1173    vm->xfb_enabled = xfb_enabled;
1174    vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
1175    vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
1176    vm->producer_stage = producer_stage;
1177    vm->consumer_stage = consumer_stage;
1178 }
1179 
1180 /**
1181  * Packing is always safe on individual arrays, structures, and matrices. It
1182  * is also safe if the varying is only used for transform feedback.
1183  */
1184 static bool
is_varying_packing_safe(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)1185 is_varying_packing_safe(struct varying_matches *vm,
1186                         const struct glsl_type *type, const nir_variable *var)
1187 {
1188    if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
1189       return false;
1190 
1191    return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
1192                               glsl_type_is_struct(type) ||
1193                               var->data.is_xfb_only);
1194 }
1195 
1196 static bool
is_packing_disabled(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)1197 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
1198                     const nir_variable *var)
1199 {
1200    return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
1201       (vm->disable_xfb_packing && var->data.is_xfb &&
1202        !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
1203          glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
1204 }
1205 
1206 /**
1207  * Compute the "packing class" of the given varying.  This is an unsigned
1208  * integer with the property that two variables in the same packing class can
1209  * be safely backed into the same vec4.
1210  */
1211 static unsigned
varying_matches_compute_packing_class(const nir_variable * var)1212 varying_matches_compute_packing_class(const nir_variable *var)
1213 {
1214    /* Without help from the back-end, there is no way to pack together
1215     * variables with different interpolation types, because
1216     * lower_packed_varyings must choose exactly one interpolation type for
1217     * each packed varying it creates.
1218     *
1219     * However, we can safely pack together floats, ints, and uints, because:
1220     *
1221     * - varyings of base type "int" and "uint" must use the "flat"
1222     *   interpolation type, which can only occur in GLSL 1.30 and above.
1223     *
1224     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
1225     *   can store flat floats as ints without losing any information (using
1226     *   the ir_unop_bitcast_* opcodes).
1227     *
1228     * Therefore, the packing class depends only on the interpolation type.
1229     */
1230    bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
1231       glsl_contains_integer(var->type) || glsl_contains_double(var->type);
1232 
1233    const unsigned interp = is_interpolation_flat
1234       ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
1235 
1236    assert(interp < (1 << 3));
1237 
1238    const unsigned packing_class = (interp << 0) |
1239                                   (var->data.centroid << 3) |
1240                                   (var->data.sample << 4) |
1241                                   (var->data.patch << 5) |
1242                                   (var->data.must_be_shader_input << 6);
1243 
1244    return packing_class;
1245 }
1246 
1247 /**
1248  * Compute the "packing order" of the given varying.  This is a sort key we
1249  * use to determine when to attempt to pack the given varying relative to
1250  * other varyings in the same packing class.
1251  */
1252 static enum packing_order_enum
varying_matches_compute_packing_order(const nir_variable * var)1253 varying_matches_compute_packing_order(const nir_variable *var)
1254 {
1255    const struct glsl_type *element_type = glsl_without_array(var->type);
1256 
1257    switch (glsl_get_component_slots(element_type) % 4) {
1258    case 1: return PACKING_ORDER_SCALAR;
1259    case 2: return PACKING_ORDER_VEC2;
1260    case 3: return PACKING_ORDER_VEC3;
1261    case 0: return PACKING_ORDER_VEC4;
1262    default:
1263       assert(!"Unexpected value of vector_elements");
1264       return PACKING_ORDER_VEC4;
1265    }
1266 }
1267 
1268 /**
1269  * Built-in / reserved GL variables names start with "gl_"
1270  */
1271 static bool
is_gl_identifier(const char * s)1272 is_gl_identifier(const char *s)
1273 {
1274    return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
1275 }
1276 
1277 /**
1278  * Record the given producer/consumer variable pair in the list of variables
1279  * that should later be assigned locations.
1280  *
1281  * It is permissible for \c consumer_var to be NULL (this happens if a
1282  * variable is output by the producer and consumed by transform feedback, but
1283  * not consumed by the consumer).
1284  *
1285  * If \c producer_var has already been paired up with a consumer_var, or
1286  * producer_var is part of fixed pipeline functionality (and hence already has
1287  * a location assigned), this function has no effect.
1288  *
1289  * Note: as a side effect this function may change the interpolation type of
1290  * \c producer_var, but only when the change couldn't possibly affect
1291  * rendering.
1292  */
1293 static void
varying_matches_record(void * mem_ctx,struct varying_matches * vm,nir_variable * producer_var,nir_variable * consumer_var)1294 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
1295                        nir_variable *producer_var, nir_variable *consumer_var)
1296 {
1297    assert(producer_var != NULL || consumer_var != NULL);
1298 
1299    if ((producer_var &&
1300        (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
1301        (consumer_var &&
1302         (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
1303       /* Either a location already exists for this variable (since it is part
1304        * of fixed functionality), or it has already been assigned explicitly.
1305        */
1306       return;
1307    }
1308 
1309    /* The varyings should not have been matched and assgned previously */
1310    assert((producer_var == NULL || producer_var->data.location == -1) &&
1311           (consumer_var == NULL || consumer_var->data.location == -1));
1312 
1313    bool needs_flat_qualifier = consumer_var == NULL &&
1314       (glsl_contains_integer(producer_var->type) ||
1315        glsl_contains_double(producer_var->type));
1316 
1317    if (!vm->disable_varying_packing &&
1318        (!vm->disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
1319        (needs_flat_qualifier ||
1320         (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
1321       /* Since this varying is not being consumed by the fragment shader, its
1322        * interpolation type varying cannot possibly affect rendering.
1323        * Also, this variable is non-flat and is (or contains) an integer
1324        * or a double.
1325        * If the consumer stage is unknown, don't modify the interpolation
1326        * type as it could affect rendering later with separate shaders.
1327        *
1328        * lower_packed_varyings requires all integer varyings to flat,
1329        * regardless of where they appear.  We can trivially satisfy that
1330        * requirement by changing the interpolation type to flat here.
1331        */
1332       if (producer_var) {
1333          producer_var->data.centroid = false;
1334          producer_var->data.sample = false;
1335          producer_var->data.interpolation = INTERP_MODE_FLAT;
1336       }
1337 
1338       if (consumer_var) {
1339          consumer_var->data.centroid = false;
1340          consumer_var->data.sample = false;
1341          consumer_var->data.interpolation = INTERP_MODE_FLAT;
1342       }
1343    }
1344 
1345    if (vm->num_matches == vm->matches_capacity) {
1346       vm->matches_capacity *= 2;
1347       vm->matches = (struct match *)
1348          reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
1349    }
1350 
1351    /* We must use the consumer to compute the packing class because in GL4.4+
1352     * there is no guarantee interpolation qualifiers will match across stages.
1353     *
1354     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1355     *
1356     *    "The type and presence of interpolation qualifiers of variables with
1357     *    the same name declared in all linked shaders for the same cross-stage
1358     *    interface must match, otherwise the link command will fail.
1359     *
1360     *    When comparing an output from one stage to an input of a subsequent
1361     *    stage, the input and output don't match if their interpolation
1362     *    qualifiers (or lack thereof) are not the same."
1363     *
1364     * This text was also in at least revison 7 of the 4.40 spec but is no
1365     * longer in revision 9 and not in the 4.50 spec.
1366     */
1367    const nir_variable *const var = (consumer_var != NULL)
1368       ? consumer_var : producer_var;
1369 
1370    if (producer_var && consumer_var &&
1371        consumer_var->data.must_be_shader_input) {
1372       producer_var->data.must_be_shader_input = 1;
1373    }
1374 
1375    vm->matches[vm->num_matches].packing_class
1376       = varying_matches_compute_packing_class(var);
1377    vm->matches[vm->num_matches].packing_order
1378       = varying_matches_compute_packing_order(var);
1379 
1380    vm->matches[vm->num_matches].producer_var = producer_var;
1381    vm->matches[vm->num_matches].consumer_var = consumer_var;
1382    vm->num_matches++;
1383 }
1384 
1385 /**
1386  * Choose locations for all of the variable matches that were previously
1387  * passed to varying_matches_record().
1388  * \param components  returns array[slot] of number of components used
1389  *                    per slot (1, 2, 3 or 4)
1390  * \param reserved_slots  bitmask indicating which varying slots are already
1391  *                        allocated
1392  * \return number of slots (4-element vectors) allocated
1393  */
1394 static unsigned
varying_matches_assign_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)1395 varying_matches_assign_locations(struct varying_matches *vm,
1396                                  struct gl_shader_program *prog,
1397                                  uint8_t components[], uint64_t reserved_slots)
1398 {
1399    /* If packing has been disabled then we cannot safely sort the varyings by
1400     * class as it may mean we are using a version of OpenGL where
1401     * interpolation qualifiers are not guaranteed to be matching across
1402     * shaders, sorting in this case could result in mismatching shader
1403     * interfaces.
1404     * When packing is disabled the sort orders varyings used by transform
1405     * feedback first, but also depends on *undefined behaviour* of qsort to
1406     * reverse the order of the varyings. See: xfb_comparator().
1407     *
1408     * If packing is only disabled for xfb varyings (mutually exclusive with
1409     * disable_varying_packing), we then group varyings depending on if they
1410     * are captured for transform feedback. The same *undefined behaviour* is
1411     * taken advantage of.
1412     */
1413    if (vm->disable_varying_packing) {
1414       /* Only sort varyings that are only used by transform feedback. */
1415       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1416             &varying_matches_xfb_comparator);
1417    } else if (vm->disable_xfb_packing) {
1418       /* Only sort varyings that are NOT used by transform feedback. */
1419       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1420             &varying_matches_not_xfb_comparator);
1421    } else {
1422       /* Sort varying matches into an order that makes them easy to pack. */
1423       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1424             &varying_matches_match_comparator);
1425    }
1426 
1427    unsigned generic_location = 0;
1428    unsigned generic_patch_location = MAX_VARYING*4;
1429    bool previous_var_xfb = false;
1430    bool previous_var_xfb_only = false;
1431    unsigned previous_packing_class = ~0u;
1432 
1433    /* For tranform feedback separate mode, we know the number of attributes
1434     * is <= the number of buffers.  So packing isn't critical.  In fact,
1435     * packing vec3 attributes can cause trouble because splitting a vec3
1436     * effectively creates an additional transform feedback output.  The
1437     * extra TFB output may exceed device driver limits.
1438     *
1439     * Also don't pack vec3 if the driver prefers power of two aligned
1440     * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
1441     * pot-aligned, we only need to take care of vec3s
1442     */
1443    const bool dont_pack_vec3 =
1444       (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1445        prog->TransformFeedback.NumVarying > 0) ||
1446       vm->prefer_pot_aligned_varyings;
1447 
1448    for (unsigned i = 0; i < vm->num_matches; i++) {
1449       unsigned *location = &generic_location;
1450       const nir_variable *var;
1451       const struct glsl_type *type;
1452       bool is_vertex_input = false;
1453 
1454       if (vm->matches[i].consumer_var) {
1455          var = vm->matches[i].consumer_var;
1456          type = get_varying_type(var, vm->consumer_stage);
1457          if (vm->consumer_stage == MESA_SHADER_VERTEX)
1458             is_vertex_input = true;
1459       } else {
1460          if (!vm->matches[i].producer_var)
1461             continue; /* The varying was optimised away */
1462 
1463          var = vm->matches[i].producer_var;
1464          type = get_varying_type(var, vm->producer_stage);
1465       }
1466 
1467       if (var->data.patch)
1468          location = &generic_patch_location;
1469 
1470       /* Advance to the next slot if this varying has a different packing
1471        * class than the previous one, and we're not already on a slot
1472        * boundary.
1473        *
1474        * Also advance if varying packing is disabled for transform feedback,
1475        * and previous or current varying is used for transform feedback.
1476        *
1477        * Also advance to the next slot if packing is disabled. This makes sure
1478        * we don't assign varyings the same locations which is possible
1479        * because we still pack individual arrays, records and matrices even
1480        * when packing is disabled. Note we don't advance to the next slot if
1481        * we can pack varyings together that are only used for transform
1482        * feedback.
1483        */
1484       if (var->data.must_be_shader_input ||
1485           (vm->disable_xfb_packing &&
1486            (previous_var_xfb || var->data.is_xfb)) ||
1487           (vm->disable_varying_packing &&
1488            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1489           (previous_packing_class != vm->matches[i].packing_class) ||
1490           (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
1491            dont_pack_vec3)) {
1492          *location = ALIGN(*location, 4);
1493       }
1494 
1495       previous_var_xfb = var->data.is_xfb;
1496       previous_var_xfb_only = var->data.is_xfb_only;
1497       previous_packing_class = vm->matches[i].packing_class;
1498 
1499       /* The number of components taken up by this variable. For vertex shader
1500        * inputs, we use the number of slots * 4, as they have different
1501        * counting rules.
1502        */
1503       unsigned num_components = 0;
1504       if (is_vertex_input) {
1505          num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
1506       } else {
1507          if (is_packing_disabled(vm, type, var)) {
1508             num_components = glsl_count_attribute_slots(type, false) * 4;
1509          } else {
1510             num_components = glsl_get_component_slots_aligned(type, *location);
1511          }
1512       }
1513 
1514       /* The last slot for this variable, inclusive. */
1515       unsigned slot_end = *location + num_components - 1;
1516 
1517       /* FIXME: We could be smarter in the below code and loop back over
1518        * trying to fill any locations that we skipped because we couldn't pack
1519        * the varying between an explicit location. For now just let the user
1520        * hit the linking error if we run out of room and suggest they use
1521        * explicit locations.
1522        */
1523       while (slot_end < MAX_VARYING * 4u) {
1524          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1525          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1526 
1527          assert(slots > 0);
1528 
1529          if ((reserved_slots & slot_mask) == 0) {
1530             break;
1531          }
1532 
1533          *location = ALIGN(*location + 1, 4);
1534          slot_end = *location + num_components - 1;
1535       }
1536 
1537       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1538          linker_error(prog, "insufficient contiguous locations available for "
1539                       "%s it is possible an array or struct could not be "
1540                       "packed between varyings with explicit locations. Try "
1541                       "using an explicit location for arrays and structs.",
1542                       var->name);
1543       }
1544 
1545       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1546          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1547             components[j] = 4;
1548          components[slot_end / 4u] = (slot_end & 3) + 1;
1549       }
1550 
1551       vm->matches[i].generic_location = *location;
1552 
1553       *location = slot_end + 1;
1554    }
1555 
1556    return (generic_location + 3) / 4;
1557 }
1558 
1559 static void
varying_matches_assign_temp_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint64_t reserved_slots)1560 varying_matches_assign_temp_locations(struct varying_matches *vm,
1561                                       struct gl_shader_program *prog,
1562                                       uint64_t reserved_slots)
1563 {
1564    unsigned tmp_loc = 0;
1565    for (unsigned i = 0; i < vm->num_matches; i++) {
1566       nir_variable *producer_var = vm->matches[i].producer_var;
1567       nir_variable *consumer_var = vm->matches[i].consumer_var;
1568 
1569       while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
1570          if (reserved_slots & (UINT64_C(1) << tmp_loc))
1571             tmp_loc++;
1572          else
1573             break;
1574       }
1575 
1576       if (producer_var) {
1577          assert(producer_var->data.location == -1);
1578          producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
1579       }
1580 
1581       if (consumer_var) {
1582          assert(consumer_var->data.location == -1);
1583          consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
1584       }
1585 
1586       tmp_loc++;
1587    }
1588 }
1589 
1590 /**
1591  * Update the producer and consumer shaders to reflect the locations
1592  * assignments that were made by varying_matches_assign_locations().
1593  */
1594 static void
varying_matches_store_locations(struct varying_matches * vm)1595 varying_matches_store_locations(struct varying_matches *vm)
1596 {
1597    /* Check is location needs to be packed with lower_packed_varyings() or if
1598     * we can just use ARB_enhanced_layouts packing.
1599     */
1600    bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
1601    const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1602 
1603    for (unsigned i = 0; i < vm->num_matches; i++) {
1604       nir_variable *producer_var = vm->matches[i].producer_var;
1605       nir_variable *consumer_var = vm->matches[i].consumer_var;
1606       unsigned generic_location = vm->matches[i].generic_location;
1607       unsigned slot = generic_location / 4;
1608       unsigned offset = generic_location % 4;
1609 
1610       if (producer_var) {
1611          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1612          producer_var->data.location_frac = offset;
1613       }
1614 
1615       if (consumer_var) {
1616          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1617          consumer_var->data.location_frac = offset;
1618       }
1619 
1620       /* Find locations suitable for native packing via
1621        * ARB_enhanced_layouts.
1622        */
1623       if (vm->enhanced_layouts_enabled) {
1624          nir_variable *var = producer_var ? producer_var : consumer_var;
1625          unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
1626          const struct glsl_type *type =
1627             get_varying_type(var, stage);
1628          unsigned comp_slots = glsl_get_component_slots(type) + offset;
1629          unsigned slots = comp_slots / 4;
1630          if (comp_slots % 4)
1631             slots += 1;
1632 
1633          if (producer_var && consumer_var) {
1634             if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
1635                 glsl_type_is_64bit(type)) {
1636                for (unsigned j = 0; j < slots; j++) {
1637                   pack_loc[slot + j] = true;
1638                }
1639             } else if (offset + glsl_get_vector_elements(type) > 4) {
1640                pack_loc[slot] = true;
1641                pack_loc[slot + 1] = true;
1642             } else {
1643                loc_type[slot][offset] = type;
1644             }
1645          } else {
1646             for (unsigned j = 0; j < slots; j++) {
1647                pack_loc[slot + j] = true;
1648             }
1649          }
1650       }
1651    }
1652 
1653    /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1654     * suitable.
1655     */
1656    if (vm->enhanced_layouts_enabled) {
1657       for (unsigned i = 0; i < vm->num_matches; i++) {
1658          nir_variable *producer_var = vm->matches[i].producer_var;
1659          nir_variable *consumer_var = vm->matches[i].consumer_var;
1660          if (!producer_var || !consumer_var)
1661             continue;
1662 
1663          unsigned generic_location = vm->matches[i].generic_location;
1664          unsigned slot = generic_location / 4;
1665          if (pack_loc[slot])
1666             continue;
1667 
1668          const struct glsl_type *type =
1669             get_varying_type(producer_var, vm->producer_stage);
1670          bool type_match = true;
1671          for (unsigned j = 0; j < 4; j++) {
1672             if (loc_type[slot][j]) {
1673                if (glsl_get_base_type(type) !=
1674                    glsl_get_base_type(loc_type[slot][j]))
1675                   type_match = false;
1676             }
1677          }
1678 
1679          if (type_match) {
1680             producer_var->data.explicit_location = 1;
1681             consumer_var->data.explicit_location = 1;
1682          }
1683       }
1684    }
1685 }
1686 
1687 /**
1688  * Is the given variable a varying variable to be counted against the
1689  * limit in ctx->Const.MaxVarying?
1690  * This includes variables such as texcoords, colors and generic
1691  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
1692  */
1693 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const nir_variable * var)1694 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
1695 {
1696    /* Only fragment shaders will take a varying variable as an input */
1697    if (stage == MESA_SHADER_FRAGMENT &&
1698        var->data.mode == nir_var_shader_in) {
1699       switch (var->data.location) {
1700       case VARYING_SLOT_POS:
1701       case VARYING_SLOT_FACE:
1702       case VARYING_SLOT_PNTC:
1703          return false;
1704       default:
1705          return true;
1706       }
1707    }
1708    return false;
1709 }
1710 
1711 struct tfeedback_candidate_generator_state {
1712    /**
1713     * Memory context used to allocate hash table keys and values.
1714     */
1715    void *mem_ctx;
1716 
1717    /**
1718     * Hash table in which tfeedback_candidate objects should be stored.
1719     */
1720    struct hash_table *tfeedback_candidates;
1721 
1722    gl_shader_stage stage;
1723 
1724    /**
1725     * Pointer to the toplevel variable that is being traversed.
1726     */
1727    nir_variable *toplevel_var;
1728 
1729    /**
1730     * Total number of varying floats that have been visited so far.  This is
1731     * used to determine the offset to each varying within the toplevel
1732     * variable.
1733     */
1734    unsigned varying_floats;
1735 
1736    /**
1737     * Offset within the xfb. Counted in floats.
1738     */
1739    unsigned xfb_offset_floats;
1740 };
1741 
1742 /**
1743  * Generates tfeedback_candidate structs describing all possible targets of
1744  * transform feedback.
1745  *
1746  * tfeedback_candidate structs are stored in the hash table
1747  * tfeedback_candidates.  This hash table maps varying names to instances of the
1748  * tfeedback_candidate struct.
1749  */
1750 static void
tfeedback_candidate_generator(struct tfeedback_candidate_generator_state * state,char ** name,size_t name_length,const struct glsl_type * type,const struct glsl_struct_field * named_ifc_member)1751 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
1752                               char **name, size_t name_length,
1753                               const struct glsl_type *type,
1754                               const struct glsl_struct_field *named_ifc_member)
1755 {
1756    switch (glsl_get_base_type(type)) {
1757    case GLSL_TYPE_INTERFACE:
1758       if (named_ifc_member) {
1759          ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
1760                                       named_ifc_member->name);
1761          tfeedback_candidate_generator(state, name, name_length,
1762                                        named_ifc_member->type, NULL);
1763          return;
1764       }
1765       FALLTHROUGH;
1766    case GLSL_TYPE_STRUCT:
1767       for (unsigned i = 0; i < glsl_get_length(type); i++) {
1768          size_t new_length = name_length;
1769 
1770          /* Append '.field' to the current variable name. */
1771          if (name) {
1772             ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
1773                                          glsl_get_struct_elem_name(type, i));
1774          }
1775 
1776          tfeedback_candidate_generator(state, name, new_length,
1777                                        glsl_get_struct_field(type, i), NULL);
1778       }
1779 
1780       return;
1781    case GLSL_TYPE_ARRAY:
1782       if (glsl_type_is_struct(glsl_without_array(type)) ||
1783           glsl_type_is_interface(glsl_without_array(type)) ||
1784           glsl_type_is_array(glsl_get_array_element(type))) {
1785 
1786          for (unsigned i = 0; i < glsl_get_length(type); i++) {
1787             size_t new_length = name_length;
1788 
1789             /* Append the subscript to the current variable name */
1790             ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1791 
1792             tfeedback_candidate_generator(state, name, new_length,
1793                                           glsl_get_array_element(type),
1794                                           named_ifc_member);
1795          }
1796 
1797          return;
1798       }
1799       FALLTHROUGH;
1800    default:
1801       assert(!glsl_type_is_struct(glsl_without_array(type)));
1802       assert(!glsl_type_is_interface(glsl_without_array(type)));
1803 
1804       struct tfeedback_candidate *candidate
1805          = rzalloc(state->mem_ctx, struct tfeedback_candidate);
1806       candidate->toplevel_var = state->toplevel_var;
1807       candidate->type = type;
1808 
1809       if (glsl_type_is_64bit(glsl_without_array(type))) {
1810          /*  From ARB_gpu_shader_fp64:
1811           *
1812           * If any variable captured in transform feedback has double-precision
1813           * components, the practical requirements for defined behavior are:
1814           *     ...
1815           * (c) each double-precision variable captured must be aligned to a
1816           *     multiple of eight bytes relative to the beginning of a vertex.
1817           */
1818          state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
1819          /* 64-bit members of structs are also aligned. */
1820          state->varying_floats = ALIGN(state->varying_floats, 2);
1821       }
1822 
1823       candidate->xfb_offset_floats = state->xfb_offset_floats;
1824       candidate->struct_offset_floats = state->varying_floats;
1825 
1826       _mesa_hash_table_insert(state->tfeedback_candidates,
1827                               ralloc_strdup(state->mem_ctx, *name),
1828                               candidate);
1829 
1830       const unsigned component_slots = glsl_get_component_slots(type);
1831 
1832       if (varying_has_user_specified_location(state->toplevel_var)) {
1833          state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
1834       } else {
1835          state->varying_floats += component_slots;
1836       }
1837 
1838       state->xfb_offset_floats += component_slots;
1839    }
1840 }
1841 
1842 static void
populate_consumer_input_sets(void * mem_ctx,nir_shader * nir,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])1843 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
1844                              struct hash_table *consumer_inputs,
1845                              struct hash_table *consumer_interface_inputs,
1846                              nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
1847 {
1848    memset(consumer_inputs_with_locations, 0,
1849           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
1850 
1851    nir_foreach_shader_in_variable(input_var, nir) {
1852       /* All interface blocks should have been lowered by this point */
1853       assert(!glsl_type_is_interface(input_var->type));
1854 
1855       if (input_var->data.explicit_location) {
1856          /* assign_varying_locations only cares about finding the
1857           * nir_variable at the start of a contiguous location block.
1858           *
1859           *     - For !producer, consumer_inputs_with_locations isn't used.
1860           *
1861           *     - For !consumer, consumer_inputs_with_locations is empty.
1862           *
1863           * For consumer && producer, if you were trying to set some
1864           * nir_variable to the middle of a location block on the other side
1865           * of producer/consumer, cross_validate_outputs_to_inputs() should
1866           * be link-erroring due to either type mismatch or location
1867           * overlaps.  If the variables do match up, then they've got a
1868           * matching data.location and you only looked at
1869           * consumer_inputs_with_locations[var->data.location], not any
1870           * following entries for the array/structure.
1871           */
1872          consumer_inputs_with_locations[input_var->data.location] =
1873             input_var;
1874       } else if (input_var->interface_type != NULL) {
1875          char *const iface_field_name =
1876             ralloc_asprintf(mem_ctx, "%s.%s",
1877                glsl_get_type_name(glsl_without_array(input_var->interface_type)),
1878                input_var->name);
1879          _mesa_hash_table_insert(consumer_interface_inputs,
1880                                  iface_field_name, input_var);
1881       } else {
1882          _mesa_hash_table_insert(consumer_inputs,
1883                                  ralloc_strdup(mem_ctx, input_var->name),
1884                                  input_var);
1885       }
1886    }
1887 }
1888 
1889 /**
1890  * Find a variable from the consumer that "matches" the specified variable
1891  *
1892  * This function only finds inputs with names that match.  There is no
1893  * validation (here) that the types, etc. are compatible.
1894  */
1895 static nir_variable *
get_matching_input(void * mem_ctx,const nir_variable * output_var,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])1896 get_matching_input(void *mem_ctx,
1897                    const nir_variable *output_var,
1898                    struct hash_table *consumer_inputs,
1899                    struct hash_table *consumer_interface_inputs,
1900                    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
1901 {
1902    nir_variable *input_var;
1903 
1904    if (output_var->data.explicit_location) {
1905       input_var = consumer_inputs_with_locations[output_var->data.location];
1906    } else if (output_var->interface_type != NULL) {
1907       char *const iface_field_name =
1908          ralloc_asprintf(mem_ctx, "%s.%s",
1909             glsl_get_type_name(glsl_without_array(output_var->interface_type)),
1910             output_var->name);
1911       struct hash_entry *entry =
1912          _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
1913       input_var = entry ? (nir_variable *) entry->data : NULL;
1914    } else {
1915       struct hash_entry *entry =
1916          _mesa_hash_table_search(consumer_inputs, output_var->name);
1917       input_var = entry ? (nir_variable *) entry->data : NULL;
1918    }
1919 
1920    return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
1921       ? NULL : input_var;
1922 }
1923 
1924 static int
io_variable_cmp(const void * _a,const void * _b)1925 io_variable_cmp(const void *_a, const void *_b)
1926 {
1927    const nir_variable *const a = *(const nir_variable **) _a;
1928    const nir_variable *const b = *(const nir_variable **) _b;
1929 
1930    if (a->data.explicit_location && b->data.explicit_location)
1931       return b->data.location - a->data.location;
1932 
1933    if (a->data.explicit_location && !b->data.explicit_location)
1934       return 1;
1935 
1936    if (!a->data.explicit_location && b->data.explicit_location)
1937       return -1;
1938 
1939    return -strcmp(a->name, b->name);
1940 }
1941 
1942 /**
1943  * Sort the shader IO variables into canonical order
1944  */
1945 static void
canonicalize_shader_io(nir_shader * nir,nir_variable_mode io_mode)1946 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
1947 {
1948    nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
1949    unsigned num_variables = 0;
1950 
1951    nir_foreach_variable_with_modes(var, nir, io_mode) {
1952       /* If we have already encountered more I/O variables that could
1953        * successfully link, bail.
1954        */
1955       if (num_variables == ARRAY_SIZE(var_table))
1956          return;
1957 
1958       var_table[num_variables++] = var;
1959    }
1960 
1961    if (num_variables == 0)
1962       return;
1963 
1964    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
1965     * we're going to push the variables on to the IR list as a stack, so we
1966     * want the last variable (in canonical order) to be first in the list.
1967     */
1968    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
1969 
1970    /* Remove the variable from it's current location in the varible list, and
1971     * put it at the front.
1972     */
1973    for (unsigned i = 0; i < num_variables; i++) {
1974       exec_node_remove(&var_table[i]->node);
1975       exec_list_push_head(&nir->variables, &var_table[i]->node);
1976    }
1977 }
1978 
1979 /**
1980  * Generate a bitfield map of the explicit locations for shader varyings.
1981  *
1982  * Note: For Tessellation shaders we are sitting right on the limits of the
1983  * 64 bit map. Per-vertex and per-patch both have separate location domains
1984  * with a max of MAX_VARYING.
1985  */
1986 static uint64_t
reserved_varying_slot(struct gl_linked_shader * sh,nir_variable_mode io_mode)1987 reserved_varying_slot(struct gl_linked_shader *sh,
1988                       nir_variable_mode io_mode)
1989 {
1990    assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
1991    /* Avoid an overflow of the returned value */
1992    assert(MAX_VARYINGS_INCL_PATCH <= 64);
1993 
1994    uint64_t slots = 0;
1995    int var_slot;
1996 
1997    if (!sh)
1998       return slots;
1999 
2000    nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
2001       if (!var->data.explicit_location ||
2002           var->data.location < VARYING_SLOT_VAR0)
2003          continue;
2004 
2005       var_slot = var->data.location - VARYING_SLOT_VAR0;
2006 
2007       bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
2008                                 sh->Stage == MESA_SHADER_VERTEX;
2009       unsigned num_elements =
2010          glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
2011                                     is_gl_vertex_input);
2012       for (unsigned i = 0; i < num_elements; i++) {
2013          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2014             slots |= UINT64_C(1) << var_slot;
2015          var_slot += 1;
2016       }
2017    }
2018 
2019    return slots;
2020 }
2021 
2022 /**
2023  * Sets the bits in the inputs_read, or outputs_written
2024  * bitfield corresponding to this variable.
2025  */
2026 static void
set_variable_io_mask(BITSET_WORD * bits,nir_variable * var,gl_shader_stage stage)2027 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
2028 {
2029    assert(var->data.mode == nir_var_shader_in ||
2030           var->data.mode == nir_var_shader_out);
2031    assert(var->data.location >= VARYING_SLOT_VAR0);
2032 
2033    const struct glsl_type *type = var->type;
2034    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
2035       assert(glsl_type_is_array(type));
2036       type = glsl_get_array_element(type);
2037    }
2038 
2039    unsigned location = var->data.location - VARYING_SLOT_VAR0;
2040    unsigned slots = glsl_count_attribute_slots(type, false);
2041    for (unsigned i = 0; i < slots; i++) {
2042       BITSET_SET(bits, location + i);
2043    }
2044 }
2045 
2046 static uint8_t
get_num_components(nir_variable * var)2047 get_num_components(nir_variable *var)
2048 {
2049    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
2050       return 4;
2051 
2052    return glsl_get_vector_elements(glsl_without_array(var->type));
2053 }
2054 
2055 static void
tcs_add_output_reads(nir_shader * shader,BITSET_WORD ** read)2056 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
2057 {
2058    nir_foreach_function(function, shader) {
2059       if (!function->impl)
2060          continue;
2061 
2062       nir_foreach_block(block, function->impl) {
2063          nir_foreach_instr(instr, block) {
2064             if (instr->type != nir_instr_type_intrinsic)
2065                continue;
2066 
2067             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2068             if (intrin->intrinsic != nir_intrinsic_load_deref)
2069                continue;
2070 
2071             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2072             if (!nir_deref_mode_is(deref, nir_var_shader_out))
2073                continue;
2074 
2075             nir_variable *var = nir_deref_instr_get_variable(deref);
2076             for (unsigned i = 0; i < get_num_components(var); i++) {
2077                if (var->data.location < VARYING_SLOT_VAR0)
2078                   continue;
2079 
2080                unsigned comp = var->data.location_frac;
2081                set_variable_io_mask(read[comp + i], var, shader->info.stage);
2082             }
2083          }
2084       }
2085    }
2086 }
2087 
2088 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
2089  * as no further NIR pass expects to see this.
2090  */
2091 static bool
replace_unused_interpolate_at_with_undef(nir_builder * b,nir_instr * instr,void * data)2092 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
2093                                          void *data)
2094 {
2095    if (instr->type == nir_instr_type_intrinsic) {
2096       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2097 
2098       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
2099           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
2100           intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
2101          nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2102          if (var->data.mode == nir_var_shader_temp) {
2103             /* Create undef and rewrite the interp uses */
2104             nir_ssa_def *undef =
2105                nir_ssa_undef(b, intrin->dest.ssa.num_components,
2106                              intrin->dest.ssa.bit_size);
2107             nir_ssa_def_rewrite_uses(&intrin->dest.ssa, undef);
2108 
2109             nir_instr_remove(&intrin->instr);
2110             return true;
2111          }
2112       }
2113    }
2114 
2115    return false;
2116 }
2117 
2118 static void
fixup_vars_lowered_to_temp(nir_shader * shader,nir_variable_mode mode)2119 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
2120 {
2121    /* Remove all interpolate uses of the unset varying and replace with undef. */
2122    if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
2123       (void) nir_shader_instructions_pass(shader,
2124                                           replace_unused_interpolate_at_with_undef,
2125                                           nir_metadata_block_index |
2126                                           nir_metadata_dominance,
2127                                           NULL);
2128    }
2129 
2130    nir_lower_global_vars_to_local(shader);
2131    nir_fixup_deref_modes(shader);
2132 }
2133 
2134 /**
2135  * Helper for removing unused shader I/O variables, by demoting them to global
2136  * variables (which may then be dead code eliminated).
2137  *
2138  * Example usage is:
2139  *
2140  * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
2141  *                                      read, patches_read) ||
2142  *                                      progress;
2143  *
2144  * The "used" should be an array of 4 BITSET_WORDs representing each
2145  * .location_frac used.  Note that for vector variables, only the first channel
2146  * (.location_frac) is examined for deciding if the variable is used!
2147  */
2148 static bool
remove_unused_io_vars(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,nir_variable_mode mode,BITSET_WORD ** used_by_other_stage)2149 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
2150                       struct gl_shader_program *prog,
2151                       nir_variable_mode mode,
2152                       BITSET_WORD **used_by_other_stage)
2153 {
2154    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
2155 
2156    bool progress = false;
2157    nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
2158 
2159    BITSET_WORD **used;
2160    nir_foreach_variable_with_modes_safe(var, shader, mode) {
2161       used = used_by_other_stage;
2162 
2163       /* Skip builtins dead builtins are removed elsewhere */
2164       if (is_gl_identifier(var->name))
2165          continue;
2166 
2167       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
2168          continue;
2169 
2170       /* Skip xfb varyings and any other type we cannot remove */
2171       if (var->data.always_active_io)
2172          continue;
2173 
2174       if (var->data.explicit_xfb_buffer)
2175          continue;
2176 
2177       BITSET_WORD *other_stage = used[var->data.location_frac];
2178 
2179       /* if location == -1 lower varying to global as it has no match and is not
2180        * a xfb varying, this must be done after skiping bultins as builtins
2181        * could be assigned a location of -1.
2182        * We also lower unused varyings with explicit locations.
2183        */
2184       bool use_found = false;
2185       if (var->data.location >= 0) {
2186          unsigned location = var->data.location - VARYING_SLOT_VAR0;
2187 
2188          const struct glsl_type *type = var->type;
2189          if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
2190             assert(glsl_type_is_array(type));
2191             type = glsl_get_array_element(type);
2192          }
2193 
2194          unsigned slots = glsl_count_attribute_slots(type, false);
2195          for (unsigned i = 0; i < slots; i++) {
2196             if (BITSET_TEST(other_stage, location + i)) {
2197                use_found = true;
2198                break;
2199             }
2200          }
2201       }
2202 
2203       if (!use_found) {
2204          /* This one is invalid, make it a global variable instead */
2205          var->data.location = 0;
2206          var->data.mode = nir_var_shader_temp;
2207 
2208          progress = true;
2209 
2210          if (mode == nir_var_shader_in) {
2211             if (!prog->IsES && prog->data->Version <= 120) {
2212                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2213                 *
2214                 *     Only those varying variables used (i.e. read) in
2215                 *     the fragment shader executable must be written to
2216                 *     by the vertex shader executable; declaring
2217                 *     superfluous varying variables in a vertex shader is
2218                 *     permissible.
2219                 *
2220                 * We interpret this text as meaning that the VS must
2221                 * write the variable for the FS to read it.  See
2222                 * "glsl1-varying read but not written" in piglit.
2223                 */
2224                linker_error(prog, "%s shader varying %s not written "
2225                             "by %s shader\n.",
2226                             _mesa_shader_stage_to_string(consumer->info.stage),
2227                             var->name,
2228                             _mesa_shader_stage_to_string(producer->info.stage));
2229             } else {
2230                linker_warning(prog, "%s shader varying %s not written "
2231                               "by %s shader\n.",
2232                               _mesa_shader_stage_to_string(consumer->info.stage),
2233                               var->name,
2234                               _mesa_shader_stage_to_string(producer->info.stage));
2235             }
2236          }
2237       }
2238    }
2239 
2240    if (progress)
2241       fixup_vars_lowered_to_temp(shader, mode);
2242 
2243    return progress;
2244 }
2245 
2246 static bool
remove_unused_varyings(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)2247 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
2248                        struct gl_shader_program *prog, void *mem_ctx)
2249 {
2250    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
2251    assert(consumer->info.stage != MESA_SHADER_VERTEX);
2252 
2253    int max_loc_out = 0;
2254    nir_foreach_shader_out_variable(var, producer) {
2255       if (var->data.location < VARYING_SLOT_VAR0)
2256          continue;
2257 
2258       const struct glsl_type *type = var->type;
2259       if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
2260          assert(glsl_type_is_array(type));
2261          type = glsl_get_array_element(type);
2262       }
2263       unsigned slots = glsl_count_attribute_slots(type, false);
2264 
2265       max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
2266          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
2267    }
2268 
2269    int max_loc_in = 0;
2270    nir_foreach_shader_in_variable(var, consumer) {
2271       if (var->data.location < VARYING_SLOT_VAR0)
2272          continue;
2273 
2274       const struct glsl_type *type = var->type;
2275       if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
2276          assert(glsl_type_is_array(type));
2277          type = glsl_get_array_element(type);
2278       }
2279       unsigned slots = glsl_count_attribute_slots(type, false);
2280 
2281       max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
2282          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
2283    }
2284 
2285    /* Old glsl shaders that don't use explicit locations can contain greater
2286     * than 64 varyings before unused varyings are removed so we must count them
2287     * and make use of the BITSET macros to keep track of used slots. Once we
2288     * have removed these excess varyings we can make use of further nir varying
2289     * linking optimimisation passes.
2290     */
2291    BITSET_WORD *read[4];
2292    BITSET_WORD *written[4];
2293    int max_loc = MAX2(max_loc_in, max_loc_out);
2294    for (unsigned i = 0; i < 4; i++) {
2295       read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
2296       written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
2297    }
2298 
2299    nir_foreach_shader_out_variable(var, producer) {
2300       if (var->data.location < VARYING_SLOT_VAR0)
2301          continue;
2302 
2303       for (unsigned i = 0; i < get_num_components(var); i++) {
2304          unsigned comp = var->data.location_frac;
2305          set_variable_io_mask(written[comp + i], var, producer->info.stage);
2306       }
2307    }
2308 
2309    nir_foreach_shader_in_variable(var, consumer) {
2310       if (var->data.location < VARYING_SLOT_VAR0)
2311          continue;
2312 
2313       for (unsigned i = 0; i < get_num_components(var); i++) {
2314          unsigned comp = var->data.location_frac;
2315          set_variable_io_mask(read[comp + i], var, consumer->info.stage);
2316       }
2317    }
2318 
2319    /* Each TCS invocation can read data written by other TCS invocations,
2320     * so even if the outputs are not used by the TES we must also make
2321     * sure they are not read by the TCS before demoting them to globals.
2322     */
2323    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
2324       tcs_add_output_reads(producer, read);
2325 
2326    bool progress = false;
2327    progress =
2328       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
2329    progress =
2330       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
2331 
2332    return progress;
2333 }
2334 
2335 static bool
should_add_varying_match_record(nir_variable * const input_var,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)2336 should_add_varying_match_record(nir_variable *const input_var,
2337                                 struct gl_shader_program *prog,
2338                                 struct gl_linked_shader *producer,
2339                                 struct gl_linked_shader *consumer) {
2340 
2341    /* If a matching input variable was found, add this output (and the input) to
2342     * the set.  If this is a separable program and there is no consumer stage,
2343     * add the output.
2344     *
2345     * Always add TCS outputs. They are shared by all invocations
2346     * within a patch and can be used as shared memory.
2347     */
2348    return input_var || (prog->SeparateShader && consumer == NULL) ||
2349              producer->Stage == MESA_SHADER_TESS_CTRL;
2350 }
2351 
2352 /* This assigns some initial unoptimised varying locations so that our nir
2353  * optimisations can perform some initial optimisations and also does initial
2354  * processing of
2355  */
2356 static bool
assign_initial_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,struct varying_matches * vm)2357 assign_initial_varying_locations(const struct gl_constants *consts,
2358                                  const struct gl_extensions *exts,
2359                                  void *mem_ctx,
2360                                  struct gl_shader_program *prog,
2361                                  struct gl_linked_shader *producer,
2362                                  struct gl_linked_shader *consumer,
2363                                  unsigned num_xfb_decls,
2364                                  struct xfb_decl *xfb_decls,
2365                                  struct varying_matches *vm)
2366 {
2367    init_varying_matches(mem_ctx, vm, consts, exts,
2368                         producer ? producer->Stage : MESA_SHADER_NONE,
2369                         consumer ? consumer->Stage : MESA_SHADER_NONE,
2370                         prog->SeparateShader);
2371 
2372    struct hash_table *tfeedback_candidates =
2373          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2374                                  _mesa_key_string_equal);
2375    struct hash_table *consumer_inputs =
2376          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2377                                  _mesa_key_string_equal);
2378    struct hash_table *consumer_interface_inputs =
2379          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2380                                  _mesa_key_string_equal);
2381    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2382       NULL,
2383    };
2384 
2385    if (consumer)
2386       populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
2387                                    consumer_inputs, consumer_interface_inputs,
2388                                    consumer_inputs_with_locations);
2389 
2390    if (producer) {
2391       nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
2392          /* Only geometry shaders can use non-zero streams */
2393          assert(output_var->data.stream == 0 ||
2394                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
2395                  producer->Stage == MESA_SHADER_GEOMETRY));
2396 
2397          if (num_xfb_decls > 0) {
2398             /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
2399              * ("Vertex Shader Variables / Output Variables")
2400              *
2401              * "Each program object can specify a set of output variables from
2402              * one shader to be recorded in transform feedback mode (see
2403              * section 13.3). The variables that can be recorded are those
2404              * emitted by the first active shader, in order, from the
2405              * following list:
2406              *
2407              *  * geometry shader
2408              *  * tessellation evaluation shader
2409              *  * tessellation control shader
2410              *  * vertex shader"
2411              *
2412              * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
2413              * Variables / Output Variables") tessellation control shader is
2414              * not included in the stages list.
2415              */
2416             if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
2417 
2418                const struct glsl_type *type = output_var->data.from_named_ifc_block ?
2419                   output_var->interface_type : output_var->type;
2420                if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
2421                   assert(glsl_type_is_array(type));
2422                   type = glsl_get_array_element(type);
2423                }
2424 
2425                const struct glsl_struct_field *ifc_member = NULL;
2426                if (output_var->data.from_named_ifc_block) {
2427                   ifc_member =
2428                      glsl_get_struct_field_data(glsl_without_array(type),
2429                         glsl_get_field_index(glsl_without_array(type), output_var->name));
2430                }
2431 
2432                char *name;
2433                if (glsl_type_is_struct(glsl_without_array(type)) ||
2434                    (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
2435                   type = output_var->type;
2436                   name = ralloc_strdup(NULL, output_var->name);
2437                } else if (glsl_type_is_interface(glsl_without_array(type))) {
2438                   name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
2439                } else  {
2440                   name = ralloc_strdup(NULL, output_var->name);
2441                }
2442 
2443                struct tfeedback_candidate_generator_state state;
2444                state.mem_ctx = mem_ctx;
2445                state.tfeedback_candidates = tfeedback_candidates;
2446                state.stage = producer->Stage;
2447                state.toplevel_var = output_var;
2448                state.varying_floats = 0;
2449                state.xfb_offset_floats = 0;
2450 
2451                tfeedback_candidate_generator(&state, &name, strlen(name), type,
2452                                              ifc_member);
2453                ralloc_free(name);
2454             }
2455          }
2456 
2457          nir_variable *const input_var =
2458             get_matching_input(mem_ctx, output_var, consumer_inputs,
2459                                consumer_interface_inputs,
2460                                consumer_inputs_with_locations);
2461 
2462          if (should_add_varying_match_record(input_var, prog, producer,
2463                                              consumer)) {
2464             varying_matches_record(mem_ctx, vm, output_var, input_var);
2465          }
2466 
2467          /* Only stream 0 outputs can be consumed in the next stage */
2468          if (input_var && output_var->data.stream != 0) {
2469             linker_error(prog, "output %s is assigned to stream=%d but "
2470                          "is linked to an input, which requires stream=0",
2471                          output_var->name, output_var->data.stream);
2472             return false;
2473          }
2474       }
2475    } else {
2476       /* If there's no producer stage, then this must be a separable program.
2477        * For example, we may have a program that has just a fragment shader.
2478        * Later this program will be used with some arbitrary vertex (or
2479        * geometry) shader program.  This means that locations must be assigned
2480        * for all the inputs.
2481        */
2482       nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
2483          varying_matches_record(mem_ctx, vm, NULL, input_var);
2484       }
2485    }
2486 
2487    for (unsigned i = 0; i < num_xfb_decls; ++i) {
2488       if (!xfb_decl_is_varying(&xfb_decls[i]))
2489          continue;
2490 
2491       const struct tfeedback_candidate *matched_candidate
2492          = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
2493 
2494       if (matched_candidate == NULL)
2495          return false;
2496 
2497       /* There are two situations where a new output varying is needed:
2498        *
2499        *  - If varying packing is disabled for xfb and the current declaration
2500        *    is subscripting an array, whether the subscript is aligned or not.
2501        *    to preserve the rest of the array for the consumer.
2502        *
2503        *  - If a builtin variable needs to be copied to a new variable
2504        *    before its content is modified by another lowering pass (e.g.
2505        *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
2506        */
2507       const bool lowered =
2508          (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
2509          (matched_candidate->toplevel_var->data.explicit_location &&
2510           matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
2511           (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
2512           (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
2513               BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
2514 
2515       if (lowered) {
2516          nir_variable *new_var;
2517          struct tfeedback_candidate *new_candidate = NULL;
2518 
2519          new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
2520                                             xfb_decls[i].orig_name,
2521                                             matched_candidate->toplevel_var);
2522          if (new_var == NULL)
2523             return false;
2524 
2525          /* Create new candidate and replace matched_candidate */
2526          new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
2527          new_candidate->toplevel_var = new_var;
2528          new_candidate->type = new_var->type;
2529          new_candidate->struct_offset_floats = 0;
2530          new_candidate->xfb_offset_floats = 0;
2531          _mesa_hash_table_insert(tfeedback_candidates,
2532                                  ralloc_strdup(mem_ctx, new_var->name),
2533                                  new_candidate);
2534 
2535          xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
2536          matched_candidate = new_candidate;
2537       }
2538 
2539       /* Mark as xfb varying */
2540       matched_candidate->toplevel_var->data.is_xfb = 1;
2541 
2542       /* Mark xfb varyings as always active */
2543       matched_candidate->toplevel_var->data.always_active_io = 1;
2544 
2545       /* Mark any corresponding inputs as always active also. We must do this
2546        * because we have a NIR pass that lowers vectors to scalars and another
2547        * that removes unused varyings.
2548        * We don't split varyings marked as always active because there is no
2549        * point in doing so. This means we need to mark both sides of the
2550        * interface as always active otherwise we will have a mismatch and
2551        * start removing things we shouldn't.
2552        */
2553       nir_variable *const input_var =
2554          get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2555                             consumer_inputs, consumer_interface_inputs,
2556                             consumer_inputs_with_locations);
2557       if (input_var) {
2558          input_var->data.is_xfb = 1;
2559          input_var->data.always_active_io = 1;
2560       }
2561 
2562       /* Add the xfb varying to varying matches if it wasn't already added */
2563       if ((!should_add_varying_match_record(input_var, prog, producer,
2564                                             consumer) &&
2565            !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
2566          matched_candidate->toplevel_var->data.is_xfb_only = 1;
2567          varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
2568                                 NULL);
2569       }
2570    }
2571 
2572    uint64_t reserved_out_slots = 0;
2573    if (producer)
2574       reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
2575 
2576    uint64_t reserved_in_slots = 0;
2577    if (consumer)
2578       reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
2579 
2580    /* Assign temporary user varying locations. This is required for our NIR
2581     * varying optimisations to do their matching.
2582     */
2583    const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
2584    varying_matches_assign_temp_locations(vm, prog, reserved_slots);
2585 
2586    for (unsigned i = 0; i < num_xfb_decls; ++i) {
2587       if (!xfb_decl_is_varying(&xfb_decls[i]))
2588          continue;
2589 
2590       xfb_decls[i].matched_candidate->initial_location =
2591          xfb_decls[i].matched_candidate->toplevel_var->data.location;
2592       xfb_decls[i].matched_candidate->initial_location_frac =
2593          xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
2594    }
2595 
2596    return true;
2597 }
2598 
2599 static void
link_shader_opts(struct varying_matches * vm,nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)2600 link_shader_opts(struct varying_matches *vm,
2601                  nir_shader *producer, nir_shader *consumer,
2602                  struct gl_shader_program *prog, void *mem_ctx)
2603 {
2604    /* If we can't pack the stage using this pass then we can't lower io to
2605     * scalar just yet. Instead we leave it to a later NIR linking pass that uses
2606     * ARB_enhanced_layout style packing to pack things further.
2607     *
2608     * Otherwise we might end up causing linking errors and perf regressions
2609     * because the new scalars will be assigned individual slots and can overflow
2610     * the available slots.
2611     */
2612    if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
2613       !vm->disable_xfb_packing) {
2614       NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
2615       NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
2616    }
2617 
2618    gl_nir_opts(producer);
2619    gl_nir_opts(consumer);
2620 
2621    if (nir_link_opt_varyings(producer, consumer))
2622       gl_nir_opts(consumer);
2623 
2624    NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
2625    NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
2626 
2627    if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
2628       NIR_PASS_V(producer, nir_lower_global_vars_to_local);
2629       NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
2630 
2631       gl_nir_opts(producer);
2632       gl_nir_opts(consumer);
2633 
2634       /* Optimizations can cause varyings to become unused.
2635        * nir_compact_varyings() depends on all dead varyings being removed so
2636        * we need to call nir_remove_dead_variables() again here.
2637        */
2638       NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out,
2639                  NULL);
2640       NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in,
2641                  NULL);
2642    }
2643 
2644    nir_link_varying_precision(producer, consumer);
2645 }
2646 
2647 /**
2648  * Assign locations for all variables that are produced in one pipeline stage
2649  * (the "producer") and consumed in the next stage (the "consumer").
2650  *
2651  * Variables produced by the producer may also be consumed by transform
2652  * feedback.
2653  *
2654  * \param num_xfb_decls is the number of declarations indicating
2655  *        variables that may be consumed by transform feedback.
2656  *
2657  * \param xfb_decls is a pointer to an array of xfb_decl objects
2658  *        representing the result of parsing the strings passed to
2659  *        glTransformFeedbackVaryings().  assign_location() will be called for
2660  *        each of these objects that matches one of the outputs of the
2661  *        producer.
2662  *
2663  * When num_xfb_decls is nonzero, it is permissible for the consumer to
2664  * be NULL.  In this case, varying locations are assigned solely based on the
2665  * requirements of transform feedback.
2666  */
2667 static bool
assign_final_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,const uint64_t reserved_slots,struct varying_matches * vm)2668 assign_final_varying_locations(const struct gl_constants *consts,
2669                                const struct gl_extensions *exts,
2670                                void *mem_ctx,
2671                                struct gl_shader_program *prog,
2672                                struct gl_linked_shader *producer,
2673                                struct gl_linked_shader *consumer,
2674                                unsigned num_xfb_decls,
2675                                struct xfb_decl *xfb_decls,
2676                                const uint64_t reserved_slots,
2677                                struct varying_matches *vm)
2678 {
2679    init_varying_matches(mem_ctx, vm, consts, exts,
2680                         producer ? producer->Stage : MESA_SHADER_NONE,
2681                         consumer ? consumer->Stage : MESA_SHADER_NONE,
2682                         prog->SeparateShader);
2683 
2684    /* Regather varying matches as we ran optimisations and the previous pointers
2685     * are no longer valid.
2686     */
2687    if (producer) {
2688       nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
2689          if (var_out->data.location < VARYING_SLOT_VAR0 ||
2690              var_out->data.explicit_location)
2691             continue;
2692 
2693          if (vm->num_matches == vm->matches_capacity) {
2694             vm->matches_capacity *= 2;
2695             vm->matches = (struct match *)
2696                reralloc(mem_ctx, vm->matches, struct match,
2697                         vm->matches_capacity);
2698          }
2699 
2700          vm->matches[vm->num_matches].packing_class
2701             = varying_matches_compute_packing_class(var_out);
2702          vm->matches[vm->num_matches].packing_order
2703             = varying_matches_compute_packing_order(var_out);
2704 
2705          vm->matches[vm->num_matches].producer_var = var_out;
2706          vm->matches[vm->num_matches].consumer_var = NULL;
2707          vm->num_matches++;
2708       }
2709 
2710       /* Regather xfb varyings too */
2711       for (unsigned i = 0; i < num_xfb_decls; i++) {
2712          if (!xfb_decl_is_varying(&xfb_decls[i]))
2713             continue;
2714 
2715          /* Varying pointer was already reset */
2716          if (xfb_decls[i].matched_candidate->initial_location == -1)
2717             continue;
2718 
2719          bool UNUSED is_reset = false;
2720          bool UNUSED no_outputs = true;
2721          nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
2722             no_outputs = false;
2723             assert(var_out->data.location != -1);
2724             if (var_out->data.location ==
2725                 xfb_decls[i].matched_candidate->initial_location &&
2726                 var_out->data.location_frac ==
2727                 xfb_decls[i].matched_candidate->initial_location_frac) {
2728                xfb_decls[i].matched_candidate->toplevel_var = var_out;
2729                xfb_decls[i].matched_candidate->initial_location = -1;
2730                is_reset = true;
2731                break;
2732             }
2733          }
2734          assert(is_reset || no_outputs);
2735       }
2736    }
2737 
2738    bool found_match = false;
2739    if (consumer) {
2740       nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
2741          if (var_in->data.location < VARYING_SLOT_VAR0 ||
2742              var_in->data.explicit_location)
2743             continue;
2744 
2745          found_match = false;
2746          for (unsigned i = 0; i < vm->num_matches; i++) {
2747             if (vm->matches[i].producer_var &&
2748                 (vm->matches[i].producer_var->data.location == var_in->data.location &&
2749                  vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
2750 
2751                vm->matches[i].consumer_var = var_in;
2752                found_match = true;
2753                break;
2754             }
2755          }
2756          if (!found_match) {
2757             if (vm->num_matches == vm->matches_capacity) {
2758                vm->matches_capacity *= 2;
2759                vm->matches = (struct match *)
2760                   reralloc(mem_ctx, vm->matches, struct match,
2761                            vm->matches_capacity);
2762             }
2763 
2764             vm->matches[vm->num_matches].packing_class
2765                = varying_matches_compute_packing_class(var_in);
2766             vm->matches[vm->num_matches].packing_order
2767                = varying_matches_compute_packing_order(var_in);
2768 
2769             vm->matches[vm->num_matches].producer_var = NULL;
2770             vm->matches[vm->num_matches].consumer_var = var_in;
2771             vm->num_matches++;
2772          }
2773       }
2774    }
2775 
2776    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2777    const unsigned slots_used =
2778       varying_matches_assign_locations(vm, prog, components, reserved_slots);
2779    varying_matches_store_locations(vm);
2780 
2781    for (unsigned i = 0; i < num_xfb_decls; ++i) {
2782       if (xfb_decl_is_varying(&xfb_decls[i])) {
2783          if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog))
2784             return false;
2785       }
2786    }
2787 
2788    if (producer) {
2789       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
2790                                    nir_var_shader_out, 0, producer,
2791                                    vm->disable_varying_packing,
2792                                    vm->disable_xfb_packing, vm->xfb_enabled);
2793       nir_lower_pack(producer->Program->nir);
2794    }
2795 
2796    if (consumer) {
2797       unsigned consumer_vertices = 0;
2798       if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2799          consumer_vertices = prog->Geom.VerticesIn;
2800 
2801       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
2802                                    nir_var_shader_in, consumer_vertices,
2803                                    consumer, vm->disable_varying_packing,
2804                                    vm->disable_xfb_packing, vm->xfb_enabled);
2805       nir_lower_pack(consumer->Program->nir);
2806    }
2807 
2808    return true;
2809 }
2810 
2811 static bool
check_against_output_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * producer,unsigned num_explicit_locations)2812 check_against_output_limit(const struct gl_constants *consts, gl_api api,
2813                            struct gl_shader_program *prog,
2814                            struct gl_linked_shader *producer,
2815                            unsigned num_explicit_locations)
2816 {
2817    unsigned output_vectors = num_explicit_locations;
2818    nir_foreach_shader_out_variable(var, producer->Program->nir) {
2819       if (!var->data.explicit_location &&
2820           var_counts_against_varying_limit(producer->Stage, var)) {
2821          /* outputs for fragment shader can't be doubles */
2822          output_vectors += glsl_count_attribute_slots(var->type, false);
2823       }
2824    }
2825 
2826    assert(producer->Stage != MESA_SHADER_FRAGMENT);
2827    unsigned max_output_components =
2828       consts->Program[producer->Stage].MaxOutputComponents;
2829 
2830    const unsigned output_components = output_vectors * 4;
2831    if (output_components > max_output_components) {
2832       if (api == API_OPENGLES2 || prog->IsES)
2833          linker_error(prog, "%s shader uses too many output vectors "
2834                       "(%u > %u)\n",
2835                       _mesa_shader_stage_to_string(producer->Stage),
2836                       output_vectors,
2837                       max_output_components / 4);
2838       else
2839          linker_error(prog, "%s shader uses too many output components "
2840                       "(%u > %u)\n",
2841                       _mesa_shader_stage_to_string(producer->Stage),
2842                       output_components,
2843                       max_output_components);
2844 
2845       return false;
2846    }
2847 
2848    return true;
2849 }
2850 
2851 static bool
check_against_input_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * consumer,unsigned num_explicit_locations)2852 check_against_input_limit(const struct gl_constants *consts, gl_api api,
2853                           struct gl_shader_program *prog,
2854                           struct gl_linked_shader *consumer,
2855                           unsigned num_explicit_locations)
2856 {
2857    unsigned input_vectors = num_explicit_locations;
2858 
2859    nir_foreach_shader_in_variable(var, consumer->Program->nir) {
2860       if (!var->data.explicit_location &&
2861           var_counts_against_varying_limit(consumer->Stage, var)) {
2862          /* vertex inputs aren't varying counted */
2863          input_vectors += glsl_count_attribute_slots(var->type, false);
2864       }
2865    }
2866 
2867    assert(consumer->Stage != MESA_SHADER_VERTEX);
2868    unsigned max_input_components =
2869       consts->Program[consumer->Stage].MaxInputComponents;
2870 
2871    const unsigned input_components = input_vectors * 4;
2872    if (input_components > max_input_components) {
2873       if (api == API_OPENGLES2 || prog->IsES)
2874          linker_error(prog, "%s shader uses too many input vectors "
2875                       "(%u > %u)\n",
2876                       _mesa_shader_stage_to_string(consumer->Stage),
2877                       input_vectors,
2878                       max_input_components / 4);
2879       else
2880          linker_error(prog, "%s shader uses too many input components "
2881                       "(%u > %u)\n",
2882                       _mesa_shader_stage_to_string(consumer->Stage),
2883                       input_components,
2884                       max_input_components);
2885 
2886       return false;
2887    }
2888 
2889    return true;
2890 }
2891 
2892 /* Lower unset/unused inputs/outputs */
2893 static void
remove_unused_shader_inputs_and_outputs(struct gl_shader_program * prog,unsigned stage,nir_variable_mode mode)2894 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
2895                                         unsigned stage, nir_variable_mode mode)
2896 {
2897    bool progress = false;
2898    nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
2899 
2900    nir_foreach_variable_with_modes_safe(var, shader, mode) {
2901       if (!var->data.is_xfb_only && var->data.location == -1) {
2902          var->data.location = 0;
2903          var->data.mode = nir_var_shader_temp;
2904          progress = true;
2905       }
2906    }
2907 
2908    if (progress)
2909       fixup_vars_lowered_to_temp(shader, mode);
2910 }
2911 
2912 static bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,void * mem_ctx)2913 link_varyings(struct gl_shader_program *prog, unsigned first,
2914               unsigned last, const struct gl_constants *consts,
2915               const struct gl_extensions *exts, gl_api api, void *mem_ctx)
2916 {
2917    bool has_xfb_qualifiers = false;
2918    unsigned num_xfb_decls = 0;
2919    char **varying_names = NULL;
2920    struct xfb_decl *xfb_decls = NULL;
2921 
2922    if (last > MESA_SHADER_FRAGMENT)
2923       return true;
2924 
2925    /* From the ARB_enhanced_layouts spec:
2926     *
2927     *    "If the shader used to record output variables for transform feedback
2928     *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2929     *    qualifiers, the values specified by TransformFeedbackVaryings are
2930     *    ignored, and the set of variables captured for transform feedback is
2931     *    instead derived from the specified layout qualifiers."
2932     */
2933    for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2934       /* Find last stage before fragment shader */
2935       if (prog->_LinkedShaders[i]) {
2936          has_xfb_qualifiers =
2937             process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2938                                           prog, &num_xfb_decls,
2939                                           &varying_names);
2940          break;
2941       }
2942    }
2943 
2944    if (!has_xfb_qualifiers) {
2945       num_xfb_decls = prog->TransformFeedback.NumVarying;
2946       varying_names = prog->TransformFeedback.VaryingNames;
2947    }
2948 
2949    if (num_xfb_decls != 0) {
2950       /* From GL_EXT_transform_feedback:
2951        *   A program will fail to link if:
2952        *
2953        *   * the <count> specified by TransformFeedbackVaryingsEXT is
2954        *     non-zero, but the program object has no vertex or geometry
2955        *     shader;
2956        */
2957       if (first >= MESA_SHADER_FRAGMENT) {
2958          linker_error(prog, "Transform feedback varyings specified, but "
2959                       "no vertex, tessellation, or geometry shader is "
2960                       "present.\n");
2961          return false;
2962       }
2963 
2964       xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
2965                                       num_xfb_decls);
2966       if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
2967                            varying_names, xfb_decls))
2968          return false;
2969    }
2970 
2971    struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
2972    unsigned num_shaders = 0;
2973 
2974    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2975       if (prog->_LinkedShaders[i])
2976          linked_shader[num_shaders++] = prog->_LinkedShaders[i];
2977    }
2978 
2979    struct varying_matches vm;
2980    if (last < MESA_SHADER_FRAGMENT &&
2981        (num_xfb_decls != 0 || prog->SeparateShader)) {
2982          struct gl_linked_shader *producer = prog->_LinkedShaders[last];
2983          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
2984                                                producer, NULL, num_xfb_decls,
2985                                                xfb_decls, &vm))
2986             return false;
2987    }
2988 
2989    if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
2990       remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
2991       remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
2992    }
2993 
2994    if (prog->SeparateShader) {
2995       struct gl_linked_shader *consumer = linked_shader[0];
2996       if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
2997                                             consumer, 0, NULL, &vm))
2998          return false;
2999    }
3000 
3001    if (num_shaders == 1) {
3002       /* Linking shaders also optimizes them. Separate shaders, compute shaders
3003        * and shaders with a fixed-func VS or FS that don't need linking are
3004        * optimized here.
3005        */
3006       gl_nir_opts(linked_shader[0]->Program->nir);
3007    } else {
3008       /* Linking the stages in the opposite order (from fragment to vertex)
3009        * ensures that inter-shader outputs written to in an earlier stage
3010        * are eliminated if they are (transitively) not used in a later
3011        * stage.
3012        */
3013       for (int i = num_shaders - 2; i >= 0; i--) {
3014          unsigned stage_num_xfb_decls =
3015             linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
3016             num_xfb_decls : 0;
3017 
3018          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
3019                                                linked_shader[i],
3020                                                linked_shader[i + 1],
3021                                                stage_num_xfb_decls, xfb_decls,
3022                                                &vm))
3023             return false;
3024 
3025          /* Now that validation is done its safe to remove unused varyings. As
3026           * we have both a producer and consumer its safe to remove unused
3027           * varyings even if the program is a SSO because the stages are being
3028           * linked together i.e. we have a multi-stage SSO.
3029           */
3030          link_shader_opts(&vm, linked_shader[i]->Program->nir,
3031                           linked_shader[i + 1]->Program->nir,
3032                           prog, mem_ctx);
3033 
3034          remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
3035                                                  nir_var_shader_out);
3036          remove_unused_shader_inputs_and_outputs(prog,
3037                                                  linked_shader[i + 1]->Stage,
3038                                                  nir_var_shader_in);
3039       }
3040    }
3041 
3042    if (!prog->SeparateShader) {
3043       /* If not SSO remove unused varyings from the first/last stage */
3044       NIR_PASS_V(prog->_LinkedShaders[first]->Program->nir,
3045                  nir_remove_dead_variables, nir_var_shader_in, NULL);
3046       NIR_PASS_V(prog->_LinkedShaders[last]->Program->nir,
3047                  nir_remove_dead_variables, nir_var_shader_out, NULL);
3048    } else {
3049       /* Sort inputs / outputs into a canonical order.  This is necessary so
3050        * that inputs / outputs of separable shaders will be assigned
3051        * predictable locations regardless of the order in which declarations
3052        * appeared in the shader source.
3053        */
3054       if (first != MESA_SHADER_VERTEX) {
3055          canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
3056                                 nir_var_shader_in);
3057       }
3058 
3059       if (last != MESA_SHADER_FRAGMENT) {
3060          canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
3061                                 nir_var_shader_out);
3062       }
3063    }
3064 
3065    /* If there is no fragment shader we need to set transform feedback.
3066     *
3067     * For SSO we also need to assign output locations.  We assign them here
3068     * because we need to do it for both single stage programs and multi stage
3069     * programs.
3070     */
3071    if (last < MESA_SHADER_FRAGMENT &&
3072        (num_xfb_decls != 0 || prog->SeparateShader)) {
3073       const uint64_t reserved_out_slots =
3074          reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
3075       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
3076                                           prog->_LinkedShaders[last], NULL,
3077                                           num_xfb_decls, xfb_decls,
3078                                           reserved_out_slots, &vm))
3079          return false;
3080    }
3081 
3082    if (prog->SeparateShader) {
3083       struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
3084 
3085       const uint64_t reserved_slots =
3086          reserved_varying_slot(sh, nir_var_shader_in);
3087 
3088       /* Assign input locations for SSO, output locations are already
3089        * assigned.
3090        */
3091       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
3092                                           NULL /* producer */,
3093                                           sh /* consumer */,
3094                                           0 /* num_xfb_decls */,
3095                                           NULL /* xfb_decls */,
3096                                           reserved_slots, &vm))
3097          return false;
3098    }
3099 
3100    if (num_shaders == 1) {
3101       gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
3102                                        0, NULL);
3103       gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
3104                                        num_xfb_decls, xfb_decls);
3105    } else {
3106       /* Linking the stages in the opposite order (from fragment to vertex)
3107        * ensures that inter-shader outputs written to in an earlier stage
3108        * are eliminated if they are (transitively) not used in a later
3109        * stage.
3110        */
3111       int next = last;
3112       for (int i = next - 1; i >= 0; i--) {
3113          if (prog->_LinkedShaders[i] == NULL && i != 0)
3114             continue;
3115 
3116          struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
3117          struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
3118 
3119          gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
3120                                           next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
3121                                           xfb_decls);
3122 
3123          const uint64_t reserved_out_slots =
3124             reserved_varying_slot(sh_i, nir_var_shader_out);
3125          const uint64_t reserved_in_slots =
3126             reserved_varying_slot(sh_next, nir_var_shader_in);
3127 
3128          if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
3129                    sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
3130                    xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
3131             return false;
3132 
3133          /* This must be done after all dead varyings are eliminated. */
3134          if (sh_i != NULL) {
3135             unsigned slots_used = util_bitcount64(reserved_out_slots);
3136             if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
3137                return false;
3138          }
3139 
3140          unsigned slots_used = util_bitcount64(reserved_in_slots);
3141          if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
3142             return false;
3143 
3144          next = i;
3145       }
3146    }
3147 
3148    if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
3149                              has_xfb_qualifiers, mem_ctx))
3150       return false;
3151 
3152    return true;
3153 }
3154 
3155 bool
gl_nir_link_varyings(const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,struct gl_shader_program * prog)3156 gl_nir_link_varyings(const struct gl_constants *consts,
3157                      const struct gl_extensions *exts,
3158                      gl_api api, struct gl_shader_program *prog)
3159 {
3160    void *mem_ctx = ralloc_context(NULL);
3161 
3162    unsigned first, last;
3163 
3164    first = MESA_SHADER_STAGES;
3165    last = 0;
3166 
3167    /* We need to initialise the program resource list because the varying
3168     * packing pass my start inserting varyings onto the list.
3169     */
3170    init_program_resource_list(prog);
3171 
3172    /* Determine first and last stage. */
3173    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3174       if (!prog->_LinkedShaders[i])
3175          continue;
3176       if (first == MESA_SHADER_STAGES)
3177          first = i;
3178       last = i;
3179    }
3180 
3181    bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
3182    if (r) {
3183       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3184          if (!prog->_LinkedShaders[i])
3185             continue;
3186 
3187          /* Check for transform feedback varyings specified via the API */
3188          prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
3189             prog->TransformFeedback.NumVarying > 0;
3190 
3191          /* Check for transform feedback varyings specified in the Shader */
3192          if (prog->last_vert_prog) {
3193             prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
3194                prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
3195          }
3196       }
3197 
3198       /* Assign NIR XFB info to the last stage before the fragment shader */
3199       for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
3200          struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
3201          if (sh && stage != MESA_SHADER_TESS_CTRL) {
3202             sh->Program->nir->xfb_info =
3203                gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
3204                                   sh->Program->nir);
3205             break;
3206          }
3207       }
3208    }
3209 
3210    ralloc_free(mem_ctx);
3211    return r;
3212 }
3213