1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file link_varyings.cpp
26 *
27 * Linker functions related specifically to linking varyings between shader
28 * stages.
29 */
30
31
32 #include "main/errors.h"
33 #include "main/mtypes.h"
34 #include "glsl_symbol_table.h"
35 #include "glsl_parser_extras.h"
36 #include "ir_optimization.h"
37 #include "linker.h"
38 #include "link_varyings.h"
39 #include "main/macros.h"
40 #include "util/hash_table.h"
41 #include "util/u_math.h"
42 #include "program.h"
43
44
45 /**
46 * Get the varying type stripped of the outermost array if we're processing
47 * a stage whose varyings are arrays indexed by a vertex number (such as
48 * geometry shader inputs).
49 */
50 static const glsl_type *
get_varying_type(const ir_variable * var,gl_shader_stage stage)51 get_varying_type(const ir_variable *var, gl_shader_stage stage)
52 {
53 const glsl_type *type = var->type;
54
55 if (!var->data.patch &&
56 ((var->data.mode == ir_var_shader_out &&
57 stage == MESA_SHADER_TESS_CTRL) ||
58 (var->data.mode == ir_var_shader_in &&
59 (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
60 stage == MESA_SHADER_GEOMETRY)))) {
61 assert(type->is_array());
62 type = type->fields.array;
63 }
64
65 return type;
66 }
67
68 static bool
varying_has_user_specified_location(const ir_variable * var)69 varying_has_user_specified_location(const ir_variable *var)
70 {
71 return var->data.explicit_location &&
72 var->data.location >= VARYING_SLOT_VAR0;
73 }
74
75 static void
create_xfb_varying_names(void * mem_ctx,const glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const glsl_type * ifc_member_t,char *** varying_names)76 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
77 size_t name_length, unsigned *count,
78 const char *ifc_member_name,
79 const glsl_type *ifc_member_t, char ***varying_names)
80 {
81 if (t->is_interface()) {
82 size_t new_length = name_length;
83
84 assert(ifc_member_name && ifc_member_t);
85 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
86
87 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
88 NULL, NULL, varying_names);
89 } else if (t->is_struct()) {
90 for (unsigned i = 0; i < t->length; i++) {
91 const char *field = t->fields.structure[i].name;
92 size_t new_length = name_length;
93
94 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
95
96 create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
97 new_length, count, NULL, NULL,
98 varying_names);
99 }
100 } else if (t->without_array()->is_struct() ||
101 t->without_array()->is_interface() ||
102 (t->is_array() && t->fields.array->is_array())) {
103 for (unsigned i = 0; i < t->length; i++) {
104 size_t new_length = name_length;
105
106 /* Append the subscript to the current variable name */
107 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
108
109 create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
110 count, ifc_member_name, ifc_member_t,
111 varying_names);
112 }
113 } else {
114 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
115 }
116 }
117
118 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_tfeedback_decls,char *** varying_names)119 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
120 struct gl_shader_program *prog,
121 unsigned *num_tfeedback_decls,
122 char ***varying_names)
123 {
124 bool has_xfb_qualifiers = false;
125
126 /* We still need to enable transform feedback mode even if xfb_stride is
127 * only applied to a global out. Also we don't bother to propagate
128 * xfb_stride to interface block members so this will catch that case also.
129 */
130 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
131 if (prog->TransformFeedback.BufferStride[j]) {
132 has_xfb_qualifiers = true;
133 break;
134 }
135 }
136
137 foreach_in_list(ir_instruction, node, sh->ir) {
138 ir_variable *var = node->as_variable();
139 if (!var || var->data.mode != ir_var_shader_out)
140 continue;
141
142 /* From the ARB_enhanced_layouts spec:
143 *
144 * "Any shader making any static use (after preprocessing) of any of
145 * these *xfb_* qualifiers will cause the shader to be in a
146 * transform feedback capturing mode and hence responsible for
147 * describing the transform feedback setup. This mode will capture
148 * any output selected by *xfb_offset*, directly or indirectly, to
149 * a transform feedback buffer."
150 */
151 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
152 has_xfb_qualifiers = true;
153 }
154
155 if (var->data.explicit_xfb_offset) {
156 *num_tfeedback_decls += var->type->varying_count();
157 has_xfb_qualifiers = true;
158 }
159 }
160
161 if (*num_tfeedback_decls == 0)
162 return has_xfb_qualifiers;
163
164 unsigned i = 0;
165 *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
166 foreach_in_list(ir_instruction, node, sh->ir) {
167 ir_variable *var = node->as_variable();
168 if (!var || var->data.mode != ir_var_shader_out)
169 continue;
170
171 if (var->data.explicit_xfb_offset) {
172 char *name;
173 const glsl_type *type, *member_type;
174
175 if (var->data.from_named_ifc_block) {
176 type = var->get_interface_type();
177
178 /* Find the member type before it was altered by lowering */
179 const glsl_type *type_wa = type->without_array();
180 member_type =
181 type_wa->fields.structure[type_wa->field_index(var->name)].type;
182 name = ralloc_strdup(NULL, type_wa->name);
183 } else {
184 type = var->type;
185 member_type = NULL;
186 name = ralloc_strdup(NULL, var->name);
187 }
188 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
189 var->name, member_type, varying_names);
190 ralloc_free(name);
191 }
192 }
193
194 assert(i == *num_tfeedback_decls);
195 return has_xfb_qualifiers;
196 }
197
198 /**
199 * Validate the types and qualifiers of an output from one stage against the
200 * matching input to another stage.
201 */
202 static void
cross_validate_types_and_qualifiers(struct gl_context * ctx,struct gl_shader_program * prog,const ir_variable * input,const ir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)203 cross_validate_types_and_qualifiers(struct gl_context *ctx,
204 struct gl_shader_program *prog,
205 const ir_variable *input,
206 const ir_variable *output,
207 gl_shader_stage consumer_stage,
208 gl_shader_stage producer_stage)
209 {
210 /* Check that the types match between stages.
211 */
212 const glsl_type *type_to_match = input->type;
213
214 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
215 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
216 consumer_stage != MESA_SHADER_FRAGMENT) ||
217 consumer_stage == MESA_SHADER_GEOMETRY;
218 if (extra_array_level) {
219 assert(type_to_match->is_array());
220 type_to_match = type_to_match->fields.array;
221 }
222
223 if (type_to_match != output->type) {
224 if (output->type->is_struct()) {
225 /* Structures across shader stages can have different name
226 * and considered to match in type if and only if structure
227 * members match in name, type, qualification, and declaration
228 * order. The precision doesn’t need to match.
229 */
230 if (!output->type->record_compare(type_to_match,
231 false, /* match_name */
232 true, /* match_locations */
233 false /* match_precision */)) {
234 linker_error(prog,
235 "%s shader output `%s' declared as struct `%s', "
236 "doesn't match in type with %s shader input "
237 "declared as struct `%s'\n",
238 _mesa_shader_stage_to_string(producer_stage),
239 output->name,
240 output->type->name,
241 _mesa_shader_stage_to_string(consumer_stage),
242 input->type->name);
243 }
244 } else if (!output->type->is_array() || !is_gl_identifier(output->name)) {
245 /* There is a bit of a special case for gl_TexCoord. This
246 * built-in is unsized by default. Applications that variable
247 * access it must redeclare it with a size. There is some
248 * language in the GLSL spec that implies the fragment shader
249 * and vertex shader do not have to agree on this size. Other
250 * driver behave this way, and one or two applications seem to
251 * rely on it.
252 *
253 * Neither declaration needs to be modified here because the array
254 * sizes are fixed later when update_array_sizes is called.
255 *
256 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
257 *
258 * "Unlike user-defined varying variables, the built-in
259 * varying variables don't have a strict one-to-one
260 * correspondence between the vertex language and the
261 * fragment language."
262 */
263 linker_error(prog,
264 "%s shader output `%s' declared as type `%s', "
265 "but %s shader input declared as type `%s'\n",
266 _mesa_shader_stage_to_string(producer_stage),
267 output->name,
268 output->type->name,
269 _mesa_shader_stage_to_string(consumer_stage),
270 input->type->name);
271 return;
272 }
273 }
274
275 /* Check that all of the qualifiers match between stages.
276 */
277
278 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
279 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
280 * conformance test suite does not verify that the qualifiers must match.
281 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
282 * OpenGLES 3.0 drivers, so we relax the checking in all cases.
283 */
284 if (false /* always skip the centroid check */ &&
285 prog->data->Version < (prog->IsES ? 310 : 430) &&
286 input->data.centroid != output->data.centroid) {
287 linker_error(prog,
288 "%s shader output `%s' %s centroid qualifier, "
289 "but %s shader input %s centroid qualifier\n",
290 _mesa_shader_stage_to_string(producer_stage),
291 output->name,
292 (output->data.centroid) ? "has" : "lacks",
293 _mesa_shader_stage_to_string(consumer_stage),
294 (input->data.centroid) ? "has" : "lacks");
295 return;
296 }
297
298 if (input->data.sample != output->data.sample) {
299 linker_error(prog,
300 "%s shader output `%s' %s sample qualifier, "
301 "but %s shader input %s sample qualifier\n",
302 _mesa_shader_stage_to_string(producer_stage),
303 output->name,
304 (output->data.sample) ? "has" : "lacks",
305 _mesa_shader_stage_to_string(consumer_stage),
306 (input->data.sample) ? "has" : "lacks");
307 return;
308 }
309
310 if (input->data.patch != output->data.patch) {
311 linker_error(prog,
312 "%s shader output `%s' %s patch qualifier, "
313 "but %s shader input %s patch qualifier\n",
314 _mesa_shader_stage_to_string(producer_stage),
315 output->name,
316 (output->data.patch) ? "has" : "lacks",
317 _mesa_shader_stage_to_string(consumer_stage),
318 (input->data.patch) ? "has" : "lacks");
319 return;
320 }
321
322 /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
323 *
324 * "As only outputs need be declared with invariant, an output from
325 * one shader stage will still match an input of a subsequent stage
326 * without the input being declared as invariant."
327 *
328 * while GLSL 4.10 says:
329 *
330 * "For variables leaving one shader and coming into another shader,
331 * the invariant keyword has to be used in both shaders, or a link
332 * error will result."
333 *
334 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
335 *
336 * "The invariance of varyings that are declared in both the vertex
337 * and fragment shaders must match."
338 */
339 if (input->data.explicit_invariant != output->data.explicit_invariant &&
340 prog->data->Version < (prog->IsES ? 300 : 420)) {
341 linker_error(prog,
342 "%s shader output `%s' %s invariant qualifier, "
343 "but %s shader input %s invariant qualifier\n",
344 _mesa_shader_stage_to_string(producer_stage),
345 output->name,
346 (output->data.explicit_invariant) ? "has" : "lacks",
347 _mesa_shader_stage_to_string(consumer_stage),
348 (input->data.explicit_invariant) ? "has" : "lacks");
349 return;
350 }
351
352 /* GLSL >= 4.40 removes text requiring interpolation qualifiers
353 * to match cross stage, they must only match within the same stage.
354 *
355 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
356 *
357 * "It is a link-time error if, within the same stage, the interpolation
358 * qualifiers of variables of the same name do not match.
359 *
360 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
361 *
362 * "When no interpolation qualifier is present, smooth interpolation
363 * is used."
364 *
365 * So we match variables where one is smooth and the other has no explicit
366 * qualifier.
367 */
368 unsigned input_interpolation = input->data.interpolation;
369 unsigned output_interpolation = output->data.interpolation;
370 if (prog->IsES) {
371 if (input_interpolation == INTERP_MODE_NONE)
372 input_interpolation = INTERP_MODE_SMOOTH;
373 if (output_interpolation == INTERP_MODE_NONE)
374 output_interpolation = INTERP_MODE_SMOOTH;
375 }
376 if (input_interpolation != output_interpolation &&
377 prog->data->Version < 440) {
378 if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
379 linker_error(prog,
380 "%s shader output `%s' specifies %s "
381 "interpolation qualifier, "
382 "but %s shader input specifies %s "
383 "interpolation qualifier\n",
384 _mesa_shader_stage_to_string(producer_stage),
385 output->name,
386 interpolation_string(output->data.interpolation),
387 _mesa_shader_stage_to_string(consumer_stage),
388 interpolation_string(input->data.interpolation));
389 return;
390 } else {
391 linker_warning(prog,
392 "%s shader output `%s' specifies %s "
393 "interpolation qualifier, "
394 "but %s shader input specifies %s "
395 "interpolation qualifier\n",
396 _mesa_shader_stage_to_string(producer_stage),
397 output->name,
398 interpolation_string(output->data.interpolation),
399 _mesa_shader_stage_to_string(consumer_stage),
400 interpolation_string(input->data.interpolation));
401 }
402 }
403 }
404
405 /**
406 * Validate front and back color outputs against single color input
407 */
408 static void
cross_validate_front_and_back_color(struct gl_context * ctx,struct gl_shader_program * prog,const ir_variable * input,const ir_variable * front_color,const ir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)409 cross_validate_front_and_back_color(struct gl_context *ctx,
410 struct gl_shader_program *prog,
411 const ir_variable *input,
412 const ir_variable *front_color,
413 const ir_variable *back_color,
414 gl_shader_stage consumer_stage,
415 gl_shader_stage producer_stage)
416 {
417 if (front_color != NULL && front_color->data.assigned)
418 cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
419 consumer_stage, producer_stage);
420
421 if (back_color != NULL && back_color->data.assigned)
422 cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
423 consumer_stage, producer_stage);
424 }
425
426 static unsigned
compute_variable_location_slot(ir_variable * var,gl_shader_stage stage)427 compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
428 {
429 unsigned location_start = VARYING_SLOT_VAR0;
430
431 switch (stage) {
432 case MESA_SHADER_VERTEX:
433 if (var->data.mode == ir_var_shader_in)
434 location_start = VERT_ATTRIB_GENERIC0;
435 break;
436 case MESA_SHADER_TESS_CTRL:
437 case MESA_SHADER_TESS_EVAL:
438 if (var->data.patch)
439 location_start = VARYING_SLOT_PATCH0;
440 break;
441 case MESA_SHADER_FRAGMENT:
442 if (var->data.mode == ir_var_shader_out)
443 location_start = FRAG_RESULT_DATA0;
444 break;
445 default:
446 break;
447 }
448
449 return var->data.location - location_start;
450 }
451
452 struct explicit_location_info {
453 ir_variable *var;
454 bool base_type_is_integer;
455 unsigned base_type_bit_size;
456 unsigned interpolation;
457 bool centroid;
458 bool sample;
459 bool patch;
460 };
461
462 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],ir_variable * var,unsigned location,unsigned component,unsigned location_limit,const glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,gl_shader_program * prog,gl_shader_stage stage)463 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
464 ir_variable *var,
465 unsigned location,
466 unsigned component,
467 unsigned location_limit,
468 const glsl_type *type,
469 unsigned interpolation,
470 bool centroid,
471 bool sample,
472 bool patch,
473 gl_shader_program *prog,
474 gl_shader_stage stage)
475 {
476 unsigned last_comp;
477 unsigned base_type_bit_size;
478 const glsl_type *type_without_array = type->without_array();
479 const bool base_type_is_integer =
480 glsl_base_type_is_integer(type_without_array->base_type);
481 const bool is_struct = type_without_array->is_struct();
482 if (is_struct) {
483 /* structs don't have a defined underlying base type so just treat all
484 * component slots as used and set the bit size to 0. If there is
485 * location aliasing, we'll fail anyway later.
486 */
487 last_comp = 4;
488 base_type_bit_size = 0;
489 } else {
490 unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
491 last_comp = component + type_without_array->vector_elements * dmul;
492 base_type_bit_size =
493 glsl_base_type_get_bit_size(type_without_array->base_type);
494 }
495
496 while (location < location_limit) {
497 unsigned comp = 0;
498 while (comp < 4) {
499 struct explicit_location_info *info =
500 &explicit_locations[location][comp];
501
502 if (info->var) {
503 if (info->var->type->without_array()->is_struct() || is_struct) {
504 /* Structs cannot share location since they are incompatible
505 * with any other underlying numerical type.
506 */
507 linker_error(prog,
508 "%s shader has multiple %sputs sharing the "
509 "same location that don't have the same "
510 "underlying numerical type. Struct variable '%s', "
511 "location %u\n",
512 _mesa_shader_stage_to_string(stage),
513 var->data.mode == ir_var_shader_in ? "in" : "out",
514 is_struct ? var->name : info->var->name,
515 location);
516 return false;
517 } else if (comp >= component && comp < last_comp) {
518 /* Component aliasing is not allowed */
519 linker_error(prog,
520 "%s shader has multiple %sputs explicitly "
521 "assigned to location %d and component %d\n",
522 _mesa_shader_stage_to_string(stage),
523 var->data.mode == ir_var_shader_in ? "in" : "out",
524 location, comp);
525 return false;
526 } else {
527 /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
528 * Qualifiers, Page 67, (Location aliasing):
529 *
530 * " Further, when location aliasing, the aliases sharing the
531 * location must have the same underlying numerical type
532 * and bit width (floating-point or integer, 32-bit versus
533 * 64-bit, etc.) and the same auxiliary storage and
534 * interpolation qualification."
535 */
536
537 /* If the underlying numerical type isn't integer, implicitly
538 * it will be float or else we would have failed by now.
539 */
540 if (info->base_type_is_integer != base_type_is_integer) {
541 linker_error(prog,
542 "%s shader has multiple %sputs sharing the "
543 "same location that don't have the same "
544 "underlying numerical type. Location %u "
545 "component %u.\n",
546 _mesa_shader_stage_to_string(stage),
547 var->data.mode == ir_var_shader_in ?
548 "in" : "out", location, comp);
549 return false;
550 }
551
552 if (info->base_type_bit_size != base_type_bit_size) {
553 linker_error(prog,
554 "%s shader has multiple %sputs sharing the "
555 "same location that don't have the same "
556 "underlying numerical bit size. Location %u "
557 "component %u.\n",
558 _mesa_shader_stage_to_string(stage),
559 var->data.mode == ir_var_shader_in ?
560 "in" : "out", location, comp);
561 return false;
562 }
563
564 if (info->interpolation != interpolation) {
565 linker_error(prog,
566 "%s shader has multiple %sputs sharing the "
567 "same location that don't have the same "
568 "interpolation qualification. Location %u "
569 "component %u.\n",
570 _mesa_shader_stage_to_string(stage),
571 var->data.mode == ir_var_shader_in ?
572 "in" : "out", location, comp);
573 return false;
574 }
575
576 if (info->centroid != centroid ||
577 info->sample != sample ||
578 info->patch != patch) {
579 linker_error(prog,
580 "%s shader has multiple %sputs sharing the "
581 "same location that don't have the same "
582 "auxiliary storage qualification. Location %u "
583 "component %u.\n",
584 _mesa_shader_stage_to_string(stage),
585 var->data.mode == ir_var_shader_in ?
586 "in" : "out", location, comp);
587 return false;
588 }
589 }
590 } else if (comp >= component && comp < last_comp) {
591 info->var = var;
592 info->base_type_is_integer = base_type_is_integer;
593 info->base_type_bit_size = base_type_bit_size;
594 info->interpolation = interpolation;
595 info->centroid = centroid;
596 info->sample = sample;
597 info->patch = patch;
598 }
599
600 comp++;
601
602 /* We need to do some special handling for doubles as dvec3 and
603 * dvec4 consume two consecutive locations. We don't need to
604 * worry about components beginning at anything other than 0 as
605 * the spec does not allow this for dvec3 and dvec4.
606 */
607 if (comp == 4 && last_comp > 4) {
608 last_comp = last_comp - 4;
609 /* Bump location index and reset the component index */
610 location++;
611 comp = 0;
612 component = 0;
613 }
614 }
615
616 location++;
617 }
618
619 return true;
620 }
621
622 static bool
validate_explicit_variable_location(struct gl_context * ctx,struct explicit_location_info explicit_locations[][4],ir_variable * var,gl_shader_program * prog,gl_linked_shader * sh)623 validate_explicit_variable_location(struct gl_context *ctx,
624 struct explicit_location_info explicit_locations[][4],
625 ir_variable *var,
626 gl_shader_program *prog,
627 gl_linked_shader *sh)
628 {
629 const glsl_type *type = get_varying_type(var, sh->Stage);
630 unsigned num_elements = type->count_attribute_slots(false);
631 unsigned idx = compute_variable_location_slot(var, sh->Stage);
632 unsigned slot_limit = idx + num_elements;
633
634 /* Vertex shader inputs and fragment shader outputs are validated in
635 * assign_attribute_or_color_locations() so we should not attempt to
636 * validate them again here.
637 */
638 unsigned slot_max;
639 if (var->data.mode == ir_var_shader_out) {
640 assert(sh->Stage != MESA_SHADER_FRAGMENT);
641 slot_max =
642 ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
643 } else {
644 assert(var->data.mode == ir_var_shader_in);
645 assert(sh->Stage != MESA_SHADER_VERTEX);
646 slot_max =
647 ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
648 }
649
650 if (slot_limit > slot_max) {
651 linker_error(prog,
652 "Invalid location %u in %s shader\n",
653 idx, _mesa_shader_stage_to_string(sh->Stage));
654 return false;
655 }
656
657 const glsl_type *type_without_array = type->without_array();
658 if (type_without_array->is_interface()) {
659 for (unsigned i = 0; i < type_without_array->length; i++) {
660 glsl_struct_field *field = &type_without_array->fields.structure[i];
661 unsigned field_location = field->location -
662 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
663 unsigned field_slots = field->type->count_attribute_slots(false);
664 if (!check_location_aliasing(explicit_locations, var,
665 field_location,
666 0,
667 field_location + field_slots,
668 field->type,
669 field->interpolation,
670 field->centroid,
671 field->sample,
672 field->patch,
673 prog, sh->Stage)) {
674 return false;
675 }
676 }
677 } else if (!check_location_aliasing(explicit_locations, var,
678 idx, var->data.location_frac,
679 slot_limit, type,
680 var->data.interpolation,
681 var->data.centroid,
682 var->data.sample,
683 var->data.patch,
684 prog, sh->Stage)) {
685 return false;
686 }
687
688 return true;
689 }
690
691 /**
692 * Validate explicit locations for the inputs to the first stage and the
693 * outputs of the last stage in a program, if those are not the VS and FS
694 * shaders.
695 */
696 void
validate_first_and_last_interface_explicit_locations(struct gl_context * ctx,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)697 validate_first_and_last_interface_explicit_locations(struct gl_context *ctx,
698 struct gl_shader_program *prog,
699 gl_shader_stage first_stage,
700 gl_shader_stage last_stage)
701 {
702 /* VS inputs and FS outputs are validated in
703 * assign_attribute_or_color_locations()
704 */
705 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
706 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
707 if (!validate_first_stage && !validate_last_stage)
708 return;
709
710 struct explicit_location_info explicit_locations[MAX_VARYING][4];
711
712 gl_shader_stage stages[2] = { first_stage, last_stage };
713 bool validate_stage[2] = { validate_first_stage, validate_last_stage };
714 ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
715
716 for (unsigned i = 0; i < 2; i++) {
717 if (!validate_stage[i])
718 continue;
719
720 gl_shader_stage stage = stages[i];
721
722 gl_linked_shader *sh = prog->_LinkedShaders[stage];
723 assert(sh);
724
725 memset(explicit_locations, 0, sizeof(explicit_locations));
726
727 foreach_in_list(ir_instruction, node, sh->ir) {
728 ir_variable *const var = node->as_variable();
729
730 if (var == NULL ||
731 !var->data.explicit_location ||
732 var->data.location < VARYING_SLOT_VAR0 ||
733 var->data.mode != var_direction[i])
734 continue;
735
736 if (!validate_explicit_variable_location(
737 ctx, explicit_locations, var, prog, sh)) {
738 return;
739 }
740 }
741 }
742 }
743
744 /**
745 * Check if we should force input / output matching between shader
746 * interfaces.
747 *
748 * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
749 *
750 * "Only the input variables that are actually read need to be
751 * written by the previous stage; it is allowed to have
752 * superfluous declarations of input variables."
753 *
754 * However it's not defined anywhere as to how we should handle
755 * inputs that are not written in the previous stage and it's not
756 * clear what "actually read" means.
757 *
758 * The GLSL 4.20 spec however is much clearer:
759 *
760 * "Only the input variables that are statically read need to
761 * be written by the previous stage; it is allowed to have
762 * superfluous declarations of input variables."
763 *
764 * It also has a table that states it is an error to statically
765 * read an input that is not defined in the previous stage. While
766 * it is not an error to not statically write to the output (it
767 * just needs to be defined to not be an error).
768 *
769 * The text in the GLSL 4.20 spec was an attempt to clarify the
770 * previous spec iterations. However given the difference in spec
771 * and that some applications seem to depend on not erroring when
772 * the input is not actually read in control flow we only apply
773 * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
774 * seen in the wild that depend on the less strict interpretation.
775 */
776 static bool
static_input_output_matching(struct gl_shader_program * prog)777 static_input_output_matching(struct gl_shader_program *prog)
778 {
779 return prog->data->Version >= (prog->IsES ? 0 : 420);
780 }
781
782 /**
783 * Validate that outputs from one stage match inputs of another
784 */
785 void
cross_validate_outputs_to_inputs(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * producer,gl_linked_shader * consumer)786 cross_validate_outputs_to_inputs(struct gl_context *ctx,
787 struct gl_shader_program *prog,
788 gl_linked_shader *producer,
789 gl_linked_shader *consumer)
790 {
791 glsl_symbol_table parameters;
792 struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {};
793 struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {};
794
795 /* Find all shader outputs in the "producer" stage.
796 */
797 foreach_in_list(ir_instruction, node, producer->ir) {
798 ir_variable *const var = node->as_variable();
799
800 if (var == NULL || var->data.mode != ir_var_shader_out)
801 continue;
802
803 if (!var->data.explicit_location
804 || var->data.location < VARYING_SLOT_VAR0)
805 parameters.add_variable(var);
806 else {
807 /* User-defined varyings with explicit locations are handled
808 * differently because they do not need to have matching names.
809 */
810 if (!validate_explicit_variable_location(ctx,
811 output_explicit_locations,
812 var, prog, producer)) {
813 return;
814 }
815 }
816 }
817
818
819 /* Find all shader inputs in the "consumer" stage. Any variables that have
820 * matching outputs already in the symbol table must have the same type and
821 * qualifiers.
822 *
823 * Exception: if the consumer is the geometry shader, then the inputs
824 * should be arrays and the type of the array element should match the type
825 * of the corresponding producer output.
826 */
827 foreach_in_list(ir_instruction, node, consumer->ir) {
828 ir_variable *const input = node->as_variable();
829
830 if (input == NULL || input->data.mode != ir_var_shader_in)
831 continue;
832
833 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
834 const ir_variable *const front_color =
835 parameters.get_variable("gl_FrontColor");
836
837 const ir_variable *const back_color =
838 parameters.get_variable("gl_BackColor");
839
840 cross_validate_front_and_back_color(ctx, prog, input,
841 front_color, back_color,
842 consumer->Stage, producer->Stage);
843 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
844 const ir_variable *const front_color =
845 parameters.get_variable("gl_FrontSecondaryColor");
846
847 const ir_variable *const back_color =
848 parameters.get_variable("gl_BackSecondaryColor");
849
850 cross_validate_front_and_back_color(ctx, prog, input,
851 front_color, back_color,
852 consumer->Stage, producer->Stage);
853 } else {
854 /* The rules for connecting inputs and outputs change in the presence
855 * of explicit locations. In this case, we no longer care about the
856 * names of the variables. Instead, we care only about the
857 * explicitly assigned location.
858 */
859 ir_variable *output = NULL;
860 if (input->data.explicit_location
861 && input->data.location >= VARYING_SLOT_VAR0) {
862
863 const glsl_type *type = get_varying_type(input, consumer->Stage);
864 unsigned num_elements = type->count_attribute_slots(false);
865 unsigned idx =
866 compute_variable_location_slot(input, consumer->Stage);
867 unsigned slot_limit = idx + num_elements;
868
869 if (!validate_explicit_variable_location(ctx,
870 input_explicit_locations,
871 input, prog, consumer)) {
872 return;
873 }
874
875 while (idx < slot_limit) {
876 if (idx >= MAX_VARYING) {
877 linker_error(prog,
878 "Invalid location %u in %s shader\n", idx,
879 _mesa_shader_stage_to_string(consumer->Stage));
880 return;
881 }
882
883 output = output_explicit_locations[idx][input->data.location_frac].var;
884
885 if (output == NULL) {
886 /* A linker failure should only happen when there is no
887 * output declaration and there is Static Use of the
888 * declared input.
889 */
890 if (input->data.used && static_input_output_matching(prog)) {
891 linker_error(prog,
892 "%s shader input `%s' with explicit location "
893 "has no matching output\n",
894 _mesa_shader_stage_to_string(consumer->Stage),
895 input->name);
896 break;
897 }
898 } else if (input->data.location != output->data.location) {
899 linker_error(prog,
900 "%s shader input `%s' with explicit location "
901 "has no matching output\n",
902 _mesa_shader_stage_to_string(consumer->Stage),
903 input->name);
904 break;
905 }
906 idx++;
907 }
908 } else {
909 output = parameters.get_variable(input->name);
910 }
911
912 if (output != NULL) {
913 /* Interface blocks have their own validation elsewhere so don't
914 * try validating them here.
915 */
916 if (!(input->get_interface_type() &&
917 output->get_interface_type()))
918 cross_validate_types_and_qualifiers(ctx, prog, input, output,
919 consumer->Stage,
920 producer->Stage);
921 } else {
922 /* Check for input vars with unmatched output vars in prev stage
923 * taking into account that interface blocks could have a matching
924 * output but with different name, so we ignore them.
925 */
926 assert(!input->data.assigned);
927 if (input->data.used && !input->get_interface_type() &&
928 !input->data.explicit_location &&
929 static_input_output_matching(prog))
930 linker_error(prog,
931 "%s shader input `%s' "
932 "has no matching output in the previous stage\n",
933 _mesa_shader_stage_to_string(consumer->Stage),
934 input->name);
935 }
936 }
937 }
938 }
939
940 /**
941 * Demote shader inputs and outputs that are not used in other stages, and
942 * remove them via dead code elimination.
943 */
944 static void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,gl_linked_shader * sh,enum ir_variable_mode mode)945 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
946 gl_linked_shader *sh,
947 enum ir_variable_mode mode)
948 {
949 if (is_separate_shader_object)
950 return;
951
952 foreach_in_list(ir_instruction, node, sh->ir) {
953 ir_variable *const var = node->as_variable();
954
955 if (var == NULL || var->data.mode != int(mode))
956 continue;
957
958 /* A shader 'in' or 'out' variable is only really an input or output if
959 * its value is used by other shader stages. This will cause the
960 * variable to have a location assigned.
961 */
962 if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
963 assert(var->data.mode != ir_var_temporary);
964
965 /* Assign zeros to demoted inputs to allow more optimizations. */
966 if (var->data.mode == ir_var_shader_in && !var->constant_value)
967 var->constant_value = ir_constant::zero(var, var->type);
968
969 var->data.mode = ir_var_auto;
970 }
971 }
972
973 /* Eliminate code that is now dead due to unused inputs/outputs being
974 * demoted.
975 */
976 while (do_dead_code(sh->ir, false))
977 ;
978
979 }
980
981 /**
982 * Initialize this object based on a string that was passed to
983 * glTransformFeedbackVaryings.
984 *
985 * If the input is mal-formed, this call still succeeds, but it sets
986 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
987 * will fail to find any matching variable.
988 */
989 void
init(struct gl_context * ctx,const void * mem_ctx,const char * input)990 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
991 const char *input)
992 {
993 /* We don't have to be pedantic about what is a valid GLSL variable name,
994 * because any variable with an invalid name can't exist in the IR anyway.
995 */
996
997 this->location = -1;
998 this->orig_name = input;
999 this->lowered_builtin_array_variable = none;
1000 this->skip_components = 0;
1001 this->next_buffer_separator = false;
1002 this->matched_candidate = NULL;
1003 this->stream_id = 0;
1004 this->buffer = 0;
1005 this->offset = 0;
1006
1007 if (ctx->Extensions.ARB_transform_feedback3) {
1008 /* Parse gl_NextBuffer. */
1009 if (strcmp(input, "gl_NextBuffer") == 0) {
1010 this->next_buffer_separator = true;
1011 return;
1012 }
1013
1014 /* Parse gl_SkipComponents. */
1015 if (strcmp(input, "gl_SkipComponents1") == 0)
1016 this->skip_components = 1;
1017 else if (strcmp(input, "gl_SkipComponents2") == 0)
1018 this->skip_components = 2;
1019 else if (strcmp(input, "gl_SkipComponents3") == 0)
1020 this->skip_components = 3;
1021 else if (strcmp(input, "gl_SkipComponents4") == 0)
1022 this->skip_components = 4;
1023
1024 if (this->skip_components)
1025 return;
1026 }
1027
1028 /* Parse a declaration. */
1029 const char *base_name_end;
1030 long subscript = parse_program_resource_name(input, strlen(input),
1031 &base_name_end);
1032 this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1033 if (this->var_name == NULL) {
1034 _mesa_error_no_memory(__func__);
1035 return;
1036 }
1037
1038 if (subscript >= 0) {
1039 this->array_subscript = subscript;
1040 this->is_subscripted = true;
1041 } else {
1042 this->is_subscripted = false;
1043 }
1044
1045 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1046 * class must behave specially to account for the fact that gl_ClipDistance
1047 * is converted from a float[8] to a vec4[2].
1048 */
1049 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1050 strcmp(this->var_name, "gl_ClipDistance") == 0) {
1051 this->lowered_builtin_array_variable = clip_distance;
1052 }
1053 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1054 strcmp(this->var_name, "gl_CullDistance") == 0) {
1055 this->lowered_builtin_array_variable = cull_distance;
1056 }
1057
1058 if (ctx->Const.LowerTessLevel &&
1059 (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
1060 this->lowered_builtin_array_variable = tess_level_outer;
1061 if (ctx->Const.LowerTessLevel &&
1062 (strcmp(this->var_name, "gl_TessLevelInner") == 0))
1063 this->lowered_builtin_array_variable = tess_level_inner;
1064 }
1065
1066
1067 /**
1068 * Determine whether two tfeedback_decl objects refer to the same variable and
1069 * array index (if applicable).
1070 */
1071 bool
is_same(const tfeedback_decl & x,const tfeedback_decl & y)1072 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
1073 {
1074 assert(x.is_varying() && y.is_varying());
1075
1076 if (strcmp(x.var_name, y.var_name) != 0)
1077 return false;
1078 if (x.is_subscripted != y.is_subscripted)
1079 return false;
1080 if (x.is_subscripted && x.array_subscript != y.array_subscript)
1081 return false;
1082 return true;
1083 }
1084
1085
1086 /**
1087 * Assign a location and stream ID for this tfeedback_decl object based on the
1088 * transform feedback candidate found by find_candidate.
1089 *
1090 * If an error occurs, the error is reported through linker_error() and false
1091 * is returned.
1092 */
1093 bool
assign_location(struct gl_context * ctx,struct gl_shader_program * prog)1094 tfeedback_decl::assign_location(struct gl_context *ctx,
1095 struct gl_shader_program *prog)
1096 {
1097 assert(this->is_varying());
1098
1099 unsigned fine_location
1100 = this->matched_candidate->toplevel_var->data.location * 4
1101 + this->matched_candidate->toplevel_var->data.location_frac
1102 + this->matched_candidate->struct_offset_floats;
1103 const unsigned dmul =
1104 this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
1105
1106 if (this->matched_candidate->type->is_array()) {
1107 /* Array variable */
1108 const unsigned matrix_cols =
1109 this->matched_candidate->type->fields.array->matrix_columns;
1110 const unsigned vector_elements =
1111 this->matched_candidate->type->fields.array->vector_elements;
1112 unsigned actual_array_size;
1113 switch (this->lowered_builtin_array_variable) {
1114 case clip_distance:
1115 actual_array_size = prog->last_vert_prog ?
1116 prog->last_vert_prog->info.clip_distance_array_size : 0;
1117 break;
1118 case cull_distance:
1119 actual_array_size = prog->last_vert_prog ?
1120 prog->last_vert_prog->info.cull_distance_array_size : 0;
1121 break;
1122 case tess_level_outer:
1123 actual_array_size = 4;
1124 break;
1125 case tess_level_inner:
1126 actual_array_size = 2;
1127 break;
1128 case none:
1129 default:
1130 actual_array_size = this->matched_candidate->type->array_size();
1131 break;
1132 }
1133
1134 if (this->is_subscripted) {
1135 /* Check array bounds. */
1136 if (this->array_subscript >= actual_array_size) {
1137 linker_error(prog, "Transform feedback varying %s has index "
1138 "%i, but the array size is %u.",
1139 this->orig_name, this->array_subscript,
1140 actual_array_size);
1141 return false;
1142 }
1143 unsigned array_elem_size = this->lowered_builtin_array_variable ?
1144 1 : vector_elements * matrix_cols * dmul;
1145 fine_location += array_elem_size * this->array_subscript;
1146 this->size = 1;
1147 } else {
1148 this->size = actual_array_size;
1149 }
1150 this->vector_elements = vector_elements;
1151 this->matrix_columns = matrix_cols;
1152 if (this->lowered_builtin_array_variable)
1153 this->type = GL_FLOAT;
1154 else
1155 this->type = this->matched_candidate->type->fields.array->gl_type;
1156 } else {
1157 /* Regular variable (scalar, vector, or matrix) */
1158 if (this->is_subscripted) {
1159 linker_error(prog, "Transform feedback varying %s requested, "
1160 "but %s is not an array.",
1161 this->orig_name, this->var_name);
1162 return false;
1163 }
1164 this->size = 1;
1165 this->vector_elements = this->matched_candidate->type->vector_elements;
1166 this->matrix_columns = this->matched_candidate->type->matrix_columns;
1167 this->type = this->matched_candidate->type->gl_type;
1168 }
1169 this->location = fine_location / 4;
1170 this->location_frac = fine_location % 4;
1171
1172 /* From GL_EXT_transform_feedback:
1173 * A program will fail to link if:
1174 *
1175 * * the total number of components to capture in any varying
1176 * variable in <varyings> is greater than the constant
1177 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1178 * buffer mode is SEPARATE_ATTRIBS_EXT;
1179 */
1180 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1181 this->num_components() >
1182 ctx->Const.MaxTransformFeedbackSeparateComponents) {
1183 linker_error(prog, "Transform feedback varying %s exceeds "
1184 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1185 this->orig_name);
1186 return false;
1187 }
1188
1189 /* Only transform feedback varyings can be assigned to non-zero streams,
1190 * so assign the stream id here.
1191 */
1192 this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1193
1194 unsigned array_offset = this->array_subscript * 4 * dmul;
1195 unsigned struct_offset = this->matched_candidate->xfb_offset_floats * 4;
1196 this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1197 this->offset = this->matched_candidate->toplevel_var->data.offset +
1198 array_offset + struct_offset;
1199
1200 return true;
1201 }
1202
1203
1204 unsigned
get_num_outputs() const1205 tfeedback_decl::get_num_outputs() const
1206 {
1207 if (!this->is_varying()) {
1208 return 0;
1209 }
1210
1211 if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
1212 unsigned dmul = this->is_64bit() ? 2 : 1;
1213 unsigned rows_per_element = DIV_ROUND_UP(this->vector_elements * dmul, 4);
1214 return this->size * this->matrix_columns * rows_per_element;
1215 } else {
1216 return (this->num_components() + this->location_frac + 3) / 4;
1217 }
1218 }
1219
1220
1221 /**
1222 * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1223 *
1224 * If an error occurs, the error is reported through linker_error() and false
1225 * is returned.
1226 */
1227 bool
store(struct gl_context * ctx,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx) const1228 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1229 struct gl_transform_feedback_info *info,
1230 unsigned buffer, unsigned buffer_index,
1231 const unsigned max_outputs,
1232 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1233 bool *explicit_stride, unsigned *max_member_alignment,
1234 bool has_xfb_qualifiers, const void* mem_ctx) const
1235 {
1236 unsigned xfb_offset = 0;
1237 unsigned size = this->size;
1238 /* Handle gl_SkipComponents. */
1239 if (this->skip_components) {
1240 info->Buffers[buffer].Stride += this->skip_components;
1241 size = this->skip_components;
1242 goto store_varying;
1243 }
1244
1245 if (this->next_buffer_separator) {
1246 size = 0;
1247 goto store_varying;
1248 }
1249
1250 if (has_xfb_qualifiers) {
1251 xfb_offset = this->offset / 4;
1252 } else {
1253 xfb_offset = info->Buffers[buffer].Stride;
1254 }
1255 info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1256
1257 {
1258 unsigned location = this->location;
1259 unsigned location_frac = this->location_frac;
1260 unsigned num_components = this->num_components();
1261
1262 /* From GL_EXT_transform_feedback:
1263 *
1264 * " A program will fail to link if:
1265 *
1266 * * the total number of components to capture is greater than the
1267 * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1268 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1269 *
1270 * From GL_ARB_enhanced_layouts:
1271 *
1272 * " The resulting stride (implicit or explicit) must be less than or
1273 * equal to the implementation-dependent constant
1274 * gl_MaxTransformFeedbackInterleavedComponents."
1275 */
1276 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1277 has_xfb_qualifiers) &&
1278 xfb_offset + num_components >
1279 ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1280 linker_error(prog,
1281 "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1282 "limit has been exceeded.");
1283 return false;
1284 }
1285
1286 /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1287 * Page 76, (Transform Feedback Layout Qualifiers):
1288 *
1289 * " No aliasing in output buffers is allowed: It is a compile-time or
1290 * link-time error to specify variables with overlapping transform
1291 * feedback offsets."
1292 */
1293 const unsigned max_components =
1294 ctx->Const.MaxTransformFeedbackInterleavedComponents;
1295 const unsigned first_component = xfb_offset;
1296 const unsigned last_component = xfb_offset + num_components - 1;
1297 const unsigned start_word = BITSET_BITWORD(first_component);
1298 const unsigned end_word = BITSET_BITWORD(last_component);
1299 BITSET_WORD *used;
1300 assert(last_component < max_components);
1301
1302 if (!used_components[buffer]) {
1303 used_components[buffer] =
1304 rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1305 }
1306 used = used_components[buffer];
1307
1308 for (unsigned word = start_word; word <= end_word; word++) {
1309 unsigned start_range = 0;
1310 unsigned end_range = BITSET_WORDBITS - 1;
1311
1312 if (word == start_word)
1313 start_range = first_component % BITSET_WORDBITS;
1314
1315 if (word == end_word)
1316 end_range = last_component % BITSET_WORDBITS;
1317
1318 if (used[word] & BITSET_RANGE(start_range, end_range)) {
1319 linker_error(prog,
1320 "variable '%s', xfb_offset (%d) is causing aliasing.",
1321 this->orig_name, xfb_offset * 4);
1322 return false;
1323 }
1324 used[word] |= BITSET_RANGE(start_range, end_range);
1325 }
1326
1327 const unsigned type_num_components =
1328 this->vector_elements * (this->is_64bit() ? 2 : 1);
1329 unsigned current_type_components_left = type_num_components;
1330
1331 while (num_components > 0) {
1332 unsigned output_size = 0;
1333
1334 /* From GL_ARB_enhanced_layouts:
1335 *
1336 * "When an attribute variable declared using an array type is bound to
1337 * generic attribute index <i>, the active array elements are assigned to
1338 * consecutive generic attributes beginning with generic attribute <i>. The
1339 * number of attributes and components assigned to each element are
1340 * determined according to the data type of array elements and "component"
1341 * layout qualifier (if any) specified in the declaration of the array."
1342 *
1343 * "When an attribute variable declared using a matrix type is bound to a
1344 * generic attribute index <i>, its values are taken from consecutive generic
1345 * attributes beginning with generic attribute <i>. Such matrices are
1346 * treated as an array of column vectors with values taken from the generic
1347 * attributes.
1348 * This means there may be gaps in the varyings we are taking values from."
1349 *
1350 * Examples:
1351 *
1352 * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1353 * | | |
1354 * | 32b 32b 32b 32b | 32b 32b 32b 32b |
1355 * | 0 X X Y Y | 4 X Y 0 0 |
1356 * | 1 Z Z 0 0 | 5 X Y 0 0 |
1357 * | 2 X X Y Y | 6 X Y 0 0 |
1358 * | 3 Z Z 0 0 | 7 X Y 0 0 |
1359 *
1360 */
1361 if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
1362 output_size = MIN3(num_components, current_type_components_left, 4);
1363 current_type_components_left -= output_size;
1364 if (current_type_components_left == 0) {
1365 current_type_components_left = type_num_components;
1366 }
1367 } else {
1368 output_size = MIN2(num_components, 4 - location_frac);
1369 }
1370
1371 assert((info->NumOutputs == 0 && max_outputs == 0) ||
1372 info->NumOutputs < max_outputs);
1373
1374 /* From the ARB_enhanced_layouts spec:
1375 *
1376 * "If such a block member or variable is not written during a shader
1377 * invocation, the buffer contents at the assigned offset will be
1378 * undefined. Even if there are no static writes to a variable or
1379 * member that is assigned a transform feedback offset, the space is
1380 * still allocated in the buffer and still affects the stride."
1381 */
1382 if (this->is_varying_written()) {
1383 info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1384 info->Outputs[info->NumOutputs].OutputRegister = location;
1385 info->Outputs[info->NumOutputs].NumComponents = output_size;
1386 info->Outputs[info->NumOutputs].StreamId = stream_id;
1387 info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1388 info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1389 ++info->NumOutputs;
1390 }
1391 info->Buffers[buffer].Stream = this->stream_id;
1392 xfb_offset += output_size;
1393
1394 num_components -= output_size;
1395 location++;
1396 location_frac = 0;
1397 }
1398 }
1399
1400 if (explicit_stride && explicit_stride[buffer]) {
1401 if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1402 linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1403 "multiple of 8 as its applied to a type that is or "
1404 "contains a double.",
1405 info->Buffers[buffer].Stride * 4);
1406 return false;
1407 }
1408
1409 if (xfb_offset > info->Buffers[buffer].Stride) {
1410 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1411 "buffer (%d)", xfb_offset * 4,
1412 info->Buffers[buffer].Stride * 4, buffer);
1413 return false;
1414 }
1415 } else {
1416 if (max_member_alignment && has_xfb_qualifiers) {
1417 max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
1418 this->is_64bit() ? 2 : 1);
1419 info->Buffers[buffer].Stride = ALIGN(xfb_offset,
1420 max_member_alignment[buffer]);
1421 } else {
1422 info->Buffers[buffer].Stride = xfb_offset;
1423 }
1424 }
1425
1426 store_varying:
1427 info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1428 this->orig_name);
1429 info->Varyings[info->NumVarying].Type = this->type;
1430 info->Varyings[info->NumVarying].Size = size;
1431 info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1432 info->NumVarying++;
1433 info->Buffers[buffer].NumVaryings++;
1434
1435 return true;
1436 }
1437
1438
1439 const tfeedback_candidate *
find_candidate(gl_shader_program * prog,hash_table * tfeedback_candidates)1440 tfeedback_decl::find_candidate(gl_shader_program *prog,
1441 hash_table *tfeedback_candidates)
1442 {
1443 const char *name = this->var_name;
1444 switch (this->lowered_builtin_array_variable) {
1445 case none:
1446 name = this->var_name;
1447 break;
1448 case clip_distance:
1449 name = "gl_ClipDistanceMESA";
1450 break;
1451 case cull_distance:
1452 name = "gl_CullDistanceMESA";
1453 break;
1454 case tess_level_outer:
1455 name = "gl_TessLevelOuterMESA";
1456 break;
1457 case tess_level_inner:
1458 name = "gl_TessLevelInnerMESA";
1459 break;
1460 }
1461 hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1462
1463 this->matched_candidate = entry ?
1464 (const tfeedback_candidate *) entry->data : NULL;
1465
1466 if (!this->matched_candidate) {
1467 /* From GL_EXT_transform_feedback:
1468 * A program will fail to link if:
1469 *
1470 * * any variable name specified in the <varyings> array is not
1471 * declared as an output in the geometry shader (if present) or
1472 * the vertex shader (if no geometry shader is present);
1473 */
1474 linker_error(prog, "Transform feedback varying %s undeclared.",
1475 this->orig_name);
1476 }
1477
1478 return this->matched_candidate;
1479 }
1480
1481 /**
1482 * Force a candidate over the previously matched one. It happens when a new
1483 * varying needs to be created to match the xfb declaration, for example,
1484 * to fullfil an alignment criteria.
1485 */
1486 void
set_lowered_candidate(const tfeedback_candidate * candidate)1487 tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
1488 {
1489 this->matched_candidate = candidate;
1490
1491 /* The subscript part is no longer relevant */
1492 this->is_subscripted = false;
1493 this->array_subscript = 0;
1494 }
1495
1496
1497 /**
1498 * Parse all the transform feedback declarations that were passed to
1499 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1500 *
1501 * If an error occurs, the error is reported through linker_error() and false
1502 * is returned.
1503 */
1504 static bool
parse_tfeedback_decls(struct gl_context * ctx,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,tfeedback_decl * decls)1505 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1506 const void *mem_ctx, unsigned num_names,
1507 char **varying_names, tfeedback_decl *decls)
1508 {
1509 for (unsigned i = 0; i < num_names; ++i) {
1510 decls[i].init(ctx, mem_ctx, varying_names[i]);
1511
1512 if (!decls[i].is_varying())
1513 continue;
1514
1515 /* From GL_EXT_transform_feedback:
1516 * A program will fail to link if:
1517 *
1518 * * any two entries in the <varyings> array specify the same varying
1519 * variable;
1520 *
1521 * We interpret this to mean "any two entries in the <varyings> array
1522 * specify the same varying variable and array index", since transform
1523 * feedback of arrays would be useless otherwise.
1524 */
1525 for (unsigned j = 0; j < i; ++j) {
1526 if (decls[j].is_varying()) {
1527 if (tfeedback_decl::is_same(decls[i], decls[j])) {
1528 linker_error(prog, "Transform feedback varying %s specified "
1529 "more than once.", varying_names[i]);
1530 return false;
1531 }
1532 }
1533 }
1534 }
1535 return true;
1536 }
1537
1538
1539 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)1540 cmp_xfb_offset(const void * x_generic, const void * y_generic)
1541 {
1542 tfeedback_decl *x = (tfeedback_decl *) x_generic;
1543 tfeedback_decl *y = (tfeedback_decl *) y_generic;
1544
1545 if (x->get_buffer() != y->get_buffer())
1546 return x->get_buffer() - y->get_buffer();
1547 return x->get_offset() - y->get_offset();
1548 }
1549
1550 /**
1551 * Store transform feedback location assignments into
1552 * prog->sh.LinkedTransformFeedback based on the data stored in
1553 * tfeedback_decls.
1554 *
1555 * If an error occurs, the error is reported through linker_error() and false
1556 * is returned.
1557 */
1558 static bool
store_tfeedback_info(struct gl_context * ctx,struct gl_shader_program * prog,unsigned num_tfeedback_decls,tfeedback_decl * tfeedback_decls,bool has_xfb_qualifiers,const void * mem_ctx)1559 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1560 unsigned num_tfeedback_decls,
1561 tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers,
1562 const void *mem_ctx)
1563 {
1564 if (!prog->last_vert_prog)
1565 return true;
1566
1567 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1568 * tracking the number of buffers doesn't overflow.
1569 */
1570 assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1571
1572 bool separate_attribs_mode =
1573 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1574
1575 struct gl_program *xfb_prog = prog->last_vert_prog;
1576 xfb_prog->sh.LinkedTransformFeedback =
1577 rzalloc(xfb_prog, struct gl_transform_feedback_info);
1578
1579 /* The xfb_offset qualifier does not have to be used in increasing order
1580 * however some drivers expect to receive the list of transform feedback
1581 * declarations in order so sort it now for convenience.
1582 */
1583 if (has_xfb_qualifiers) {
1584 qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1585 cmp_xfb_offset);
1586 }
1587
1588 xfb_prog->sh.LinkedTransformFeedback->Varyings =
1589 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1590 num_tfeedback_decls);
1591
1592 unsigned num_outputs = 0;
1593 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1594 if (tfeedback_decls[i].is_varying_written())
1595 num_outputs += tfeedback_decls[i].get_num_outputs();
1596 }
1597
1598 xfb_prog->sh.LinkedTransformFeedback->Outputs =
1599 rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1600 num_outputs);
1601
1602 unsigned num_buffers = 0;
1603 unsigned buffers = 0;
1604 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {};
1605
1606 if (!has_xfb_qualifiers && separate_attribs_mode) {
1607 /* GL_SEPARATE_ATTRIBS */
1608 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1609 if (!tfeedback_decls[i].store(ctx, prog,
1610 xfb_prog->sh.LinkedTransformFeedback,
1611 num_buffers, num_buffers, num_outputs,
1612 used_components, NULL, NULL,
1613 has_xfb_qualifiers, mem_ctx))
1614 return false;
1615
1616 buffers |= 1 << num_buffers;
1617 num_buffers++;
1618 }
1619 }
1620 else {
1621 /* GL_INVERLEAVED_ATTRIBS */
1622 int buffer_stream_id = -1;
1623 unsigned buffer =
1624 num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1625 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1626 unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
1627 /* Apply any xfb_stride global qualifiers */
1628 if (has_xfb_qualifiers) {
1629 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1630 if (prog->TransformFeedback.BufferStride[j]) {
1631 explicit_stride[j] = true;
1632 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1633 prog->TransformFeedback.BufferStride[j] / 4;
1634 }
1635 }
1636 }
1637
1638 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1639 if (has_xfb_qualifiers &&
1640 buffer != tfeedback_decls[i].get_buffer()) {
1641 /* we have moved to the next buffer so reset stream id */
1642 buffer_stream_id = -1;
1643 num_buffers++;
1644 }
1645
1646 if (tfeedback_decls[i].is_next_buffer_separator()) {
1647 if (!tfeedback_decls[i].store(ctx, prog,
1648 xfb_prog->sh.LinkedTransformFeedback,
1649 buffer, num_buffers, num_outputs,
1650 used_components, explicit_stride,
1651 max_member_alignment,
1652 has_xfb_qualifiers,
1653 mem_ctx))
1654 return false;
1655 num_buffers++;
1656 buffer_stream_id = -1;
1657 continue;
1658 }
1659
1660 if (has_xfb_qualifiers) {
1661 buffer = tfeedback_decls[i].get_buffer();
1662 } else {
1663 buffer = num_buffers;
1664 }
1665
1666 if (tfeedback_decls[i].is_varying()) {
1667 if (buffer_stream_id == -1) {
1668 /* First varying writing to this buffer: remember its stream */
1669 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1670
1671 /* Only mark a buffer as active when there is a varying
1672 * attached to it. This behaviour is based on a revised version
1673 * of section 13.2.2 of the GL 4.6 spec.
1674 */
1675 buffers |= 1 << buffer;
1676 } else if (buffer_stream_id !=
1677 (int) tfeedback_decls[i].get_stream_id()) {
1678 /* Varying writes to the same buffer from a different stream */
1679 linker_error(prog,
1680 "Transform feedback can't capture varyings belonging "
1681 "to different vertex streams in a single buffer. "
1682 "Varying %s writes to buffer from stream %u, other "
1683 "varyings in the same buffer write from stream %u.",
1684 tfeedback_decls[i].name(),
1685 tfeedback_decls[i].get_stream_id(),
1686 buffer_stream_id);
1687 return false;
1688 }
1689 }
1690
1691 if (!tfeedback_decls[i].store(ctx, prog,
1692 xfb_prog->sh.LinkedTransformFeedback,
1693 buffer, num_buffers, num_outputs,
1694 used_components, explicit_stride,
1695 max_member_alignment,
1696 has_xfb_qualifiers,
1697 mem_ctx))
1698 return false;
1699 }
1700 }
1701
1702 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1703
1704 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1705 return true;
1706 }
1707
1708 namespace {
1709
1710 /**
1711 * Data structure recording the relationship between outputs of one shader
1712 * stage (the "producer") and inputs of another (the "consumer").
1713 */
1714 class varying_matches
1715 {
1716 public:
1717 varying_matches(bool disable_varying_packing,
1718 bool disable_xfb_packing,
1719 bool xfb_enabled,
1720 bool enhanced_layouts_enabled,
1721 gl_shader_stage producer_stage,
1722 gl_shader_stage consumer_stage);
1723 ~varying_matches();
1724 void record(ir_variable *producer_var, ir_variable *consumer_var);
1725 unsigned assign_locations(struct gl_shader_program *prog,
1726 uint8_t components[],
1727 uint64_t reserved_slots);
1728 void store_locations() const;
1729
1730 private:
1731 bool is_varying_packing_safe(const glsl_type *type,
1732 const ir_variable *var) const;
1733
1734 /**
1735 * If true, this driver disables varying packing, so all varyings need to
1736 * be aligned on slot boundaries, and take up a number of slots equal to
1737 * their number of matrix columns times their array size.
1738 *
1739 * Packing may also be disabled because our current packing method is not
1740 * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1741 * guaranteed to match across stages.
1742 */
1743 const bool disable_varying_packing;
1744
1745 /**
1746 * If true, this driver disables packing for varyings used by transform
1747 * feedback.
1748 */
1749 const bool disable_xfb_packing;
1750
1751 /**
1752 * If true, this driver has transform feedback enabled. The transform
1753 * feedback code usually requires at least some packing be done even
1754 * when varying packing is disabled, fortunately where transform feedback
1755 * requires packing it's safe to override the disabled setting. See
1756 * is_varying_packing_safe().
1757 */
1758 const bool xfb_enabled;
1759
1760 const bool enhanced_layouts_enabled;
1761
1762 /**
1763 * Enum representing the order in which varyings are packed within a
1764 * packing class.
1765 *
1766 * Currently we pack vec4's first, then vec2's, then scalar values, then
1767 * vec3's. This order ensures that the only vectors that are at risk of
1768 * having to be "double parked" (split between two adjacent varying slots)
1769 * are the vec3's.
1770 */
1771 enum packing_order_enum {
1772 PACKING_ORDER_VEC4,
1773 PACKING_ORDER_VEC2,
1774 PACKING_ORDER_SCALAR,
1775 PACKING_ORDER_VEC3,
1776 };
1777
1778 static unsigned compute_packing_class(const ir_variable *var);
1779 static packing_order_enum compute_packing_order(const ir_variable *var);
1780 static int match_comparator(const void *x_generic, const void *y_generic);
1781 static int xfb_comparator(const void *x_generic, const void *y_generic);
1782 static int not_xfb_comparator(const void *x_generic, const void *y_generic);
1783
1784 /**
1785 * Structure recording the relationship between a single producer output
1786 * and a single consumer input.
1787 */
1788 struct match {
1789 /**
1790 * Packing class for this varying, computed by compute_packing_class().
1791 */
1792 unsigned packing_class;
1793
1794 /**
1795 * Packing order for this varying, computed by compute_packing_order().
1796 */
1797 packing_order_enum packing_order;
1798
1799 /**
1800 * The output variable in the producer stage.
1801 */
1802 ir_variable *producer_var;
1803
1804 /**
1805 * The input variable in the consumer stage.
1806 */
1807 ir_variable *consumer_var;
1808
1809 /**
1810 * The location which has been assigned for this varying. This is
1811 * expressed in multiples of a float, with the first generic varying
1812 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1813 * value 0.
1814 */
1815 unsigned generic_location;
1816 } *matches;
1817
1818 /**
1819 * The number of elements in the \c matches array that are currently in
1820 * use.
1821 */
1822 unsigned num_matches;
1823
1824 /**
1825 * The number of elements that were set aside for the \c matches array when
1826 * it was allocated.
1827 */
1828 unsigned matches_capacity;
1829
1830 gl_shader_stage producer_stage;
1831 gl_shader_stage consumer_stage;
1832 };
1833
1834 } /* anonymous namespace */
1835
varying_matches(bool disable_varying_packing,bool disable_xfb_packing,bool xfb_enabled,bool enhanced_layouts_enabled,gl_shader_stage producer_stage,gl_shader_stage consumer_stage)1836 varying_matches::varying_matches(bool disable_varying_packing,
1837 bool disable_xfb_packing,
1838 bool xfb_enabled,
1839 bool enhanced_layouts_enabled,
1840 gl_shader_stage producer_stage,
1841 gl_shader_stage consumer_stage)
1842 : disable_varying_packing(disable_varying_packing),
1843 disable_xfb_packing(disable_xfb_packing),
1844 xfb_enabled(xfb_enabled),
1845 enhanced_layouts_enabled(enhanced_layouts_enabled),
1846 producer_stage(producer_stage),
1847 consumer_stage(consumer_stage)
1848 {
1849 /* Note: this initial capacity is rather arbitrarily chosen to be large
1850 * enough for many cases without wasting an unreasonable amount of space.
1851 * varying_matches::record() will resize the array if there are more than
1852 * this number of varyings.
1853 */
1854 this->matches_capacity = 8;
1855 this->matches = (match *)
1856 malloc(sizeof(*this->matches) * this->matches_capacity);
1857 this->num_matches = 0;
1858 }
1859
1860
~varying_matches()1861 varying_matches::~varying_matches()
1862 {
1863 free(this->matches);
1864 }
1865
1866
1867 /**
1868 * Packing is always safe on individual arrays, structures, and matrices. It
1869 * is also safe if the varying is only used for transform feedback.
1870 */
1871 bool
is_varying_packing_safe(const glsl_type * type,const ir_variable * var) const1872 varying_matches::is_varying_packing_safe(const glsl_type *type,
1873 const ir_variable *var) const
1874 {
1875 if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1876 consumer_stage == MESA_SHADER_TESS_CTRL ||
1877 producer_stage == MESA_SHADER_TESS_CTRL)
1878 return false;
1879
1880 return xfb_enabled && (type->is_array() || type->is_struct() ||
1881 type->is_matrix() || var->data.is_xfb_only);
1882 }
1883
1884
1885 /**
1886 * Record the given producer/consumer variable pair in the list of variables
1887 * that should later be assigned locations.
1888 *
1889 * It is permissible for \c consumer_var to be NULL (this happens if a
1890 * variable is output by the producer and consumed by transform feedback, but
1891 * not consumed by the consumer).
1892 *
1893 * If \c producer_var has already been paired up with a consumer_var, or
1894 * producer_var is part of fixed pipeline functionality (and hence already has
1895 * a location assigned), this function has no effect.
1896 *
1897 * Note: as a side effect this function may change the interpolation type of
1898 * \c producer_var, but only when the change couldn't possibly affect
1899 * rendering.
1900 */
1901 void
record(ir_variable * producer_var,ir_variable * consumer_var)1902 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1903 {
1904 assert(producer_var != NULL || consumer_var != NULL);
1905
1906 if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1907 producer_var->data.explicit_location)) ||
1908 (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1909 consumer_var->data.explicit_location))) {
1910 /* Either a location already exists for this variable (since it is part
1911 * of fixed functionality), or it has already been recorded as part of a
1912 * previous match.
1913 */
1914 return;
1915 }
1916
1917 bool needs_flat_qualifier = consumer_var == NULL &&
1918 (producer_var->type->contains_integer() ||
1919 producer_var->type->contains_double());
1920
1921 if (!disable_varying_packing &&
1922 (!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
1923 (needs_flat_qualifier ||
1924 (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1925 /* Since this varying is not being consumed by the fragment shader, its
1926 * interpolation type varying cannot possibly affect rendering.
1927 * Also, this variable is non-flat and is (or contains) an integer
1928 * or a double.
1929 * If the consumer stage is unknown, don't modify the interpolation
1930 * type as it could affect rendering later with separate shaders.
1931 *
1932 * lower_packed_varyings requires all integer varyings to flat,
1933 * regardless of where they appear. We can trivially satisfy that
1934 * requirement by changing the interpolation type to flat here.
1935 */
1936 if (producer_var) {
1937 producer_var->data.centroid = false;
1938 producer_var->data.sample = false;
1939 producer_var->data.interpolation = INTERP_MODE_FLAT;
1940 }
1941
1942 if (consumer_var) {
1943 consumer_var->data.centroid = false;
1944 consumer_var->data.sample = false;
1945 consumer_var->data.interpolation = INTERP_MODE_FLAT;
1946 }
1947 }
1948
1949 if (this->num_matches == this->matches_capacity) {
1950 this->matches_capacity *= 2;
1951 this->matches = (match *)
1952 realloc(this->matches,
1953 sizeof(*this->matches) * this->matches_capacity);
1954 }
1955
1956 /* We must use the consumer to compute the packing class because in GL4.4+
1957 * there is no guarantee interpolation qualifiers will match across stages.
1958 *
1959 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1960 *
1961 * "The type and presence of interpolation qualifiers of variables with
1962 * the same name declared in all linked shaders for the same cross-stage
1963 * interface must match, otherwise the link command will fail.
1964 *
1965 * When comparing an output from one stage to an input of a subsequent
1966 * stage, the input and output don't match if their interpolation
1967 * qualifiers (or lack thereof) are not the same."
1968 *
1969 * This text was also in at least revison 7 of the 4.40 spec but is no
1970 * longer in revision 9 and not in the 4.50 spec.
1971 */
1972 const ir_variable *const var = (consumer_var != NULL)
1973 ? consumer_var : producer_var;
1974
1975 if (producer_var && consumer_var &&
1976 consumer_var->data.must_be_shader_input) {
1977 producer_var->data.must_be_shader_input = 1;
1978 }
1979
1980 this->matches[this->num_matches].packing_class
1981 = this->compute_packing_class(var);
1982 this->matches[this->num_matches].packing_order
1983 = this->compute_packing_order(var);
1984
1985 this->matches[this->num_matches].producer_var = producer_var;
1986 this->matches[this->num_matches].consumer_var = consumer_var;
1987 this->num_matches++;
1988 if (producer_var)
1989 producer_var->data.is_unmatched_generic_inout = 0;
1990 if (consumer_var)
1991 consumer_var->data.is_unmatched_generic_inout = 0;
1992 }
1993
1994
1995 /**
1996 * Choose locations for all of the variable matches that were previously
1997 * passed to varying_matches::record().
1998 * \param components returns array[slot] of number of components used
1999 * per slot (1, 2, 3 or 4)
2000 * \param reserved_slots bitmask indicating which varying slots are already
2001 * allocated
2002 * \return number of slots (4-element vectors) allocated
2003 */
2004 unsigned
assign_locations(struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)2005 varying_matches::assign_locations(struct gl_shader_program *prog,
2006 uint8_t components[],
2007 uint64_t reserved_slots)
2008 {
2009 /* If packing has been disabled then we cannot safely sort the varyings by
2010 * class as it may mean we are using a version of OpenGL where
2011 * interpolation qualifiers are not guaranteed to be matching across
2012 * shaders, sorting in this case could result in mismatching shader
2013 * interfaces.
2014 * When packing is disabled the sort orders varyings used by transform
2015 * feedback first, but also depends on *undefined behaviour* of qsort to
2016 * reverse the order of the varyings. See: xfb_comparator().
2017 *
2018 * If packing is only disabled for xfb varyings (mutually exclusive with
2019 * disable_varying_packing), we then group varyings depending on if they
2020 * are captured for transform feedback. The same *undefined behaviour* is
2021 * taken advantage of.
2022 */
2023 if (this->disable_varying_packing) {
2024 /* Only sort varyings that are only used by transform feedback. */
2025 qsort(this->matches, this->num_matches, sizeof(*this->matches),
2026 &varying_matches::xfb_comparator);
2027 } else if (this->disable_xfb_packing) {
2028 /* Only sort varyings that are NOT used by transform feedback. */
2029 qsort(this->matches, this->num_matches, sizeof(*this->matches),
2030 &varying_matches::not_xfb_comparator);
2031 } else {
2032 /* Sort varying matches into an order that makes them easy to pack. */
2033 qsort(this->matches, this->num_matches, sizeof(*this->matches),
2034 &varying_matches::match_comparator);
2035 }
2036
2037 unsigned generic_location = 0;
2038 unsigned generic_patch_location = MAX_VARYING*4;
2039 bool previous_var_xfb = false;
2040 bool previous_var_xfb_only = false;
2041 unsigned previous_packing_class = ~0u;
2042
2043 /* For tranform feedback separate mode, we know the number of attributes
2044 * is <= the number of buffers. So packing isn't critical. In fact,
2045 * packing vec3 attributes can cause trouble because splitting a vec3
2046 * effectively creates an additional transform feedback output. The
2047 * extra TFB output may exceed device driver limits.
2048 */
2049 const bool dont_pack_vec3 =
2050 (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2051 prog->TransformFeedback.NumVarying > 0);
2052
2053 for (unsigned i = 0; i < this->num_matches; i++) {
2054 unsigned *location = &generic_location;
2055 const ir_variable *var;
2056 const glsl_type *type;
2057 bool is_vertex_input = false;
2058
2059 if (matches[i].consumer_var) {
2060 var = matches[i].consumer_var;
2061 type = get_varying_type(var, consumer_stage);
2062 if (consumer_stage == MESA_SHADER_VERTEX)
2063 is_vertex_input = true;
2064 } else {
2065 var = matches[i].producer_var;
2066 type = get_varying_type(var, producer_stage);
2067 }
2068
2069 if (var->data.patch)
2070 location = &generic_patch_location;
2071
2072 /* Advance to the next slot if this varying has a different packing
2073 * class than the previous one, and we're not already on a slot
2074 * boundary.
2075 *
2076 * Also advance if varying packing is disabled for transform feedback,
2077 * and previous or current varying is used for transform feedback.
2078 *
2079 * Also advance to the next slot if packing is disabled. This makes sure
2080 * we don't assign varyings the same locations which is possible
2081 * because we still pack individual arrays, records and matrices even
2082 * when packing is disabled. Note we don't advance to the next slot if
2083 * we can pack varyings together that are only used for transform
2084 * feedback.
2085 */
2086 if (var->data.must_be_shader_input ||
2087 (this->disable_xfb_packing &&
2088 (previous_var_xfb || var->data.is_xfb)) ||
2089 (this->disable_varying_packing &&
2090 !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2091 (previous_packing_class != this->matches[i].packing_class) ||
2092 (this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2093 dont_pack_vec3)) {
2094 *location = ALIGN(*location, 4);
2095 }
2096
2097 previous_var_xfb = var->data.is_xfb;
2098 previous_var_xfb_only = var->data.is_xfb_only;
2099 previous_packing_class = this->matches[i].packing_class;
2100
2101 /* The number of components taken up by this variable. For vertex shader
2102 * inputs, we use the number of slots * 4, as they have different
2103 * counting rules.
2104 */
2105 unsigned num_components = 0;
2106 if (is_vertex_input) {
2107 num_components = type->count_attribute_slots(is_vertex_input) * 4;
2108 } else {
2109 if ((this->disable_varying_packing &&
2110 !is_varying_packing_safe(type, var)) ||
2111 (this->disable_xfb_packing && var->data.is_xfb &&
2112 !(type->is_array() || type->is_struct() || type->is_matrix())) ||
2113 var->data.must_be_shader_input) {
2114 num_components = type->count_attribute_slots(false) * 4;
2115 } else {
2116 num_components = type->component_slots_aligned(*location);
2117 }
2118 }
2119
2120 /* The last slot for this variable, inclusive. */
2121 unsigned slot_end = *location + num_components - 1;
2122
2123 /* FIXME: We could be smarter in the below code and loop back over
2124 * trying to fill any locations that we skipped because we couldn't pack
2125 * the varying between an explicit location. For now just let the user
2126 * hit the linking error if we run out of room and suggest they use
2127 * explicit locations.
2128 */
2129 while (slot_end < MAX_VARYING * 4u) {
2130 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2131 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2132
2133 assert(slots > 0);
2134
2135 if ((reserved_slots & slot_mask) == 0) {
2136 break;
2137 }
2138
2139 *location = ALIGN(*location + 1, 4);
2140 slot_end = *location + num_components - 1;
2141 }
2142
2143 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2144 linker_error(prog, "insufficient contiguous locations available for "
2145 "%s it is possible an array or struct could not be "
2146 "packed between varyings with explicit locations. Try "
2147 "using an explicit location for arrays and structs.",
2148 var->name);
2149 }
2150
2151 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2152 for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2153 components[j] = 4;
2154 components[slot_end / 4u] = (slot_end & 3) + 1;
2155 }
2156
2157 this->matches[i].generic_location = *location;
2158
2159 *location = slot_end + 1;
2160 }
2161
2162 return (generic_location + 3) / 4;
2163 }
2164
2165
2166 /**
2167 * Update the producer and consumer shaders to reflect the locations
2168 * assignments that were made by varying_matches::assign_locations().
2169 */
2170 void
store_locations() const2171 varying_matches::store_locations() const
2172 {
2173 /* Check is location needs to be packed with lower_packed_varyings() or if
2174 * we can just use ARB_enhanced_layouts packing.
2175 */
2176 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {};
2177 const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2178
2179 for (unsigned i = 0; i < this->num_matches; i++) {
2180 ir_variable *producer_var = this->matches[i].producer_var;
2181 ir_variable *consumer_var = this->matches[i].consumer_var;
2182 unsigned generic_location = this->matches[i].generic_location;
2183 unsigned slot = generic_location / 4;
2184 unsigned offset = generic_location % 4;
2185
2186 if (producer_var) {
2187 producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2188 producer_var->data.location_frac = offset;
2189 }
2190
2191 if (consumer_var) {
2192 assert(consumer_var->data.location == -1);
2193 consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2194 consumer_var->data.location_frac = offset;
2195 }
2196
2197 /* Find locations suitable for native packing via
2198 * ARB_enhanced_layouts.
2199 */
2200 if (producer_var && consumer_var) {
2201 if (enhanced_layouts_enabled) {
2202 const glsl_type *type =
2203 get_varying_type(producer_var, producer_stage);
2204 if (type->is_array() || type->is_matrix() || type->is_struct() ||
2205 type->is_64bit()) {
2206 unsigned comp_slots = type->component_slots() + offset;
2207 unsigned slots = comp_slots / 4;
2208 if (comp_slots % 4)
2209 slots += 1;
2210
2211 for (unsigned j = 0; j < slots; j++) {
2212 pack_loc[slot + j] = true;
2213 }
2214 } else if (offset + type->vector_elements > 4) {
2215 pack_loc[slot] = true;
2216 pack_loc[slot + 1] = true;
2217 } else {
2218 loc_type[slot][offset] = type;
2219 }
2220 }
2221 }
2222 }
2223
2224 /* Attempt to use ARB_enhanced_layouts for more efficient packing if
2225 * suitable.
2226 */
2227 if (enhanced_layouts_enabled) {
2228 for (unsigned i = 0; i < this->num_matches; i++) {
2229 ir_variable *producer_var = this->matches[i].producer_var;
2230 ir_variable *consumer_var = this->matches[i].consumer_var;
2231 unsigned generic_location = this->matches[i].generic_location;
2232 unsigned slot = generic_location / 4;
2233
2234 if (pack_loc[slot] || !producer_var || !consumer_var)
2235 continue;
2236
2237 const glsl_type *type =
2238 get_varying_type(producer_var, producer_stage);
2239 bool type_match = true;
2240 for (unsigned j = 0; j < 4; j++) {
2241 if (loc_type[slot][j]) {
2242 if (type->base_type != loc_type[slot][j]->base_type)
2243 type_match = false;
2244 }
2245 }
2246
2247 if (type_match) {
2248 producer_var->data.explicit_location = 1;
2249 consumer_var->data.explicit_location = 1;
2250 producer_var->data.explicit_component = 1;
2251 consumer_var->data.explicit_component = 1;
2252 }
2253 }
2254 }
2255 }
2256
2257
2258 /**
2259 * Compute the "packing class" of the given varying. This is an unsigned
2260 * integer with the property that two variables in the same packing class can
2261 * be safely backed into the same vec4.
2262 */
2263 unsigned
compute_packing_class(const ir_variable * var)2264 varying_matches::compute_packing_class(const ir_variable *var)
2265 {
2266 /* Without help from the back-end, there is no way to pack together
2267 * variables with different interpolation types, because
2268 * lower_packed_varyings must choose exactly one interpolation type for
2269 * each packed varying it creates.
2270 *
2271 * However, we can safely pack together floats, ints, and uints, because:
2272 *
2273 * - varyings of base type "int" and "uint" must use the "flat"
2274 * interpolation type, which can only occur in GLSL 1.30 and above.
2275 *
2276 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2277 * can store flat floats as ints without losing any information (using
2278 * the ir_unop_bitcast_* opcodes).
2279 *
2280 * Therefore, the packing class depends only on the interpolation type.
2281 */
2282 const unsigned interp = var->is_interpolation_flat()
2283 ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
2284
2285 assert(interp < (1 << 3));
2286
2287 const unsigned packing_class = (interp << 0) |
2288 (var->data.centroid << 3) |
2289 (var->data.sample << 4) |
2290 (var->data.patch << 5) |
2291 (var->data.must_be_shader_input << 6);
2292
2293 return packing_class;
2294 }
2295
2296
2297 /**
2298 * Compute the "packing order" of the given varying. This is a sort key we
2299 * use to determine when to attempt to pack the given varying relative to
2300 * other varyings in the same packing class.
2301 */
2302 varying_matches::packing_order_enum
compute_packing_order(const ir_variable * var)2303 varying_matches::compute_packing_order(const ir_variable *var)
2304 {
2305 const glsl_type *element_type = var->type;
2306
2307 while (element_type->is_array()) {
2308 element_type = element_type->fields.array;
2309 }
2310
2311 switch (element_type->component_slots() % 4) {
2312 case 1: return PACKING_ORDER_SCALAR;
2313 case 2: return PACKING_ORDER_VEC2;
2314 case 3: return PACKING_ORDER_VEC3;
2315 case 0: return PACKING_ORDER_VEC4;
2316 default:
2317 assert(!"Unexpected value of vector_elements");
2318 return PACKING_ORDER_VEC4;
2319 }
2320 }
2321
2322
2323 /**
2324 * Comparison function passed to qsort() to sort varyings by packing_class and
2325 * then by packing_order.
2326 */
2327 int
match_comparator(const void * x_generic,const void * y_generic)2328 varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2329 {
2330 const match *x = (const match *) x_generic;
2331 const match *y = (const match *) y_generic;
2332
2333 if (x->packing_class != y->packing_class)
2334 return x->packing_class - y->packing_class;
2335 return x->packing_order - y->packing_order;
2336 }
2337
2338
2339 /**
2340 * Comparison function passed to qsort() to sort varyings used only by
2341 * transform feedback when packing of other varyings is disabled.
2342 */
2343 int
xfb_comparator(const void * x_generic,const void * y_generic)2344 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2345 {
2346 const match *x = (const match *) x_generic;
2347
2348 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2349 return match_comparator(x_generic, y_generic);
2350
2351 /* FIXME: When the comparator returns 0 it means the elements being
2352 * compared are equivalent. However the qsort documentation says:
2353 *
2354 * "The order of equivalent elements is undefined."
2355 *
2356 * In practice the sort ends up reversing the order of the varyings which
2357 * means locations are also assigned in this reversed order and happens to
2358 * be what we want. This is also whats happening in
2359 * varying_matches::match_comparator().
2360 */
2361 return 0;
2362 }
2363
2364
2365 /**
2366 * Comparison function passed to qsort() to sort varyings NOT used by
2367 * transform feedback when packing of xfb varyings is disabled.
2368 */
2369 int
not_xfb_comparator(const void * x_generic,const void * y_generic)2370 varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
2371 {
2372 const match *x = (const match *) x_generic;
2373
2374 if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2375 return match_comparator(x_generic, y_generic);
2376
2377 /* FIXME: When the comparator returns 0 it means the elements being
2378 * compared are equivalent. However the qsort documentation says:
2379 *
2380 * "The order of equivalent elements is undefined."
2381 *
2382 * In practice the sort ends up reversing the order of the varyings which
2383 * means locations are also assigned in this reversed order and happens to
2384 * be what we want. This is also whats happening in
2385 * varying_matches::match_comparator().
2386 */
2387 return 0;
2388 }
2389
2390
2391 /**
2392 * Is the given variable a varying variable to be counted against the
2393 * limit in ctx->Const.MaxVarying?
2394 * This includes variables such as texcoords, colors and generic
2395 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2396 */
2397 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const ir_variable * var)2398 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2399 {
2400 /* Only fragment shaders will take a varying variable as an input */
2401 if (stage == MESA_SHADER_FRAGMENT &&
2402 var->data.mode == ir_var_shader_in) {
2403 switch (var->data.location) {
2404 case VARYING_SLOT_POS:
2405 case VARYING_SLOT_FACE:
2406 case VARYING_SLOT_PNTC:
2407 return false;
2408 default:
2409 return true;
2410 }
2411 }
2412 return false;
2413 }
2414
2415
2416 /**
2417 * Visitor class that generates tfeedback_candidate structs describing all
2418 * possible targets of transform feedback.
2419 *
2420 * tfeedback_candidate structs are stored in the hash table
2421 * tfeedback_candidates, which is passed to the constructor. This hash table
2422 * maps varying names to instances of the tfeedback_candidate struct.
2423 */
2424 class tfeedback_candidate_generator : public program_resource_visitor
2425 {
2426 public:
tfeedback_candidate_generator(void * mem_ctx,hash_table * tfeedback_candidates,gl_shader_stage stage)2427 tfeedback_candidate_generator(void *mem_ctx,
2428 hash_table *tfeedback_candidates,
2429 gl_shader_stage stage)
2430 : mem_ctx(mem_ctx),
2431 tfeedback_candidates(tfeedback_candidates),
2432 stage(stage),
2433 toplevel_var(NULL),
2434 varying_floats(0),
2435 xfb_offset_floats(0)
2436 {
2437 }
2438
process(ir_variable * var)2439 void process(ir_variable *var)
2440 {
2441 /* All named varying interface blocks should be flattened by now */
2442 assert(!var->is_interface_instance());
2443 assert(var->data.mode == ir_var_shader_out);
2444
2445 this->toplevel_var = var;
2446 this->varying_floats = 0;
2447 this->xfb_offset_floats = 0;
2448 const glsl_type *t =
2449 var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
2450 if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
2451 assert(t->is_array());
2452 t = t->fields.array;
2453 }
2454 program_resource_visitor::process(var, t, false);
2455 }
2456
2457 private:
visit_field(const glsl_type * type,const char * name,bool,const glsl_type *,const enum glsl_interface_packing,bool)2458 virtual void visit_field(const glsl_type *type, const char *name,
2459 bool /* row_major */,
2460 const glsl_type * /* record_type */,
2461 const enum glsl_interface_packing,
2462 bool /* last_field */)
2463 {
2464 assert(!type->without_array()->is_struct());
2465 assert(!type->without_array()->is_interface());
2466
2467 tfeedback_candidate *candidate
2468 = rzalloc(this->mem_ctx, tfeedback_candidate);
2469 candidate->toplevel_var = this->toplevel_var;
2470 candidate->type = type;
2471
2472 if (type->without_array()->is_64bit()) {
2473 /* From ARB_gpu_shader_fp64:
2474 *
2475 * If any variable captured in transform feedback has double-precision
2476 * components, the practical requirements for defined behavior are:
2477 * ...
2478 * (c) each double-precision variable captured must be aligned to a
2479 * multiple of eight bytes relative to the beginning of a vertex.
2480 */
2481 this->xfb_offset_floats = ALIGN(this->xfb_offset_floats, 2);
2482 /* 64-bit members of structs are also aligned. */
2483 this->varying_floats = ALIGN(this->varying_floats, 2);
2484 }
2485
2486 candidate->xfb_offset_floats = this->xfb_offset_floats;
2487 candidate->struct_offset_floats = this->varying_floats;
2488
2489 _mesa_hash_table_insert(this->tfeedback_candidates,
2490 ralloc_strdup(this->mem_ctx, name),
2491 candidate);
2492
2493 const unsigned component_slots = type->component_slots();
2494
2495 if (varying_has_user_specified_location(this->toplevel_var)) {
2496 this->varying_floats += type->count_attribute_slots(false) * 4;
2497 } else {
2498 this->varying_floats += component_slots;
2499 }
2500
2501 this->xfb_offset_floats += component_slots;
2502 }
2503
2504 /**
2505 * Memory context used to allocate hash table keys and values.
2506 */
2507 void * const mem_ctx;
2508
2509 /**
2510 * Hash table in which tfeedback_candidate objects should be stored.
2511 */
2512 hash_table * const tfeedback_candidates;
2513
2514 gl_shader_stage stage;
2515
2516 /**
2517 * Pointer to the toplevel variable that is being traversed.
2518 */
2519 ir_variable *toplevel_var;
2520
2521 /**
2522 * Total number of varying floats that have been visited so far. This is
2523 * used to determine the offset to each varying within the toplevel
2524 * variable.
2525 */
2526 unsigned varying_floats;
2527
2528 /**
2529 * Offset within the xfb. Counted in floats.
2530 */
2531 unsigned xfb_offset_floats;
2532 };
2533
2534
2535 namespace linker {
2536
2537 void
populate_consumer_input_sets(void * mem_ctx,exec_list * ir,hash_table * consumer_inputs,hash_table * consumer_interface_inputs,ir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])2538 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2539 hash_table *consumer_inputs,
2540 hash_table *consumer_interface_inputs,
2541 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2542 {
2543 memset(consumer_inputs_with_locations,
2544 0,
2545 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2546
2547 foreach_in_list(ir_instruction, node, ir) {
2548 ir_variable *const input_var = node->as_variable();
2549
2550 if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2551 /* All interface blocks should have been lowered by this point */
2552 assert(!input_var->type->is_interface());
2553
2554 if (input_var->data.explicit_location) {
2555 /* assign_varying_locations only cares about finding the
2556 * ir_variable at the start of a contiguous location block.
2557 *
2558 * - For !producer, consumer_inputs_with_locations isn't used.
2559 *
2560 * - For !consumer, consumer_inputs_with_locations is empty.
2561 *
2562 * For consumer && producer, if you were trying to set some
2563 * ir_variable to the middle of a location block on the other side
2564 * of producer/consumer, cross_validate_outputs_to_inputs() should
2565 * be link-erroring due to either type mismatch or location
2566 * overlaps. If the variables do match up, then they've got a
2567 * matching data.location and you only looked at
2568 * consumer_inputs_with_locations[var->data.location], not any
2569 * following entries for the array/structure.
2570 */
2571 consumer_inputs_with_locations[input_var->data.location] =
2572 input_var;
2573 } else if (input_var->get_interface_type() != NULL) {
2574 char *const iface_field_name =
2575 ralloc_asprintf(mem_ctx, "%s.%s",
2576 input_var->get_interface_type()->without_array()->name,
2577 input_var->name);
2578 _mesa_hash_table_insert(consumer_interface_inputs,
2579 iface_field_name, input_var);
2580 } else {
2581 _mesa_hash_table_insert(consumer_inputs,
2582 ralloc_strdup(mem_ctx, input_var->name),
2583 input_var);
2584 }
2585 }
2586 }
2587 }
2588
2589 /**
2590 * Find a variable from the consumer that "matches" the specified variable
2591 *
2592 * This function only finds inputs with names that match. There is no
2593 * validation (here) that the types, etc. are compatible.
2594 */
2595 ir_variable *
get_matching_input(void * mem_ctx,const ir_variable * output_var,hash_table * consumer_inputs,hash_table * consumer_interface_inputs,ir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])2596 get_matching_input(void *mem_ctx,
2597 const ir_variable *output_var,
2598 hash_table *consumer_inputs,
2599 hash_table *consumer_interface_inputs,
2600 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2601 {
2602 ir_variable *input_var;
2603
2604 if (output_var->data.explicit_location) {
2605 input_var = consumer_inputs_with_locations[output_var->data.location];
2606 } else if (output_var->get_interface_type() != NULL) {
2607 char *const iface_field_name =
2608 ralloc_asprintf(mem_ctx, "%s.%s",
2609 output_var->get_interface_type()->without_array()->name,
2610 output_var->name);
2611 hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2612 input_var = entry ? (ir_variable *) entry->data : NULL;
2613 } else {
2614 hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2615 input_var = entry ? (ir_variable *) entry->data : NULL;
2616 }
2617
2618 return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2619 ? NULL : input_var;
2620 }
2621
2622 }
2623
2624 static int
io_variable_cmp(const void * _a,const void * _b)2625 io_variable_cmp(const void *_a, const void *_b)
2626 {
2627 const ir_variable *const a = *(const ir_variable **) _a;
2628 const ir_variable *const b = *(const ir_variable **) _b;
2629
2630 if (a->data.explicit_location && b->data.explicit_location)
2631 return b->data.location - a->data.location;
2632
2633 if (a->data.explicit_location && !b->data.explicit_location)
2634 return 1;
2635
2636 if (!a->data.explicit_location && b->data.explicit_location)
2637 return -1;
2638
2639 return -strcmp(a->name, b->name);
2640 }
2641
2642 /**
2643 * Sort the shader IO variables into canonical order
2644 */
2645 static void
canonicalize_shader_io(exec_list * ir,enum ir_variable_mode io_mode)2646 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2647 {
2648 ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2649 unsigned num_variables = 0;
2650
2651 foreach_in_list(ir_instruction, node, ir) {
2652 ir_variable *const var = node->as_variable();
2653
2654 if (var == NULL || var->data.mode != io_mode)
2655 continue;
2656
2657 /* If we have already encountered more I/O variables that could
2658 * successfully link, bail.
2659 */
2660 if (num_variables == ARRAY_SIZE(var_table))
2661 return;
2662
2663 var_table[num_variables++] = var;
2664 }
2665
2666 if (num_variables == 0)
2667 return;
2668
2669 /* Sort the list in reverse order (io_variable_cmp handles this). Later
2670 * we're going to push the variables on to the IR list as a stack, so we
2671 * want the last variable (in canonical order) to be first in the list.
2672 */
2673 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2674
2675 /* Remove the variable from it's current location in the IR, and put it at
2676 * the front.
2677 */
2678 for (unsigned i = 0; i < num_variables; i++) {
2679 var_table[i]->remove();
2680 ir->push_head(var_table[i]);
2681 }
2682 }
2683
2684 /**
2685 * Generate a bitfield map of the explicit locations for shader varyings.
2686 *
2687 * Note: For Tessellation shaders we are sitting right on the limits of the
2688 * 64 bit map. Per-vertex and per-patch both have separate location domains
2689 * with a max of MAX_VARYING.
2690 */
2691 static uint64_t
reserved_varying_slot(struct gl_linked_shader * stage,ir_variable_mode io_mode)2692 reserved_varying_slot(struct gl_linked_shader *stage,
2693 ir_variable_mode io_mode)
2694 {
2695 assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2696 /* Avoid an overflow of the returned value */
2697 assert(MAX_VARYINGS_INCL_PATCH <= 64);
2698
2699 uint64_t slots = 0;
2700 int var_slot;
2701
2702 if (!stage)
2703 return slots;
2704
2705 foreach_in_list(ir_instruction, node, stage->ir) {
2706 ir_variable *const var = node->as_variable();
2707
2708 if (var == NULL || var->data.mode != io_mode ||
2709 !var->data.explicit_location ||
2710 var->data.location < VARYING_SLOT_VAR0)
2711 continue;
2712
2713 var_slot = var->data.location - VARYING_SLOT_VAR0;
2714
2715 unsigned num_elements = get_varying_type(var, stage->Stage)
2716 ->count_attribute_slots(io_mode == ir_var_shader_in &&
2717 stage->Stage == MESA_SHADER_VERTEX);
2718 for (unsigned i = 0; i < num_elements; i++) {
2719 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2720 slots |= UINT64_C(1) << var_slot;
2721 var_slot += 1;
2722 }
2723 }
2724
2725 return slots;
2726 }
2727
2728
2729 /**
2730 * Assign locations for all variables that are produced in one pipeline stage
2731 * (the "producer") and consumed in the next stage (the "consumer").
2732 *
2733 * Variables produced by the producer may also be consumed by transform
2734 * feedback.
2735 *
2736 * \param num_tfeedback_decls is the number of declarations indicating
2737 * variables that may be consumed by transform feedback.
2738 *
2739 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2740 * representing the result of parsing the strings passed to
2741 * glTransformFeedbackVaryings(). assign_location() will be called for
2742 * each of these objects that matches one of the outputs of the
2743 * producer.
2744 *
2745 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2746 * be NULL. In this case, varying locations are assigned solely based on the
2747 * requirements of transform feedback.
2748 */
2749 static bool
assign_varying_locations(struct gl_context * ctx,void * mem_ctx,struct gl_shader_program * prog,gl_linked_shader * producer,gl_linked_shader * consumer,unsigned num_tfeedback_decls,tfeedback_decl * tfeedback_decls,const uint64_t reserved_slots)2750 assign_varying_locations(struct gl_context *ctx,
2751 void *mem_ctx,
2752 struct gl_shader_program *prog,
2753 gl_linked_shader *producer,
2754 gl_linked_shader *consumer,
2755 unsigned num_tfeedback_decls,
2756 tfeedback_decl *tfeedback_decls,
2757 const uint64_t reserved_slots)
2758 {
2759 /* Tessellation shaders treat inputs and outputs as shared memory and can
2760 * access inputs and outputs of other invocations.
2761 * Therefore, they can't be lowered to temps easily (and definitely not
2762 * efficiently).
2763 */
2764 bool unpackable_tess =
2765 (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2766 (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2767 (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2768
2769 /* Transform feedback code assumes varying arrays are packed, so if the
2770 * driver has disabled varying packing, make sure to at least enable
2771 * packing required by transform feedback. See below for exception.
2772 */
2773 bool xfb_enabled =
2774 ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2775
2776 /* Some drivers actually requires packing to be explicitly disabled
2777 * for varyings used by transform feedback.
2778 */
2779 bool disable_xfb_packing =
2780 ctx->Const.DisableTransformFeedbackPacking;
2781
2782 /* Disable packing on outward facing interfaces for SSO because in ES we
2783 * need to retain the unpacked varying information for draw time
2784 * validation.
2785 *
2786 * Packing is still enabled on individual arrays, structs, and matrices as
2787 * these are required by the transform feedback code and it is still safe
2788 * to do so. We also enable packing when a varying is only used for
2789 * transform feedback and its not a SSO.
2790 */
2791 bool disable_varying_packing =
2792 ctx->Const.DisableVaryingPacking || unpackable_tess;
2793 if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2794 disable_varying_packing = true;
2795
2796 varying_matches matches(disable_varying_packing,
2797 disable_xfb_packing,
2798 xfb_enabled,
2799 ctx->Extensions.ARB_enhanced_layouts,
2800 producer ? producer->Stage : MESA_SHADER_NONE,
2801 consumer ? consumer->Stage : MESA_SHADER_NONE);
2802 void *hash_table_ctx = ralloc_context(NULL);
2803 hash_table *tfeedback_candidates =
2804 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2805 _mesa_key_string_equal);
2806 hash_table *consumer_inputs =
2807 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2808 _mesa_key_string_equal);
2809 hash_table *consumer_interface_inputs =
2810 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2811 _mesa_key_string_equal);
2812 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2813 NULL,
2814 };
2815
2816 unsigned consumer_vertices = 0;
2817 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2818 consumer_vertices = prog->Geom.VerticesIn;
2819
2820 /* Operate in a total of four passes.
2821 *
2822 * 1. Sort inputs / outputs into a canonical order. This is necessary so
2823 * that inputs / outputs of separable shaders will be assigned
2824 * predictable locations regardless of the order in which declarations
2825 * appeared in the shader source.
2826 *
2827 * 2. Assign locations for any matching inputs and outputs.
2828 *
2829 * 3. Mark output variables in the producer that do not have locations as
2830 * not being outputs. This lets the optimizer eliminate them.
2831 *
2832 * 4. Mark input variables in the consumer that do not have locations as
2833 * not being inputs. This lets the optimizer eliminate them.
2834 */
2835 if (consumer)
2836 canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2837
2838 if (producer)
2839 canonicalize_shader_io(producer->ir, ir_var_shader_out);
2840
2841 if (consumer)
2842 linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2843 consumer_inputs,
2844 consumer_interface_inputs,
2845 consumer_inputs_with_locations);
2846
2847 if (producer) {
2848 foreach_in_list(ir_instruction, node, producer->ir) {
2849 ir_variable *const output_var = node->as_variable();
2850
2851 if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2852 continue;
2853
2854 /* Only geometry shaders can use non-zero streams */
2855 assert(output_var->data.stream == 0 ||
2856 (output_var->data.stream < MAX_VERTEX_STREAMS &&
2857 producer->Stage == MESA_SHADER_GEOMETRY));
2858
2859 if (num_tfeedback_decls > 0) {
2860 tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
2861 /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
2862 * ("Vertex Shader Variables / Output Variables")
2863 *
2864 * "Each program object can specify a set of output variables from
2865 * one shader to be recorded in transform feedback mode (see
2866 * section 13.3). The variables that can be recorded are those
2867 * emitted by the first active shader, in order, from the
2868 * following list:
2869 *
2870 * * geometry shader
2871 * * tessellation evaluation shader
2872 * * tessellation control shader
2873 * * vertex shader"
2874 *
2875 * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
2876 * Variables / Output Variables") tessellation control shader is
2877 * not included in the stages list.
2878 */
2879 if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
2880 g.process(output_var);
2881 }
2882 }
2883
2884 ir_variable *const input_var =
2885 linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2886 consumer_interface_inputs,
2887 consumer_inputs_with_locations);
2888
2889 /* If a matching input variable was found, add this output (and the
2890 * input) to the set. If this is a separable program and there is no
2891 * consumer stage, add the output.
2892 *
2893 * Always add TCS outputs. They are shared by all invocations
2894 * within a patch and can be used as shared memory.
2895 */
2896 if (input_var || (prog->SeparateShader && consumer == NULL) ||
2897 producer->Stage == MESA_SHADER_TESS_CTRL) {
2898 matches.record(output_var, input_var);
2899 }
2900
2901 /* Only stream 0 outputs can be consumed in the next stage */
2902 if (input_var && output_var->data.stream != 0) {
2903 linker_error(prog, "output %s is assigned to stream=%d but "
2904 "is linked to an input, which requires stream=0",
2905 output_var->name, output_var->data.stream);
2906 ralloc_free(hash_table_ctx);
2907 return false;
2908 }
2909 }
2910 } else {
2911 /* If there's no producer stage, then this must be a separable program.
2912 * For example, we may have a program that has just a fragment shader.
2913 * Later this program will be used with some arbitrary vertex (or
2914 * geometry) shader program. This means that locations must be assigned
2915 * for all the inputs.
2916 */
2917 foreach_in_list(ir_instruction, node, consumer->ir) {
2918 ir_variable *const input_var = node->as_variable();
2919 if (input_var && input_var->data.mode == ir_var_shader_in) {
2920 matches.record(NULL, input_var);
2921 }
2922 }
2923 }
2924
2925 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2926 if (!tfeedback_decls[i].is_varying())
2927 continue;
2928
2929 const tfeedback_candidate *matched_candidate
2930 = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2931
2932 if (matched_candidate == NULL) {
2933 ralloc_free(hash_table_ctx);
2934 return false;
2935 }
2936
2937 /* There are two situations where a new output varying is needed:
2938 *
2939 * - If varying packing is disabled for xfb and the current declaration
2940 * is subscripting an array, whether the subscript is aligned or not.
2941 * to preserve the rest of the array for the consumer.
2942 *
2943 * - If a builtin variable needs to be copied to a new variable
2944 * before its content is modified by another lowering pass (e.g.
2945 * \c gl_Position is transformed by \c nir_lower_viewport_transform).
2946 */
2947 const bool lowered =
2948 (disable_xfb_packing && tfeedback_decls[i].subscripted()) ||
2949 (matched_candidate->toplevel_var->data.explicit_location &&
2950 matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
2951 (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
2952 (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
2953 BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
2954
2955 if (lowered) {
2956 ir_variable *new_var;
2957 tfeedback_candidate *new_candidate = NULL;
2958
2959 new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
2960 if (new_var == NULL) {
2961 ralloc_free(hash_table_ctx);
2962 return false;
2963 }
2964
2965 /* Create new candidate and replace matched_candidate */
2966 new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
2967 new_candidate->toplevel_var = new_var;
2968 new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
2969 new_candidate->type = new_var->type;
2970 new_candidate->struct_offset_floats = 0;
2971 new_candidate->xfb_offset_floats = 0;
2972 _mesa_hash_table_insert(tfeedback_candidates,
2973 ralloc_strdup(mem_ctx, new_var->name),
2974 new_candidate);
2975
2976 tfeedback_decls[i].set_lowered_candidate(new_candidate);
2977 matched_candidate = new_candidate;
2978 }
2979
2980 /* Mark as xfb varying */
2981 matched_candidate->toplevel_var->data.is_xfb = 1;
2982
2983 /* Mark xfb varyings as always active */
2984 matched_candidate->toplevel_var->data.always_active_io = 1;
2985
2986 /* Mark any corresponding inputs as always active also. We must do this
2987 * because we have a NIR pass that lowers vectors to scalars and another
2988 * that removes unused varyings.
2989 * We don't split varyings marked as always active because there is no
2990 * point in doing so. This means we need to mark both sides of the
2991 * interface as always active otherwise we will have a mismatch and
2992 * start removing things we shouldn't.
2993 */
2994 ir_variable *const input_var =
2995 linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2996 consumer_inputs,
2997 consumer_interface_inputs,
2998 consumer_inputs_with_locations);
2999 if (input_var) {
3000 input_var->data.is_xfb = 1;
3001 input_var->data.always_active_io = 1;
3002 }
3003
3004 if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
3005 matched_candidate->toplevel_var->data.is_xfb_only = 1;
3006 matches.record(matched_candidate->toplevel_var, NULL);
3007 }
3008 }
3009
3010 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
3011 const unsigned slots_used = matches.assign_locations(
3012 prog, components, reserved_slots);
3013 matches.store_locations();
3014
3015 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
3016 if (tfeedback_decls[i].is_varying()) {
3017 if (!tfeedback_decls[i].assign_location(ctx, prog)) {
3018 ralloc_free(hash_table_ctx);
3019 return false;
3020 }
3021 }
3022 }
3023 ralloc_free(hash_table_ctx);
3024
3025 if (consumer && producer) {
3026 foreach_in_list(ir_instruction, node, consumer->ir) {
3027 ir_variable *const var = node->as_variable();
3028
3029 if (var && var->data.mode == ir_var_shader_in &&
3030 var->data.is_unmatched_generic_inout) {
3031 if (!prog->IsES && prog->data->Version <= 120) {
3032 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3033 *
3034 * Only those varying variables used (i.e. read) in
3035 * the fragment shader executable must be written to
3036 * by the vertex shader executable; declaring
3037 * superfluous varying variables in a vertex shader is
3038 * permissible.
3039 *
3040 * We interpret this text as meaning that the VS must
3041 * write the variable for the FS to read it. See
3042 * "glsl1-varying read but not written" in piglit.
3043 */
3044 linker_error(prog, "%s shader varying %s not written "
3045 "by %s shader\n.",
3046 _mesa_shader_stage_to_string(consumer->Stage),
3047 var->name,
3048 _mesa_shader_stage_to_string(producer->Stage));
3049 } else {
3050 linker_warning(prog, "%s shader varying %s not written "
3051 "by %s shader\n.",
3052 _mesa_shader_stage_to_string(consumer->Stage),
3053 var->name,
3054 _mesa_shader_stage_to_string(producer->Stage));
3055 }
3056 }
3057 }
3058
3059 /* Now that validation is done its safe to remove unused varyings. As
3060 * we have both a producer and consumer its safe to remove unused
3061 * varyings even if the program is a SSO because the stages are being
3062 * linked together i.e. we have a multi-stage SSO.
3063 */
3064 remove_unused_shader_inputs_and_outputs(false, producer,
3065 ir_var_shader_out);
3066 remove_unused_shader_inputs_and_outputs(false, consumer,
3067 ir_var_shader_in);
3068 }
3069
3070 if (producer) {
3071 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
3072 0, producer, disable_varying_packing,
3073 disable_xfb_packing, xfb_enabled);
3074 }
3075
3076 if (consumer) {
3077 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
3078 consumer_vertices, consumer, disable_varying_packing,
3079 disable_xfb_packing, xfb_enabled);
3080 }
3081
3082 return true;
3083 }
3084
3085 static bool
check_against_output_limit(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * producer,unsigned num_explicit_locations)3086 check_against_output_limit(struct gl_context *ctx,
3087 struct gl_shader_program *prog,
3088 gl_linked_shader *producer,
3089 unsigned num_explicit_locations)
3090 {
3091 unsigned output_vectors = num_explicit_locations;
3092
3093 foreach_in_list(ir_instruction, node, producer->ir) {
3094 ir_variable *const var = node->as_variable();
3095
3096 if (var && !var->data.explicit_location &&
3097 var->data.mode == ir_var_shader_out &&
3098 var_counts_against_varying_limit(producer->Stage, var)) {
3099 /* outputs for fragment shader can't be doubles */
3100 output_vectors += var->type->count_attribute_slots(false);
3101 }
3102 }
3103
3104 assert(producer->Stage != MESA_SHADER_FRAGMENT);
3105 unsigned max_output_components =
3106 ctx->Const.Program[producer->Stage].MaxOutputComponents;
3107
3108 const unsigned output_components = output_vectors * 4;
3109 if (output_components > max_output_components) {
3110 if (ctx->API == API_OPENGLES2 || prog->IsES)
3111 linker_error(prog, "%s shader uses too many output vectors "
3112 "(%u > %u)\n",
3113 _mesa_shader_stage_to_string(producer->Stage),
3114 output_vectors,
3115 max_output_components / 4);
3116 else
3117 linker_error(prog, "%s shader uses too many output components "
3118 "(%u > %u)\n",
3119 _mesa_shader_stage_to_string(producer->Stage),
3120 output_components,
3121 max_output_components);
3122
3123 return false;
3124 }
3125
3126 return true;
3127 }
3128
3129 static bool
check_against_input_limit(struct gl_context * ctx,struct gl_shader_program * prog,gl_linked_shader * consumer,unsigned num_explicit_locations)3130 check_against_input_limit(struct gl_context *ctx,
3131 struct gl_shader_program *prog,
3132 gl_linked_shader *consumer,
3133 unsigned num_explicit_locations)
3134 {
3135 unsigned input_vectors = num_explicit_locations;
3136
3137 foreach_in_list(ir_instruction, node, consumer->ir) {
3138 ir_variable *const var = node->as_variable();
3139
3140 if (var && !var->data.explicit_location &&
3141 var->data.mode == ir_var_shader_in &&
3142 var_counts_against_varying_limit(consumer->Stage, var)) {
3143 /* vertex inputs aren't varying counted */
3144 input_vectors += var->type->count_attribute_slots(false);
3145 }
3146 }
3147
3148 assert(consumer->Stage != MESA_SHADER_VERTEX);
3149 unsigned max_input_components =
3150 ctx->Const.Program[consumer->Stage].MaxInputComponents;
3151
3152 const unsigned input_components = input_vectors * 4;
3153 if (input_components > max_input_components) {
3154 if (ctx->API == API_OPENGLES2 || prog->IsES)
3155 linker_error(prog, "%s shader uses too many input vectors "
3156 "(%u > %u)\n",
3157 _mesa_shader_stage_to_string(consumer->Stage),
3158 input_vectors,
3159 max_input_components / 4);
3160 else
3161 linker_error(prog, "%s shader uses too many input components "
3162 "(%u > %u)\n",
3163 _mesa_shader_stage_to_string(consumer->Stage),
3164 input_components,
3165 max_input_components);
3166
3167 return false;
3168 }
3169
3170 return true;
3171 }
3172
3173 bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,struct gl_context * ctx,void * mem_ctx)3174 link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
3175 struct gl_context *ctx, void *mem_ctx)
3176 {
3177 bool has_xfb_qualifiers = false;
3178 unsigned num_tfeedback_decls = 0;
3179 char **varying_names = NULL;
3180 tfeedback_decl *tfeedback_decls = NULL;
3181
3182 /* From the ARB_enhanced_layouts spec:
3183 *
3184 * "If the shader used to record output variables for transform feedback
3185 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
3186 * qualifiers, the values specified by TransformFeedbackVaryings are
3187 * ignored, and the set of variables captured for transform feedback is
3188 * instead derived from the specified layout qualifiers."
3189 */
3190 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
3191 /* Find last stage before fragment shader */
3192 if (prog->_LinkedShaders[i]) {
3193 has_xfb_qualifiers =
3194 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
3195 prog, &num_tfeedback_decls,
3196 &varying_names);
3197 break;
3198 }
3199 }
3200
3201 if (!has_xfb_qualifiers) {
3202 num_tfeedback_decls = prog->TransformFeedback.NumVarying;
3203 varying_names = prog->TransformFeedback.VaryingNames;
3204 }
3205
3206 if (num_tfeedback_decls != 0) {
3207 /* From GL_EXT_transform_feedback:
3208 * A program will fail to link if:
3209 *
3210 * * the <count> specified by TransformFeedbackVaryingsEXT is
3211 * non-zero, but the program object has no vertex or geometry
3212 * shader;
3213 */
3214 if (first >= MESA_SHADER_FRAGMENT) {
3215 linker_error(prog, "Transform feedback varyings specified, but "
3216 "no vertex, tessellation, or geometry shader is "
3217 "present.\n");
3218 return false;
3219 }
3220
3221 tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
3222 num_tfeedback_decls);
3223 if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
3224 varying_names, tfeedback_decls))
3225 return false;
3226 }
3227
3228 /* If there is no fragment shader we need to set transform feedback.
3229 *
3230 * For SSO we also need to assign output locations. We assign them here
3231 * because we need to do it for both single stage programs and multi stage
3232 * programs.
3233 */
3234 if (last < MESA_SHADER_FRAGMENT &&
3235 (num_tfeedback_decls != 0 || prog->SeparateShader)) {
3236 const uint64_t reserved_out_slots =
3237 reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
3238 if (!assign_varying_locations(ctx, mem_ctx, prog,
3239 prog->_LinkedShaders[last], NULL,
3240 num_tfeedback_decls, tfeedback_decls,
3241 reserved_out_slots))
3242 return false;
3243 }
3244
3245 if (last <= MESA_SHADER_FRAGMENT) {
3246 /* Remove unused varyings from the first/last stage unless SSO */
3247 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
3248 prog->_LinkedShaders[first],
3249 ir_var_shader_in);
3250 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
3251 prog->_LinkedShaders[last],
3252 ir_var_shader_out);
3253
3254 /* If the program is made up of only a single stage */
3255 if (first == last) {
3256 gl_linked_shader *const sh = prog->_LinkedShaders[last];
3257
3258 do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
3259 do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
3260 tfeedback_decls);
3261
3262 if (prog->SeparateShader) {
3263 const uint64_t reserved_slots =
3264 reserved_varying_slot(sh, ir_var_shader_in);
3265
3266 /* Assign input locations for SSO, output locations are already
3267 * assigned.
3268 */
3269 if (!assign_varying_locations(ctx, mem_ctx, prog,
3270 NULL /* producer */,
3271 sh /* consumer */,
3272 0 /* num_tfeedback_decls */,
3273 NULL /* tfeedback_decls */,
3274 reserved_slots))
3275 return false;
3276 }
3277 } else {
3278 /* Linking the stages in the opposite order (from fragment to vertex)
3279 * ensures that inter-shader outputs written to in an earlier stage
3280 * are eliminated if they are (transitively) not used in a later
3281 * stage.
3282 */
3283 int next = last;
3284 for (int i = next - 1; i >= 0; i--) {
3285 if (prog->_LinkedShaders[i] == NULL && i != 0)
3286 continue;
3287
3288 gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
3289 gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
3290
3291 const uint64_t reserved_out_slots =
3292 reserved_varying_slot(sh_i, ir_var_shader_out);
3293 const uint64_t reserved_in_slots =
3294 reserved_varying_slot(sh_next, ir_var_shader_in);
3295
3296 do_dead_builtin_varyings(ctx, sh_i, sh_next,
3297 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
3298 tfeedback_decls);
3299
3300 if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
3301 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
3302 tfeedback_decls,
3303 reserved_out_slots | reserved_in_slots))
3304 return false;
3305
3306 /* This must be done after all dead varyings are eliminated. */
3307 if (sh_i != NULL) {
3308 unsigned slots_used = util_bitcount64(reserved_out_slots);
3309 if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
3310 return false;
3311 }
3312 }
3313
3314 unsigned slots_used = util_bitcount64(reserved_in_slots);
3315 if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
3316 return false;
3317
3318 next = i;
3319 }
3320 }
3321 }
3322
3323 if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
3324 has_xfb_qualifiers, mem_ctx))
3325 return false;
3326
3327 return true;
3328 }
3329