1 /*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2021 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * Linker functions related specifically to linking varyings between shader
27 * stages.
28 */
29
30 #include "main/errors.h"
31 #include "main/macros.h"
32 #include "main/menums.h"
33 #include "main/mtypes.h"
34 #include "program/symbol_table.h"
35 #include "util/hash_table.h"
36 #include "util/u_math.h"
37 #include "util/perf/cpu_trace.h"
38
39 #include "nir.h"
40 #include "nir_builder.h"
41 #include "nir_deref.h"
42 #include "gl_nir.h"
43 #include "gl_nir_link_varyings.h"
44 #include "gl_nir_linker.h"
45 #include "linker_util.h"
46 #include "string_to_uint_map.h"
47
48 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
49
50 /* Temporary storage for the set of attributes that need locations assigned. */
51 struct temp_attr {
52 unsigned slots;
53 nir_variable *var;
54 };
55
56 /* Used below in the call to qsort. */
57 static int
compare_attr(const void * a,const void * b)58 compare_attr(const void *a, const void *b)
59 {
60 const struct temp_attr *const l = (const struct temp_attr *) a;
61 const struct temp_attr *const r = (const struct temp_attr *) b;
62
63 /* Reversed because we want a descending order sort below. */
64 return r->slots - l->slots;
65 }
66
67 /**
68 * Get the varying type stripped of the outermost array if we're processing
69 * a stage whose varyings are arrays indexed by a vertex number (such as
70 * geometry shader inputs).
71 */
72 static const struct glsl_type *
get_varying_type(const nir_variable * var,gl_shader_stage stage)73 get_varying_type(const nir_variable *var, gl_shader_stage stage)
74 {
75 const struct glsl_type *type = var->type;
76 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
77 assert(glsl_type_is_array(type));
78 type = glsl_get_array_element(type);
79 }
80
81 return type;
82 }
83
84 /**
85 * Find a contiguous set of available bits in a bitmask.
86 *
87 * \param used_mask Bits representing used (1) and unused (0) locations
88 * \param needed_count Number of contiguous bits needed.
89 *
90 * \return
91 * Base location of the available bits on success or -1 on failure.
92 */
93 static int
find_available_slots(unsigned used_mask,unsigned needed_count)94 find_available_slots(unsigned used_mask, unsigned needed_count)
95 {
96 unsigned needed_mask = (1 << needed_count) - 1;
97 const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
98
99 /* The comparison to 32 is redundant, but without it GCC emits "warning:
100 * cannot optimize possibly infinite loops" for the loop below.
101 */
102 if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
103 return -1;
104
105 for (int i = 0; i <= max_bit_to_test; i++) {
106 if ((needed_mask & ~used_mask) == needed_mask)
107 return i;
108
109 needed_mask <<= 1;
110 }
111
112 return -1;
113 }
114
115 /* Find deref based on variable name.
116 * Note: This function does not support arrays.
117 */
118 static bool
find_deref(nir_shader * shader,const char * name)119 find_deref(nir_shader *shader, const char *name)
120 {
121 nir_foreach_function(func, shader) {
122 nir_foreach_block(block, func->impl) {
123 nir_foreach_instr(instr, block) {
124 if (instr->type == nir_instr_type_deref) {
125 nir_deref_instr *deref = nir_instr_as_deref(instr);
126 if (deref->deref_type == nir_deref_type_var &&
127 strcmp(deref->var->name, name) == 0)
128 return true;
129 }
130 }
131 }
132 }
133
134 return false;
135 }
136
137 /**
138 * Validate the types and qualifiers of an output from one stage against the
139 * matching input to another stage.
140 */
141 static void
cross_validate_types_and_qualifiers(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)142 cross_validate_types_and_qualifiers(const struct gl_constants *consts,
143 struct gl_shader_program *prog,
144 const nir_variable *input,
145 const nir_variable *output,
146 gl_shader_stage consumer_stage,
147 gl_shader_stage producer_stage)
148 {
149 /* Check that the types match between stages.
150 */
151 const struct glsl_type *type_to_match = input->type;
152
153 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
154 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
155 consumer_stage != MESA_SHADER_FRAGMENT) ||
156 consumer_stage == MESA_SHADER_GEOMETRY;
157 if (extra_array_level) {
158 assert(glsl_type_is_array(type_to_match));
159 type_to_match = glsl_get_array_element(type_to_match);
160 }
161
162 if (type_to_match != output->type) {
163 if (glsl_type_is_struct(output->type)) {
164 /* Structures across shader stages can have different name
165 * and considered to match in type if and only if structure
166 * members match in name, type, qualification, and declaration
167 * order. The precision doesn’t need to match.
168 */
169 if (!glsl_record_compare(output->type, type_to_match,
170 false, /* match_name */
171 true, /* match_locations */
172 false /* match_precision */)) {
173 linker_error(prog,
174 "%s shader output `%s' declared as struct `%s', "
175 "doesn't match in type with %s shader input "
176 "declared as struct `%s'\n",
177 _mesa_shader_stage_to_string(producer_stage),
178 output->name,
179 glsl_get_type_name(output->type),
180 _mesa_shader_stage_to_string(consumer_stage),
181 glsl_get_type_name(input->type));
182 }
183 } else if (!glsl_type_is_array(output->type) ||
184 !is_gl_identifier(output->name)) {
185 /* There is a bit of a special case for gl_TexCoord. This
186 * built-in is unsized by default. Applications that variable
187 * access it must redeclare it with a size. There is some
188 * language in the GLSL spec that implies the fragment shader
189 * and vertex shader do not have to agree on this size. Other
190 * driver behave this way, and one or two applications seem to
191 * rely on it.
192 *
193 * Neither declaration needs to be modified here because the array
194 * sizes are fixed later when update_array_sizes is called.
195 *
196 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
197 *
198 * "Unlike user-defined varying variables, the built-in
199 * varying variables don't have a strict one-to-one
200 * correspondence between the vertex language and the
201 * fragment language."
202 */
203 linker_error(prog,
204 "%s shader output `%s' declared as type `%s', "
205 "but %s shader input declared as type `%s'\n",
206 _mesa_shader_stage_to_string(producer_stage),
207 output->name,
208 glsl_get_type_name(output->type),
209 _mesa_shader_stage_to_string(consumer_stage),
210 glsl_get_type_name(input->type));
211 return;
212 }
213 }
214
215 /* Check that all of the qualifiers match between stages.
216 */
217
218 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
219 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
220 * conformance test suite does not verify that the qualifiers must match.
221 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
222 * OpenGLES 3.0 drivers, so we relax the checking in all cases.
223 */
224 if (false /* always skip the centroid check */ &&
225 prog->GLSL_Version < (prog->IsES ? 310 : 430) &&
226 input->data.centroid != output->data.centroid) {
227 linker_error(prog,
228 "%s shader output `%s' %s centroid qualifier, "
229 "but %s shader input %s centroid qualifier\n",
230 _mesa_shader_stage_to_string(producer_stage),
231 output->name,
232 (output->data.centroid) ? "has" : "lacks",
233 _mesa_shader_stage_to_string(consumer_stage),
234 (input->data.centroid) ? "has" : "lacks");
235 return;
236 }
237
238 if (input->data.sample != output->data.sample) {
239 linker_error(prog,
240 "%s shader output `%s' %s sample qualifier, "
241 "but %s shader input %s sample qualifier\n",
242 _mesa_shader_stage_to_string(producer_stage),
243 output->name,
244 (output->data.sample) ? "has" : "lacks",
245 _mesa_shader_stage_to_string(consumer_stage),
246 (input->data.sample) ? "has" : "lacks");
247 return;
248 }
249
250 if (input->data.patch != output->data.patch) {
251 linker_error(prog,
252 "%s shader output `%s' %s patch qualifier, "
253 "but %s shader input %s patch qualifier\n",
254 _mesa_shader_stage_to_string(producer_stage),
255 output->name,
256 (output->data.patch) ? "has" : "lacks",
257 _mesa_shader_stage_to_string(consumer_stage),
258 (input->data.patch) ? "has" : "lacks");
259 return;
260 }
261
262 /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
263 *
264 * "As only outputs need be declared with invariant, an output from
265 * one shader stage will still match an input of a subsequent stage
266 * without the input being declared as invariant."
267 *
268 * while GLSL 4.10 says:
269 *
270 * "For variables leaving one shader and coming into another shader,
271 * the invariant keyword has to be used in both shaders, or a link
272 * error will result."
273 *
274 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
275 *
276 * "The invariance of varyings that are declared in both the vertex
277 * and fragment shaders must match."
278 */
279 if (input->data.explicit_invariant != output->data.explicit_invariant &&
280 prog->GLSL_Version < (prog->IsES ? 300 : 420)) {
281 linker_error(prog,
282 "%s shader output `%s' %s invariant qualifier, "
283 "but %s shader input %s invariant qualifier\n",
284 _mesa_shader_stage_to_string(producer_stage),
285 output->name,
286 (output->data.explicit_invariant) ? "has" : "lacks",
287 _mesa_shader_stage_to_string(consumer_stage),
288 (input->data.explicit_invariant) ? "has" : "lacks");
289 return;
290 }
291
292 /* GLSL >= 4.40 removes text requiring interpolation qualifiers
293 * to match cross stage, they must only match within the same stage.
294 *
295 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
296 *
297 * "It is a link-time error if, within the same stage, the interpolation
298 * qualifiers of variables of the same name do not match.
299 *
300 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
301 *
302 * "When no interpolation qualifier is present, smooth interpolation
303 * is used."
304 *
305 * So we match variables where one is smooth and the other has no explicit
306 * qualifier.
307 */
308 unsigned input_interpolation = input->data.interpolation;
309 unsigned output_interpolation = output->data.interpolation;
310 if (prog->IsES) {
311 if (input_interpolation == INTERP_MODE_NONE)
312 input_interpolation = INTERP_MODE_SMOOTH;
313 if (output_interpolation == INTERP_MODE_NONE)
314 output_interpolation = INTERP_MODE_SMOOTH;
315 }
316 if (input_interpolation != output_interpolation &&
317 prog->GLSL_Version < 440) {
318 if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
319 linker_error(prog,
320 "%s shader output `%s' specifies %s "
321 "interpolation qualifier, "
322 "but %s shader input specifies %s "
323 "interpolation qualifier\n",
324 _mesa_shader_stage_to_string(producer_stage),
325 output->name,
326 interpolation_string(output->data.interpolation),
327 _mesa_shader_stage_to_string(consumer_stage),
328 interpolation_string(input->data.interpolation));
329 return;
330 } else {
331 linker_warning(prog,
332 "%s shader output `%s' specifies %s "
333 "interpolation qualifier, "
334 "but %s shader input specifies %s "
335 "interpolation qualifier\n",
336 _mesa_shader_stage_to_string(producer_stage),
337 output->name,
338 interpolation_string(output->data.interpolation),
339 _mesa_shader_stage_to_string(consumer_stage),
340 interpolation_string(input->data.interpolation));
341 }
342 }
343 }
344
345 /**
346 * Validate front and back color outputs against single color input
347 */
348 static void
cross_validate_front_and_back_color(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * front_color,const nir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)349 cross_validate_front_and_back_color(const struct gl_constants *consts,
350 struct gl_shader_program *prog,
351 const nir_variable *input,
352 const nir_variable *front_color,
353 const nir_variable *back_color,
354 gl_shader_stage consumer_stage,
355 gl_shader_stage producer_stage)
356 {
357 if (front_color != NULL && front_color->data.assigned)
358 cross_validate_types_and_qualifiers(consts, prog, input, front_color,
359 consumer_stage, producer_stage);
360
361 if (back_color != NULL && back_color->data.assigned)
362 cross_validate_types_and_qualifiers(consts, prog, input, back_color,
363 consumer_stage, producer_stage);
364 }
365
366 static unsigned
compute_variable_location_slot(nir_variable * var,gl_shader_stage stage)367 compute_variable_location_slot(nir_variable *var, gl_shader_stage stage)
368 {
369 unsigned location_start = VARYING_SLOT_VAR0;
370
371 switch (stage) {
372 case MESA_SHADER_VERTEX:
373 if (var->data.mode == nir_var_shader_in)
374 location_start = VERT_ATTRIB_GENERIC0;
375 break;
376 case MESA_SHADER_TESS_CTRL:
377 case MESA_SHADER_TESS_EVAL:
378 if (var->data.patch)
379 location_start = VARYING_SLOT_PATCH0;
380 break;
381 case MESA_SHADER_FRAGMENT:
382 if (var->data.mode == nir_var_shader_out)
383 location_start = FRAG_RESULT_DATA0;
384 break;
385 default:
386 break;
387 }
388
389 return var->data.location - location_start;
390 }
391
392
393 struct explicit_location_info {
394 nir_variable *var;
395 bool base_type_is_integer;
396 unsigned base_type_bit_size;
397 unsigned interpolation;
398 bool centroid;
399 bool sample;
400 bool patch;
401 };
402
403 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],nir_variable * var,unsigned location,unsigned component,unsigned location_limit,const struct glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,struct gl_shader_program * prog,gl_shader_stage stage)404 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
405 nir_variable *var,
406 unsigned location,
407 unsigned component,
408 unsigned location_limit,
409 const struct glsl_type *type,
410 unsigned interpolation,
411 bool centroid,
412 bool sample,
413 bool patch,
414 struct gl_shader_program *prog,
415 gl_shader_stage stage)
416 {
417 unsigned last_comp;
418 unsigned base_type_bit_size;
419 const struct glsl_type *type_without_array = glsl_without_array(type);
420 const bool base_type_is_integer =
421 glsl_base_type_is_integer(glsl_get_base_type(type_without_array));
422 const bool is_struct = glsl_type_is_struct(type_without_array);
423 if (is_struct) {
424 /* structs don't have a defined underlying base type so just treat all
425 * component slots as used and set the bit size to 0. If there is
426 * location aliasing, we'll fail anyway later.
427 */
428 last_comp = 4;
429 base_type_bit_size = 0;
430 } else {
431 unsigned dmul = glsl_type_is_64bit(type_without_array) ? 2 : 1;
432 last_comp = component + glsl_get_vector_elements(type_without_array) * dmul;
433 base_type_bit_size =
434 glsl_base_type_get_bit_size(glsl_get_base_type(type_without_array));
435 }
436
437 while (location < location_limit) {
438 unsigned comp = 0;
439 while (comp < 4) {
440 struct explicit_location_info *info =
441 &explicit_locations[location][comp];
442
443 if (info->var) {
444 if (glsl_type_is_struct(glsl_without_array(info->var->type)) ||
445 is_struct) {
446 /* Structs cannot share location since they are incompatible
447 * with any other underlying numerical type.
448 */
449 linker_error(prog,
450 "%s shader has multiple %sputs sharing the "
451 "same location that don't have the same "
452 "underlying numerical type. Struct variable '%s', "
453 "location %u\n",
454 _mesa_shader_stage_to_string(stage),
455 var->data.mode == nir_var_shader_in ? "in" : "out",
456 is_struct ? var->name : info->var->name,
457 location);
458 return false;
459 } else if (comp >= component && comp < last_comp) {
460 /* Component aliasing is not allowed */
461 linker_error(prog,
462 "%s shader has multiple %sputs explicitly "
463 "assigned to location %d and component %d\n",
464 _mesa_shader_stage_to_string(stage),
465 var->data.mode == nir_var_shader_in ? "in" : "out",
466 location, comp);
467 return false;
468 } else {
469 /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
470 * Qualifiers, Page 67, (Location aliasing):
471 *
472 * " Further, when location aliasing, the aliases sharing the
473 * location must have the same underlying numerical type
474 * and bit width (floating-point or integer, 32-bit versus
475 * 64-bit, etc.) and the same auxiliary storage and
476 * interpolation qualification."
477 */
478
479 /* If the underlying numerical type isn't integer, implicitly
480 * it will be float or else we would have failed by now.
481 */
482 if (info->base_type_is_integer != base_type_is_integer) {
483 linker_error(prog,
484 "%s shader has multiple %sputs sharing the "
485 "same location that don't have the same "
486 "underlying numerical type. Location %u "
487 "component %u.\n",
488 _mesa_shader_stage_to_string(stage),
489 var->data.mode == nir_var_shader_in ?
490 "in" : "out", location, comp);
491 return false;
492 }
493
494 if (info->base_type_bit_size != base_type_bit_size) {
495 linker_error(prog,
496 "%s shader has multiple %sputs sharing the "
497 "same location that don't have the same "
498 "underlying numerical bit size. Location %u "
499 "component %u.\n",
500 _mesa_shader_stage_to_string(stage),
501 var->data.mode == nir_var_shader_in ?
502 "in" : "out", location, comp);
503 return false;
504 }
505
506 if (info->interpolation != interpolation) {
507 linker_error(prog,
508 "%s shader has multiple %sputs sharing the "
509 "same location that don't have the same "
510 "interpolation qualification. Location %u "
511 "component %u.\n",
512 _mesa_shader_stage_to_string(stage),
513 var->data.mode == nir_var_shader_in ?
514 "in" : "out", location, comp);
515 return false;
516 }
517
518 if (info->centroid != centroid ||
519 info->sample != sample ||
520 info->patch != patch) {
521 linker_error(prog,
522 "%s shader has multiple %sputs sharing the "
523 "same location that don't have the same "
524 "auxiliary storage qualification. Location %u "
525 "component %u.\n",
526 _mesa_shader_stage_to_string(stage),
527 var->data.mode == nir_var_shader_in ?
528 "in" : "out", location, comp);
529 return false;
530 }
531 }
532 } else if (comp >= component && comp < last_comp) {
533 info->var = var;
534 info->base_type_is_integer = base_type_is_integer;
535 info->base_type_bit_size = base_type_bit_size;
536 info->interpolation = interpolation;
537 info->centroid = centroid;
538 info->sample = sample;
539 info->patch = patch;
540 }
541
542 comp++;
543
544 /* We need to do some special handling for doubles as dvec3 and
545 * dvec4 consume two consecutive locations. We don't need to
546 * worry about components beginning at anything other than 0 as
547 * the spec does not allow this for dvec3 and dvec4.
548 */
549 if (comp == 4 && last_comp > 4) {
550 last_comp = last_comp - 4;
551 /* Bump location index and reset the component index */
552 location++;
553 comp = 0;
554 component = 0;
555 }
556 }
557
558 location++;
559 }
560
561 return true;
562 }
563
564 static bool
validate_explicit_variable_location(const struct gl_constants * consts,struct explicit_location_info explicit_locations[][4],nir_variable * var,struct gl_shader_program * prog,struct gl_linked_shader * sh)565 validate_explicit_variable_location(const struct gl_constants *consts,
566 struct explicit_location_info explicit_locations[][4],
567 nir_variable *var,
568 struct gl_shader_program *prog,
569 struct gl_linked_shader *sh)
570 {
571 const struct glsl_type *type = get_varying_type(var, sh->Stage);
572 unsigned num_elements = glsl_count_attribute_slots(type, false);
573 unsigned idx = compute_variable_location_slot(var, sh->Stage);
574 unsigned slot_limit = idx + num_elements;
575
576 /* Vertex shader inputs and fragment shader outputs are validated in
577 * assign_attribute_or_color_locations() so we should not attempt to
578 * validate them again here.
579 */
580 unsigned slot_max;
581 if (var->data.mode == nir_var_shader_out) {
582 assert(sh->Stage != MESA_SHADER_FRAGMENT);
583 slot_max = consts->Program[sh->Stage].MaxOutputComponents / 4;
584 } else {
585 assert(var->data.mode == nir_var_shader_in);
586 assert(sh->Stage != MESA_SHADER_VERTEX);
587 slot_max = consts->Program[sh->Stage].MaxInputComponents / 4;
588 }
589
590 if (slot_limit > slot_max) {
591 linker_error(prog,
592 "Invalid location %u in %s shader\n",
593 idx, _mesa_shader_stage_to_string(sh->Stage));
594 return false;
595 }
596
597 const struct glsl_type *type_without_array = glsl_without_array(type);
598 if (glsl_type_is_interface(type_without_array)) {
599 for (unsigned i = 0; i < glsl_get_length(type_without_array); i++) {
600 const struct glsl_struct_field *field =
601 glsl_get_struct_field_data(type_without_array, i);
602 unsigned field_location = field->location -
603 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
604 unsigned field_slots = glsl_count_attribute_slots(field->type, false);
605 if (!check_location_aliasing(explicit_locations, var,
606 field_location,
607 0,
608 field_location + field_slots,
609 field->type,
610 field->interpolation,
611 field->centroid,
612 field->sample,
613 field->patch,
614 prog, sh->Stage)) {
615 return false;
616 }
617 }
618 } else if (!check_location_aliasing(explicit_locations, var,
619 idx, var->data.location_frac,
620 slot_limit, type,
621 var->data.interpolation,
622 var->data.centroid,
623 var->data.sample,
624 var->data.patch,
625 prog, sh->Stage)) {
626 return false;
627 }
628
629 return true;
630 }
631
632 /**
633 * Validate explicit locations for the inputs to the first stage and the
634 * outputs of the last stage in a program, if those are not the VS and FS
635 * shaders.
636 */
637 void
gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants * consts,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)638 gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
639 struct gl_shader_program *prog,
640 gl_shader_stage first_stage,
641 gl_shader_stage last_stage)
642 {
643 /* VS inputs and FS outputs are validated in
644 * assign_attribute_or_color_locations()
645 */
646 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
647 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
648 if (!validate_first_stage && !validate_last_stage)
649 return;
650
651 struct explicit_location_info explicit_locations[MAX_VARYING][4];
652
653 gl_shader_stage stages[2] = { first_stage, last_stage };
654 bool validate_stage[2] = { validate_first_stage, validate_last_stage };
655 nir_variable_mode var_mode[2] = { nir_var_shader_in, nir_var_shader_out };
656
657 for (unsigned i = 0; i < 2; i++) {
658 if (!validate_stage[i])
659 continue;
660
661 gl_shader_stage stage = stages[i];
662
663 struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
664 assert(sh);
665
666 memset(explicit_locations, 0, sizeof(explicit_locations));
667
668 nir_foreach_variable_with_modes(var, sh->Program->nir, var_mode[i]) {
669 if (!var->data.explicit_location ||
670 var->data.location < VARYING_SLOT_VAR0)
671 continue;
672
673 if (!validate_explicit_variable_location(consts, explicit_locations,
674 var, prog, sh)) {
675 return;
676 }
677 }
678 }
679 }
680
681 /**
682 * Check if we should force input / output matching between shader
683 * interfaces.
684 *
685 * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
686 *
687 * "Only the input variables that are actually read need to be
688 * written by the previous stage; it is allowed to have
689 * superfluous declarations of input variables."
690 *
691 * However it's not defined anywhere as to how we should handle
692 * inputs that are not written in the previous stage and it's not
693 * clear what "actually read" means.
694 *
695 * The GLSL 4.20 spec however is much clearer:
696 *
697 * "Only the input variables that are statically read need to
698 * be written by the previous stage; it is allowed to have
699 * superfluous declarations of input variables."
700 *
701 * It also has a table that states it is an error to statically
702 * read an input that is not defined in the previous stage. While
703 * it is not an error to not statically write to the output (it
704 * just needs to be defined to not be an error).
705 *
706 * The text in the GLSL 4.20 spec was an attempt to clarify the
707 * previous spec iterations. However given the difference in spec
708 * and that some applications seem to depend on not erroring when
709 * the input is not actually read in control flow we only apply
710 * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
711 * seen in the wild that depend on the less strict interpretation.
712 */
713 static bool
static_input_output_matching(struct gl_shader_program * prog)714 static_input_output_matching(struct gl_shader_program *prog)
715 {
716 return prog->GLSL_Version >= (prog->IsES ? 0 : 420);
717 }
718
719 /**
720 * Validate that outputs from one stage match inputs of another
721 */
722 void
gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)723 gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
724 struct gl_shader_program *prog,
725 struct gl_linked_shader *producer,
726 struct gl_linked_shader *consumer)
727 {
728 struct _mesa_symbol_table *table = _mesa_symbol_table_ctor();
729 struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {0};
730 struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {0};
731
732 /* Find all shader outputs in the "producer" stage.
733 */
734 nir_foreach_variable_with_modes(var, producer->Program->nir, nir_var_shader_out) {
735 if (!var->data.explicit_location
736 || var->data.location < VARYING_SLOT_VAR0) {
737 /* Interface block validation is handled elsewhere */
738 if (!var->interface_type || is_gl_identifier(var->name))
739 _mesa_symbol_table_add_symbol(table, var->name, var);
740
741 } else {
742 /* User-defined varyings with explicit locations are handled
743 * differently because they do not need to have matching names.
744 */
745 if (!validate_explicit_variable_location(consts,
746 output_explicit_locations,
747 var, prog, producer)) {
748 goto out;
749 }
750 }
751 }
752
753 /* Find all shader inputs in the "consumer" stage. Any variables that have
754 * matching outputs already in the symbol table must have the same type and
755 * qualifiers.
756 *
757 * Exception: if the consumer is the geometry shader, then the inputs
758 * should be arrays and the type of the array element should match the type
759 * of the corresponding producer output.
760 */
761 nir_foreach_variable_with_modes(input, consumer->Program->nir, nir_var_shader_in) {
762 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
763 const nir_variable *front_color =
764 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontColor");
765
766 const nir_variable *back_color =
767 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackColor");
768
769 cross_validate_front_and_back_color(consts, prog, input,
770 front_color, back_color,
771 consumer->Stage, producer->Stage);
772 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
773 const nir_variable *front_color =
774 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontSecondaryColor");
775
776 const nir_variable *back_color =
777 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackSecondaryColor");
778
779 cross_validate_front_and_back_color(consts, prog, input,
780 front_color, back_color,
781 consumer->Stage, producer->Stage);
782 } else {
783 /* The rules for connecting inputs and outputs change in the presence
784 * of explicit locations. In this case, we no longer care about the
785 * names of the variables. Instead, we care only about the
786 * explicitly assigned location.
787 */
788 nir_variable *output = NULL;
789 if (input->data.explicit_location
790 && input->data.location >= VARYING_SLOT_VAR0) {
791
792 const struct glsl_type *type =
793 get_varying_type(input, consumer->Stage);
794 unsigned num_elements = glsl_count_attribute_slots(type, false);
795 unsigned idx =
796 compute_variable_location_slot(input, consumer->Stage);
797 unsigned slot_limit = idx + num_elements;
798
799 if (!validate_explicit_variable_location(consts,
800 input_explicit_locations,
801 input, prog, consumer)) {
802 goto out;
803 }
804
805 while (idx < slot_limit) {
806 if (idx >= MAX_VARYING) {
807 linker_error(prog,
808 "Invalid location %u in %s shader\n", idx,
809 _mesa_shader_stage_to_string(consumer->Stage));
810 goto out;
811 }
812
813 output = output_explicit_locations[idx][input->data.location_frac].var;
814
815 if (output == NULL) {
816 /* A linker failure should only happen when there is no
817 * output declaration and there is Static Use of the
818 * declared input.
819 */
820 if (input->data.used && static_input_output_matching(prog)) {
821 linker_error(prog,
822 "%s shader input `%s' with explicit location "
823 "has no matching output\n",
824 _mesa_shader_stage_to_string(consumer->Stage),
825 input->name);
826 break;
827 }
828 } else if (input->data.location != output->data.location) {
829 linker_error(prog,
830 "%s shader input `%s' with explicit location "
831 "has no matching output\n",
832 _mesa_shader_stage_to_string(consumer->Stage),
833 input->name);
834 break;
835 }
836 idx++;
837 }
838 } else {
839 /* Interface block validation is handled elsewhere */
840 if (input->interface_type)
841 continue;
842
843 output = (nir_variable *)
844 _mesa_symbol_table_find_symbol(table, input->name);
845 }
846
847 if (output != NULL) {
848 /* Interface blocks have their own validation elsewhere so don't
849 * try validating them here.
850 */
851 if (!(input->interface_type && output->interface_type))
852 cross_validate_types_and_qualifiers(consts, prog, input, output,
853 consumer->Stage,
854 producer->Stage);
855 } else {
856 /* Check for input vars with unmatched output vars in prev stage
857 * taking into account that interface blocks could have a matching
858 * output but with different name, so we ignore them.
859 */
860 assert(!input->data.assigned);
861 if (input->data.used && !input->interface_type &&
862 !input->data.explicit_location &&
863 static_input_output_matching(prog))
864 linker_error(prog,
865 "%s shader input `%s' "
866 "has no matching output in the previous stage\n",
867 _mesa_shader_stage_to_string(consumer->Stage),
868 input->name);
869 }
870 }
871 }
872
873 out:
874 _mesa_symbol_table_dtor(table);
875 }
876
877 /**
878 * Assign locations for either VS inputs or FS outputs.
879 *
880 * \param mem_ctx Temporary ralloc context used for linking.
881 * \param prog Shader program whose variables need locations
882 * assigned.
883 * \param constants Driver specific constant values for the program.
884 * \param target_index Selector for the program target to receive location
885 * assignmnets. Must be either \c MESA_SHADER_VERTEX or
886 * \c MESA_SHADER_FRAGMENT.
887 * \param do_assignment Whether we are actually marking the assignment or we
888 * are just doing a dry-run checking.
889 *
890 * \return
891 * If locations are (or can be, in case of dry-running) successfully assigned,
892 * true is returned. Otherwise an error is emitted to the shader link log and
893 * false is returned.
894 */
895 static bool
assign_attribute_or_color_locations(void * mem_ctx,struct gl_shader_program * prog,const struct gl_constants * constants,unsigned target_index,bool do_assignment)896 assign_attribute_or_color_locations(void *mem_ctx,
897 struct gl_shader_program *prog,
898 const struct gl_constants *constants,
899 unsigned target_index,
900 bool do_assignment)
901 {
902 /* Maximum number of generic locations. This corresponds to either the
903 * maximum number of draw buffers or the maximum number of generic
904 * attributes.
905 */
906 unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
907 constants->Program[target_index].MaxAttribs :
908 MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);
909
910 assert(max_index <= 32);
911 struct temp_attr to_assign[32];
912
913 /* Mark invalid locations as being used.
914 */
915 unsigned used_locations = ~SAFE_MASK_FROM_INDEX(max_index);
916 unsigned double_storage_locations = 0;
917
918 assert((target_index == MESA_SHADER_VERTEX)
919 || (target_index == MESA_SHADER_FRAGMENT));
920
921 if (prog->_LinkedShaders[target_index] == NULL)
922 return true;
923
924 /* Operate in a total of four passes.
925 *
926 * 1. Invalidate the location assignments for all vertex shader inputs.
927 *
928 * 2. Assign locations for inputs that have user-defined (via
929 * glBindVertexAttribLocation) locations and outputs that have
930 * user-defined locations (via glBindFragDataLocation).
931 *
932 * 3. Sort the attributes without assigned locations by number of slots
933 * required in decreasing order. Fragmentation caused by attribute
934 * locations assigned by the application may prevent large attributes
935 * from having enough contiguous space.
936 *
937 * 4. Assign locations to any inputs without assigned locations.
938 */
939
940 const int generic_base = (target_index == MESA_SHADER_VERTEX)
941 ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;
942
943 nir_variable_mode io_mode =
944 (target_index == MESA_SHADER_VERTEX)
945 ? nir_var_shader_in : nir_var_shader_out;
946
947 /* Temporary array for the set of attributes that have locations assigned,
948 * for the purpose of checking overlapping slots/components of (non-ES)
949 * fragment shader outputs.
950 */
951 nir_variable *assigned[FRAG_RESULT_MAX * 4]; /* (max # of FS outputs) * # components */
952 unsigned assigned_attr = 0;
953
954 unsigned num_attr = 0;
955
956 nir_shader *shader = prog->_LinkedShaders[target_index]->Program->nir;
957 nir_foreach_variable_with_modes(var, shader, io_mode) {
958
959 if (var->data.explicit_location) {
960 if ((var->data.location >= (int)(max_index + generic_base))
961 || (var->data.location < 0)) {
962 linker_error(prog,
963 "invalid explicit location %d specified for `%s'\n",
964 (var->data.location < 0)
965 ? var->data.location
966 : var->data.location - generic_base,
967 var->name);
968 return false;
969 }
970 } else if (target_index == MESA_SHADER_VERTEX) {
971 unsigned binding;
972
973 if (string_to_uint_map_get(prog->AttributeBindings, &binding, var->name)) {
974 assert(binding >= VERT_ATTRIB_GENERIC0);
975 var->data.location = binding;
976 }
977 } else if (target_index == MESA_SHADER_FRAGMENT) {
978 unsigned binding;
979 unsigned index;
980 const char *name = var->name;
981 const struct glsl_type *type = var->type;
982
983 while (type) {
984 /* Check if there's a binding for the variable name */
985 if (string_to_uint_map_get(prog->FragDataBindings, &binding, name)) {
986 assert(binding >= FRAG_RESULT_DATA0);
987 var->data.location = binding;
988
989 if (string_to_uint_map_get(prog->FragDataIndexBindings, &index, name)) {
990 var->data.index = index;
991 }
992 break;
993 }
994
995 /* If not, but it's an array type, look for name[0] */
996 if (glsl_type_is_array(type)) {
997 name = ralloc_asprintf(mem_ctx, "%s[0]", name);
998 type = glsl_get_array_element(type);
999 continue;
1000 }
1001
1002 break;
1003 }
1004 }
1005
1006 if (strcmp(var->name, "gl_LastFragData") == 0)
1007 continue;
1008
1009 /* From GL4.5 core spec, section 15.2 (Shader Execution):
1010 *
1011 * "Output binding assignments will cause LinkProgram to fail:
1012 * ...
1013 * If the program has an active output assigned to a location greater
1014 * than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
1015 * an active output assigned an index greater than or equal to one;"
1016 */
1017 if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
1018 var->data.location - generic_base >=
1019 (int) constants->MaxDualSourceDrawBuffers) {
1020 linker_error(prog,
1021 "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
1022 "with index %u for %s\n",
1023 var->data.location - generic_base, var->data.index,
1024 var->name);
1025 return false;
1026 }
1027
1028 const unsigned slots =
1029 glsl_count_attribute_slots(var->type,
1030 target_index == MESA_SHADER_VERTEX);
1031
1032 /* If the variable is not a built-in and has a location statically
1033 * assigned in the shader (presumably via a layout qualifier), make sure
1034 * that it doesn't collide with other assigned locations. Otherwise,
1035 * add it to the list of variables that need linker-assigned locations.
1036 */
1037 if (var->data.location != -1) {
1038 if (var->data.location >= generic_base && var->data.index < 1) {
1039 /* From page 61 of the OpenGL 4.0 spec:
1040 *
1041 * "LinkProgram will fail if the attribute bindings assigned
1042 * by BindAttribLocation do not leave not enough space to
1043 * assign a location for an active matrix attribute or an
1044 * active attribute array, both of which require multiple
1045 * contiguous generic attributes."
1046 *
1047 * I think above text prohibits the aliasing of explicit and
1048 * automatic assignments. But, aliasing is allowed in manual
1049 * assignments of attribute locations. See below comments for
1050 * the details.
1051 *
1052 * From OpenGL 4.0 spec, page 61:
1053 *
1054 * "It is possible for an application to bind more than one
1055 * attribute name to the same location. This is referred to as
1056 * aliasing. This will only work if only one of the aliased
1057 * attributes is active in the executable program, or if no
1058 * path through the shader consumes more than one attribute of
1059 * a set of attributes aliased to the same location. A link
1060 * error can occur if the linker determines that every path
1061 * through the shader consumes multiple aliased attributes,
1062 * but implementations are not required to generate an error
1063 * in this case."
1064 *
1065 * From GLSL 4.30 spec, page 54:
1066 *
1067 * "A program will fail to link if any two non-vertex shader
1068 * input variables are assigned to the same location. For
1069 * vertex shaders, multiple input variables may be assigned
1070 * to the same location using either layout qualifiers or via
1071 * the OpenGL API. However, such aliasing is intended only to
1072 * support vertex shaders where each execution path accesses
1073 * at most one input per each location. Implementations are
1074 * permitted, but not required, to generate link-time errors
1075 * if they detect that every path through the vertex shader
1076 * executable accesses multiple inputs assigned to any single
1077 * location. For all shader types, a program will fail to link
1078 * if explicit location assignments leave the linker unable
1079 * to find space for other variables without explicit
1080 * assignments."
1081 *
1082 * From OpenGL ES 3.0 spec, page 56:
1083 *
1084 * "Binding more than one attribute name to the same location
1085 * is referred to as aliasing, and is not permitted in OpenGL
1086 * ES Shading Language 3.00 vertex shaders. LinkProgram will
1087 * fail when this condition exists. However, aliasing is
1088 * possible in OpenGL ES Shading Language 1.00 vertex shaders.
1089 * This will only work if only one of the aliased attributes
1090 * is active in the executable program, or if no path through
1091 * the shader consumes more than one attribute of a set of
1092 * attributes aliased to the same location. A link error can
1093 * occur if the linker determines that every path through the
1094 * shader consumes multiple aliased attributes, but implemen-
1095 * tations are not required to generate an error in this case."
1096 *
1097 * After looking at above references from OpenGL, OpenGL ES and
1098 * GLSL specifications, we allow aliasing of vertex input variables
1099 * in: OpenGL 2.0 (and above) and OpenGL ES 2.0.
1100 *
1101 * NOTE: This is not required by the spec but its worth mentioning
1102 * here that we're not doing anything to make sure that no path
1103 * through the vertex shader executable accesses multiple inputs
1104 * assigned to any single location.
1105 */
1106
1107 /* Mask representing the contiguous slots that will be used by
1108 * this attribute.
1109 */
1110 const unsigned attr = var->data.location - generic_base;
1111 const unsigned use_mask = (1 << slots) - 1;
1112 const char *const string = (target_index == MESA_SHADER_VERTEX)
1113 ? "vertex shader input" : "fragment shader output";
1114
1115 /* Generate a link error if the requested locations for this
1116 * attribute exceed the maximum allowed attribute location.
1117 */
1118 if (attr + slots > max_index) {
1119 linker_error(prog,
1120 "insufficient contiguous locations "
1121 "available for %s `%s' %d %d %d\n", string,
1122 var->name, used_locations, use_mask, attr);
1123 return false;
1124 }
1125
1126 /* Generate a link error if the set of bits requested for this
1127 * attribute overlaps any previously allocated bits.
1128 */
1129 if ((~(use_mask << attr) & used_locations) != used_locations) {
1130 if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1131 /* From section 4.4.2 (Output Layout Qualifiers) of the GLSL
1132 * 4.40 spec:
1133 *
1134 * "Additionally, for fragment shader outputs, if two
1135 * variables are placed within the same location, they
1136 * must have the same underlying type (floating-point or
1137 * integer). No component aliasing of output variables or
1138 * members is allowed.
1139 */
1140 for (unsigned i = 0; i < assigned_attr; i++) {
1141 unsigned assigned_slots =
1142 glsl_count_attribute_slots(assigned[i]->type, false);
1143 unsigned assig_attr =
1144 assigned[i]->data.location - generic_base;
1145 unsigned assigned_use_mask = (1 << assigned_slots) - 1;
1146
1147 if ((assigned_use_mask << assig_attr) &
1148 (use_mask << attr)) {
1149
1150 const struct glsl_type *assigned_type =
1151 glsl_without_array(assigned[i]->type);
1152 const struct glsl_type *type =
1153 glsl_without_array(var->type);
1154 if (glsl_get_base_type(assigned_type) !=
1155 glsl_get_base_type(type)) {
1156 linker_error(prog, "types do not match for aliased"
1157 " %ss %s and %s\n", string,
1158 assigned[i]->name, var->name);
1159 return false;
1160 }
1161
1162 unsigned assigned_component_mask =
1163 ((1 << glsl_get_vector_elements(assigned_type)) - 1) <<
1164 assigned[i]->data.location_frac;
1165 unsigned component_mask =
1166 ((1 << glsl_get_vector_elements(type)) - 1) <<
1167 var->data.location_frac;
1168 if (assigned_component_mask & component_mask) {
1169 linker_error(prog, "overlapping component is "
1170 "assigned to %ss %s and %s "
1171 "(component=%d)\n",
1172 string, assigned[i]->name, var->name,
1173 var->data.location_frac);
1174 return false;
1175 }
1176 }
1177 }
1178 } else if (target_index == MESA_SHADER_FRAGMENT ||
1179 (prog->IsES && prog->GLSL_Version >= 300)) {
1180 linker_error(prog, "overlapping location is assigned "
1181 "to %s `%s' %d %d %d\n", string, var->name,
1182 used_locations, use_mask, attr);
1183 return false;
1184 } else {
1185 linker_warning(prog, "overlapping location is assigned "
1186 "to %s `%s' %d %d %d\n", string, var->name,
1187 used_locations, use_mask, attr);
1188 }
1189 }
1190
1191 if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1192 /* Only track assigned variables for non-ES fragment shaders
1193 * to avoid overflowing the array.
1194 *
1195 * At most one variable per fragment output component should
1196 * reach this.
1197 */
1198 assert(assigned_attr < ARRAY_SIZE(assigned));
1199 assigned[assigned_attr] = var;
1200 assigned_attr++;
1201 }
1202
1203 used_locations |= (use_mask << attr);
1204
1205 /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
1206 *
1207 * "A program with more than the value of MAX_VERTEX_ATTRIBS
1208 * active attribute variables may fail to link, unless
1209 * device-dependent optimizations are able to make the program
1210 * fit within available hardware resources. For the purposes
1211 * of this test, attribute variables of the type dvec3, dvec4,
1212 * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
1213 * count as consuming twice as many attributes as equivalent
1214 * single-precision types. While these types use the same number
1215 * of generic attributes as their single-precision equivalents,
1216 * implementations are permitted to consume two single-precision
1217 * vectors of internal storage for each three- or four-component
1218 * double-precision vector."
1219 *
1220 * Mark this attribute slot as taking up twice as much space
1221 * so we can count it properly against limits. According to
1222 * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
1223 * is optional behavior, but it seems preferable.
1224 */
1225 if (glsl_type_is_dual_slot(glsl_without_array(var->type)))
1226 double_storage_locations |= (use_mask << attr);
1227 }
1228
1229 continue;
1230 }
1231
1232 if (num_attr >= max_index) {
1233 linker_error(prog, "too many %s (max %u)",
1234 target_index == MESA_SHADER_VERTEX ?
1235 "vertex shader inputs" : "fragment shader outputs",
1236 max_index);
1237 return false;
1238 }
1239 to_assign[num_attr].slots = slots;
1240 to_assign[num_attr].var = var;
1241 num_attr++;
1242 }
1243
1244 if (!do_assignment)
1245 return true;
1246
1247 if (target_index == MESA_SHADER_VERTEX) {
1248 unsigned total_attribs_size =
1249 util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1250 util_bitcount(double_storage_locations);
1251 if (total_attribs_size > max_index) {
1252 linker_error(prog,
1253 "attempt to use %d vertex attribute slots only %d available ",
1254 total_attribs_size, max_index);
1255 return false;
1256 }
1257 }
1258
1259 /* If all of the attributes were assigned locations by the application (or
1260 * are built-in attributes with fixed locations), return early. This should
1261 * be the common case.
1262 */
1263 if (num_attr == 0)
1264 return true;
1265
1266 qsort(to_assign, num_attr, sizeof(to_assign[0]), &compare_attr);
1267
1268 if (target_index == MESA_SHADER_VERTEX) {
1269 /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can
1270 * only be explicitly assigned by via glBindAttribLocation. Mark it as
1271 * reserved to prevent it from being automatically allocated below.
1272 */
1273 if (find_deref(shader, "gl_Vertex"))
1274 used_locations |= (1 << 0);
1275 }
1276
1277 for (unsigned i = 0; i < num_attr; i++) {
1278 /* Mask representing the contiguous slots that will be used by this
1279 * attribute.
1280 */
1281 const unsigned use_mask = (1 << to_assign[i].slots) - 1;
1282
1283 int location = find_available_slots(used_locations, to_assign[i].slots);
1284
1285 if (location < 0) {
1286 const char *const string = (target_index == MESA_SHADER_VERTEX)
1287 ? "vertex shader input" : "fragment shader output";
1288
1289 linker_error(prog,
1290 "insufficient contiguous locations "
1291 "available for %s `%s'\n",
1292 string, to_assign[i].var->name);
1293 return false;
1294 }
1295
1296 to_assign[i].var->data.location = generic_base + location;
1297 used_locations |= (use_mask << location);
1298
1299 if (glsl_type_is_dual_slot(glsl_without_array(to_assign[i].var->type)))
1300 double_storage_locations |= (use_mask << location);
1301 }
1302
1303 /* Now that we have all the locations, from the GL 4.5 core spec, section
1304 * 11.1.1 (Vertex Attributes), dvec3, dvec4, dmat2x3, dmat2x4, dmat3,
1305 * dmat3x4, dmat4x3, and dmat4 count as consuming twice as many attributes
1306 * as equivalent single-precision types.
1307 */
1308 if (target_index == MESA_SHADER_VERTEX) {
1309 unsigned total_attribs_size =
1310 util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1311 util_bitcount(double_storage_locations);
1312 if (total_attribs_size > max_index) {
1313 linker_error(prog,
1314 "attempt to use %d vertex attribute slots only %d available ",
1315 total_attribs_size, max_index);
1316 return false;
1317 }
1318 }
1319
1320 return true;
1321 }
1322
1323 static bool
varying_has_user_specified_location(const nir_variable * var)1324 varying_has_user_specified_location(const nir_variable *var)
1325 {
1326 return var->data.explicit_location &&
1327 var->data.location >= VARYING_SLOT_VAR0;
1328 }
1329
1330 static void
create_xfb_varying_names(void * mem_ctx,const struct glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const struct glsl_type * ifc_member_t,char *** varying_names)1331 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
1332 size_t name_length, unsigned *count,
1333 const char *ifc_member_name,
1334 const struct glsl_type *ifc_member_t,
1335 char ***varying_names)
1336 {
1337 if (glsl_type_is_interface(t)) {
1338 size_t new_length = name_length;
1339
1340 assert(ifc_member_name && ifc_member_t);
1341 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
1342
1343 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
1344 NULL, NULL, varying_names);
1345 } else if (glsl_type_is_struct(t)) {
1346 for (unsigned i = 0; i < glsl_get_length(t); i++) {
1347 const char *field = glsl_get_struct_elem_name(t, i);
1348 size_t new_length = name_length;
1349
1350 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
1351
1352 create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
1353 new_length, count, NULL, NULL,
1354 varying_names);
1355 }
1356 } else if (glsl_type_is_struct(glsl_without_array(t)) ||
1357 glsl_type_is_interface(glsl_without_array(t)) ||
1358 (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
1359 for (unsigned i = 0; i < glsl_get_length(t); i++) {
1360 size_t new_length = name_length;
1361
1362 /* Append the subscript to the current variable name */
1363 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1364
1365 create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
1366 new_length, count, ifc_member_name,
1367 ifc_member_t, varying_names);
1368 }
1369 } else {
1370 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
1371 }
1372 }
1373
1374 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const struct gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_xfb_decls,char *** varying_names)1375 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
1376 struct gl_shader_program *prog,
1377 unsigned *num_xfb_decls,
1378 char ***varying_names)
1379 {
1380 bool has_xfb_qualifiers = false;
1381
1382 /* We still need to enable transform feedback mode even if xfb_stride is
1383 * only applied to a global out. Also we don't bother to propagate
1384 * xfb_stride to interface block members so this will catch that case also.
1385 */
1386 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1387 if (prog->TransformFeedback.BufferStride[j]) {
1388 has_xfb_qualifiers = true;
1389 break;
1390 }
1391 }
1392
1393 nir_foreach_shader_out_variable(var, sh->Program->nir) {
1394 /* From the ARB_enhanced_layouts spec:
1395 *
1396 * "Any shader making any static use (after preprocessing) of any of
1397 * these *xfb_* qualifiers will cause the shader to be in a
1398 * transform feedback capturing mode and hence responsible for
1399 * describing the transform feedback setup. This mode will capture
1400 * any output selected by *xfb_offset*, directly or indirectly, to
1401 * a transform feedback buffer."
1402 */
1403 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
1404 has_xfb_qualifiers = true;
1405 }
1406
1407 if (var->data.explicit_offset) {
1408 *num_xfb_decls += glsl_varying_count(var->type);
1409 has_xfb_qualifiers = true;
1410 }
1411 }
1412
1413 if (*num_xfb_decls == 0)
1414 return has_xfb_qualifiers;
1415
1416 unsigned i = 0;
1417 *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
1418 nir_foreach_shader_out_variable(var, sh->Program->nir) {
1419 if (var->data.explicit_offset) {
1420 char *name;
1421 const struct glsl_type *type, *member_type;
1422
1423 if (var->data.from_named_ifc_block) {
1424 type = var->interface_type;
1425
1426 /* Find the member type before it was altered by lowering */
1427 const struct glsl_type *type_wa = glsl_without_array(type);
1428 member_type =
1429 glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
1430 name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
1431 } else {
1432 type = var->type;
1433 member_type = NULL;
1434 name = ralloc_strdup(NULL, var->name);
1435 }
1436 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
1437 var->name, member_type, varying_names);
1438 ralloc_free(name);
1439 }
1440 }
1441
1442 assert(i == *num_xfb_decls);
1443 return has_xfb_qualifiers;
1444 }
1445
1446 /**
1447 * Initialize this struct based on a string that was passed to
1448 * glTransformFeedbackVaryings.
1449 *
1450 * If the input is mal-formed, this call still succeeds, but it sets
1451 * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
1452 * will fail to find any matching variable.
1453 */
1454 static void
xfb_decl_init(struct xfb_decl * xfb_decl,const struct gl_constants * consts,const struct gl_extensions * exts,const void * mem_ctx,const char * input)1455 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1456 const struct gl_extensions *exts, const void *mem_ctx,
1457 const char *input)
1458 {
1459 /* We don't have to be pedantic about what is a valid GLSL variable name,
1460 * because any variable with an invalid name can't exist in the IR anyway.
1461 */
1462 xfb_decl->location = -1;
1463 xfb_decl->orig_name = input;
1464 xfb_decl->lowered_builtin_array_variable = none;
1465 xfb_decl->skip_components = 0;
1466 xfb_decl->next_buffer_separator = false;
1467 xfb_decl->matched_candidate = NULL;
1468 xfb_decl->stream_id = 0;
1469 xfb_decl->buffer = 0;
1470 xfb_decl->offset = 0;
1471
1472 if (exts->ARB_transform_feedback3) {
1473 /* Parse gl_NextBuffer. */
1474 if (strcmp(input, "gl_NextBuffer") == 0) {
1475 xfb_decl->next_buffer_separator = true;
1476 return;
1477 }
1478
1479 /* Parse gl_SkipComponents. */
1480 if (strcmp(input, "gl_SkipComponents1") == 0)
1481 xfb_decl->skip_components = 1;
1482 else if (strcmp(input, "gl_SkipComponents2") == 0)
1483 xfb_decl->skip_components = 2;
1484 else if (strcmp(input, "gl_SkipComponents3") == 0)
1485 xfb_decl->skip_components = 3;
1486 else if (strcmp(input, "gl_SkipComponents4") == 0)
1487 xfb_decl->skip_components = 4;
1488
1489 if (xfb_decl->skip_components)
1490 return;
1491 }
1492
1493 /* Parse a declaration. */
1494 const char *base_name_end;
1495 long subscript = link_util_parse_program_resource_name(input, strlen(input),
1496 &base_name_end);
1497 xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1498 if (xfb_decl->var_name == NULL) {
1499 _mesa_error_no_memory(__func__);
1500 return;
1501 }
1502
1503 if (subscript >= 0) {
1504 xfb_decl->array_subscript = subscript;
1505 xfb_decl->is_subscripted = true;
1506 } else {
1507 xfb_decl->is_subscripted = false;
1508 }
1509
1510 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1511 * class must behave specially to account for the fact that gl_ClipDistance
1512 * is converted from a float[8] to a vec4[2].
1513 */
1514 if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1515 strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
1516 xfb_decl->lowered_builtin_array_variable = clip_distance;
1517 }
1518 if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1519 strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
1520 xfb_decl->lowered_builtin_array_variable = cull_distance;
1521 }
1522 }
1523
1524 /**
1525 * Determine whether two xfb_decl structs refer to the same variable and
1526 * array index (if applicable).
1527 */
1528 static bool
xfb_decl_is_same(const struct xfb_decl * x,const struct xfb_decl * y)1529 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
1530 {
1531 assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
1532
1533 if (strcmp(x->var_name, y->var_name) != 0)
1534 return false;
1535 if (x->is_subscripted != y->is_subscripted)
1536 return false;
1537 if (x->is_subscripted && x->array_subscript != y->array_subscript)
1538 return false;
1539 return true;
1540 }
1541
1542 /**
1543 * The total number of varying components taken up by this variable. Only
1544 * valid if assign_location() has been called.
1545 */
1546 static unsigned
xfb_decl_num_components(struct xfb_decl * xfb_decl)1547 xfb_decl_num_components(struct xfb_decl *xfb_decl)
1548 {
1549 if (xfb_decl->lowered_builtin_array_variable)
1550 return xfb_decl->size;
1551 else
1552 return xfb_decl->vector_elements * xfb_decl->matrix_columns *
1553 xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1554 }
1555
1556 /**
1557 * Assign a location and stream ID for this xfb_decl object based on the
1558 * transform feedback candidate found by find_candidate.
1559 *
1560 * If an error occurs, the error is reported through linker_error() and false
1561 * is returned.
1562 */
1563 static bool
xfb_decl_assign_location(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,bool disable_varying_packing,bool xfb_enabled)1564 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
1565 const struct gl_constants *consts,
1566 struct gl_shader_program *prog,
1567 bool disable_varying_packing, bool xfb_enabled)
1568 {
1569 assert(xfb_decl_is_varying(xfb_decl));
1570
1571 unsigned fine_location
1572 = xfb_decl->matched_candidate->toplevel_var->data.location * 4
1573 + xfb_decl->matched_candidate->toplevel_var->data.location_frac
1574 + xfb_decl->matched_candidate->struct_offset_floats;
1575 const unsigned dmul =
1576 glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
1577
1578 if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
1579 /* Array variable */
1580 const struct glsl_type *element_type =
1581 glsl_get_array_element(xfb_decl->matched_candidate->type);
1582 const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
1583 const unsigned vector_elements = glsl_get_vector_elements(element_type);
1584 unsigned actual_array_size;
1585 switch (xfb_decl->lowered_builtin_array_variable) {
1586 case clip_distance:
1587 actual_array_size = prog->last_vert_prog ?
1588 prog->last_vert_prog->info.clip_distance_array_size : 0;
1589 break;
1590 case cull_distance:
1591 actual_array_size = prog->last_vert_prog ?
1592 prog->last_vert_prog->info.cull_distance_array_size : 0;
1593 break;
1594 case none:
1595 default:
1596 actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
1597 break;
1598 }
1599
1600 if (xfb_decl->is_subscripted) {
1601 /* Check array bounds. */
1602 if (xfb_decl->array_subscript >= actual_array_size) {
1603 linker_error(prog, "Transform feedback varying %s has index "
1604 "%i, but the array size is %u.",
1605 xfb_decl->orig_name, xfb_decl->array_subscript,
1606 actual_array_size);
1607 return false;
1608 }
1609
1610 bool array_will_be_lowered =
1611 lower_packed_varying_needs_lowering(prog->last_vert_prog->nir,
1612 xfb_decl->matched_candidate->toplevel_var,
1613 nir_var_shader_out,
1614 disable_varying_packing,
1615 xfb_enabled) ||
1616 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_ClipDistance") == 0 ||
1617 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_CullDistance") == 0 ||
1618 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelInner") == 0 ||
1619 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelOuter") == 0;
1620
1621 unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
1622 1 : (array_will_be_lowered ? vector_elements : 4) * matrix_cols * dmul;
1623 fine_location += array_elem_size * xfb_decl->array_subscript;
1624 xfb_decl->size = 1;
1625 } else {
1626 xfb_decl->size = actual_array_size;
1627 }
1628 xfb_decl->vector_elements = vector_elements;
1629 xfb_decl->matrix_columns = matrix_cols;
1630 if (xfb_decl->lowered_builtin_array_variable)
1631 xfb_decl->type = GL_FLOAT;
1632 else
1633 xfb_decl->type = glsl_get_gl_type(element_type);
1634 } else {
1635 /* Regular variable (scalar, vector, or matrix) */
1636 if (xfb_decl->is_subscripted) {
1637 linker_error(prog, "Transform feedback varying %s requested, "
1638 "but %s is not an array.",
1639 xfb_decl->orig_name, xfb_decl->var_name);
1640 return false;
1641 }
1642 xfb_decl->size = 1;
1643 xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
1644 xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
1645 xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
1646 }
1647 xfb_decl->location = fine_location / 4;
1648 xfb_decl->location_frac = fine_location % 4;
1649
1650 /* From GL_EXT_transform_feedback:
1651 * A program will fail to link if:
1652 *
1653 * * the total number of components to capture in any varying
1654 * variable in <varyings> is greater than the constant
1655 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1656 * buffer mode is SEPARATE_ATTRIBS_EXT;
1657 */
1658 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1659 xfb_decl_num_components(xfb_decl) >
1660 consts->MaxTransformFeedbackSeparateComponents) {
1661 linker_error(prog, "Transform feedback varying %s exceeds "
1662 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1663 xfb_decl->orig_name);
1664 return false;
1665 }
1666
1667 /* Only transform feedback varyings can be assigned to non-zero streams,
1668 * so assign the stream id here.
1669 */
1670 xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
1671
1672 unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
1673 unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
1674 xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
1675 xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
1676 array_offset + struct_offset;
1677
1678 return true;
1679 }
1680
1681 static unsigned
xfb_decl_get_num_outputs(struct xfb_decl * xfb_decl)1682 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
1683 {
1684 if (!xfb_decl_is_varying(xfb_decl)) {
1685 return 0;
1686 }
1687
1688 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1689 unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
1690 unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
1691 return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
1692 } else {
1693 return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
1694 }
1695 }
1696
1697 static bool
xfb_decl_is_varying_written(struct xfb_decl * xfb_decl)1698 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
1699 {
1700 if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
1701 return false;
1702
1703 return xfb_decl->matched_candidate->toplevel_var->data.assigned;
1704 }
1705
1706 /**
1707 * Update gl_transform_feedback_info to reflect this xfb_decl.
1708 *
1709 * If an error occurs, the error is reported through linker_error() and false
1710 * is returned.
1711 */
1712 static bool
xfb_decl_store(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx)1713 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1714 struct gl_shader_program *prog,
1715 struct gl_transform_feedback_info *info,
1716 unsigned buffer, unsigned buffer_index,
1717 const unsigned max_outputs,
1718 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1719 bool *explicit_stride, unsigned *max_member_alignment,
1720 bool has_xfb_qualifiers, const void* mem_ctx)
1721 {
1722 unsigned xfb_offset = 0;
1723 unsigned size = xfb_decl->size;
1724 /* Handle gl_SkipComponents. */
1725 if (xfb_decl->skip_components) {
1726 info->Buffers[buffer].Stride += xfb_decl->skip_components;
1727 size = xfb_decl->skip_components;
1728 goto store_varying;
1729 }
1730
1731 if (xfb_decl->next_buffer_separator) {
1732 size = 0;
1733 goto store_varying;
1734 }
1735
1736 if (has_xfb_qualifiers) {
1737 xfb_offset = xfb_decl->offset / 4;
1738 } else {
1739 xfb_offset = info->Buffers[buffer].Stride;
1740 }
1741 info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1742
1743 {
1744 unsigned location = xfb_decl->location;
1745 unsigned location_frac = xfb_decl->location_frac;
1746 unsigned num_components = xfb_decl_num_components(xfb_decl);
1747
1748 /* From GL_EXT_transform_feedback:
1749 *
1750 * " A program will fail to link if:
1751 *
1752 * * the total number of components to capture is greater than the
1753 * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1754 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1755 *
1756 * From GL_ARB_enhanced_layouts:
1757 *
1758 * " The resulting stride (implicit or explicit) must be less than or
1759 * equal to the implementation-dependent constant
1760 * gl_MaxTransformFeedbackInterleavedComponents."
1761 */
1762 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1763 has_xfb_qualifiers) &&
1764 xfb_offset + num_components >
1765 consts->MaxTransformFeedbackInterleavedComponents) {
1766 linker_error(prog,
1767 "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1768 "limit has been exceeded.");
1769 return false;
1770 }
1771
1772 /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1773 * Page 76, (Transform Feedback Layout Qualifiers):
1774 *
1775 * " No aliasing in output buffers is allowed: It is a compile-time or
1776 * link-time error to specify variables with overlapping transform
1777 * feedback offsets."
1778 */
1779 const unsigned max_components =
1780 consts->MaxTransformFeedbackInterleavedComponents;
1781 const unsigned first_component = xfb_offset;
1782 const unsigned last_component = xfb_offset + num_components - 1;
1783 const unsigned start_word = BITSET_BITWORD(first_component);
1784 const unsigned end_word = BITSET_BITWORD(last_component);
1785 BITSET_WORD *used;
1786 assert(last_component < max_components);
1787
1788 if (!used_components[buffer]) {
1789 used_components[buffer] =
1790 rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1791 }
1792 used = used_components[buffer];
1793
1794 for (unsigned word = start_word; word <= end_word; word++) {
1795 unsigned start_range = 0;
1796 unsigned end_range = BITSET_WORDBITS - 1;
1797
1798 if (word == start_word)
1799 start_range = first_component % BITSET_WORDBITS;
1800
1801 if (word == end_word)
1802 end_range = last_component % BITSET_WORDBITS;
1803
1804 if (used[word] & BITSET_RANGE(start_range, end_range)) {
1805 linker_error(prog,
1806 "variable '%s', xfb_offset (%d) is causing aliasing.",
1807 xfb_decl->orig_name, xfb_offset * 4);
1808 return false;
1809 }
1810 used[word] |= BITSET_RANGE(start_range, end_range);
1811 }
1812
1813 const unsigned type_num_components =
1814 xfb_decl->vector_elements *
1815 (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1816 unsigned current_type_components_left = type_num_components;
1817
1818 while (num_components > 0) {
1819 unsigned output_size = 0;
1820
1821 /* From GL_ARB_enhanced_layouts:
1822 *
1823 * "When an attribute variable declared using an array type is bound to
1824 * generic attribute index <i>, the active array elements are assigned to
1825 * consecutive generic attributes beginning with generic attribute <i>. The
1826 * number of attributes and components assigned to each element are
1827 * determined according to the data type of array elements and "component"
1828 * layout qualifier (if any) specified in the declaration of the array."
1829 *
1830 * "When an attribute variable declared using a matrix type is bound to a
1831 * generic attribute index <i>, its values are taken from consecutive generic
1832 * attributes beginning with generic attribute <i>. Such matrices are
1833 * treated as an array of column vectors with values taken from the generic
1834 * attributes.
1835 * This means there may be gaps in the varyings we are taking values from."
1836 *
1837 * Examples:
1838 *
1839 * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1840 * | | |
1841 * | 32b 32b 32b 32b | 32b 32b 32b 32b |
1842 * | 0 X X Y Y | 4 X Y 0 0 |
1843 * | 1 Z Z 0 0 | 5 X Y 0 0 |
1844 * | 2 X X Y Y | 6 X Y 0 0 |
1845 * | 3 Z Z 0 0 | 7 X Y 0 0 |
1846 *
1847 */
1848 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1849 output_size = MIN3(num_components, current_type_components_left, 4);
1850 current_type_components_left -= output_size;
1851 if (current_type_components_left == 0) {
1852 current_type_components_left = type_num_components;
1853 }
1854 } else {
1855 output_size = MIN2(num_components, 4 - location_frac);
1856 }
1857
1858 assert((info->NumOutputs == 0 && max_outputs == 0) ||
1859 info->NumOutputs < max_outputs);
1860
1861 /* From the ARB_enhanced_layouts spec:
1862 *
1863 * "If such a block member or variable is not written during a shader
1864 * invocation, the buffer contents at the assigned offset will be
1865 * undefined. Even if there are no static writes to a variable or
1866 * member that is assigned a transform feedback offset, the space is
1867 * still allocated in the buffer and still affects the stride."
1868 */
1869 if (xfb_decl_is_varying_written(xfb_decl)) {
1870 info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1871 info->Outputs[info->NumOutputs].OutputRegister = location;
1872 info->Outputs[info->NumOutputs].NumComponents = output_size;
1873 info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
1874 info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1875 info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1876 ++info->NumOutputs;
1877 }
1878 info->Buffers[buffer].Stream = xfb_decl->stream_id;
1879 xfb_offset += output_size;
1880
1881 num_components -= output_size;
1882 location++;
1883 location_frac = 0;
1884 }
1885 }
1886
1887 if (explicit_stride && explicit_stride[buffer]) {
1888 if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
1889 info->Buffers[buffer].Stride % 2) {
1890 linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1891 "multiple of 8 as its applied to a type that is or "
1892 "contains a double.",
1893 info->Buffers[buffer].Stride * 4);
1894 return false;
1895 }
1896
1897 if (xfb_offset > info->Buffers[buffer].Stride) {
1898 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1899 "buffer (%d)", xfb_offset * 4,
1900 info->Buffers[buffer].Stride * 4, buffer);
1901 return false;
1902 }
1903 } else {
1904 if (max_member_alignment && has_xfb_qualifiers) {
1905 max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
1906 _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1907 info->Buffers[buffer].Stride = ALIGN(xfb_offset,
1908 max_member_alignment[buffer]);
1909 } else {
1910 info->Buffers[buffer].Stride = xfb_offset;
1911 }
1912 }
1913
1914 store_varying:
1915 info->Varyings[info->NumVarying].name.string =
1916 ralloc_strdup(prog, xfb_decl->orig_name);
1917 resource_name_updated(&info->Varyings[info->NumVarying].name);
1918 info->Varyings[info->NumVarying].Type = xfb_decl->type;
1919 info->Varyings[info->NumVarying].Size = size;
1920 info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1921 info->NumVarying++;
1922 info->Buffers[buffer].NumVaryings++;
1923
1924 return true;
1925 }
1926
1927 static const struct tfeedback_candidate *
xfb_decl_find_candidate(struct xfb_decl * xfb_decl,struct gl_shader_program * prog,struct hash_table * tfeedback_candidates)1928 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
1929 struct gl_shader_program *prog,
1930 struct hash_table *tfeedback_candidates)
1931 {
1932 const char *name = xfb_decl->var_name;
1933 switch (xfb_decl->lowered_builtin_array_variable) {
1934 case none:
1935 name = xfb_decl->var_name;
1936 break;
1937 case clip_distance:
1938 case cull_distance:
1939 name = "gl_ClipDistanceMESA";
1940 break;
1941 }
1942 struct hash_entry *entry =
1943 _mesa_hash_table_search(tfeedback_candidates, name);
1944
1945 xfb_decl->matched_candidate = entry ?
1946 (struct tfeedback_candidate *) entry->data : NULL;
1947
1948 if (!xfb_decl->matched_candidate) {
1949 /* From GL_EXT_transform_feedback:
1950 * A program will fail to link if:
1951 *
1952 * * any variable name specified in the <varyings> array is not
1953 * declared as an output in the geometry shader (if present) or
1954 * the vertex shader (if no geometry shader is present);
1955 */
1956 linker_error(prog, "Transform feedback varying %s undeclared.",
1957 xfb_decl->orig_name);
1958 }
1959
1960 return xfb_decl->matched_candidate;
1961 }
1962
1963 /**
1964 * Force a candidate over the previously matched one. It happens when a new
1965 * varying needs to be created to match the xfb declaration, for example,
1966 * to fullfil an alignment criteria.
1967 */
1968 static void
xfb_decl_set_lowered_candidate(struct xfb_decl * xfb_decl,struct tfeedback_candidate * candidate)1969 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
1970 struct tfeedback_candidate *candidate)
1971 {
1972 xfb_decl->matched_candidate = candidate;
1973
1974 /* The subscript part is no longer relevant */
1975 xfb_decl->is_subscripted = false;
1976 xfb_decl->array_subscript = 0;
1977 }
1978
1979 /**
1980 * Parse all the transform feedback declarations that were passed to
1981 * glTransformFeedbackVaryings() and store them in xfb_decl objects.
1982 *
1983 * If an error occurs, the error is reported through linker_error() and false
1984 * is returned.
1985 */
1986 static bool
parse_xfb_decls(const struct gl_constants * consts,const struct gl_extensions * exts,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,struct xfb_decl * decls)1987 parse_xfb_decls(const struct gl_constants *consts,
1988 const struct gl_extensions *exts,
1989 struct gl_shader_program *prog,
1990 const void *mem_ctx, unsigned num_names,
1991 char **varying_names, struct xfb_decl *decls)
1992 {
1993 for (unsigned i = 0; i < num_names; ++i) {
1994 xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i]);
1995
1996 if (!xfb_decl_is_varying(&decls[i]))
1997 continue;
1998
1999 /* From GL_EXT_transform_feedback:
2000 * A program will fail to link if:
2001 *
2002 * * any two entries in the <varyings> array specify the same varying
2003 * variable;
2004 *
2005 * We interpret this to mean "any two entries in the <varyings> array
2006 * specify the same varying variable and array index", since transform
2007 * feedback of arrays would be useless otherwise.
2008 */
2009 for (unsigned j = 0; j < i; ++j) {
2010 if (xfb_decl_is_varying(&decls[j])) {
2011 if (xfb_decl_is_same(&decls[i], &decls[j])) {
2012 linker_error(prog, "Transform feedback varying %s specified "
2013 "more than once.", varying_names[i]);
2014 return false;
2015 }
2016 }
2017 }
2018 }
2019 return true;
2020 }
2021
2022 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)2023 cmp_xfb_offset(const void * x_generic, const void * y_generic)
2024 {
2025 struct xfb_decl *x = (struct xfb_decl *) x_generic;
2026 struct xfb_decl *y = (struct xfb_decl *) y_generic;
2027
2028 if (x->buffer != y->buffer)
2029 return x->buffer - y->buffer;
2030 return x->offset - y->offset;
2031 }
2032
2033 /**
2034 * Store transform feedback location assignments into
2035 * prog->sh.LinkedTransformFeedback based on the data stored in
2036 * xfb_decls.
2037 *
2038 * If an error occurs, the error is reported through linker_error() and false
2039 * is returned.
2040 */
2041 static bool
store_tfeedback_info(const struct gl_constants * consts,struct gl_shader_program * prog,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,bool has_xfb_qualifiers,const void * mem_ctx)2042 store_tfeedback_info(const struct gl_constants *consts,
2043 struct gl_shader_program *prog, unsigned num_xfb_decls,
2044 struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
2045 const void *mem_ctx)
2046 {
2047 if (!prog->last_vert_prog)
2048 return true;
2049
2050 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
2051 * tracking the number of buffers doesn't overflow.
2052 */
2053 assert(consts->MaxTransformFeedbackBuffers < 32);
2054
2055 bool separate_attribs_mode =
2056 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
2057
2058 struct gl_program *xfb_prog = prog->last_vert_prog;
2059 xfb_prog->sh.LinkedTransformFeedback =
2060 rzalloc(xfb_prog, struct gl_transform_feedback_info);
2061
2062 /* The xfb_offset qualifier does not have to be used in increasing order
2063 * however some drivers expect to receive the list of transform feedback
2064 * declarations in order so sort it now for convenience.
2065 */
2066 if (has_xfb_qualifiers) {
2067 qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
2068 cmp_xfb_offset);
2069 }
2070
2071 xfb_prog->sh.LinkedTransformFeedback->Varyings =
2072 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
2073 num_xfb_decls);
2074
2075 unsigned num_outputs = 0;
2076 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2077 if (xfb_decl_is_varying_written(&xfb_decls[i]))
2078 num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
2079 }
2080
2081 xfb_prog->sh.LinkedTransformFeedback->Outputs =
2082 rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
2083 num_outputs);
2084
2085 unsigned num_buffers = 0;
2086 unsigned buffers = 0;
2087 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
2088
2089 if (!has_xfb_qualifiers && separate_attribs_mode) {
2090 /* GL_SEPARATE_ATTRIBS */
2091 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2092 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2093 xfb_prog->sh.LinkedTransformFeedback,
2094 num_buffers, num_buffers, num_outputs,
2095 used_components, NULL, NULL, has_xfb_qualifiers,
2096 mem_ctx))
2097 return false;
2098
2099 buffers |= 1 << num_buffers;
2100 num_buffers++;
2101 }
2102 }
2103 else {
2104 /* GL_INVERLEAVED_ATTRIBS */
2105 int buffer_stream_id = -1;
2106 unsigned buffer =
2107 num_xfb_decls ? xfb_decls[0].buffer : 0;
2108 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
2109 unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
2110 /* Apply any xfb_stride global qualifiers */
2111 if (has_xfb_qualifiers) {
2112 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
2113 if (prog->TransformFeedback.BufferStride[j]) {
2114 explicit_stride[j] = true;
2115 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
2116 prog->TransformFeedback.BufferStride[j] / 4;
2117 }
2118 }
2119 }
2120
2121 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2122 if (has_xfb_qualifiers &&
2123 buffer != xfb_decls[i].buffer) {
2124 /* we have moved to the next buffer so reset stream id */
2125 buffer_stream_id = -1;
2126 num_buffers++;
2127 }
2128
2129 if (xfb_decls[i].next_buffer_separator) {
2130 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2131 xfb_prog->sh.LinkedTransformFeedback,
2132 buffer, num_buffers, num_outputs,
2133 used_components, explicit_stride,
2134 max_member_alignment, has_xfb_qualifiers,
2135 mem_ctx))
2136 return false;
2137 num_buffers++;
2138 buffer_stream_id = -1;
2139 continue;
2140 }
2141
2142 if (has_xfb_qualifiers) {
2143 buffer = xfb_decls[i].buffer;
2144 } else {
2145 buffer = num_buffers;
2146 }
2147
2148 if (xfb_decl_is_varying(&xfb_decls[i])) {
2149 if (buffer_stream_id == -1) {
2150 /* First varying writing to this buffer: remember its stream */
2151 buffer_stream_id = (int) xfb_decls[i].stream_id;
2152
2153 /* Only mark a buffer as active when there is a varying
2154 * attached to it. This behaviour is based on a revised version
2155 * of section 13.2.2 of the GL 4.6 spec.
2156 */
2157 buffers |= 1 << buffer;
2158 } else if (buffer_stream_id !=
2159 (int) xfb_decls[i].stream_id) {
2160 /* Varying writes to the same buffer from a different stream */
2161 linker_error(prog,
2162 "Transform feedback can't capture varyings belonging "
2163 "to different vertex streams in a single buffer. "
2164 "Varying %s writes to buffer from stream %u, other "
2165 "varyings in the same buffer write from stream %u.",
2166 xfb_decls[i].orig_name,
2167 xfb_decls[i].stream_id,
2168 buffer_stream_id);
2169 return false;
2170 }
2171 }
2172
2173 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2174 xfb_prog->sh.LinkedTransformFeedback,
2175 buffer, num_buffers, num_outputs, used_components,
2176 explicit_stride, max_member_alignment,
2177 has_xfb_qualifiers, mem_ctx))
2178 return false;
2179 }
2180 }
2181 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
2182
2183 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
2184 return true;
2185 }
2186
2187 /**
2188 * Enum representing the order in which varyings are packed within a
2189 * packing class.
2190 *
2191 * Currently we pack vec4's first, then vec2's, then scalar values, then
2192 * vec3's. This order ensures that the only vectors that are at risk of
2193 * having to be "double parked" (split between two adjacent varying slots)
2194 * are the vec3's.
2195 */
2196 enum packing_order_enum {
2197 PACKING_ORDER_VEC4,
2198 PACKING_ORDER_VEC2,
2199 PACKING_ORDER_SCALAR,
2200 PACKING_ORDER_VEC3,
2201 };
2202
2203 /**
2204 * Structure recording the relationship between a single producer output
2205 * and a single consumer input.
2206 */
2207 struct match {
2208 /**
2209 * Packing class for this varying, computed by compute_packing_class().
2210 */
2211 unsigned packing_class;
2212
2213 /**
2214 * Packing order for this varying, computed by compute_packing_order().
2215 */
2216 enum packing_order_enum packing_order;
2217
2218 /**
2219 * The output variable in the producer stage.
2220 */
2221 nir_variable *producer_var;
2222
2223 /**
2224 * The input variable in the consumer stage.
2225 */
2226 nir_variable *consumer_var;
2227
2228 /**
2229 * The location which has been assigned for this varying. This is
2230 * expressed in multiples of a float, with the first generic varying
2231 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
2232 * value 0.
2233 */
2234 unsigned generic_location;
2235 };
2236
2237 /**
2238 * Data structure recording the relationship between outputs of one shader
2239 * stage (the "producer") and inputs of another (the "consumer").
2240 */
2241 struct varying_matches
2242 {
2243 /**
2244 * If true, this driver disables varying packing, so all varyings need to
2245 * be aligned on slot boundaries, and take up a number of slots equal to
2246 * their number of matrix columns times their array size.
2247 *
2248 * Packing may also be disabled because our current packing method is not
2249 * safe in SSO or versions of OpenGL where interpolation qualifiers are not
2250 * guaranteed to match across stages.
2251 */
2252 bool disable_varying_packing;
2253
2254 /**
2255 * If true, this driver disables packing for varyings used by transform
2256 * feedback.
2257 */
2258 bool disable_xfb_packing;
2259
2260 /**
2261 * If true, this driver has transform feedback enabled. The transform
2262 * feedback code usually requires at least some packing be done even
2263 * when varying packing is disabled, fortunately where transform feedback
2264 * requires packing it's safe to override the disabled setting. See
2265 * is_varying_packing_safe().
2266 */
2267 bool xfb_enabled;
2268
2269 bool enhanced_layouts_enabled;
2270
2271 /**
2272 * If true, this driver prefers varyings to be aligned to power of two
2273 * in a slot.
2274 */
2275 bool prefer_pot_aligned_varyings;
2276
2277 struct match *matches;
2278
2279 /**
2280 * The number of elements in the \c matches array that are currently in
2281 * use.
2282 */
2283 unsigned num_matches;
2284
2285 /**
2286 * The number of elements that were set aside for the \c matches array when
2287 * it was allocated.
2288 */
2289 unsigned matches_capacity;
2290
2291 gl_shader_stage producer_stage;
2292 gl_shader_stage consumer_stage;
2293 };
2294
2295 /**
2296 * Comparison function passed to qsort() to sort varyings by packing_class and
2297 * then by packing_order.
2298 */
2299 static int
varying_matches_match_comparator(const void * x_generic,const void * y_generic)2300 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
2301 {
2302 const struct match *x = (const struct match *) x_generic;
2303 const struct match *y = (const struct match *) y_generic;
2304
2305 if (x->packing_class != y->packing_class)
2306 return x->packing_class - y->packing_class;
2307 return x->packing_order - y->packing_order;
2308 }
2309
2310 /**
2311 * Comparison function passed to qsort() to sort varyings used only by
2312 * transform feedback when packing of other varyings is disabled.
2313 */
2314 static int
varying_matches_xfb_comparator(const void * x_generic,const void * y_generic)2315 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
2316 {
2317 const struct match *x = (const struct match *) x_generic;
2318
2319 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2320 return varying_matches_match_comparator(x_generic, y_generic);
2321
2322 /* FIXME: When the comparator returns 0 it means the elements being
2323 * compared are equivalent. However the qsort documentation says:
2324 *
2325 * "The order of equivalent elements is undefined."
2326 *
2327 * In practice the sort ends up reversing the order of the varyings which
2328 * means locations are also assigned in this reversed order and happens to
2329 * be what we want. This is also whats happening in
2330 * varying_matches_match_comparator().
2331 */
2332 return 0;
2333 }
2334
2335 /**
2336 * Comparison function passed to qsort() to sort varyings NOT used by
2337 * transform feedback when packing of xfb varyings is disabled.
2338 */
2339 static int
varying_matches_not_xfb_comparator(const void * x_generic,const void * y_generic)2340 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
2341 {
2342 const struct match *x = (const struct match *) x_generic;
2343
2344 if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2345 return varying_matches_match_comparator(x_generic, y_generic);
2346
2347 /* FIXME: When the comparator returns 0 it means the elements being
2348 * compared are equivalent. However the qsort documentation says:
2349 *
2350 * "The order of equivalent elements is undefined."
2351 *
2352 * In practice the sort ends up reversing the order of the varyings which
2353 * means locations are also assigned in this reversed order and happens to
2354 * be what we want. This is also whats happening in
2355 * varying_matches_match_comparator().
2356 */
2357 return 0;
2358 }
2359
2360 static bool
is_unpackable_tess(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)2361 is_unpackable_tess(gl_shader_stage producer_stage,
2362 gl_shader_stage consumer_stage)
2363 {
2364 if (consumer_stage == MESA_SHADER_TESS_EVAL ||
2365 consumer_stage == MESA_SHADER_TESS_CTRL ||
2366 producer_stage == MESA_SHADER_TESS_CTRL)
2367 return true;
2368
2369 return false;
2370 }
2371
2372 static void
init_varying_matches(void * mem_ctx,struct varying_matches * vm,const struct gl_constants * consts,const struct gl_extensions * exts,gl_shader_stage producer_stage,gl_shader_stage consumer_stage,bool sso)2373 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
2374 const struct gl_constants *consts,
2375 const struct gl_extensions *exts,
2376 gl_shader_stage producer_stage,
2377 gl_shader_stage consumer_stage,
2378 bool sso)
2379 {
2380 /* Tessellation shaders treat inputs and outputs as shared memory and can
2381 * access inputs and outputs of other invocations.
2382 * Therefore, they can't be lowered to temps easily (and definitely not
2383 * efficiently).
2384 */
2385 bool unpackable_tess =
2386 is_unpackable_tess(producer_stage, consumer_stage);
2387
2388 /* Transform feedback code assumes varying arrays are packed, so if the
2389 * driver has disabled varying packing, make sure to at least enable
2390 * packing required by transform feedback. See below for exception.
2391 */
2392 bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
2393
2394 /* Some drivers actually requires packing to be explicitly disabled
2395 * for varyings used by transform feedback.
2396 */
2397 bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
2398
2399 /* Disable packing on outward facing interfaces for SSO because in ES we
2400 * need to retain the unpacked varying information for draw time
2401 * validation.
2402 *
2403 * Packing is still enabled on individual arrays, structs, and matrices as
2404 * these are required by the transform feedback code and it is still safe
2405 * to do so. We also enable packing when a varying is only used for
2406 * transform feedback and its not a SSO.
2407 */
2408 bool disable_varying_packing =
2409 consts->DisableVaryingPacking || unpackable_tess;
2410 if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
2411 disable_varying_packing = true;
2412
2413 /* Note: this initial capacity is rather arbitrarily chosen to be large
2414 * enough for many cases without wasting an unreasonable amount of space.
2415 * varying_matches_record() will resize the array if there are more than
2416 * this number of varyings.
2417 */
2418 vm->matches_capacity = 8;
2419 vm->matches = (struct match *)
2420 ralloc_array(mem_ctx, struct match, vm->matches_capacity);
2421 vm->num_matches = 0;
2422
2423 vm->disable_varying_packing = disable_varying_packing;
2424 vm->disable_xfb_packing = disable_xfb_packing;
2425 vm->xfb_enabled = xfb_enabled;
2426 vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
2427 vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
2428 vm->producer_stage = producer_stage;
2429 vm->consumer_stage = consumer_stage;
2430 }
2431
2432 /**
2433 * Packing is always safe on individual arrays, structures, and matrices. It
2434 * is also safe if the varying is only used for transform feedback.
2435 */
2436 static bool
is_varying_packing_safe(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2437 is_varying_packing_safe(struct varying_matches *vm,
2438 const struct glsl_type *type, const nir_variable *var)
2439 {
2440 if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
2441 return false;
2442
2443 return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
2444 glsl_type_is_struct(type) ||
2445 var->data.is_xfb_only);
2446 }
2447
2448 static bool
is_packing_disabled(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2449 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
2450 const nir_variable *var)
2451 {
2452 return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
2453 (vm->disable_xfb_packing && var->data.is_xfb &&
2454 !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
2455 glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
2456 }
2457
2458 /**
2459 * Compute the "packing class" of the given varying. This is an unsigned
2460 * integer with the property that two variables in the same packing class can
2461 * be safely backed into the same vec4.
2462 */
2463 static unsigned
varying_matches_compute_packing_class(const nir_variable * var)2464 varying_matches_compute_packing_class(const nir_variable *var)
2465 {
2466 /* Without help from the back-end, there is no way to pack together
2467 * variables with different interpolation types, because
2468 * lower_packed_varyings must choose exactly one interpolation type for
2469 * each packed varying it creates.
2470 *
2471 * However, we can safely pack together floats, ints, and uints, because:
2472 *
2473 * - varyings of base type "int" and "uint" must use the "flat"
2474 * interpolation type, which can only occur in GLSL 1.30 and above.
2475 *
2476 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2477 * can store flat floats as ints without losing any information (using
2478 * the ir_unop_bitcast_* opcodes).
2479 *
2480 * Therefore, the packing class depends only on the interpolation type.
2481 */
2482 bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
2483 glsl_contains_integer(var->type) || glsl_contains_double(var->type);
2484
2485 const unsigned interp = is_interpolation_flat
2486 ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
2487
2488 assert(interp < (1 << 3));
2489
2490 const unsigned packing_class = (interp << 0) |
2491 (var->data.centroid << 3) |
2492 (var->data.sample << 4) |
2493 (var->data.patch << 5) |
2494 (var->data.must_be_shader_input << 6);
2495
2496 return packing_class;
2497 }
2498
2499 /**
2500 * Compute the "packing order" of the given varying. This is a sort key we
2501 * use to determine when to attempt to pack the given varying relative to
2502 * other varyings in the same packing class.
2503 */
2504 static enum packing_order_enum
varying_matches_compute_packing_order(const nir_variable * var)2505 varying_matches_compute_packing_order(const nir_variable *var)
2506 {
2507 const struct glsl_type *element_type = glsl_without_array(var->type);
2508
2509 switch (glsl_get_component_slots(element_type) % 4) {
2510 case 1: return PACKING_ORDER_SCALAR;
2511 case 2: return PACKING_ORDER_VEC2;
2512 case 3: return PACKING_ORDER_VEC3;
2513 case 0: return PACKING_ORDER_VEC4;
2514 default:
2515 assert(!"Unexpected value of vector_elements");
2516 return PACKING_ORDER_VEC4;
2517 }
2518 }
2519
2520 /**
2521 * Record the given producer/consumer variable pair in the list of variables
2522 * that should later be assigned locations.
2523 *
2524 * It is permissible for \c consumer_var to be NULL (this happens if a
2525 * variable is output by the producer and consumed by transform feedback, but
2526 * not consumed by the consumer).
2527 *
2528 * If \c producer_var has already been paired up with a consumer_var, or
2529 * producer_var is part of fixed pipeline functionality (and hence already has
2530 * a location assigned), this function has no effect.
2531 *
2532 * Note: as a side effect this function may change the interpolation type of
2533 * \c producer_var, but only when the change couldn't possibly affect
2534 * rendering.
2535 */
2536 static void
varying_matches_record(void * mem_ctx,struct varying_matches * vm,nir_variable * producer_var,nir_variable * consumer_var)2537 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
2538 nir_variable *producer_var, nir_variable *consumer_var)
2539 {
2540 assert(producer_var != NULL || consumer_var != NULL);
2541
2542 if ((producer_var &&
2543 (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
2544 (consumer_var &&
2545 (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
2546 /* Either a location already exists for this variable (since it is part
2547 * of fixed functionality), or it has already been assigned explicitly.
2548 */
2549 return;
2550 }
2551
2552 /* The varyings should not have been matched and assgned previously */
2553 assert((producer_var == NULL || producer_var->data.location == -1) &&
2554 (consumer_var == NULL || consumer_var->data.location == -1));
2555
2556 bool needs_flat_qualifier = consumer_var == NULL &&
2557 (glsl_contains_integer(producer_var->type) ||
2558 glsl_contains_double(producer_var->type));
2559
2560 if (!vm->disable_varying_packing &&
2561 (!vm->disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
2562 (needs_flat_qualifier ||
2563 (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
2564 /* Since this varying is not being consumed by the fragment shader, its
2565 * interpolation type varying cannot possibly affect rendering.
2566 * Also, this variable is non-flat and is (or contains) an integer
2567 * or a double.
2568 * If the consumer stage is unknown, don't modify the interpolation
2569 * type as it could affect rendering later with separate shaders.
2570 *
2571 * lower_packed_varyings requires all integer varyings to flat,
2572 * regardless of where they appear. We can trivially satisfy that
2573 * requirement by changing the interpolation type to flat here.
2574 */
2575 if (producer_var) {
2576 producer_var->data.centroid = false;
2577 producer_var->data.sample = false;
2578 producer_var->data.interpolation = INTERP_MODE_FLAT;
2579 }
2580
2581 if (consumer_var) {
2582 consumer_var->data.centroid = false;
2583 consumer_var->data.sample = false;
2584 consumer_var->data.interpolation = INTERP_MODE_FLAT;
2585 }
2586 }
2587
2588 if (vm->num_matches == vm->matches_capacity) {
2589 vm->matches_capacity *= 2;
2590 vm->matches = (struct match *)
2591 reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
2592 }
2593
2594 /* We must use the consumer to compute the packing class because in GL4.4+
2595 * there is no guarantee interpolation qualifiers will match across stages.
2596 *
2597 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
2598 *
2599 * "The type and presence of interpolation qualifiers of variables with
2600 * the same name declared in all linked shaders for the same cross-stage
2601 * interface must match, otherwise the link command will fail.
2602 *
2603 * When comparing an output from one stage to an input of a subsequent
2604 * stage, the input and output don't match if their interpolation
2605 * qualifiers (or lack thereof) are not the same."
2606 *
2607 * This text was also in at least revison 7 of the 4.40 spec but is no
2608 * longer in revision 9 and not in the 4.50 spec.
2609 */
2610 const nir_variable *const var = (consumer_var != NULL)
2611 ? consumer_var : producer_var;
2612
2613 if (producer_var && consumer_var &&
2614 consumer_var->data.must_be_shader_input) {
2615 producer_var->data.must_be_shader_input = 1;
2616 }
2617
2618 vm->matches[vm->num_matches].packing_class
2619 = varying_matches_compute_packing_class(var);
2620 vm->matches[vm->num_matches].packing_order
2621 = varying_matches_compute_packing_order(var);
2622
2623 vm->matches[vm->num_matches].producer_var = producer_var;
2624 vm->matches[vm->num_matches].consumer_var = consumer_var;
2625 vm->num_matches++;
2626 }
2627
2628 /**
2629 * Choose locations for all of the variable matches that were previously
2630 * passed to varying_matches_record().
2631 * \param components returns array[slot] of number of components used
2632 * per slot (1, 2, 3 or 4)
2633 * \param reserved_slots bitmask indicating which varying slots are already
2634 * allocated
2635 * \return number of slots (4-element vectors) allocated
2636 */
2637 static unsigned
varying_matches_assign_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)2638 varying_matches_assign_locations(struct varying_matches *vm,
2639 struct gl_shader_program *prog,
2640 uint8_t components[], uint64_t reserved_slots)
2641 {
2642 /* If packing has been disabled then we cannot safely sort the varyings by
2643 * class as it may mean we are using a version of OpenGL where
2644 * interpolation qualifiers are not guaranteed to be matching across
2645 * shaders, sorting in this case could result in mismatching shader
2646 * interfaces.
2647 * When packing is disabled the sort orders varyings used by transform
2648 * feedback first, but also depends on *undefined behaviour* of qsort to
2649 * reverse the order of the varyings. See: xfb_comparator().
2650 *
2651 * If packing is only disabled for xfb varyings (mutually exclusive with
2652 * disable_varying_packing), we then group varyings depending on if they
2653 * are captured for transform feedback. The same *undefined behaviour* is
2654 * taken advantage of.
2655 */
2656 if (vm->disable_varying_packing) {
2657 /* Only sort varyings that are only used by transform feedback. */
2658 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2659 &varying_matches_xfb_comparator);
2660 } else if (vm->disable_xfb_packing) {
2661 /* Only sort varyings that are NOT used by transform feedback. */
2662 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2663 &varying_matches_not_xfb_comparator);
2664 } else {
2665 /* Sort varying matches into an order that makes them easy to pack. */
2666 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2667 &varying_matches_match_comparator);
2668 }
2669
2670 unsigned generic_location = 0;
2671 unsigned generic_patch_location = MAX_VARYING*4;
2672 bool previous_var_xfb = false;
2673 bool previous_var_xfb_only = false;
2674 unsigned previous_packing_class = ~0u;
2675
2676 /* For tranform feedback separate mode, we know the number of attributes
2677 * is <= the number of buffers. So packing isn't critical. In fact,
2678 * packing vec3 attributes can cause trouble because splitting a vec3
2679 * effectively creates an additional transform feedback output. The
2680 * extra TFB output may exceed device driver limits.
2681 *
2682 * Also don't pack vec3 if the driver prefers power of two aligned
2683 * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
2684 * pot-aligned, we only need to take care of vec3s
2685 */
2686 const bool dont_pack_vec3 =
2687 (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2688 prog->TransformFeedback.NumVarying > 0) ||
2689 vm->prefer_pot_aligned_varyings;
2690
2691 for (unsigned i = 0; i < vm->num_matches; i++) {
2692 unsigned *location = &generic_location;
2693 const nir_variable *var;
2694 const struct glsl_type *type;
2695 bool is_vertex_input = false;
2696
2697 if (vm->matches[i].consumer_var) {
2698 var = vm->matches[i].consumer_var;
2699 type = get_varying_type(var, vm->consumer_stage);
2700 if (vm->consumer_stage == MESA_SHADER_VERTEX)
2701 is_vertex_input = true;
2702 } else {
2703 if (!vm->matches[i].producer_var)
2704 continue; /* The varying was optimised away */
2705
2706 var = vm->matches[i].producer_var;
2707 type = get_varying_type(var, vm->producer_stage);
2708 }
2709
2710 if (var->data.patch)
2711 location = &generic_patch_location;
2712
2713 /* Advance to the next slot if this varying has a different packing
2714 * class than the previous one, and we're not already on a slot
2715 * boundary.
2716 *
2717 * Also advance if varying packing is disabled for transform feedback,
2718 * and previous or current varying is used for transform feedback.
2719 *
2720 * Also advance to the next slot if packing is disabled. This makes sure
2721 * we don't assign varyings the same locations which is possible
2722 * because we still pack individual arrays, records and matrices even
2723 * when packing is disabled. Note we don't advance to the next slot if
2724 * we can pack varyings together that are only used for transform
2725 * feedback.
2726 */
2727 if (var->data.must_be_shader_input ||
2728 (vm->disable_xfb_packing &&
2729 (previous_var_xfb || var->data.is_xfb)) ||
2730 (vm->disable_varying_packing &&
2731 !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2732 (previous_packing_class != vm->matches[i].packing_class) ||
2733 (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2734 dont_pack_vec3)) {
2735 *location = ALIGN(*location, 4);
2736 }
2737
2738 previous_var_xfb = var->data.is_xfb;
2739 previous_var_xfb_only = var->data.is_xfb_only;
2740 previous_packing_class = vm->matches[i].packing_class;
2741
2742 /* The number of components taken up by this variable. For vertex shader
2743 * inputs, we use the number of slots * 4, as they have different
2744 * counting rules.
2745 */
2746 unsigned num_components = 0;
2747 if (is_vertex_input) {
2748 num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
2749 } else {
2750 if (is_packing_disabled(vm, type, var)) {
2751 num_components = glsl_count_attribute_slots(type, false) * 4;
2752 } else {
2753 num_components = glsl_get_component_slots_aligned(type, *location);
2754 }
2755 }
2756
2757 /* The last slot for this variable, inclusive. */
2758 unsigned slot_end = *location + num_components - 1;
2759
2760 /* FIXME: We could be smarter in the below code and loop back over
2761 * trying to fill any locations that we skipped because we couldn't pack
2762 * the varying between an explicit location. For now just let the user
2763 * hit the linking error if we run out of room and suggest they use
2764 * explicit locations.
2765 */
2766 while (slot_end < MAX_VARYING * 4u) {
2767 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2768 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2769
2770 assert(slots > 0);
2771
2772 if ((reserved_slots & slot_mask) == 0) {
2773 break;
2774 }
2775
2776 *location = ALIGN(*location + 1, 4);
2777 slot_end = *location + num_components - 1;
2778 }
2779
2780 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2781 linker_error(prog, "insufficient contiguous locations available for "
2782 "%s it is possible an array or struct could not be "
2783 "packed between varyings with explicit locations. Try "
2784 "using an explicit location for arrays and structs.",
2785 var->name);
2786 }
2787
2788 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2789 for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2790 components[j] = 4;
2791 components[slot_end / 4u] = (slot_end & 3) + 1;
2792 }
2793
2794 vm->matches[i].generic_location = *location;
2795
2796 *location = slot_end + 1;
2797 }
2798
2799 return (generic_location + 3) / 4;
2800 }
2801
2802 static void
varying_matches_assign_temp_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint64_t reserved_slots)2803 varying_matches_assign_temp_locations(struct varying_matches *vm,
2804 struct gl_shader_program *prog,
2805 uint64_t reserved_slots)
2806 {
2807 unsigned tmp_loc = 0;
2808 for (unsigned i = 0; i < vm->num_matches; i++) {
2809 nir_variable *producer_var = vm->matches[i].producer_var;
2810 nir_variable *consumer_var = vm->matches[i].consumer_var;
2811
2812 while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
2813 if (reserved_slots & (UINT64_C(1) << tmp_loc))
2814 tmp_loc++;
2815 else
2816 break;
2817 }
2818
2819 if (producer_var) {
2820 assert(producer_var->data.location == -1);
2821 producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2822 }
2823
2824 if (consumer_var) {
2825 assert(consumer_var->data.location == -1);
2826 consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2827 }
2828
2829 tmp_loc++;
2830 }
2831 }
2832
2833 /**
2834 * Update the producer and consumer shaders to reflect the locations
2835 * assignments that were made by varying_matches_assign_locations().
2836 */
2837 static void
varying_matches_store_locations(struct varying_matches * vm)2838 varying_matches_store_locations(struct varying_matches *vm)
2839 {
2840 /* Check is location needs to be packed with lower_packed_varyings() or if
2841 * we can just use ARB_enhanced_layouts packing.
2842 */
2843 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
2844 const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2845
2846 for (unsigned i = 0; i < vm->num_matches; i++) {
2847 nir_variable *producer_var = vm->matches[i].producer_var;
2848 nir_variable *consumer_var = vm->matches[i].consumer_var;
2849 unsigned generic_location = vm->matches[i].generic_location;
2850 unsigned slot = generic_location / 4;
2851 unsigned offset = generic_location % 4;
2852
2853 if (producer_var) {
2854 producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2855 producer_var->data.location_frac = offset;
2856 }
2857
2858 if (consumer_var) {
2859 consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2860 consumer_var->data.location_frac = offset;
2861 }
2862
2863 /* Find locations suitable for native packing via
2864 * ARB_enhanced_layouts.
2865 */
2866 if (vm->enhanced_layouts_enabled) {
2867 nir_variable *var = producer_var ? producer_var : consumer_var;
2868 unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
2869 const struct glsl_type *type =
2870 get_varying_type(var, stage);
2871 unsigned comp_slots = glsl_get_component_slots(type) + offset;
2872 unsigned slots = comp_slots / 4;
2873 if (comp_slots % 4)
2874 slots += 1;
2875
2876 if (producer_var && consumer_var) {
2877 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
2878 glsl_type_is_64bit(type)) {
2879 for (unsigned j = 0; j < slots; j++) {
2880 pack_loc[slot + j] = true;
2881 }
2882 } else if (offset + glsl_get_vector_elements(type) > 4) {
2883 pack_loc[slot] = true;
2884 pack_loc[slot + 1] = true;
2885 } else {
2886 loc_type[slot][offset] = type;
2887 }
2888 } else {
2889 for (unsigned j = 0; j < slots; j++) {
2890 pack_loc[slot + j] = true;
2891 }
2892 }
2893 }
2894 }
2895
2896 /* Attempt to use ARB_enhanced_layouts for more efficient packing if
2897 * suitable.
2898 */
2899 if (vm->enhanced_layouts_enabled) {
2900 for (unsigned i = 0; i < vm->num_matches; i++) {
2901 nir_variable *producer_var = vm->matches[i].producer_var;
2902 nir_variable *consumer_var = vm->matches[i].consumer_var;
2903 if (!producer_var || !consumer_var)
2904 continue;
2905
2906 unsigned generic_location = vm->matches[i].generic_location;
2907 unsigned slot = generic_location / 4;
2908 if (pack_loc[slot])
2909 continue;
2910
2911 const struct glsl_type *type =
2912 get_varying_type(producer_var, vm->producer_stage);
2913 bool type_match = true;
2914 for (unsigned j = 0; j < 4; j++) {
2915 if (loc_type[slot][j]) {
2916 if (glsl_get_base_type(type) !=
2917 glsl_get_base_type(loc_type[slot][j]))
2918 type_match = false;
2919 }
2920 }
2921
2922 if (type_match) {
2923 producer_var->data.explicit_location = 1;
2924 consumer_var->data.explicit_location = 1;
2925 }
2926 }
2927 }
2928 }
2929
2930 /**
2931 * Is the given variable a varying variable to be counted against the
2932 * limit in ctx->Const.MaxVarying?
2933 * This includes variables such as texcoords, colors and generic
2934 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2935 */
2936 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const nir_variable * var)2937 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
2938 {
2939 /* Only fragment shaders will take a varying variable as an input */
2940 if (stage == MESA_SHADER_FRAGMENT &&
2941 var->data.mode == nir_var_shader_in) {
2942 switch (var->data.location) {
2943 case VARYING_SLOT_POS:
2944 case VARYING_SLOT_FACE:
2945 case VARYING_SLOT_PNTC:
2946 return false;
2947 default:
2948 return true;
2949 }
2950 }
2951 return false;
2952 }
2953
2954 struct tfeedback_candidate_generator_state {
2955 /**
2956 * Memory context used to allocate hash table keys and values.
2957 */
2958 void *mem_ctx;
2959
2960 /**
2961 * Hash table in which tfeedback_candidate objects should be stored.
2962 */
2963 struct hash_table *tfeedback_candidates;
2964
2965 gl_shader_stage stage;
2966
2967 /**
2968 * Pointer to the toplevel variable that is being traversed.
2969 */
2970 nir_variable *toplevel_var;
2971
2972 /**
2973 * Total number of varying floats that have been visited so far. This is
2974 * used to determine the offset to each varying within the toplevel
2975 * variable.
2976 */
2977 unsigned varying_floats;
2978
2979 /**
2980 * Offset within the xfb. Counted in floats.
2981 */
2982 unsigned xfb_offset_floats;
2983 };
2984
2985 /**
2986 * Generates tfeedback_candidate structs describing all possible targets of
2987 * transform feedback.
2988 *
2989 * tfeedback_candidate structs are stored in the hash table
2990 * tfeedback_candidates. This hash table maps varying names to instances of the
2991 * tfeedback_candidate struct.
2992 */
2993 static void
tfeedback_candidate_generator(struct tfeedback_candidate_generator_state * state,char ** name,size_t name_length,const struct glsl_type * type,const struct glsl_struct_field * named_ifc_member)2994 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
2995 char **name, size_t name_length,
2996 const struct glsl_type *type,
2997 const struct glsl_struct_field *named_ifc_member)
2998 {
2999 switch (glsl_get_base_type(type)) {
3000 case GLSL_TYPE_INTERFACE:
3001 if (named_ifc_member) {
3002 ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
3003 named_ifc_member->name);
3004 tfeedback_candidate_generator(state, name, name_length,
3005 named_ifc_member->type, NULL);
3006 return;
3007 }
3008 FALLTHROUGH;
3009 case GLSL_TYPE_STRUCT:
3010 for (unsigned i = 0; i < glsl_get_length(type); i++) {
3011 size_t new_length = name_length;
3012
3013 /* Append '.field' to the current variable name. */
3014 if (name) {
3015 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
3016 glsl_get_struct_elem_name(type, i));
3017 }
3018
3019 tfeedback_candidate_generator(state, name, new_length,
3020 glsl_get_struct_field(type, i), NULL);
3021 }
3022
3023 return;
3024 case GLSL_TYPE_ARRAY:
3025 if (glsl_type_is_struct(glsl_without_array(type)) ||
3026 glsl_type_is_interface(glsl_without_array(type)) ||
3027 glsl_type_is_array(glsl_get_array_element(type))) {
3028
3029 for (unsigned i = 0; i < glsl_get_length(type); i++) {
3030 size_t new_length = name_length;
3031
3032 /* Append the subscript to the current variable name */
3033 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
3034
3035 tfeedback_candidate_generator(state, name, new_length,
3036 glsl_get_array_element(type),
3037 named_ifc_member);
3038 }
3039
3040 return;
3041 }
3042 FALLTHROUGH;
3043 default:
3044 assert(!glsl_type_is_struct(glsl_without_array(type)));
3045 assert(!glsl_type_is_interface(glsl_without_array(type)));
3046
3047 struct tfeedback_candidate *candidate
3048 = rzalloc(state->mem_ctx, struct tfeedback_candidate);
3049 candidate->toplevel_var = state->toplevel_var;
3050 candidate->type = type;
3051
3052 if (glsl_type_is_64bit(glsl_without_array(type))) {
3053 /* From ARB_gpu_shader_fp64:
3054 *
3055 * If any variable captured in transform feedback has double-precision
3056 * components, the practical requirements for defined behavior are:
3057 * ...
3058 * (c) each double-precision variable captured must be aligned to a
3059 * multiple of eight bytes relative to the beginning of a vertex.
3060 */
3061 state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
3062 /* 64-bit members of structs are also aligned. */
3063 state->varying_floats = ALIGN(state->varying_floats, 2);
3064 }
3065
3066 candidate->xfb_offset_floats = state->xfb_offset_floats;
3067 candidate->struct_offset_floats = state->varying_floats;
3068
3069 _mesa_hash_table_insert(state->tfeedback_candidates,
3070 ralloc_strdup(state->mem_ctx, *name),
3071 candidate);
3072
3073 const unsigned component_slots = glsl_get_component_slots(type);
3074
3075 if (varying_has_user_specified_location(state->toplevel_var)) {
3076 state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
3077 } else {
3078 state->varying_floats += component_slots;
3079 }
3080
3081 state->xfb_offset_floats += component_slots;
3082 }
3083 }
3084
3085 static void
populate_consumer_input_sets(void * mem_ctx,nir_shader * nir,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3086 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
3087 struct hash_table *consumer_inputs,
3088 struct hash_table *consumer_interface_inputs,
3089 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3090 {
3091 memset(consumer_inputs_with_locations, 0,
3092 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
3093
3094 nir_foreach_shader_in_variable(input_var, nir) {
3095 /* All interface blocks should have been lowered by this point */
3096 assert(!glsl_type_is_interface(input_var->type));
3097
3098 if (input_var->data.explicit_location) {
3099 /* assign_varying_locations only cares about finding the
3100 * nir_variable at the start of a contiguous location block.
3101 *
3102 * - For !producer, consumer_inputs_with_locations isn't used.
3103 *
3104 * - For !consumer, consumer_inputs_with_locations is empty.
3105 *
3106 * For consumer && producer, if you were trying to set some
3107 * nir_variable to the middle of a location block on the other side
3108 * of producer/consumer, cross_validate_outputs_to_inputs() should
3109 * be link-erroring due to either type mismatch or location
3110 * overlaps. If the variables do match up, then they've got a
3111 * matching data.location and you only looked at
3112 * consumer_inputs_with_locations[var->data.location], not any
3113 * following entries for the array/structure.
3114 */
3115 consumer_inputs_with_locations[input_var->data.location] =
3116 input_var;
3117 } else if (input_var->interface_type != NULL) {
3118 char *const iface_field_name =
3119 ralloc_asprintf(mem_ctx, "%s.%s",
3120 glsl_get_type_name(glsl_without_array(input_var->interface_type)),
3121 input_var->name);
3122 _mesa_hash_table_insert(consumer_interface_inputs,
3123 iface_field_name, input_var);
3124 } else {
3125 _mesa_hash_table_insert(consumer_inputs,
3126 ralloc_strdup(mem_ctx, input_var->name),
3127 input_var);
3128 }
3129 }
3130 }
3131
3132 /**
3133 * Find a variable from the consumer that "matches" the specified variable
3134 *
3135 * This function only finds inputs with names that match. There is no
3136 * validation (here) that the types, etc. are compatible.
3137 */
3138 static nir_variable *
get_matching_input(void * mem_ctx,const nir_variable * output_var,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3139 get_matching_input(void *mem_ctx,
3140 const nir_variable *output_var,
3141 struct hash_table *consumer_inputs,
3142 struct hash_table *consumer_interface_inputs,
3143 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3144 {
3145 nir_variable *input_var;
3146
3147 if (output_var->data.explicit_location) {
3148 input_var = consumer_inputs_with_locations[output_var->data.location];
3149 } else if (output_var->interface_type != NULL) {
3150 char *const iface_field_name =
3151 ralloc_asprintf(mem_ctx, "%s.%s",
3152 glsl_get_type_name(glsl_without_array(output_var->interface_type)),
3153 output_var->name);
3154 struct hash_entry *entry =
3155 _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
3156 input_var = entry ? (nir_variable *) entry->data : NULL;
3157 } else {
3158 struct hash_entry *entry =
3159 _mesa_hash_table_search(consumer_inputs, output_var->name);
3160 input_var = entry ? (nir_variable *) entry->data : NULL;
3161 }
3162
3163 return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
3164 ? NULL : input_var;
3165 }
3166
3167 static int
io_variable_cmp(const void * _a,const void * _b)3168 io_variable_cmp(const void *_a, const void *_b)
3169 {
3170 const nir_variable *const a = *(const nir_variable **) _a;
3171 const nir_variable *const b = *(const nir_variable **) _b;
3172
3173 if (a->data.explicit_location && b->data.explicit_location)
3174 return b->data.location - a->data.location;
3175
3176 if (a->data.explicit_location && !b->data.explicit_location)
3177 return 1;
3178
3179 if (!a->data.explicit_location && b->data.explicit_location)
3180 return -1;
3181
3182 return -strcmp(a->name, b->name);
3183 }
3184
3185 /**
3186 * Sort the shader IO variables into canonical order
3187 */
3188 static void
canonicalize_shader_io(nir_shader * nir,nir_variable_mode io_mode)3189 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
3190 {
3191 nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
3192 unsigned num_variables = 0;
3193
3194 nir_foreach_variable_with_modes(var, nir, io_mode) {
3195 /* If we have already encountered more I/O variables that could
3196 * successfully link, bail.
3197 */
3198 if (num_variables == ARRAY_SIZE(var_table))
3199 return;
3200
3201 var_table[num_variables++] = var;
3202 }
3203
3204 if (num_variables == 0)
3205 return;
3206
3207 /* Sort the list in reverse order (io_variable_cmp handles this). Later
3208 * we're going to push the variables on to the IR list as a stack, so we
3209 * want the last variable (in canonical order) to be first in the list.
3210 */
3211 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
3212
3213 /* Remove the variable from it's current location in the varible list, and
3214 * put it at the front.
3215 */
3216 for (unsigned i = 0; i < num_variables; i++) {
3217 exec_node_remove(&var_table[i]->node);
3218 exec_list_push_head(&nir->variables, &var_table[i]->node);
3219 }
3220 }
3221
3222 /**
3223 * Generate a bitfield map of the explicit locations for shader varyings.
3224 *
3225 * Note: For Tessellation shaders we are sitting right on the limits of the
3226 * 64 bit map. Per-vertex and per-patch both have separate location domains
3227 * with a max of MAX_VARYING.
3228 */
3229 static uint64_t
reserved_varying_slot(struct gl_linked_shader * sh,nir_variable_mode io_mode)3230 reserved_varying_slot(struct gl_linked_shader *sh,
3231 nir_variable_mode io_mode)
3232 {
3233 assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
3234 /* Avoid an overflow of the returned value */
3235 assert(MAX_VARYINGS_INCL_PATCH <= 64);
3236
3237 uint64_t slots = 0;
3238 int var_slot;
3239
3240 if (!sh)
3241 return slots;
3242
3243 nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
3244 if (!var->data.explicit_location ||
3245 var->data.location < VARYING_SLOT_VAR0)
3246 continue;
3247
3248 var_slot = var->data.location - VARYING_SLOT_VAR0;
3249
3250 bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
3251 sh->Stage == MESA_SHADER_VERTEX;
3252 unsigned num_elements =
3253 glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
3254 is_gl_vertex_input);
3255 for (unsigned i = 0; i < num_elements; i++) {
3256 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
3257 slots |= UINT64_C(1) << var_slot;
3258 var_slot += 1;
3259 }
3260 }
3261
3262 return slots;
3263 }
3264
3265 /**
3266 * Sets the bits in the inputs_read, or outputs_written
3267 * bitfield corresponding to this variable.
3268 */
3269 static void
set_variable_io_mask(BITSET_WORD * bits,nir_variable * var,gl_shader_stage stage)3270 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
3271 {
3272 assert(var->data.mode == nir_var_shader_in ||
3273 var->data.mode == nir_var_shader_out);
3274 assert(var->data.location >= VARYING_SLOT_VAR0);
3275
3276 const struct glsl_type *type = var->type;
3277 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
3278 assert(glsl_type_is_array(type));
3279 type = glsl_get_array_element(type);
3280 }
3281
3282 unsigned location = var->data.location - VARYING_SLOT_VAR0;
3283 unsigned slots = glsl_count_attribute_slots(type, false);
3284 for (unsigned i = 0; i < slots; i++) {
3285 BITSET_SET(bits, location + i);
3286 }
3287 }
3288
3289 static uint8_t
get_num_components(nir_variable * var)3290 get_num_components(nir_variable *var)
3291 {
3292 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
3293 return 4;
3294
3295 return glsl_get_vector_elements(glsl_without_array(var->type));
3296 }
3297
3298 static void
tcs_add_output_reads(nir_shader * shader,BITSET_WORD ** read)3299 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
3300 {
3301 nir_foreach_function_impl(impl, shader) {
3302 nir_foreach_block(block, impl) {
3303 nir_foreach_instr(instr, block) {
3304 if (instr->type != nir_instr_type_intrinsic)
3305 continue;
3306
3307 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3308 if (intrin->intrinsic != nir_intrinsic_load_deref)
3309 continue;
3310
3311 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3312 if (!nir_deref_mode_is(deref, nir_var_shader_out))
3313 continue;
3314
3315 nir_variable *var = nir_deref_instr_get_variable(deref);
3316 for (unsigned i = 0; i < get_num_components(var); i++) {
3317 if (var->data.location < VARYING_SLOT_VAR0)
3318 continue;
3319
3320 unsigned comp = var->data.location_frac;
3321 set_variable_io_mask(read[comp + i], var, shader->info.stage);
3322 }
3323 }
3324 }
3325 }
3326 }
3327
3328 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
3329 * as no further NIR pass expects to see this.
3330 */
3331 static bool
replace_unused_interpolate_at_with_undef(nir_builder * b,nir_instr * instr,void * data)3332 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
3333 void *data)
3334 {
3335 if (instr->type == nir_instr_type_intrinsic) {
3336 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3337
3338 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
3339 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
3340 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
3341 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
3342 if (var->data.mode == nir_var_shader_temp) {
3343 /* Create undef and rewrite the interp uses */
3344 nir_def *undef =
3345 nir_undef(b, intrin->def.num_components,
3346 intrin->def.bit_size);
3347 nir_def_rewrite_uses(&intrin->def, undef);
3348
3349 nir_instr_remove(&intrin->instr);
3350 return true;
3351 }
3352 }
3353 }
3354
3355 return false;
3356 }
3357
3358 static void
fixup_vars_lowered_to_temp(nir_shader * shader,nir_variable_mode mode)3359 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
3360 {
3361 /* Remove all interpolate uses of the unset varying and replace with undef. */
3362 if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
3363 (void) nir_shader_instructions_pass(shader,
3364 replace_unused_interpolate_at_with_undef,
3365 nir_metadata_block_index |
3366 nir_metadata_dominance,
3367 NULL);
3368 }
3369
3370 nir_lower_global_vars_to_local(shader);
3371 nir_fixup_deref_modes(shader);
3372 }
3373
3374 /**
3375 * Helper for removing unused shader I/O variables, by demoting them to global
3376 * variables (which may then be dead code eliminated).
3377 *
3378 * Example usage is:
3379 *
3380 * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
3381 * read, patches_read) ||
3382 * progress;
3383 *
3384 * The "used" should be an array of 4 BITSET_WORDs representing each
3385 * .location_frac used. Note that for vector variables, only the first channel
3386 * (.location_frac) is examined for deciding if the variable is used!
3387 */
3388 static bool
remove_unused_io_vars(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,nir_variable_mode mode,BITSET_WORD ** used_by_other_stage)3389 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
3390 struct gl_shader_program *prog,
3391 nir_variable_mode mode,
3392 BITSET_WORD **used_by_other_stage)
3393 {
3394 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
3395
3396 bool progress = false;
3397 nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
3398
3399 BITSET_WORD **used;
3400 nir_foreach_variable_with_modes_safe(var, shader, mode) {
3401 used = used_by_other_stage;
3402
3403 /* Skip builtins dead builtins are removed elsewhere */
3404 if (is_gl_identifier(var->name))
3405 continue;
3406
3407 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
3408 continue;
3409
3410 /* Skip xfb varyings and any other type we cannot remove */
3411 if (var->data.always_active_io)
3412 continue;
3413
3414 if (var->data.explicit_xfb_buffer)
3415 continue;
3416
3417 BITSET_WORD *other_stage = used[var->data.location_frac];
3418
3419 /* if location == -1 lower varying to global as it has no match and is not
3420 * a xfb varying, this must be done after skiping bultins as builtins
3421 * could be assigned a location of -1.
3422 * We also lower unused varyings with explicit locations.
3423 */
3424 bool use_found = false;
3425 if (var->data.location >= 0) {
3426 unsigned location = var->data.location - VARYING_SLOT_VAR0;
3427
3428 const struct glsl_type *type = var->type;
3429 if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
3430 assert(glsl_type_is_array(type));
3431 type = glsl_get_array_element(type);
3432 }
3433
3434 unsigned slots = glsl_count_attribute_slots(type, false);
3435 for (unsigned i = 0; i < slots; i++) {
3436 if (BITSET_TEST(other_stage, location + i)) {
3437 use_found = true;
3438 break;
3439 }
3440 }
3441 }
3442
3443 if (!use_found) {
3444 /* This one is invalid, make it a global variable instead */
3445 var->data.location = 0;
3446 var->data.mode = nir_var_shader_temp;
3447
3448 progress = true;
3449
3450 if (mode == nir_var_shader_in) {
3451 if (!prog->IsES && prog->GLSL_Version <= 120) {
3452 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3453 *
3454 * Only those varying variables used (i.e. read) in
3455 * the fragment shader executable must be written to
3456 * by the vertex shader executable; declaring
3457 * superfluous varying variables in a vertex shader is
3458 * permissible.
3459 *
3460 * We interpret this text as meaning that the VS must
3461 * write the variable for the FS to read it. See
3462 * "glsl1-varying read but not written" in piglit.
3463 */
3464 linker_error(prog, "%s shader varying %s not written "
3465 "by %s shader\n.",
3466 _mesa_shader_stage_to_string(consumer->info.stage),
3467 var->name,
3468 _mesa_shader_stage_to_string(producer->info.stage));
3469 } else {
3470 linker_warning(prog, "%s shader varying %s not written "
3471 "by %s shader\n.",
3472 _mesa_shader_stage_to_string(consumer->info.stage),
3473 var->name,
3474 _mesa_shader_stage_to_string(producer->info.stage));
3475 }
3476 }
3477 }
3478 }
3479
3480 if (progress)
3481 fixup_vars_lowered_to_temp(shader, mode);
3482
3483 return progress;
3484 }
3485
3486 static bool
remove_unused_varyings(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3487 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
3488 struct gl_shader_program *prog, void *mem_ctx)
3489 {
3490 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
3491 assert(consumer->info.stage != MESA_SHADER_VERTEX);
3492
3493 int max_loc_out = 0;
3494 nir_foreach_shader_out_variable(var, producer) {
3495 if (var->data.location < VARYING_SLOT_VAR0)
3496 continue;
3497
3498 const struct glsl_type *type = var->type;
3499 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
3500 assert(glsl_type_is_array(type));
3501 type = glsl_get_array_element(type);
3502 }
3503 unsigned slots = glsl_count_attribute_slots(type, false);
3504
3505 max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3506 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
3507 }
3508
3509 int max_loc_in = 0;
3510 nir_foreach_shader_in_variable(var, consumer) {
3511 if (var->data.location < VARYING_SLOT_VAR0)
3512 continue;
3513
3514 const struct glsl_type *type = var->type;
3515 if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
3516 assert(glsl_type_is_array(type));
3517 type = glsl_get_array_element(type);
3518 }
3519 unsigned slots = glsl_count_attribute_slots(type, false);
3520
3521 max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3522 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
3523 }
3524
3525 /* Old glsl shaders that don't use explicit locations can contain greater
3526 * than 64 varyings before unused varyings are removed so we must count them
3527 * and make use of the BITSET macros to keep track of used slots. Once we
3528 * have removed these excess varyings we can make use of further nir varying
3529 * linking optimimisation passes.
3530 */
3531 BITSET_WORD *read[4];
3532 BITSET_WORD *written[4];
3533 int max_loc = MAX2(max_loc_in, max_loc_out);
3534 for (unsigned i = 0; i < 4; i++) {
3535 read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3536 written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3537 }
3538
3539 nir_foreach_shader_out_variable(var, producer) {
3540 if (var->data.location < VARYING_SLOT_VAR0)
3541 continue;
3542
3543 for (unsigned i = 0; i < get_num_components(var); i++) {
3544 unsigned comp = var->data.location_frac;
3545 set_variable_io_mask(written[comp + i], var, producer->info.stage);
3546 }
3547 }
3548
3549 nir_foreach_shader_in_variable(var, consumer) {
3550 if (var->data.location < VARYING_SLOT_VAR0)
3551 continue;
3552
3553 for (unsigned i = 0; i < get_num_components(var); i++) {
3554 unsigned comp = var->data.location_frac;
3555 set_variable_io_mask(read[comp + i], var, consumer->info.stage);
3556 }
3557 }
3558
3559 /* Each TCS invocation can read data written by other TCS invocations,
3560 * so even if the outputs are not used by the TES we must also make
3561 * sure they are not read by the TCS before demoting them to globals.
3562 */
3563 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
3564 tcs_add_output_reads(producer, read);
3565
3566 bool progress = false;
3567 progress =
3568 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
3569 progress =
3570 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
3571
3572 return progress;
3573 }
3574
3575 static bool
should_add_varying_match_record(nir_variable * const input_var,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)3576 should_add_varying_match_record(nir_variable *const input_var,
3577 struct gl_shader_program *prog,
3578 struct gl_linked_shader *producer,
3579 struct gl_linked_shader *consumer) {
3580
3581 /* If a matching input variable was found, add this output (and the input) to
3582 * the set. If this is a separable program and there is no consumer stage,
3583 * add the output.
3584 *
3585 * Always add TCS outputs. They are shared by all invocations
3586 * within a patch and can be used as shared memory.
3587 */
3588 return input_var || (prog->SeparateShader && consumer == NULL) ||
3589 producer->Stage == MESA_SHADER_TESS_CTRL;
3590 }
3591
3592 /* This assigns some initial unoptimised varying locations so that our nir
3593 * optimisations can perform some initial optimisations and also does initial
3594 * processing of
3595 */
3596 static bool
assign_initial_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,struct varying_matches * vm)3597 assign_initial_varying_locations(const struct gl_constants *consts,
3598 const struct gl_extensions *exts,
3599 void *mem_ctx,
3600 struct gl_shader_program *prog,
3601 struct gl_linked_shader *producer,
3602 struct gl_linked_shader *consumer,
3603 unsigned num_xfb_decls,
3604 struct xfb_decl *xfb_decls,
3605 struct varying_matches *vm)
3606 {
3607 init_varying_matches(mem_ctx, vm, consts, exts,
3608 producer ? producer->Stage : MESA_SHADER_NONE,
3609 consumer ? consumer->Stage : MESA_SHADER_NONE,
3610 prog->SeparateShader);
3611
3612 struct hash_table *tfeedback_candidates =
3613 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3614 _mesa_key_string_equal);
3615 struct hash_table *consumer_inputs =
3616 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3617 _mesa_key_string_equal);
3618 struct hash_table *consumer_interface_inputs =
3619 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3620 _mesa_key_string_equal);
3621 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
3622 NULL,
3623 };
3624
3625 if (consumer)
3626 populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
3627 consumer_inputs, consumer_interface_inputs,
3628 consumer_inputs_with_locations);
3629
3630 if (producer) {
3631 nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
3632 /* Only geometry shaders can use non-zero streams */
3633 assert(output_var->data.stream == 0 ||
3634 (output_var->data.stream < MAX_VERTEX_STREAMS &&
3635 producer->Stage == MESA_SHADER_GEOMETRY));
3636
3637 if (num_xfb_decls > 0) {
3638 /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
3639 * ("Vertex Shader Variables / Output Variables")
3640 *
3641 * "Each program object can specify a set of output variables from
3642 * one shader to be recorded in transform feedback mode (see
3643 * section 13.3). The variables that can be recorded are those
3644 * emitted by the first active shader, in order, from the
3645 * following list:
3646 *
3647 * * geometry shader
3648 * * tessellation evaluation shader
3649 * * tessellation control shader
3650 * * vertex shader"
3651 *
3652 * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
3653 * Variables / Output Variables") tessellation control shader is
3654 * not included in the stages list.
3655 */
3656 if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
3657
3658 const struct glsl_type *type = output_var->data.from_named_ifc_block ?
3659 output_var->interface_type : output_var->type;
3660 if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
3661 assert(glsl_type_is_array(type));
3662 type = glsl_get_array_element(type);
3663 }
3664
3665 const struct glsl_struct_field *ifc_member = NULL;
3666 if (output_var->data.from_named_ifc_block) {
3667 ifc_member =
3668 glsl_get_struct_field_data(glsl_without_array(type),
3669 glsl_get_field_index(glsl_without_array(type), output_var->name));
3670 }
3671
3672 char *name;
3673 if (glsl_type_is_struct(glsl_without_array(type)) ||
3674 (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
3675 type = output_var->type;
3676 name = ralloc_strdup(NULL, output_var->name);
3677 } else if (glsl_type_is_interface(glsl_without_array(type))) {
3678 name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
3679 } else {
3680 name = ralloc_strdup(NULL, output_var->name);
3681 }
3682
3683 struct tfeedback_candidate_generator_state state;
3684 state.mem_ctx = mem_ctx;
3685 state.tfeedback_candidates = tfeedback_candidates;
3686 state.stage = producer->Stage;
3687 state.toplevel_var = output_var;
3688 state.varying_floats = 0;
3689 state.xfb_offset_floats = 0;
3690
3691 tfeedback_candidate_generator(&state, &name, strlen(name), type,
3692 ifc_member);
3693 ralloc_free(name);
3694 }
3695 }
3696
3697 nir_variable *const input_var =
3698 get_matching_input(mem_ctx, output_var, consumer_inputs,
3699 consumer_interface_inputs,
3700 consumer_inputs_with_locations);
3701
3702 if (should_add_varying_match_record(input_var, prog, producer,
3703 consumer)) {
3704 varying_matches_record(mem_ctx, vm, output_var, input_var);
3705 }
3706
3707 /* Only stream 0 outputs can be consumed in the next stage */
3708 if (input_var && output_var->data.stream != 0) {
3709 linker_error(prog, "output %s is assigned to stream=%d but "
3710 "is linked to an input, which requires stream=0",
3711 output_var->name, output_var->data.stream);
3712 return false;
3713 }
3714 }
3715 } else {
3716 /* If there's no producer stage, then this must be a separable program.
3717 * For example, we may have a program that has just a fragment shader.
3718 * Later this program will be used with some arbitrary vertex (or
3719 * geometry) shader program. This means that locations must be assigned
3720 * for all the inputs.
3721 */
3722 nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
3723 varying_matches_record(mem_ctx, vm, NULL, input_var);
3724 }
3725 }
3726
3727 for (unsigned i = 0; i < num_xfb_decls; ++i) {
3728 if (!xfb_decl_is_varying(&xfb_decls[i]))
3729 continue;
3730
3731 const struct tfeedback_candidate *matched_candidate
3732 = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
3733
3734 if (matched_candidate == NULL)
3735 return false;
3736
3737 /* There are two situations where a new output varying is needed:
3738 *
3739 * - If varying packing is disabled for xfb and the current declaration
3740 * is subscripting an array, whether the subscript is aligned or not.
3741 * to preserve the rest of the array for the consumer.
3742 *
3743 * - If a builtin variable needs to be copied to a new variable
3744 * before its content is modified by another lowering pass (e.g.
3745 * \c gl_Position is transformed by \c nir_lower_viewport_transform).
3746 */
3747 const bool lowered =
3748 (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
3749 (matched_candidate->toplevel_var->data.explicit_location &&
3750 matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
3751 (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
3752 (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
3753 BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
3754
3755 if (lowered) {
3756 nir_variable *new_var;
3757 struct tfeedback_candidate *new_candidate = NULL;
3758
3759 new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
3760 xfb_decls[i].orig_name,
3761 matched_candidate->toplevel_var);
3762 if (new_var == NULL)
3763 return false;
3764
3765 /* Create new candidate and replace matched_candidate */
3766 new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
3767 new_candidate->toplevel_var = new_var;
3768 new_candidate->type = new_var->type;
3769 new_candidate->struct_offset_floats = 0;
3770 new_candidate->xfb_offset_floats = 0;
3771 _mesa_hash_table_insert(tfeedback_candidates,
3772 ralloc_strdup(mem_ctx, new_var->name),
3773 new_candidate);
3774
3775 xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
3776 matched_candidate = new_candidate;
3777 }
3778
3779 /* Mark as xfb varying */
3780 matched_candidate->toplevel_var->data.is_xfb = 1;
3781
3782 /* Mark xfb varyings as always active */
3783 matched_candidate->toplevel_var->data.always_active_io = 1;
3784
3785 /* Mark any corresponding inputs as always active also. We must do this
3786 * because we have a NIR pass that lowers vectors to scalars and another
3787 * that removes unused varyings.
3788 * We don't split varyings marked as always active because there is no
3789 * point in doing so. This means we need to mark both sides of the
3790 * interface as always active otherwise we will have a mismatch and
3791 * start removing things we shouldn't.
3792 */
3793 nir_variable *const input_var =
3794 get_matching_input(mem_ctx, matched_candidate->toplevel_var,
3795 consumer_inputs, consumer_interface_inputs,
3796 consumer_inputs_with_locations);
3797 if (input_var) {
3798 input_var->data.is_xfb = 1;
3799 input_var->data.always_active_io = 1;
3800 }
3801
3802 /* Add the xfb varying to varying matches if it wasn't already added */
3803 if ((!should_add_varying_match_record(input_var, prog, producer,
3804 consumer) &&
3805 !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
3806 matched_candidate->toplevel_var->data.is_xfb_only = 1;
3807 varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
3808 NULL);
3809 }
3810 }
3811
3812 uint64_t reserved_out_slots = 0;
3813 if (producer)
3814 reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
3815
3816 uint64_t reserved_in_slots = 0;
3817 if (consumer)
3818 reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
3819
3820 /* Assign temporary user varying locations. This is required for our NIR
3821 * varying optimisations to do their matching.
3822 */
3823 const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
3824 varying_matches_assign_temp_locations(vm, prog, reserved_slots);
3825
3826 for (unsigned i = 0; i < num_xfb_decls; ++i) {
3827 if (!xfb_decl_is_varying(&xfb_decls[i]))
3828 continue;
3829
3830 xfb_decls[i].matched_candidate->initial_location =
3831 xfb_decls[i].matched_candidate->toplevel_var->data.location;
3832 xfb_decls[i].matched_candidate->initial_location_frac =
3833 xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
3834 }
3835
3836 return true;
3837 }
3838
3839 static void
link_shader_opts(struct varying_matches * vm,nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3840 link_shader_opts(struct varying_matches *vm,
3841 nir_shader *producer, nir_shader *consumer,
3842 struct gl_shader_program *prog, void *mem_ctx)
3843 {
3844 /* If we can't pack the stage using this pass then we can't lower io to
3845 * scalar just yet. Instead we leave it to a later NIR linking pass that uses
3846 * ARB_enhanced_layout style packing to pack things further.
3847 *
3848 * Otherwise we might end up causing linking errors and perf regressions
3849 * because the new scalars will be assigned individual slots and can overflow
3850 * the available slots.
3851 */
3852 if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
3853 !vm->disable_xfb_packing) {
3854 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
3855 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
3856 }
3857
3858 gl_nir_opts(producer);
3859 gl_nir_opts(consumer);
3860
3861 if (nir_link_opt_varyings(producer, consumer))
3862 gl_nir_opts(consumer);
3863
3864 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
3865 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
3866
3867 if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
3868 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
3869 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
3870
3871 gl_nir_opts(producer);
3872 gl_nir_opts(consumer);
3873
3874 /* Optimizations can cause varyings to become unused.
3875 * nir_compact_varyings() depends on all dead varyings being removed so
3876 * we need to call nir_remove_dead_variables() again here.
3877 */
3878 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
3879 NULL);
3880 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
3881 NULL);
3882 }
3883
3884 nir_link_varying_precision(producer, consumer);
3885 }
3886
3887 /**
3888 * Assign locations for all variables that are produced in one pipeline stage
3889 * (the "producer") and consumed in the next stage (the "consumer").
3890 *
3891 * Variables produced by the producer may also be consumed by transform
3892 * feedback.
3893 *
3894 * \param num_xfb_decls is the number of declarations indicating
3895 * variables that may be consumed by transform feedback.
3896 *
3897 * \param xfb_decls is a pointer to an array of xfb_decl objects
3898 * representing the result of parsing the strings passed to
3899 * glTransformFeedbackVaryings(). assign_location() will be called for
3900 * each of these objects that matches one of the outputs of the
3901 * producer.
3902 *
3903 * When num_xfb_decls is nonzero, it is permissible for the consumer to
3904 * be NULL. In this case, varying locations are assigned solely based on the
3905 * requirements of transform feedback.
3906 */
3907 static bool
assign_final_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,const uint64_t reserved_slots,struct varying_matches * vm)3908 assign_final_varying_locations(const struct gl_constants *consts,
3909 const struct gl_extensions *exts,
3910 void *mem_ctx,
3911 struct gl_shader_program *prog,
3912 struct gl_linked_shader *producer,
3913 struct gl_linked_shader *consumer,
3914 unsigned num_xfb_decls,
3915 struct xfb_decl *xfb_decls,
3916 const uint64_t reserved_slots,
3917 struct varying_matches *vm)
3918 {
3919 init_varying_matches(mem_ctx, vm, consts, exts,
3920 producer ? producer->Stage : MESA_SHADER_NONE,
3921 consumer ? consumer->Stage : MESA_SHADER_NONE,
3922 prog->SeparateShader);
3923
3924 /* Regather varying matches as we ran optimisations and the previous pointers
3925 * are no longer valid.
3926 */
3927 if (producer) {
3928 nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
3929 if (var_out->data.location < VARYING_SLOT_VAR0 ||
3930 var_out->data.explicit_location)
3931 continue;
3932
3933 if (vm->num_matches == vm->matches_capacity) {
3934 vm->matches_capacity *= 2;
3935 vm->matches = (struct match *)
3936 reralloc(mem_ctx, vm->matches, struct match,
3937 vm->matches_capacity);
3938 }
3939
3940 vm->matches[vm->num_matches].packing_class
3941 = varying_matches_compute_packing_class(var_out);
3942 vm->matches[vm->num_matches].packing_order
3943 = varying_matches_compute_packing_order(var_out);
3944
3945 vm->matches[vm->num_matches].producer_var = var_out;
3946 vm->matches[vm->num_matches].consumer_var = NULL;
3947 vm->num_matches++;
3948 }
3949
3950 /* Regather xfb varyings too */
3951 for (unsigned i = 0; i < num_xfb_decls; i++) {
3952 if (!xfb_decl_is_varying(&xfb_decls[i]))
3953 continue;
3954
3955 /* Varying pointer was already reset */
3956 if (xfb_decls[i].matched_candidate->initial_location == -1)
3957 continue;
3958
3959 bool UNUSED is_reset = false;
3960 bool UNUSED no_outputs = true;
3961 nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
3962 no_outputs = false;
3963 assert(var_out->data.location != -1);
3964 if (var_out->data.location ==
3965 xfb_decls[i].matched_candidate->initial_location &&
3966 var_out->data.location_frac ==
3967 xfb_decls[i].matched_candidate->initial_location_frac) {
3968 xfb_decls[i].matched_candidate->toplevel_var = var_out;
3969 xfb_decls[i].matched_candidate->initial_location = -1;
3970 is_reset = true;
3971 break;
3972 }
3973 }
3974 assert(is_reset || no_outputs);
3975 }
3976 }
3977
3978 bool found_match = false;
3979 if (consumer) {
3980 nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
3981 if (var_in->data.location < VARYING_SLOT_VAR0 ||
3982 var_in->data.explicit_location)
3983 continue;
3984
3985 found_match = false;
3986 for (unsigned i = 0; i < vm->num_matches; i++) {
3987 if (vm->matches[i].producer_var &&
3988 (vm->matches[i].producer_var->data.location == var_in->data.location &&
3989 vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
3990
3991 vm->matches[i].consumer_var = var_in;
3992 found_match = true;
3993 break;
3994 }
3995 }
3996 if (!found_match) {
3997 if (vm->num_matches == vm->matches_capacity) {
3998 vm->matches_capacity *= 2;
3999 vm->matches = (struct match *)
4000 reralloc(mem_ctx, vm->matches, struct match,
4001 vm->matches_capacity);
4002 }
4003
4004 vm->matches[vm->num_matches].packing_class
4005 = varying_matches_compute_packing_class(var_in);
4006 vm->matches[vm->num_matches].packing_order
4007 = varying_matches_compute_packing_order(var_in);
4008
4009 vm->matches[vm->num_matches].producer_var = NULL;
4010 vm->matches[vm->num_matches].consumer_var = var_in;
4011 vm->num_matches++;
4012 }
4013 }
4014 }
4015
4016 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
4017 const unsigned slots_used =
4018 varying_matches_assign_locations(vm, prog, components, reserved_slots);
4019 varying_matches_store_locations(vm);
4020
4021 for (unsigned i = 0; i < num_xfb_decls; ++i) {
4022 if (xfb_decl_is_varying(&xfb_decls[i])) {
4023 if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog,
4024 vm->disable_varying_packing, vm->xfb_enabled))
4025 return false;
4026 }
4027 }
4028
4029 if (producer) {
4030 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4031 nir_var_shader_out, 0, producer,
4032 vm->disable_varying_packing,
4033 vm->disable_xfb_packing, vm->xfb_enabled);
4034 nir_lower_pack(producer->Program->nir);
4035 }
4036
4037 if (consumer) {
4038 unsigned consumer_vertices = 0;
4039 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
4040 consumer_vertices = prog->Geom.VerticesIn;
4041
4042 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4043 nir_var_shader_in, consumer_vertices,
4044 consumer, vm->disable_varying_packing,
4045 vm->disable_xfb_packing, vm->xfb_enabled);
4046 nir_lower_pack(consumer->Program->nir);
4047 }
4048
4049 return true;
4050 }
4051
4052 static bool
check_against_output_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * producer,unsigned num_explicit_locations)4053 check_against_output_limit(const struct gl_constants *consts, gl_api api,
4054 struct gl_shader_program *prog,
4055 struct gl_linked_shader *producer,
4056 unsigned num_explicit_locations)
4057 {
4058 unsigned output_vectors = num_explicit_locations;
4059 nir_foreach_shader_out_variable(var, producer->Program->nir) {
4060 if (!var->data.explicit_location &&
4061 var_counts_against_varying_limit(producer->Stage, var)) {
4062 /* outputs for fragment shader can't be doubles */
4063 output_vectors += glsl_count_attribute_slots(var->type, false);
4064 }
4065 }
4066
4067 assert(producer->Stage != MESA_SHADER_FRAGMENT);
4068 unsigned max_output_components =
4069 consts->Program[producer->Stage].MaxOutputComponents;
4070
4071 const unsigned output_components = output_vectors * 4;
4072 if (output_components > max_output_components) {
4073 if (api == API_OPENGLES2 || prog->IsES)
4074 linker_error(prog, "%s shader uses too many output vectors "
4075 "(%u > %u)\n",
4076 _mesa_shader_stage_to_string(producer->Stage),
4077 output_vectors,
4078 max_output_components / 4);
4079 else
4080 linker_error(prog, "%s shader uses too many output components "
4081 "(%u > %u)\n",
4082 _mesa_shader_stage_to_string(producer->Stage),
4083 output_components,
4084 max_output_components);
4085
4086 return false;
4087 }
4088
4089 return true;
4090 }
4091
4092 static bool
check_against_input_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * consumer,unsigned num_explicit_locations)4093 check_against_input_limit(const struct gl_constants *consts, gl_api api,
4094 struct gl_shader_program *prog,
4095 struct gl_linked_shader *consumer,
4096 unsigned num_explicit_locations)
4097 {
4098 unsigned input_vectors = num_explicit_locations;
4099
4100 nir_foreach_shader_in_variable(var, consumer->Program->nir) {
4101 if (!var->data.explicit_location &&
4102 var_counts_against_varying_limit(consumer->Stage, var)) {
4103 /* vertex inputs aren't varying counted */
4104 input_vectors += glsl_count_attribute_slots(var->type, false);
4105 }
4106 }
4107
4108 assert(consumer->Stage != MESA_SHADER_VERTEX);
4109 unsigned max_input_components =
4110 consts->Program[consumer->Stage].MaxInputComponents;
4111
4112 const unsigned input_components = input_vectors * 4;
4113 if (input_components > max_input_components) {
4114 if (api == API_OPENGLES2 || prog->IsES)
4115 linker_error(prog, "%s shader uses too many input vectors "
4116 "(%u > %u)\n",
4117 _mesa_shader_stage_to_string(consumer->Stage),
4118 input_vectors,
4119 max_input_components / 4);
4120 else
4121 linker_error(prog, "%s shader uses too many input components "
4122 "(%u > %u)\n",
4123 _mesa_shader_stage_to_string(consumer->Stage),
4124 input_components,
4125 max_input_components);
4126
4127 return false;
4128 }
4129
4130 return true;
4131 }
4132
4133 /* Lower unset/unused inputs/outputs */
4134 static void
remove_unused_shader_inputs_and_outputs(struct gl_shader_program * prog,unsigned stage,nir_variable_mode mode)4135 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
4136 unsigned stage, nir_variable_mode mode)
4137 {
4138 bool progress = false;
4139 nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
4140
4141 nir_foreach_variable_with_modes_safe(var, shader, mode) {
4142 if (!var->data.is_xfb_only && var->data.location == -1) {
4143 var->data.location = 0;
4144 var->data.mode = nir_var_shader_temp;
4145 progress = true;
4146 }
4147 }
4148
4149 if (progress)
4150 fixup_vars_lowered_to_temp(shader, mode);
4151 }
4152
4153 static bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,void * mem_ctx)4154 link_varyings(struct gl_shader_program *prog, unsigned first,
4155 unsigned last, const struct gl_constants *consts,
4156 const struct gl_extensions *exts, gl_api api, void *mem_ctx)
4157 {
4158 bool has_xfb_qualifiers = false;
4159 unsigned num_xfb_decls = 0;
4160 char **varying_names = NULL;
4161 struct xfb_decl *xfb_decls = NULL;
4162
4163 if (last > MESA_SHADER_FRAGMENT)
4164 return true;
4165
4166 /* From the ARB_enhanced_layouts spec:
4167 *
4168 * "If the shader used to record output variables for transform feedback
4169 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
4170 * qualifiers, the values specified by TransformFeedbackVaryings are
4171 * ignored, and the set of variables captured for transform feedback is
4172 * instead derived from the specified layout qualifiers."
4173 */
4174 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
4175 /* Find last stage before fragment shader */
4176 if (prog->_LinkedShaders[i]) {
4177 has_xfb_qualifiers =
4178 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
4179 prog, &num_xfb_decls,
4180 &varying_names);
4181 break;
4182 }
4183 }
4184
4185 if (!has_xfb_qualifiers) {
4186 num_xfb_decls = prog->TransformFeedback.NumVarying;
4187 varying_names = prog->TransformFeedback.VaryingNames;
4188 }
4189
4190 if (num_xfb_decls != 0) {
4191 /* From GL_EXT_transform_feedback:
4192 * A program will fail to link if:
4193 *
4194 * * the <count> specified by TransformFeedbackVaryingsEXT is
4195 * non-zero, but the program object has no vertex or geometry
4196 * shader;
4197 */
4198 if (first >= MESA_SHADER_FRAGMENT) {
4199 linker_error(prog, "Transform feedback varyings specified, but "
4200 "no vertex, tessellation, or geometry shader is "
4201 "present.\n");
4202 return false;
4203 }
4204
4205 xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
4206 num_xfb_decls);
4207 if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
4208 varying_names, xfb_decls))
4209 return false;
4210 }
4211
4212 struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
4213 unsigned num_shaders = 0;
4214
4215 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4216 if (prog->_LinkedShaders[i])
4217 linked_shader[num_shaders++] = prog->_LinkedShaders[i];
4218 }
4219
4220 struct varying_matches vm;
4221 if (last < MESA_SHADER_FRAGMENT &&
4222 (num_xfb_decls != 0 || prog->SeparateShader)) {
4223 struct gl_linked_shader *producer = prog->_LinkedShaders[last];
4224 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4225 producer, NULL, num_xfb_decls,
4226 xfb_decls, &vm))
4227 return false;
4228 }
4229
4230 if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
4231 remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
4232 remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
4233 }
4234
4235 if (prog->SeparateShader) {
4236 struct gl_linked_shader *consumer = linked_shader[0];
4237 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
4238 consumer, 0, NULL, &vm))
4239 return false;
4240 }
4241
4242 if (num_shaders == 1) {
4243 /* Linking shaders also optimizes them. Separate shaders, compute shaders
4244 * and shaders with a fixed-func VS or FS that don't need linking are
4245 * optimized here.
4246 */
4247 gl_nir_opts(linked_shader[0]->Program->nir);
4248 } else {
4249 /* Linking the stages in the opposite order (from fragment to vertex)
4250 * ensures that inter-shader outputs written to in an earlier stage
4251 * are eliminated if they are (transitively) not used in a later
4252 * stage.
4253 */
4254 for (int i = num_shaders - 2; i >= 0; i--) {
4255 unsigned stage_num_xfb_decls =
4256 linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
4257 num_xfb_decls : 0;
4258
4259 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4260 linked_shader[i],
4261 linked_shader[i + 1],
4262 stage_num_xfb_decls, xfb_decls,
4263 &vm))
4264 return false;
4265
4266 /* Now that validation is done its safe to remove unused varyings. As
4267 * we have both a producer and consumer its safe to remove unused
4268 * varyings even if the program is a SSO because the stages are being
4269 * linked together i.e. we have a multi-stage SSO.
4270 */
4271 link_shader_opts(&vm, linked_shader[i]->Program->nir,
4272 linked_shader[i + 1]->Program->nir,
4273 prog, mem_ctx);
4274
4275 remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
4276 nir_var_shader_out);
4277 remove_unused_shader_inputs_and_outputs(prog,
4278 linked_shader[i + 1]->Stage,
4279 nir_var_shader_in);
4280 }
4281 }
4282
4283 if (!prog->SeparateShader) {
4284 /* If not SSO remove unused varyings from the first/last stage */
4285 NIR_PASS(_, prog->_LinkedShaders[first]->Program->nir,
4286 nir_remove_dead_variables, nir_var_shader_in, NULL);
4287 NIR_PASS(_, prog->_LinkedShaders[last]->Program->nir,
4288 nir_remove_dead_variables, nir_var_shader_out, NULL);
4289 } else {
4290 /* Sort inputs / outputs into a canonical order. This is necessary so
4291 * that inputs / outputs of separable shaders will be assigned
4292 * predictable locations regardless of the order in which declarations
4293 * appeared in the shader source.
4294 */
4295 if (first != MESA_SHADER_VERTEX) {
4296 canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
4297 nir_var_shader_in);
4298 }
4299
4300 if (last != MESA_SHADER_FRAGMENT) {
4301 canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
4302 nir_var_shader_out);
4303 }
4304 }
4305
4306 /* If there is no fragment shader we need to set transform feedback.
4307 *
4308 * For SSO we also need to assign output locations. We assign them here
4309 * because we need to do it for both single stage programs and multi stage
4310 * programs.
4311 */
4312 if (last < MESA_SHADER_FRAGMENT &&
4313 (num_xfb_decls != 0 || prog->SeparateShader)) {
4314 const uint64_t reserved_out_slots =
4315 reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
4316 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4317 prog->_LinkedShaders[last], NULL,
4318 num_xfb_decls, xfb_decls,
4319 reserved_out_slots, &vm))
4320 return false;
4321 }
4322
4323 if (prog->SeparateShader) {
4324 struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
4325
4326 const uint64_t reserved_slots =
4327 reserved_varying_slot(sh, nir_var_shader_in);
4328
4329 /* Assign input locations for SSO, output locations are already
4330 * assigned.
4331 */
4332 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4333 NULL /* producer */,
4334 sh /* consumer */,
4335 0 /* num_xfb_decls */,
4336 NULL /* xfb_decls */,
4337 reserved_slots, &vm))
4338 return false;
4339 }
4340
4341 if (num_shaders == 1) {
4342 gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
4343 0, NULL);
4344 gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
4345 num_xfb_decls, xfb_decls);
4346 } else {
4347 /* Linking the stages in the opposite order (from fragment to vertex)
4348 * ensures that inter-shader outputs written to in an earlier stage
4349 * are eliminated if they are (transitively) not used in a later
4350 * stage.
4351 */
4352 int next = last;
4353 for (int i = next - 1; i >= 0; i--) {
4354 if (prog->_LinkedShaders[i] == NULL && i != 0)
4355 continue;
4356
4357 struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
4358 struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
4359
4360 gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
4361 next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4362 xfb_decls);
4363
4364 const uint64_t reserved_out_slots =
4365 reserved_varying_slot(sh_i, nir_var_shader_out);
4366 const uint64_t reserved_in_slots =
4367 reserved_varying_slot(sh_next, nir_var_shader_in);
4368
4369 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
4370 sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4371 xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
4372 return false;
4373
4374 /* This must be done after all dead varyings are eliminated. */
4375 if (sh_i != NULL) {
4376 unsigned slots_used = util_bitcount64(reserved_out_slots);
4377 if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
4378 return false;
4379 }
4380
4381 unsigned slots_used = util_bitcount64(reserved_in_slots);
4382 if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
4383 return false;
4384
4385 next = i;
4386 }
4387 }
4388
4389 if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
4390 has_xfb_qualifiers, mem_ctx))
4391 return false;
4392
4393 return prog->data->LinkStatus != LINKING_FAILURE;
4394 }
4395
4396 /**
4397 * Store the gl_FragDepth layout in the gl_shader_program struct.
4398 */
4399 static void
store_fragdepth_layout(struct gl_shader_program * prog)4400 store_fragdepth_layout(struct gl_shader_program *prog)
4401 {
4402 if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
4403 return;
4404 }
4405
4406 nir_shader *nir = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->nir;
4407 nir_foreach_shader_out_variable(var, nir) {
4408 if (strcmp(var->name, "gl_FragDepth") == 0) {
4409 switch (var->data.depth_layout) {
4410 case nir_depth_layout_none:
4411 prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
4412 return;
4413 case nir_depth_layout_any:
4414 prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
4415 return;
4416 case nir_depth_layout_greater:
4417 prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
4418 return;
4419 case nir_depth_layout_less:
4420 prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
4421 return;
4422 case nir_depth_layout_unchanged:
4423 prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
4424 return;
4425 default:
4426 assert(0);
4427 return;
4428 }
4429 }
4430 }
4431 }
4432
4433 bool
gl_assign_attribute_or_color_locations(const struct gl_constants * consts,struct gl_shader_program * prog)4434 gl_assign_attribute_or_color_locations(const struct gl_constants *consts,
4435 struct gl_shader_program *prog)
4436 {
4437 void *mem_ctx = ralloc_context(NULL);
4438
4439 if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4440 MESA_SHADER_VERTEX, true)) {
4441 ralloc_free(mem_ctx);
4442 return false;
4443 }
4444
4445 if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4446 MESA_SHADER_FRAGMENT, true)) {
4447 ralloc_free(mem_ctx);
4448 return false;
4449 }
4450
4451 ralloc_free(mem_ctx);
4452 return true;
4453 }
4454
4455 bool
gl_nir_link_varyings(const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,struct gl_shader_program * prog)4456 gl_nir_link_varyings(const struct gl_constants *consts,
4457 const struct gl_extensions *exts,
4458 gl_api api, struct gl_shader_program *prog)
4459 {
4460 void *mem_ctx = ralloc_context(NULL);
4461
4462 unsigned first, last;
4463
4464 MESA_TRACE_FUNC();
4465
4466 store_fragdepth_layout(prog);
4467
4468 first = MESA_SHADER_STAGES;
4469 last = 0;
4470
4471 /* We need to initialise the program resource list because the varying
4472 * packing pass my start inserting varyings onto the list.
4473 */
4474 init_program_resource_list(prog);
4475
4476 /* Determine first and last stage. */
4477 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4478 if (!prog->_LinkedShaders[i])
4479 continue;
4480 if (first == MESA_SHADER_STAGES)
4481 first = i;
4482 last = i;
4483 }
4484
4485 bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
4486 if (r) {
4487 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4488 if (!prog->_LinkedShaders[i])
4489 continue;
4490
4491 /* Check for transform feedback varyings specified via the API */
4492 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
4493 prog->TransformFeedback.NumVarying > 0;
4494
4495 /* Check for transform feedback varyings specified in the Shader */
4496 if (prog->last_vert_prog) {
4497 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
4498 prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
4499 }
4500 }
4501
4502 /* Assign NIR XFB info to the last stage before the fragment shader */
4503 for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
4504 struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
4505 if (sh && stage != MESA_SHADER_TESS_CTRL) {
4506 sh->Program->nir->xfb_info =
4507 gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
4508 sh->Program->nir);
4509 break;
4510 }
4511 }
4512 }
4513
4514 ralloc_free(mem_ctx);
4515 return r;
4516 }
4517