1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out);
49 assert(var->data.location >= 0);
50
51 const struct glsl_type *type = var->type;
52 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53 assert(glsl_type_is_array(type));
54 type = glsl_get_array_element(type);
55 }
56
57 unsigned slots = glsl_count_attribute_slots(type, false);
58 return ((1ull << slots) - 1) << location;
59 }
60
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66 var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67 var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74 return 4;
75
76 return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78
79 static void
tcs_add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82 nir_foreach_function(function, shader) {
83 if (!function->impl)
84 continue;
85
86 nir_foreach_block(block, function->impl) {
87 nir_foreach_instr(instr, block) {
88 if (instr->type != nir_instr_type_intrinsic)
89 continue;
90
91 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
92 if (intrin->intrinsic != nir_intrinsic_load_deref)
93 continue;
94
95 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
96 if (!nir_deref_mode_is(deref, nir_var_shader_out))
97 continue;
98
99 nir_variable *var = nir_deref_instr_get_variable(deref);
100 for (unsigned i = 0; i < get_num_components(var); i++) {
101 if (var->data.patch) {
102 if (is_non_generic_patch_var(var))
103 continue;
104
105 patches_read[var->data.location_frac + i] |=
106 get_variable_io_mask(var, shader->info.stage);
107 } else {
108 read[var->data.location_frac + i] |=
109 get_variable_io_mask(var, shader->info.stage);
110 }
111 }
112 }
113 }
114 }
115 }
116
117 /**
118 * Helper for removing unused shader I/O variables, by demoting them to global
119 * variables (which may then by dead code eliminated).
120 *
121 * Example usage is:
122 *
123 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
124 * read, patches_read) ||
125 * progress;
126 *
127 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
128 * representing each .location_frac used. Note that for vector variables,
129 * only the first channel (.location_frac) is examined for deciding if the
130 * variable is used!
131 */
132 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)133 nir_remove_unused_io_vars(nir_shader *shader,
134 nir_variable_mode mode,
135 uint64_t *used_by_other_stage,
136 uint64_t *used_by_other_stage_patches)
137 {
138 bool progress = false;
139 uint64_t *used;
140
141 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
142
143 nir_foreach_variable_with_modes_safe(var, shader, mode) {
144 if (var->data.patch)
145 used = used_by_other_stage_patches;
146 else
147 used = used_by_other_stage;
148
149 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
150 continue;
151
152 if (var->data.always_active_io)
153 continue;
154
155 if (var->data.explicit_xfb_buffer)
156 continue;
157
158 uint64_t other_stage = used[var->data.location_frac];
159
160 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
161 /* This one is invalid, make it a global variable instead */
162 var->data.location = 0;
163 var->data.mode = nir_var_shader_temp;
164
165 progress = true;
166 }
167 }
168
169 if (progress)
170 nir_fixup_deref_modes(shader);
171
172 return progress;
173 }
174
175 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)176 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
177 {
178 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
179 assert(consumer->info.stage != MESA_SHADER_VERTEX);
180
181 uint64_t read[4] = { 0 }, written[4] = { 0 };
182 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
183
184 nir_foreach_shader_out_variable(var, producer) {
185 for (unsigned i = 0; i < get_num_components(var); i++) {
186 if (var->data.patch) {
187 if (is_non_generic_patch_var(var))
188 continue;
189
190 patches_written[var->data.location_frac + i] |=
191 get_variable_io_mask(var, producer->info.stage);
192 } else {
193 written[var->data.location_frac + i] |=
194 get_variable_io_mask(var, producer->info.stage);
195 }
196 }
197 }
198
199 nir_foreach_shader_in_variable(var, consumer) {
200 for (unsigned i = 0; i < get_num_components(var); i++) {
201 if (var->data.patch) {
202 if (is_non_generic_patch_var(var))
203 continue;
204
205 patches_read[var->data.location_frac + i] |=
206 get_variable_io_mask(var, consumer->info.stage);
207 } else {
208 read[var->data.location_frac + i] |=
209 get_variable_io_mask(var, consumer->info.stage);
210 }
211 }
212 }
213
214 /* Each TCS invocation can read data written by other TCS invocations,
215 * so even if the outputs are not used by the TES we must also make
216 * sure they are not read by the TCS before demoting them to globals.
217 */
218 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
219 tcs_add_output_reads(producer, read, patches_read);
220
221 bool progress = false;
222 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
223 patches_read);
224
225 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
226 patches_written) || progress;
227
228 return progress;
229 }
230
231 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)232 get_interp_type(nir_variable *var, const struct glsl_type *type,
233 bool default_to_smooth_interp)
234 {
235 if (glsl_type_is_integer(type))
236 return INTERP_MODE_FLAT;
237 else if (var->data.interpolation != INTERP_MODE_NONE)
238 return var->data.interpolation;
239 else if (default_to_smooth_interp)
240 return INTERP_MODE_SMOOTH;
241 else
242 return INTERP_MODE_NONE;
243 }
244
245 #define INTERPOLATE_LOC_SAMPLE 0
246 #define INTERPOLATE_LOC_CENTROID 1
247 #define INTERPOLATE_LOC_CENTER 2
248
249 static uint8_t
get_interp_loc(nir_variable * var)250 get_interp_loc(nir_variable *var)
251 {
252 if (var->data.sample)
253 return INTERPOLATE_LOC_SAMPLE;
254 else if (var->data.centroid)
255 return INTERPOLATE_LOC_CENTROID;
256 else
257 return INTERPOLATE_LOC_CENTER;
258 }
259
260 static bool
is_packing_supported_for_type(const struct glsl_type * type)261 is_packing_supported_for_type(const struct glsl_type *type)
262 {
263 /* We ignore complex types such as arrays, matrices, structs and bitsizes
264 * other then 32bit. All other vector types should have been split into
265 * scalar variables by the lower_io_to_scalar pass. The only exception
266 * should be OpenGL xfb varyings.
267 * TODO: add support for more complex types?
268 */
269 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
270 }
271
272 struct assigned_comps
273 {
274 uint8_t comps;
275 uint8_t interp_type;
276 uint8_t interp_loc;
277 bool is_32bit;
278 bool is_mediump;
279 };
280
281 /* Packing arrays and dual slot varyings is difficult so to avoid complex
282 * algorithms this function just assigns them their existing location for now.
283 * TODO: allow better packing of complex types.
284 */
285 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)286 get_unmoveable_components_masks(nir_shader *shader,
287 nir_variable_mode mode,
288 struct assigned_comps *comps,
289 gl_shader_stage stage,
290 bool default_to_smooth_interp)
291 {
292 nir_foreach_variable_with_modes_safe(var, shader, mode) {
293 assert(var->data.location >= 0);
294
295 /* Only remap things that aren't built-ins. */
296 if (var->data.location >= VARYING_SLOT_VAR0 &&
297 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
298
299 const struct glsl_type *type = var->type;
300 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
301 assert(glsl_type_is_array(type));
302 type = glsl_get_array_element(type);
303 }
304
305 /* If we can pack this varying then don't mark the components as
306 * used.
307 */
308 if (is_packing_supported_for_type(type))
309 continue;
310
311 unsigned location = var->data.location - VARYING_SLOT_VAR0;
312
313 unsigned elements =
314 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
315 glsl_get_vector_elements(glsl_without_array(type)) : 4;
316
317 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
318 unsigned slots = glsl_count_attribute_slots(type, false);
319 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
320 unsigned comps_slot2 = 0;
321 for (unsigned i = 0; i < slots; i++) {
322 if (dual_slot) {
323 if (i & 1) {
324 comps[location + i].comps |= ((1 << comps_slot2) - 1);
325 } else {
326 unsigned num_comps = 4 - var->data.location_frac;
327 comps_slot2 = (elements * dmul) - num_comps;
328
329 /* Assume ARB_enhanced_layouts packing rules for doubles */
330 assert(var->data.location_frac == 0 ||
331 var->data.location_frac == 2);
332 assert(comps_slot2 <= 4);
333
334 comps[location + i].comps |=
335 ((1 << num_comps) - 1) << var->data.location_frac;
336 }
337 } else {
338 comps[location + i].comps |=
339 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
340 }
341
342 comps[location + i].interp_type =
343 get_interp_type(var, type, default_to_smooth_interp);
344 comps[location + i].interp_loc = get_interp_loc(var);
345 comps[location + i].is_32bit =
346 glsl_type_is_32bit(glsl_without_array(type));
347 comps[location + i].is_mediump =
348 var->data.precision == GLSL_PRECISION_MEDIUM ||
349 var->data.precision == GLSL_PRECISION_LOW;
350 }
351 }
352 }
353 }
354
355 struct varying_loc
356 {
357 uint8_t component;
358 uint32_t location;
359 };
360
361 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)362 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
363 uint64_t slots_used_mask, unsigned num_slots)
364 {
365 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
366
367 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
368 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
369 }
370
371 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)372 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
373 {
374 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
375
376 slots_used[var->data.patch ? 1 : 0] |=
377 BITFIELD64_BIT(var->data.location - loc_offset + offset);
378 }
379
380 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)381 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
382 struct varying_loc (*remap)[4],
383 uint64_t *slots_used, uint64_t *out_slots_read,
384 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
385 {
386 const gl_shader_stage stage = shader->info.stage;
387 uint64_t out_slots_read_tmp[2] = {0};
388 uint64_t slots_used_tmp[2] = {0};
389
390 /* We don't touch builtins so just copy the bitmask */
391 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
392
393 nir_foreach_variable_with_modes(var, shader, mode) {
394 assert(var->data.location >= 0);
395
396 /* Only remap things that aren't built-ins */
397 if (var->data.location >= VARYING_SLOT_VAR0 &&
398 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
399
400 const struct glsl_type *type = var->type;
401 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
402 assert(glsl_type_is_array(type));
403 type = glsl_get_array_element(type);
404 }
405
406 unsigned num_slots = glsl_count_attribute_slots(type, false);
407 bool used_across_stages = false;
408 bool outputs_read = false;
409
410 unsigned location = var->data.location - VARYING_SLOT_VAR0;
411 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
412
413 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
414 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
415 uint64_t outs_used =
416 var->data.patch ? *p_out_slots_read : *out_slots_read;
417 uint64_t slots =
418 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
419
420 if (slots & used)
421 used_across_stages = true;
422
423 if (slots & outs_used)
424 outputs_read = true;
425
426 if (new_loc->location) {
427 var->data.location = new_loc->location;
428 var->data.location_frac = new_loc->component;
429 }
430
431 if (var->data.always_active_io) {
432 /* We can't apply link time optimisations (specifically array
433 * splitting) to these so we need to copy the existing mask
434 * otherwise we will mess up the mask for things like partially
435 * marked arrays.
436 */
437 if (used_across_stages)
438 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
439
440 if (outputs_read) {
441 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
442 num_slots);
443 }
444 } else {
445 for (unsigned i = 0; i < num_slots; i++) {
446 if (used_across_stages)
447 mark_used_slot(var, slots_used_tmp, i);
448
449 if (outputs_read)
450 mark_used_slot(var, out_slots_read_tmp, i);
451 }
452 }
453 }
454 }
455
456 *slots_used = slots_used_tmp[0];
457 *out_slots_read = out_slots_read_tmp[0];
458 *p_slots_used = slots_used_tmp[1];
459 *p_out_slots_read = out_slots_read_tmp[1];
460 }
461
462 struct varying_component {
463 nir_variable *var;
464 uint8_t interp_type;
465 uint8_t interp_loc;
466 bool is_32bit;
467 bool is_patch;
468 bool is_mediump;
469 bool is_intra_stage_only;
470 bool initialised;
471 };
472
473 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)474 cmp_varying_component(const void *comp1_v, const void *comp2_v)
475 {
476 struct varying_component *comp1 = (struct varying_component *) comp1_v;
477 struct varying_component *comp2 = (struct varying_component *) comp2_v;
478
479 /* We want patches to be order at the end of the array */
480 if (comp1->is_patch != comp2->is_patch)
481 return comp1->is_patch ? 1 : -1;
482
483 /* We want to try to group together TCS outputs that are only read by other
484 * TCS invocations and not consumed by the follow stage.
485 */
486 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
487 return comp1->is_intra_stage_only ? 1 : -1;
488
489 /* Group mediump varyings together. */
490 if (comp1->is_mediump != comp2->is_mediump)
491 return comp1->is_mediump ? 1 : -1;
492
493 /* We can only pack varyings with matching interpolation types so group
494 * them together.
495 */
496 if (comp1->interp_type != comp2->interp_type)
497 return comp1->interp_type - comp2->interp_type;
498
499 /* Interpolation loc must match also. */
500 if (comp1->interp_loc != comp2->interp_loc)
501 return comp1->interp_loc - comp2->interp_loc;
502
503 /* If everything else matches just use the original location to sort */
504 const struct nir_variable_data *const data1 = &comp1->var->data;
505 const struct nir_variable_data *const data2 = &comp2->var->data;
506 if (data1->location != data2->location)
507 return data1->location - data2->location;
508 return (int)data1->location_frac - (int)data2->location_frac;
509 }
510
511 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)512 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
513 struct varying_component **varying_comp_info,
514 unsigned *varying_comp_info_size,
515 bool default_to_smooth_interp)
516 {
517 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
518 unsigned num_of_comps_to_pack = 0;
519
520 /* Count the number of varying that can be packed and create a mapping
521 * of those varyings to the array we will pass to qsort.
522 */
523 nir_foreach_shader_out_variable(var, producer) {
524
525 /* Only remap things that aren't builtins. */
526 if (var->data.location >= VARYING_SLOT_VAR0 &&
527 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
528
529 /* We can't repack xfb varyings. */
530 if (var->data.always_active_io)
531 continue;
532
533 const struct glsl_type *type = var->type;
534 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
535 assert(glsl_type_is_array(type));
536 type = glsl_get_array_element(type);
537 }
538
539 if (!is_packing_supported_for_type(type))
540 continue;
541
542 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
543 store_varying_info_idx[loc][var->data.location_frac] =
544 ++num_of_comps_to_pack;
545 }
546 }
547
548 *varying_comp_info_size = num_of_comps_to_pack;
549 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
550 num_of_comps_to_pack);
551
552 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
553
554 /* Walk over the shader and populate the varying component info array */
555 nir_foreach_block(block, impl) {
556 nir_foreach_instr(instr, block) {
557 if (instr->type != nir_instr_type_intrinsic)
558 continue;
559
560 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
561 if (intr->intrinsic != nir_intrinsic_load_deref &&
562 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
563 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
564 intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
565 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
566 continue;
567
568 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
569 if (!nir_deref_mode_is(deref, nir_var_shader_in))
570 continue;
571
572 /* We only remap things that aren't builtins. */
573 nir_variable *in_var = nir_deref_instr_get_variable(deref);
574 if (in_var->data.location < VARYING_SLOT_VAR0)
575 continue;
576
577 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
578 if (location >= MAX_VARYINGS_INCL_PATCH)
579 continue;
580
581 unsigned var_info_idx =
582 store_varying_info_idx[location][in_var->data.location_frac];
583 if (!var_info_idx)
584 continue;
585
586 struct varying_component *vc_info =
587 &(*varying_comp_info)[var_info_idx-1];
588
589 if (!vc_info->initialised) {
590 const struct glsl_type *type = in_var->type;
591 if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
592 in_var->data.per_view) {
593 assert(glsl_type_is_array(type));
594 type = glsl_get_array_element(type);
595 }
596
597 vc_info->var = in_var;
598 vc_info->interp_type =
599 get_interp_type(in_var, type, default_to_smooth_interp);
600 vc_info->interp_loc = get_interp_loc(in_var);
601 vc_info->is_32bit = glsl_type_is_32bit(type);
602 vc_info->is_patch = in_var->data.patch;
603 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
604 (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
605 in_var->data.precision == GLSL_PRECISION_LOW);
606 vc_info->is_intra_stage_only = false;
607 vc_info->initialised = true;
608 }
609 }
610 }
611
612 /* Walk over the shader and populate the varying component info array
613 * for varyings which are read by other TCS instances but are not consumed
614 * by the TES.
615 */
616 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
617 impl = nir_shader_get_entrypoint(producer);
618
619 nir_foreach_block(block, impl) {
620 nir_foreach_instr(instr, block) {
621 if (instr->type != nir_instr_type_intrinsic)
622 continue;
623
624 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
625 if (intr->intrinsic != nir_intrinsic_load_deref)
626 continue;
627
628 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
629 if (!nir_deref_mode_is(deref, nir_var_shader_out))
630 continue;
631
632 /* We only remap things that aren't builtins. */
633 nir_variable *out_var = nir_deref_instr_get_variable(deref);
634 if (out_var->data.location < VARYING_SLOT_VAR0)
635 continue;
636
637 unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
638 if (location >= MAX_VARYINGS_INCL_PATCH)
639 continue;
640
641 unsigned var_info_idx =
642 store_varying_info_idx[location][out_var->data.location_frac];
643 if (!var_info_idx) {
644 /* Something went wrong, the shader interfaces didn't match, so
645 * abandon packing. This can happen for example when the
646 * inputs are scalars but the outputs are struct members.
647 */
648 *varying_comp_info_size = 0;
649 break;
650 }
651
652 struct varying_component *vc_info =
653 &(*varying_comp_info)[var_info_idx-1];
654
655 if (!vc_info->initialised) {
656 const struct glsl_type *type = out_var->type;
657 if (nir_is_arrayed_io(out_var, producer->info.stage)) {
658 assert(glsl_type_is_array(type));
659 type = glsl_get_array_element(type);
660 }
661
662 vc_info->var = out_var;
663 vc_info->interp_type =
664 get_interp_type(out_var, type, default_to_smooth_interp);
665 vc_info->interp_loc = get_interp_loc(out_var);
666 vc_info->is_32bit = glsl_type_is_32bit(type);
667 vc_info->is_patch = out_var->data.patch;
668 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
669 (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
670 out_var->data.precision == GLSL_PRECISION_LOW);
671 vc_info->is_intra_stage_only = true;
672 vc_info->initialised = true;
673 }
674 }
675 }
676 }
677
678 for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
679 struct varying_component *vc_info = &(*varying_comp_info)[i];
680 if (!vc_info->initialised) {
681 /* Something went wrong, the shader interfaces didn't match, so
682 * abandon packing. This can happen for example when the outputs are
683 * scalars but the inputs are struct members.
684 */
685 *varying_comp_info_size = 0;
686 break;
687 }
688 }
689 }
690
691 static bool
allow_pack_interp_type(nir_pack_varying_options options,int type)692 allow_pack_interp_type(nir_pack_varying_options options, int type)
693 {
694 int sel;
695
696 switch (type) {
697 case INTERP_MODE_NONE:
698 sel = nir_pack_varying_interp_mode_none;
699 break;
700 case INTERP_MODE_SMOOTH:
701 sel = nir_pack_varying_interp_mode_smooth;
702 break;
703 case INTERP_MODE_FLAT:
704 sel = nir_pack_varying_interp_mode_flat;
705 break;
706 case INTERP_MODE_NOPERSPECTIVE:
707 sel = nir_pack_varying_interp_mode_noperspective;
708 break;
709 default:
710 return false;
711 }
712
713 return options & sel;
714 }
715
716 static bool
allow_pack_interp_loc(nir_pack_varying_options options,int loc)717 allow_pack_interp_loc(nir_pack_varying_options options, int loc)
718 {
719 int sel;
720
721 switch (loc) {
722 case INTERPOLATE_LOC_SAMPLE:
723 sel = nir_pack_varying_interp_loc_sample;
724 break;
725 case INTERPOLATE_LOC_CENTROID:
726 sel = nir_pack_varying_interp_loc_centroid;
727 break;
728 case INTERPOLATE_LOC_CENTER:
729 sel = nir_pack_varying_interp_loc_center;
730 break;
731 default:
732 return false;
733 }
734
735 return options & sel;
736 }
737
738 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_pack_varying_options options)739 assign_remap_locations(struct varying_loc (*remap)[4],
740 struct assigned_comps *assigned_comps,
741 struct varying_component *info,
742 unsigned *cursor, unsigned *comp,
743 unsigned max_location,
744 nir_pack_varying_options options)
745 {
746 unsigned tmp_cursor = *cursor;
747 unsigned tmp_comp = *comp;
748
749 for (; tmp_cursor < max_location; tmp_cursor++) {
750
751 if (assigned_comps[tmp_cursor].comps) {
752 /* We can only pack varyings with matching precision. */
753 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
754 tmp_comp = 0;
755 continue;
756 }
757
758 /* We can only pack varyings with matching interpolation type
759 * if driver does not support it.
760 */
761 if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
762 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
763 !allow_pack_interp_type(options, info->interp_type))) {
764 tmp_comp = 0;
765 continue;
766 }
767
768 /* We can only pack varyings with matching interpolation location
769 * if driver does not support it.
770 */
771 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
772 (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
773 !allow_pack_interp_loc(options, info->interp_loc))) {
774 tmp_comp = 0;
775 continue;
776 }
777
778 /* We can only pack varyings with matching types, and the current
779 * algorithm only supports packing 32-bit.
780 */
781 if (!assigned_comps[tmp_cursor].is_32bit) {
782 tmp_comp = 0;
783 continue;
784 }
785
786 while (tmp_comp < 4 &&
787 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
788 tmp_comp++;
789 }
790 }
791
792 if (tmp_comp == 4) {
793 tmp_comp = 0;
794 continue;
795 }
796
797 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
798
799 /* Once we have assigned a location mark it as used */
800 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
801 assigned_comps[tmp_cursor].interp_type = info->interp_type;
802 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
803 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
804 assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
805
806 /* Assign remap location */
807 remap[location][info->var->data.location_frac].component = tmp_comp++;
808 remap[location][info->var->data.location_frac].location =
809 tmp_cursor + VARYING_SLOT_VAR0;
810
811 break;
812 }
813
814 *cursor = tmp_cursor;
815 *comp = tmp_comp;
816 }
817
818 /* If there are empty components in the slot compact the remaining components
819 * as close to component 0 as possible. This will make it easier to fill the
820 * empty components with components from a different slot in a following pass.
821 */
822 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)823 compact_components(nir_shader *producer, nir_shader *consumer,
824 struct assigned_comps *assigned_comps,
825 bool default_to_smooth_interp)
826 {
827 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
828 struct varying_component *varying_comp_info;
829 unsigned varying_comp_info_size;
830
831 /* Gather varying component info */
832 gather_varying_component_info(producer, consumer, &varying_comp_info,
833 &varying_comp_info_size,
834 default_to_smooth_interp);
835
836 /* Sort varying components. */
837 qsort(varying_comp_info, varying_comp_info_size,
838 sizeof(struct varying_component), cmp_varying_component);
839
840 nir_pack_varying_options options = consumer->options->pack_varying_options;
841
842 unsigned cursor = 0;
843 unsigned comp = 0;
844
845 /* Set the remap array based on the sorted components */
846 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
847 struct varying_component *info = &varying_comp_info[i];
848
849 assert(info->is_patch || cursor < MAX_VARYING);
850 if (info->is_patch) {
851 /* The list should be sorted with all non-patch inputs first followed
852 * by patch inputs. When we hit our first patch input, we need to
853 * reset the cursor to MAX_VARYING so we put them in the right slot.
854 */
855 if (cursor < MAX_VARYING) {
856 cursor = MAX_VARYING;
857 comp = 0;
858 }
859
860 assign_remap_locations(remap, assigned_comps, info,
861 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
862 options);
863 } else {
864 assign_remap_locations(remap, assigned_comps, info,
865 &cursor, &comp, MAX_VARYING,
866 options);
867
868 /* Check if we failed to assign a remap location. This can happen if
869 * for example there are a bunch of unmovable components with
870 * mismatching interpolation types causing us to skip over locations
871 * that would have been useful for packing later components.
872 * The solution is to iterate over the locations again (this should
873 * happen very rarely in practice).
874 */
875 if (cursor == MAX_VARYING) {
876 cursor = 0;
877 comp = 0;
878 assign_remap_locations(remap, assigned_comps, info,
879 &cursor, &comp, MAX_VARYING,
880 options);
881 }
882 }
883 }
884
885 ralloc_free(varying_comp_info);
886
887 uint64_t zero = 0;
888 uint32_t zero32 = 0;
889 remap_slots_and_components(consumer, nir_var_shader_in, remap,
890 &consumer->info.inputs_read, &zero,
891 &consumer->info.patch_inputs_read, &zero32);
892 remap_slots_and_components(producer, nir_var_shader_out, remap,
893 &producer->info.outputs_written,
894 &producer->info.outputs_read,
895 &producer->info.patch_outputs_written,
896 &producer->info.patch_outputs_read);
897 }
898
899 /* We assume that this has been called more-or-less directly after
900 * remove_unused_varyings. At this point, all of the varyings that we
901 * aren't going to be using have been completely removed and the
902 * inputs_read and outputs_written fields in nir_shader_info reflect
903 * this. Therefore, the total set of valid slots is the OR of the two
904 * sets of varyings; this accounts for varyings which one side may need
905 * to read/write even if the other doesn't. This can happen if, for
906 * instance, an array is used indirectly from one side causing it to be
907 * unsplittable but directly from the other.
908 */
909 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)910 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
911 bool default_to_smooth_interp)
912 {
913 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
914 assert(consumer->info.stage != MESA_SHADER_VERTEX);
915
916 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
917
918 get_unmoveable_components_masks(producer, nir_var_shader_out,
919 assigned_comps,
920 producer->info.stage,
921 default_to_smooth_interp);
922 get_unmoveable_components_masks(consumer, nir_var_shader_in,
923 assigned_comps,
924 consumer->info.stage,
925 default_to_smooth_interp);
926
927 compact_components(producer, consumer, assigned_comps,
928 default_to_smooth_interp);
929 }
930
931 /*
932 * Mark XFB varyings as always_active_io in the consumer so the linking opts
933 * don't touch them.
934 */
935 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)936 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
937 {
938 nir_variable *input_vars[MAX_VARYING] = { 0 };
939
940 nir_foreach_shader_in_variable(var, consumer) {
941 if (var->data.location >= VARYING_SLOT_VAR0 &&
942 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
943
944 unsigned location = var->data.location - VARYING_SLOT_VAR0;
945 input_vars[location] = var;
946 }
947 }
948
949 nir_foreach_shader_out_variable(var, producer) {
950 if (var->data.location >= VARYING_SLOT_VAR0 &&
951 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
952
953 if (!var->data.always_active_io)
954 continue;
955
956 unsigned location = var->data.location - VARYING_SLOT_VAR0;
957 if (input_vars[location]) {
958 input_vars[location]->data.always_active_io = true;
959 }
960 }
961 }
962 }
963
964 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)965 does_varying_match(nir_variable *out_var, nir_variable *in_var)
966 {
967 return in_var->data.location == out_var->data.location &&
968 in_var->data.location_frac == out_var->data.location_frac;
969 }
970
971 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)972 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
973 {
974 nir_foreach_shader_in_variable(var, consumer) {
975 if (does_varying_match(out_var, var))
976 return var;
977 }
978
979 return NULL;
980 }
981
982 static bool
can_replace_varying(nir_variable * out_var)983 can_replace_varying(nir_variable *out_var)
984 {
985 /* Skip types that require more complex handling.
986 * TODO: add support for these types.
987 */
988 if (glsl_type_is_array(out_var->type) ||
989 glsl_type_is_dual_slot(out_var->type) ||
990 glsl_type_is_matrix(out_var->type) ||
991 glsl_type_is_struct_or_ifc(out_var->type))
992 return false;
993
994 /* Limit this pass to scalars for now to keep things simple. Most varyings
995 * should have been lowered to scalars at this point anyway.
996 */
997 if (!glsl_type_is_scalar(out_var->type))
998 return false;
999
1000 if (out_var->data.location < VARYING_SLOT_VAR0 ||
1001 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1002 return false;
1003
1004 return true;
1005 }
1006
1007 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1008 replace_varying_input_by_constant_load(nir_shader *shader,
1009 nir_intrinsic_instr *store_intr)
1010 {
1011 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1012
1013 nir_builder b;
1014 nir_builder_init(&b, impl);
1015
1016 nir_variable *out_var =
1017 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1018
1019 bool progress = false;
1020 nir_foreach_block(block, impl) {
1021 nir_foreach_instr(instr, block) {
1022 if (instr->type != nir_instr_type_intrinsic)
1023 continue;
1024
1025 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1026 if (intr->intrinsic != nir_intrinsic_load_deref)
1027 continue;
1028
1029 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1030 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1031 continue;
1032
1033 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1034
1035 if (!does_varying_match(out_var, in_var))
1036 continue;
1037
1038 b.cursor = nir_before_instr(instr);
1039
1040 nir_load_const_instr *out_const =
1041 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1042
1043 /* Add new const to replace the input */
1044 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1045 intr->dest.ssa.bit_size,
1046 out_const->value);
1047
1048 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1049
1050 progress = true;
1051 }
1052 }
1053
1054 return progress;
1055 }
1056
1057 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1058 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1059 nir_intrinsic_instr *dup_store_intr)
1060 {
1061 assert(input_var);
1062
1063 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1064
1065 nir_builder b;
1066 nir_builder_init(&b, impl);
1067
1068 nir_variable *dup_out_var =
1069 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1070
1071 bool progress = false;
1072 nir_foreach_block(block, impl) {
1073 nir_foreach_instr(instr, block) {
1074 if (instr->type != nir_instr_type_intrinsic)
1075 continue;
1076
1077 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1078 if (intr->intrinsic != nir_intrinsic_load_deref)
1079 continue;
1080
1081 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1082 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1083 continue;
1084
1085 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1086
1087 if (!does_varying_match(dup_out_var, in_var) ||
1088 in_var->data.interpolation != input_var->data.interpolation ||
1089 get_interp_loc(in_var) != get_interp_loc(input_var))
1090 continue;
1091
1092 b.cursor = nir_before_instr(instr);
1093
1094 nir_ssa_def *load = nir_load_var(&b, input_var);
1095 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1096
1097 progress = true;
1098 }
1099 }
1100
1101 return progress;
1102 }
1103
1104 static bool
is_direct_uniform_load(nir_ssa_def * def,nir_ssa_scalar * s)1105 is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1106 {
1107 /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1108 assert(def->num_components == 1);
1109
1110 /* Uniform load may hide behind some move instruction for converting
1111 * vector to scalar:
1112 *
1113 * vec1 32 ssa_1 = deref_var &color (uniform vec3)
1114 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1115 * vec1 32 ssa_3 = mov ssa_2.x
1116 * vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1117 * intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1118 */
1119 *s = nir_ssa_scalar_resolved(def, 0);
1120
1121 nir_ssa_def *ssa = s->def;
1122 if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1123 return false;
1124
1125 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1126 if (intr->intrinsic != nir_intrinsic_load_deref)
1127 return false;
1128
1129 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1130 /* TODO: support nir_var_mem_ubo. */
1131 if (!nir_deref_mode_is(deref, nir_var_uniform))
1132 return false;
1133
1134 /* Does not support indirect uniform load. */
1135 return !nir_deref_instr_has_indirect(deref);
1136 }
1137
1138 static nir_variable *
get_uniform_var_in_consumer(nir_shader * consumer,nir_variable * var_in_producer)1139 get_uniform_var_in_consumer(nir_shader *consumer,
1140 nir_variable *var_in_producer)
1141 {
1142 /* Find if uniform already exists in consumer. */
1143 nir_variable *new_var = NULL;
1144 nir_foreach_uniform_variable(v, consumer) {
1145 if (!strcmp(var_in_producer->name, v->name)) {
1146 new_var = v;
1147 break;
1148 }
1149 }
1150
1151 /* Create a variable if not exist. */
1152 if (!new_var) {
1153 new_var = nir_variable_clone(var_in_producer, consumer);
1154 nir_shader_add_variable(consumer, new_var);
1155 }
1156
1157 return new_var;
1158 }
1159
1160 static nir_deref_instr *
clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1161 clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1162 {
1163 if (deref->deref_type == nir_deref_type_var)
1164 return nir_build_deref_var(b, var);
1165
1166 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1167 nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1168
1169 /* Build array and struct deref instruction.
1170 * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1171 */
1172 switch (deref->deref_type) {
1173 case nir_deref_type_array: {
1174 nir_load_const_instr *index =
1175 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1176 return nir_build_deref_array_imm(b, parent, index->value->i64);
1177 }
1178 case nir_deref_type_ptr_as_array: {
1179 nir_load_const_instr *index =
1180 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1181 nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1182 parent->dest.ssa.bit_size);
1183 return nir_build_deref_ptr_as_array(b, parent, ssa);
1184 }
1185 case nir_deref_type_struct:
1186 return nir_build_deref_struct(b, parent, deref->strct.index);
1187 default:
1188 unreachable("invalid type");
1189 return NULL;
1190 }
1191 }
1192
1193 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_ssa_scalar * scalar)1194 replace_varying_input_by_uniform_load(nir_shader *shader,
1195 nir_intrinsic_instr *store_intr,
1196 nir_ssa_scalar *scalar)
1197 {
1198 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1199
1200 nir_builder b;
1201 nir_builder_init(&b, impl);
1202
1203 nir_variable *out_var =
1204 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1205
1206 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1207 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1208 nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1209 uni_var = get_uniform_var_in_consumer(shader, uni_var);
1210
1211 bool progress = false;
1212 nir_foreach_block(block, impl) {
1213 nir_foreach_instr(instr, block) {
1214 if (instr->type != nir_instr_type_intrinsic)
1215 continue;
1216
1217 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1218 if (intr->intrinsic != nir_intrinsic_load_deref)
1219 continue;
1220
1221 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1222 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1223 continue;
1224
1225 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1226
1227 if (!does_varying_match(out_var, in_var))
1228 continue;
1229
1230 b.cursor = nir_before_instr(instr);
1231
1232 /* Clone instructions start from deref load to variable deref. */
1233 nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1234 nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1235
1236 /* Add a vector to scalar move if uniform is a vector. */
1237 if (uni_def->num_components > 1) {
1238 nir_alu_src src = {0};
1239 src.src = nir_src_for_ssa(uni_def);
1240 src.swizzle[0] = scalar->comp;
1241 uni_def = nir_mov_alu(&b, src, 1);
1242 }
1243
1244 /* Replace load input with load uniform. */
1245 nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1246
1247 progress = true;
1248 }
1249 }
1250
1251 return progress;
1252 }
1253
1254 /* The GLSL ES 3.20 spec says:
1255 *
1256 * "The precision of a vertex output does not need to match the precision of
1257 * the corresponding fragment input. The minimum precision at which vertex
1258 * outputs are interpolated is the minimum of the vertex output precision and
1259 * the fragment input precision, with the exception that for highp,
1260 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1261 * Output Matching by Name in Linked Programs")
1262 *
1263 * To implement this, when linking shaders we will take the minimum precision
1264 * qualifier (allowing drivers to interpolate at lower precision). For
1265 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1266 * requires we use the *last* specified precision if there is a conflict.
1267 *
1268 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1269 * NONE, we'll return the other precision, since there is no conflict.
1270 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1271 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1272 * "backwards". For non-fragment stages, we'll pick the latter precision to
1273 * comply with the spec. (Note that the order matters.)
1274 *
1275 * For streamout, "Variables declared with lowp or mediump precision are
1276 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1277 * of OpenGL ES 3.2 specification). So drivers should promote them
1278 * the transform feedback memory store, but not the output store.
1279 */
1280
1281 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1282 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1283 {
1284 if (producer == GLSL_PRECISION_NONE)
1285 return consumer;
1286 else if (consumer == GLSL_PRECISION_NONE)
1287 return producer;
1288 else
1289 return fs ? MAX2(producer, consumer) : consumer;
1290 }
1291
1292 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1293 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1294 {
1295 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1296
1297 nir_foreach_shader_out_variable(producer_var, producer) {
1298 /* Skip if the slot is not assigned */
1299 if (producer_var->data.location < 0)
1300 continue;
1301
1302 nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1303 nir_var_shader_in, producer_var->data.location);
1304
1305 /* Skip if the variable will be eliminated */
1306 if (!consumer_var)
1307 continue;
1308
1309 /* Now we have a pair of variables. Let's pick the smaller precision. */
1310 unsigned precision_1 = producer_var->data.precision;
1311 unsigned precision_2 = consumer_var->data.precision;
1312 unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1313
1314 /* Propagate the new precision */
1315 producer_var->data.precision = consumer_var->data.precision = minimum;
1316 }
1317 }
1318
1319 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1320 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1321 {
1322 /* TODO: Add support for more shader stage combinations */
1323 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1324 (producer->info.stage != MESA_SHADER_VERTEX &&
1325 producer->info.stage != MESA_SHADER_TESS_EVAL))
1326 return false;
1327
1328 bool progress = false;
1329
1330 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1331
1332 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1333
1334 /* If we find a store in the last block of the producer we can be sure this
1335 * is the only possible value for this output.
1336 */
1337 nir_block *last_block = nir_impl_last_block(impl);
1338 nir_foreach_instr_reverse(instr, last_block) {
1339 if (instr->type != nir_instr_type_intrinsic)
1340 continue;
1341
1342 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1343
1344 if (intr->intrinsic != nir_intrinsic_store_deref)
1345 continue;
1346
1347 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1348 if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1349 continue;
1350
1351 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1352 if (!can_replace_varying(out_var))
1353 continue;
1354
1355 nir_ssa_scalar uni_scalar;
1356 nir_ssa_def *ssa = intr->src[1].ssa;
1357 if (ssa->parent_instr->type == nir_instr_type_load_const) {
1358 progress |= replace_varying_input_by_constant_load(consumer, intr);
1359 } else if (is_direct_uniform_load(ssa, &uni_scalar)) {
1360 progress |= replace_varying_input_by_uniform_load(consumer, intr,
1361 &uni_scalar);
1362 } else {
1363 struct hash_entry *entry =
1364 _mesa_hash_table_search(varying_values, ssa);
1365 if (entry) {
1366 progress |= replace_duplicate_input(consumer,
1367 (nir_variable *) entry->data,
1368 intr);
1369 } else {
1370 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1371 if (in_var) {
1372 _mesa_hash_table_insert(varying_values, ssa, in_var);
1373 }
1374 }
1375 }
1376 }
1377
1378 _mesa_hash_table_destroy(varying_values, NULL);
1379
1380 return progress;
1381 }
1382
1383 /* TODO any better helper somewhere to sort a list? */
1384
1385 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1386 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1387 {
1388 nir_foreach_variable_in_list(var, var_list) {
1389 if (var->data.location > new_var->data.location) {
1390 exec_node_insert_node_before(&var->node, &new_var->node);
1391 return;
1392 }
1393 }
1394 exec_list_push_tail(var_list, &new_var->node);
1395 }
1396
1397 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1398 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1399 struct exec_list *sorted_list)
1400 {
1401 exec_list_make_empty(sorted_list);
1402 nir_foreach_variable_with_modes_safe(var, shader, mode) {
1403 exec_node_remove(&var->node);
1404 insert_sorted(sorted_list, var);
1405 }
1406 }
1407
1408 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1409 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1410 unsigned *size, gl_shader_stage stage)
1411 {
1412 unsigned location = 0;
1413 unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1414 uint64_t processed_locs[2] = {0};
1415
1416 struct exec_list io_vars;
1417 sort_varyings(shader, mode, &io_vars);
1418
1419 int UNUSED last_loc = 0;
1420 bool last_partial = false;
1421 nir_foreach_variable_in_list(var, &io_vars) {
1422 const struct glsl_type *type = var->type;
1423 if (nir_is_arrayed_io(var, stage)) {
1424 assert(glsl_type_is_array(type));
1425 type = glsl_get_array_element(type);
1426 }
1427
1428 int base;
1429 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1430 base = VERT_ATTRIB_GENERIC0;
1431 else if (var->data.mode == nir_var_shader_out &&
1432 stage == MESA_SHADER_FRAGMENT)
1433 base = FRAG_RESULT_DATA0;
1434 else
1435 base = VARYING_SLOT_VAR0;
1436
1437 unsigned var_size, driver_size;
1438 if (var->data.compact) {
1439 /* If we are inside a partial compact,
1440 * don't allow another compact to be in this slot
1441 * if it starts at component 0.
1442 */
1443 if (last_partial && var->data.location_frac == 0) {
1444 location++;
1445 }
1446
1447 /* compact variables must be arrays of scalars */
1448 assert(!var->data.per_view);
1449 assert(glsl_type_is_array(type));
1450 assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1451 unsigned start = 4 * location + var->data.location_frac;
1452 unsigned end = start + glsl_get_length(type);
1453 var_size = driver_size = end / 4 - location;
1454 last_partial = end % 4 != 0;
1455 } else {
1456 /* Compact variables bypass the normal varying compacting pass,
1457 * which means they cannot be in the same vec4 slot as a normal
1458 * variable. If part of the current slot is taken up by a compact
1459 * variable, we need to go to the next one.
1460 */
1461 if (last_partial) {
1462 location++;
1463 last_partial = false;
1464 }
1465
1466 /* per-view variables have an extra array dimension, which is ignored
1467 * when counting user-facing slots (var->data.location), but *not*
1468 * with driver slots (var->data.driver_location). That is, each user
1469 * slot maps to multiple driver slots.
1470 */
1471 driver_size = glsl_count_attribute_slots(type, false);
1472 if (var->data.per_view) {
1473 assert(glsl_type_is_array(type));
1474 var_size =
1475 glsl_count_attribute_slots(glsl_get_array_element(type), false);
1476 } else {
1477 var_size = driver_size;
1478 }
1479 }
1480
1481 /* Builtins don't allow component packing so we only need to worry about
1482 * user defined varyings sharing the same location.
1483 */
1484 bool processed = false;
1485 if (var->data.location >= base) {
1486 unsigned glsl_location = var->data.location - base;
1487
1488 for (unsigned i = 0; i < var_size; i++) {
1489 if (processed_locs[var->data.index] &
1490 ((uint64_t)1 << (glsl_location + i)))
1491 processed = true;
1492 else
1493 processed_locs[var->data.index] |=
1494 ((uint64_t)1 << (glsl_location + i));
1495 }
1496 }
1497
1498 /* Because component packing allows varyings to share the same location
1499 * we may have already have processed this location.
1500 */
1501 if (processed) {
1502 /* TODO handle overlapping per-view variables */
1503 assert(!var->data.per_view);
1504 unsigned driver_location = assigned_locations[var->data.location];
1505 var->data.driver_location = driver_location;
1506
1507 /* An array may be packed such that is crosses multiple other arrays
1508 * or variables, we need to make sure we have allocated the elements
1509 * consecutively if the previously proccessed var was shorter than
1510 * the current array we are processing.
1511 *
1512 * NOTE: The code below assumes the var list is ordered in ascending
1513 * location order.
1514 */
1515 assert(last_loc <= var->data.location);
1516 last_loc = var->data.location;
1517 unsigned last_slot_location = driver_location + var_size;
1518 if (last_slot_location > location) {
1519 unsigned num_unallocated_slots = last_slot_location - location;
1520 unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1521 for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1522 assigned_locations[var->data.location + i] = location;
1523 location++;
1524 }
1525 }
1526 continue;
1527 }
1528
1529 for (unsigned i = 0; i < var_size; i++) {
1530 assigned_locations[var->data.location + i] = location + i;
1531 }
1532
1533 var->data.driver_location = location;
1534 location += driver_size;
1535 }
1536
1537 if (last_partial)
1538 location++;
1539
1540 exec_list_append(&shader->variables, &io_vars);
1541 *size = location;
1542 }
1543
1544 static uint64_t
get_linked_variable_location(unsigned location,bool patch)1545 get_linked_variable_location(unsigned location, bool patch)
1546 {
1547 if (!patch)
1548 return location;
1549
1550 /* Reserve locations 0...3 for special patch variables
1551 * like tess factors and bounding boxes, and the generic patch
1552 * variables will come after them.
1553 */
1554 if (location >= VARYING_SLOT_PATCH0)
1555 return location - VARYING_SLOT_PATCH0 + 4;
1556 else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1557 location <= VARYING_SLOT_BOUNDING_BOX1)
1558 return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1559 else
1560 unreachable("Unsupported variable in get_linked_variable_location.");
1561 }
1562
1563 static uint64_t
get_linked_variable_io_mask(nir_variable * variable,gl_shader_stage stage)1564 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1565 {
1566 const struct glsl_type *type = variable->type;
1567
1568 if (nir_is_arrayed_io(variable, stage)) {
1569 assert(glsl_type_is_array(type));
1570 type = glsl_get_array_element(type);
1571 }
1572
1573 unsigned slots = glsl_count_attribute_slots(type, false);
1574 if (variable->data.compact) {
1575 unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1576 slots = DIV_ROUND_UP(component_count, 4);
1577 }
1578
1579 uint64_t mask = u_bit_consecutive64(0, slots);
1580 return mask;
1581 }
1582
1583 nir_linked_io_var_info
nir_assign_linked_io_var_locations(nir_shader * producer,nir_shader * consumer)1584 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1585 {
1586 assert(producer);
1587 assert(consumer);
1588
1589 uint64_t producer_output_mask = 0;
1590 uint64_t producer_patch_output_mask = 0;
1591
1592 nir_foreach_shader_out_variable(variable, producer) {
1593 uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1594 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1595
1596 if (variable->data.patch)
1597 producer_patch_output_mask |= mask << loc;
1598 else
1599 producer_output_mask |= mask << loc;
1600 }
1601
1602 uint64_t consumer_input_mask = 0;
1603 uint64_t consumer_patch_input_mask = 0;
1604
1605 nir_foreach_shader_in_variable(variable, consumer) {
1606 uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1607 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1608
1609 if (variable->data.patch)
1610 consumer_patch_input_mask |= mask << loc;
1611 else
1612 consumer_input_mask |= mask << loc;
1613 }
1614
1615 uint64_t io_mask = producer_output_mask | consumer_input_mask;
1616 uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1617
1618 nir_foreach_shader_out_variable(variable, producer) {
1619 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1620
1621 if (variable->data.patch)
1622 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1623 else
1624 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1625 }
1626
1627 nir_foreach_shader_in_variable(variable, consumer) {
1628 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1629
1630 if (variable->data.patch)
1631 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1632 else
1633 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1634 }
1635
1636 nir_linked_io_var_info result = {
1637 .num_linked_io_vars = util_bitcount64(io_mask),
1638 .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1639 };
1640
1641 return result;
1642 }
1643