1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/set.h"
27 #include "util/hash_table.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ?
45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47 assert(var->data.mode == nir_var_shader_in ||
48 var->data.mode == nir_var_shader_out);
49 assert(var->data.location >= 0);
50 assert(location < 64);
51
52 const struct glsl_type *type = var->type;
53 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
54 assert(glsl_type_is_array(type));
55 type = glsl_get_array_element(type);
56 }
57
58 unsigned slots = glsl_count_attribute_slots(type, false);
59 return BITFIELD64_MASK(slots) << location;
60 }
61
62 static bool
is_non_generic_patch_var(nir_variable * var)63 is_non_generic_patch_var(nir_variable *var)
64 {
65 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
66 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
67 var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
68 var->data.location == VARYING_SLOT_BOUNDING_BOX1;
69 }
70
71 static uint8_t
get_num_components(nir_variable * var)72 get_num_components(nir_variable *var)
73 {
74 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
75 return 4;
76
77 return glsl_get_vector_elements(glsl_without_array(var->type));
78 }
79
80 static void
tcs_add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)81 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
82 {
83 nir_foreach_function(function, shader) {
84 if (!function->impl)
85 continue;
86
87 nir_foreach_block(block, function->impl) {
88 nir_foreach_instr(instr, block) {
89 if (instr->type != nir_instr_type_intrinsic)
90 continue;
91
92 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
93 if (intrin->intrinsic != nir_intrinsic_load_deref)
94 continue;
95
96 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
97 if (!nir_deref_mode_is(deref, nir_var_shader_out))
98 continue;
99
100 nir_variable *var = nir_deref_instr_get_variable(deref);
101 for (unsigned i = 0; i < get_num_components(var); i++) {
102 if (var->data.patch) {
103 if (is_non_generic_patch_var(var))
104 continue;
105
106 patches_read[var->data.location_frac + i] |=
107 get_variable_io_mask(var, shader->info.stage);
108 } else {
109 read[var->data.location_frac + i] |=
110 get_variable_io_mask(var, shader->info.stage);
111 }
112 }
113 }
114 }
115 }
116 }
117
118 /**
119 * Helper for removing unused shader I/O variables, by demoting them to global
120 * variables (which may then by dead code eliminated).
121 *
122 * Example usage is:
123 *
124 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
125 * read, patches_read) ||
126 * progress;
127 *
128 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
129 * representing each .location_frac used. Note that for vector variables,
130 * only the first channel (.location_frac) is examined for deciding if the
131 * variable is used!
132 */
133 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)134 nir_remove_unused_io_vars(nir_shader *shader,
135 nir_variable_mode mode,
136 uint64_t *used_by_other_stage,
137 uint64_t *used_by_other_stage_patches)
138 {
139 bool progress = false;
140 uint64_t *used;
141
142 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
143
144 nir_foreach_variable_with_modes_safe(var, shader, mode) {
145 if (var->data.patch)
146 used = used_by_other_stage_patches;
147 else
148 used = used_by_other_stage;
149
150 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
151 if (shader->info.stage != MESA_SHADER_MESH || var->data.location != VARYING_SLOT_PRIMITIVE_ID)
152 continue;
153
154 if (var->data.always_active_io)
155 continue;
156
157 if (var->data.explicit_xfb_buffer)
158 continue;
159
160 uint64_t other_stage = used[var->data.location_frac];
161
162 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
163 /* This one is invalid, make it a global variable instead */
164 if (shader->info.stage == MESA_SHADER_MESH &&
165 (shader->info.outputs_read & BITFIELD64_BIT(var->data.location)))
166 var->data.mode = nir_var_mem_shared;
167 else
168 var->data.mode = nir_var_shader_temp;
169 var->data.location = 0;
170
171 progress = true;
172 }
173 }
174
175 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
176 if (progress) {
177 nir_metadata_preserve(impl, nir_metadata_dominance |
178 nir_metadata_block_index);
179 nir_fixup_deref_modes(shader);
180 } else {
181 nir_metadata_preserve(impl, nir_metadata_all);
182 }
183
184 return progress;
185 }
186
187 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)188 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
189 {
190 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
191 assert(consumer->info.stage != MESA_SHADER_VERTEX);
192
193 uint64_t read[4] = { 0 }, written[4] = { 0 };
194 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
195
196 nir_foreach_shader_out_variable(var, producer) {
197 for (unsigned i = 0; i < get_num_components(var); i++) {
198 if (var->data.patch) {
199 if (is_non_generic_patch_var(var))
200 continue;
201
202 patches_written[var->data.location_frac + i] |=
203 get_variable_io_mask(var, producer->info.stage);
204 } else {
205 written[var->data.location_frac + i] |=
206 get_variable_io_mask(var, producer->info.stage);
207 }
208 }
209 }
210
211 nir_foreach_shader_in_variable(var, consumer) {
212 for (unsigned i = 0; i < get_num_components(var); i++) {
213 if (var->data.patch) {
214 if (is_non_generic_patch_var(var))
215 continue;
216
217 patches_read[var->data.location_frac + i] |=
218 get_variable_io_mask(var, consumer->info.stage);
219 } else {
220 read[var->data.location_frac + i] |=
221 get_variable_io_mask(var, consumer->info.stage);
222 }
223 }
224 }
225
226 /* Each TCS invocation can read data written by other TCS invocations,
227 * so even if the outputs are not used by the TES we must also make
228 * sure they are not read by the TCS before demoting them to globals.
229 */
230 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
231 tcs_add_output_reads(producer, read, patches_read);
232
233 bool progress = false;
234 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
235 patches_read);
236
237 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
238 patches_written) || progress;
239
240 return progress;
241 }
242
243 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)244 get_interp_type(nir_variable *var, const struct glsl_type *type,
245 bool default_to_smooth_interp)
246 {
247 if (var->data.per_primitive)
248 return INTERP_MODE_NONE;
249 if (glsl_type_is_integer(type))
250 return INTERP_MODE_FLAT;
251 else if (var->data.interpolation != INTERP_MODE_NONE)
252 return var->data.interpolation;
253 else if (default_to_smooth_interp)
254 return INTERP_MODE_SMOOTH;
255 else
256 return INTERP_MODE_NONE;
257 }
258
259 #define INTERPOLATE_LOC_SAMPLE 0
260 #define INTERPOLATE_LOC_CENTROID 1
261 #define INTERPOLATE_LOC_CENTER 2
262
263 static uint8_t
get_interp_loc(nir_variable * var)264 get_interp_loc(nir_variable *var)
265 {
266 if (var->data.sample)
267 return INTERPOLATE_LOC_SAMPLE;
268 else if (var->data.centroid)
269 return INTERPOLATE_LOC_CENTROID;
270 else
271 return INTERPOLATE_LOC_CENTER;
272 }
273
274 static bool
is_packing_supported_for_type(const struct glsl_type * type)275 is_packing_supported_for_type(const struct glsl_type *type)
276 {
277 /* We ignore complex types such as arrays, matrices, structs and bitsizes
278 * other then 32bit. All other vector types should have been split into
279 * scalar variables by the lower_io_to_scalar pass. The only exception
280 * should be OpenGL xfb varyings.
281 * TODO: add support for more complex types?
282 */
283 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
284 }
285
286 struct assigned_comps
287 {
288 uint8_t comps;
289 uint8_t interp_type;
290 uint8_t interp_loc;
291 bool is_32bit;
292 bool is_mediump;
293 bool is_per_primitive;
294 };
295
296 /* Packing arrays and dual slot varyings is difficult so to avoid complex
297 * algorithms this function just assigns them their existing location for now.
298 * TODO: allow better packing of complex types.
299 */
300 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)301 get_unmoveable_components_masks(nir_shader *shader,
302 nir_variable_mode mode,
303 struct assigned_comps *comps,
304 gl_shader_stage stage,
305 bool default_to_smooth_interp)
306 {
307 nir_foreach_variable_with_modes_safe(var, shader, mode) {
308 assert(var->data.location >= 0);
309
310 /* Only remap things that aren't built-ins. */
311 if (var->data.location >= VARYING_SLOT_VAR0 &&
312 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
313
314 const struct glsl_type *type = var->type;
315 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
316 assert(glsl_type_is_array(type));
317 type = glsl_get_array_element(type);
318 }
319
320 /* If we can pack this varying then don't mark the components as
321 * used.
322 */
323 if (is_packing_supported_for_type(type) &&
324 !var->data.always_active_io)
325 continue;
326
327 unsigned location = var->data.location - VARYING_SLOT_VAR0;
328
329 unsigned elements =
330 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
331 glsl_get_vector_elements(glsl_without_array(type)) : 4;
332
333 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
334 unsigned slots = glsl_count_attribute_slots(type, false);
335 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
336 unsigned comps_slot2 = 0;
337 for (unsigned i = 0; i < slots; i++) {
338 if (dual_slot) {
339 if (i & 1) {
340 comps[location + i].comps |= ((1 << comps_slot2) - 1);
341 } else {
342 unsigned num_comps = 4 - var->data.location_frac;
343 comps_slot2 = (elements * dmul) - num_comps;
344
345 /* Assume ARB_enhanced_layouts packing rules for doubles */
346 assert(var->data.location_frac == 0 ||
347 var->data.location_frac == 2);
348 assert(comps_slot2 <= 4);
349
350 comps[location + i].comps |=
351 ((1 << num_comps) - 1) << var->data.location_frac;
352 }
353 } else {
354 comps[location + i].comps |=
355 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
356 }
357
358 comps[location + i].interp_type =
359 get_interp_type(var, type, default_to_smooth_interp);
360 comps[location + i].interp_loc = get_interp_loc(var);
361 comps[location + i].is_32bit =
362 glsl_type_is_32bit(glsl_without_array(type));
363 comps[location + i].is_mediump =
364 var->data.precision == GLSL_PRECISION_MEDIUM ||
365 var->data.precision == GLSL_PRECISION_LOW;
366 comps[location + i].is_per_primitive = var->data.per_primitive;
367 }
368 }
369 }
370 }
371
372 struct varying_loc
373 {
374 uint8_t component;
375 uint32_t location;
376 };
377
378 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)379 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
380 uint64_t slots_used_mask, unsigned num_slots)
381 {
382 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
383
384 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
385 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
386 }
387
388 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)389 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
390 {
391 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
392
393 slots_used[var->data.patch ? 1 : 0] |=
394 BITFIELD64_BIT(var->data.location - loc_offset + offset);
395 }
396
397 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)398 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
399 struct varying_loc (*remap)[4],
400 uint64_t *slots_used, uint64_t *out_slots_read,
401 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
402 {
403 const gl_shader_stage stage = shader->info.stage;
404 uint64_t out_slots_read_tmp[2] = {0};
405 uint64_t slots_used_tmp[2] = {0};
406
407 /* We don't touch builtins so just copy the bitmask */
408 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
409
410 nir_foreach_variable_with_modes(var, shader, mode) {
411 assert(var->data.location >= 0);
412
413 /* Only remap things that aren't built-ins */
414 if (var->data.location >= VARYING_SLOT_VAR0 &&
415 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
416
417 const struct glsl_type *type = var->type;
418 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
419 assert(glsl_type_is_array(type));
420 type = glsl_get_array_element(type);
421 }
422
423 unsigned num_slots = glsl_count_attribute_slots(type, false);
424 bool used_across_stages = false;
425 bool outputs_read = false;
426
427 unsigned location = var->data.location - VARYING_SLOT_VAR0;
428 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
429
430 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
431 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
432 uint64_t outs_used =
433 var->data.patch ? *p_out_slots_read : *out_slots_read;
434 uint64_t slots =
435 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
436
437 if (slots & used)
438 used_across_stages = true;
439
440 if (slots & outs_used)
441 outputs_read = true;
442
443 if (new_loc->location) {
444 var->data.location = new_loc->location;
445 var->data.location_frac = new_loc->component;
446 }
447
448 if (var->data.always_active_io) {
449 /* We can't apply link time optimisations (specifically array
450 * splitting) to these so we need to copy the existing mask
451 * otherwise we will mess up the mask for things like partially
452 * marked arrays.
453 */
454 if (used_across_stages)
455 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
456
457 if (outputs_read) {
458 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
459 num_slots);
460 }
461 } else {
462 for (unsigned i = 0; i < num_slots; i++) {
463 if (used_across_stages)
464 mark_used_slot(var, slots_used_tmp, i);
465
466 if (outputs_read)
467 mark_used_slot(var, out_slots_read_tmp, i);
468 }
469 }
470 }
471 }
472
473 *slots_used = slots_used_tmp[0];
474 *out_slots_read = out_slots_read_tmp[0];
475 *p_slots_used = slots_used_tmp[1];
476 *p_out_slots_read = out_slots_read_tmp[1];
477 }
478
479 struct varying_component {
480 nir_variable *var;
481 uint8_t interp_type;
482 uint8_t interp_loc;
483 bool is_32bit;
484 bool is_patch;
485 bool is_per_primitive;
486 bool is_mediump;
487 bool is_intra_stage_only;
488 bool initialised;
489 };
490
491 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)492 cmp_varying_component(const void *comp1_v, const void *comp2_v)
493 {
494 struct varying_component *comp1 = (struct varying_component *) comp1_v;
495 struct varying_component *comp2 = (struct varying_component *) comp2_v;
496
497 /* We want patches to be order at the end of the array */
498 if (comp1->is_patch != comp2->is_patch)
499 return comp1->is_patch ? 1 : -1;
500
501 /* Sort per-primitive outputs after per-vertex ones to allow
502 * better compaction when they are mixed in the shader's source.
503 */
504 if (comp1->is_per_primitive != comp2->is_per_primitive)
505 return comp1->is_per_primitive ? 1 : -1;
506
507 /* We want to try to group together TCS outputs that are only read by other
508 * TCS invocations and not consumed by the follow stage.
509 */
510 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
511 return comp1->is_intra_stage_only ? 1 : -1;
512
513 /* Group mediump varyings together. */
514 if (comp1->is_mediump != comp2->is_mediump)
515 return comp1->is_mediump ? 1 : -1;
516
517 /* We can only pack varyings with matching interpolation types so group
518 * them together.
519 */
520 if (comp1->interp_type != comp2->interp_type)
521 return comp1->interp_type - comp2->interp_type;
522
523 /* Interpolation loc must match also. */
524 if (comp1->interp_loc != comp2->interp_loc)
525 return comp1->interp_loc - comp2->interp_loc;
526
527 /* If everything else matches just use the original location to sort */
528 const struct nir_variable_data *const data1 = &comp1->var->data;
529 const struct nir_variable_data *const data2 = &comp2->var->data;
530 if (data1->location != data2->location)
531 return data1->location - data2->location;
532 return (int)data1->location_frac - (int)data2->location_frac;
533 }
534
535 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)536 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
537 struct varying_component **varying_comp_info,
538 unsigned *varying_comp_info_size,
539 bool default_to_smooth_interp)
540 {
541 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
542 unsigned num_of_comps_to_pack = 0;
543
544 /* Count the number of varying that can be packed and create a mapping
545 * of those varyings to the array we will pass to qsort.
546 */
547 nir_foreach_shader_out_variable(var, producer) {
548
549 /* Only remap things that aren't builtins. */
550 if (var->data.location >= VARYING_SLOT_VAR0 &&
551 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
552
553 /* We can't repack xfb varyings. */
554 if (var->data.always_active_io)
555 continue;
556
557 const struct glsl_type *type = var->type;
558 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
559 assert(glsl_type_is_array(type));
560 type = glsl_get_array_element(type);
561 }
562
563 if (!is_packing_supported_for_type(type))
564 continue;
565
566 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
567 store_varying_info_idx[loc][var->data.location_frac] =
568 ++num_of_comps_to_pack;
569 }
570 }
571
572 *varying_comp_info_size = num_of_comps_to_pack;
573 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
574 num_of_comps_to_pack);
575
576 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
577
578 /* Walk over the shader and populate the varying component info array */
579 nir_foreach_block(block, impl) {
580 nir_foreach_instr(instr, block) {
581 if (instr->type != nir_instr_type_intrinsic)
582 continue;
583
584 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
585 if (intr->intrinsic != nir_intrinsic_load_deref &&
586 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
587 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
588 intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
589 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
590 continue;
591
592 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
593 if (!nir_deref_mode_is(deref, nir_var_shader_in))
594 continue;
595
596 /* We only remap things that aren't builtins. */
597 nir_variable *in_var = nir_deref_instr_get_variable(deref);
598 if (in_var->data.location < VARYING_SLOT_VAR0)
599 continue;
600
601 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
602 if (location >= MAX_VARYINGS_INCL_PATCH)
603 continue;
604
605 unsigned var_info_idx =
606 store_varying_info_idx[location][in_var->data.location_frac];
607 if (!var_info_idx)
608 continue;
609
610 struct varying_component *vc_info =
611 &(*varying_comp_info)[var_info_idx-1];
612
613 if (!vc_info->initialised) {
614 const struct glsl_type *type = in_var->type;
615 if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
616 in_var->data.per_view) {
617 assert(glsl_type_is_array(type));
618 type = glsl_get_array_element(type);
619 }
620
621 vc_info->var = in_var;
622 vc_info->interp_type =
623 get_interp_type(in_var, type, default_to_smooth_interp);
624 vc_info->interp_loc = get_interp_loc(in_var);
625 vc_info->is_32bit = glsl_type_is_32bit(type);
626 vc_info->is_patch = in_var->data.patch;
627 vc_info->is_per_primitive = in_var->data.per_primitive;
628 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
629 (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
630 in_var->data.precision == GLSL_PRECISION_LOW);
631 vc_info->is_intra_stage_only = false;
632 vc_info->initialised = true;
633 }
634 }
635 }
636
637 /* Walk over the shader and populate the varying component info array
638 * for varyings which are read by other TCS instances but are not consumed
639 * by the TES.
640 */
641 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
642 impl = nir_shader_get_entrypoint(producer);
643
644 nir_foreach_block(block, impl) {
645 nir_foreach_instr(instr, block) {
646 if (instr->type != nir_instr_type_intrinsic)
647 continue;
648
649 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
650 if (intr->intrinsic != nir_intrinsic_load_deref)
651 continue;
652
653 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
654 if (!nir_deref_mode_is(deref, nir_var_shader_out))
655 continue;
656
657 /* We only remap things that aren't builtins. */
658 nir_variable *out_var = nir_deref_instr_get_variable(deref);
659 if (out_var->data.location < VARYING_SLOT_VAR0)
660 continue;
661
662 unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
663 if (location >= MAX_VARYINGS_INCL_PATCH)
664 continue;
665
666 unsigned var_info_idx =
667 store_varying_info_idx[location][out_var->data.location_frac];
668 if (!var_info_idx) {
669 /* Something went wrong, the shader interfaces didn't match, so
670 * abandon packing. This can happen for example when the
671 * inputs are scalars but the outputs are struct members.
672 */
673 *varying_comp_info_size = 0;
674 break;
675 }
676
677 struct varying_component *vc_info =
678 &(*varying_comp_info)[var_info_idx-1];
679
680 if (!vc_info->initialised) {
681 const struct glsl_type *type = out_var->type;
682 if (nir_is_arrayed_io(out_var, producer->info.stage)) {
683 assert(glsl_type_is_array(type));
684 type = glsl_get_array_element(type);
685 }
686
687 vc_info->var = out_var;
688 vc_info->interp_type =
689 get_interp_type(out_var, type, default_to_smooth_interp);
690 vc_info->interp_loc = get_interp_loc(out_var);
691 vc_info->is_32bit = glsl_type_is_32bit(type);
692 vc_info->is_patch = out_var->data.patch;
693 vc_info->is_per_primitive = out_var->data.per_primitive;
694 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
695 (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
696 out_var->data.precision == GLSL_PRECISION_LOW);
697 vc_info->is_intra_stage_only = true;
698 vc_info->initialised = true;
699 }
700 }
701 }
702 }
703
704 for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
705 struct varying_component *vc_info = &(*varying_comp_info)[i];
706 if (!vc_info->initialised) {
707 /* Something went wrong, the shader interfaces didn't match, so
708 * abandon packing. This can happen for example when the outputs are
709 * scalars but the inputs are struct members.
710 */
711 *varying_comp_info_size = 0;
712 break;
713 }
714 }
715 }
716
717 static bool
allow_pack_interp_type(nir_pack_varying_options options,int type)718 allow_pack_interp_type(nir_pack_varying_options options, int type)
719 {
720 int sel;
721
722 switch (type) {
723 case INTERP_MODE_NONE:
724 sel = nir_pack_varying_interp_mode_none;
725 break;
726 case INTERP_MODE_SMOOTH:
727 sel = nir_pack_varying_interp_mode_smooth;
728 break;
729 case INTERP_MODE_FLAT:
730 sel = nir_pack_varying_interp_mode_flat;
731 break;
732 case INTERP_MODE_NOPERSPECTIVE:
733 sel = nir_pack_varying_interp_mode_noperspective;
734 break;
735 default:
736 return false;
737 }
738
739 return options & sel;
740 }
741
742 static bool
allow_pack_interp_loc(nir_pack_varying_options options,int loc)743 allow_pack_interp_loc(nir_pack_varying_options options, int loc)
744 {
745 int sel;
746
747 switch (loc) {
748 case INTERPOLATE_LOC_SAMPLE:
749 sel = nir_pack_varying_interp_loc_sample;
750 break;
751 case INTERPOLATE_LOC_CENTROID:
752 sel = nir_pack_varying_interp_loc_centroid;
753 break;
754 case INTERPOLATE_LOC_CENTER:
755 sel = nir_pack_varying_interp_loc_center;
756 break;
757 default:
758 return false;
759 }
760
761 return options & sel;
762 }
763
764 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_pack_varying_options options)765 assign_remap_locations(struct varying_loc (*remap)[4],
766 struct assigned_comps *assigned_comps,
767 struct varying_component *info,
768 unsigned *cursor, unsigned *comp,
769 unsigned max_location,
770 nir_pack_varying_options options)
771 {
772 unsigned tmp_cursor = *cursor;
773 unsigned tmp_comp = *comp;
774
775 for (; tmp_cursor < max_location; tmp_cursor++) {
776
777 if (assigned_comps[tmp_cursor].comps) {
778 /* Don't pack per-primitive and per-vertex varyings together. */
779 if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
780 tmp_comp = 0;
781 continue;
782 }
783
784 /* We can only pack varyings with matching precision. */
785 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
786 tmp_comp = 0;
787 continue;
788 }
789
790 /* We can only pack varyings with matching interpolation type
791 * if driver does not support it.
792 */
793 if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
794 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
795 !allow_pack_interp_type(options, info->interp_type))) {
796 tmp_comp = 0;
797 continue;
798 }
799
800 /* We can only pack varyings with matching interpolation location
801 * if driver does not support it.
802 */
803 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
804 (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
805 !allow_pack_interp_loc(options, info->interp_loc))) {
806 tmp_comp = 0;
807 continue;
808 }
809
810 /* We can only pack varyings with matching types, and the current
811 * algorithm only supports packing 32-bit.
812 */
813 if (!assigned_comps[tmp_cursor].is_32bit) {
814 tmp_comp = 0;
815 continue;
816 }
817
818 while (tmp_comp < 4 &&
819 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
820 tmp_comp++;
821 }
822 }
823
824 if (tmp_comp == 4) {
825 tmp_comp = 0;
826 continue;
827 }
828
829 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
830
831 /* Once we have assigned a location mark it as used */
832 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
833 assigned_comps[tmp_cursor].interp_type = info->interp_type;
834 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
835 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
836 assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
837 assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
838
839 /* Assign remap location */
840 remap[location][info->var->data.location_frac].component = tmp_comp++;
841 remap[location][info->var->data.location_frac].location =
842 tmp_cursor + VARYING_SLOT_VAR0;
843
844 break;
845 }
846
847 *cursor = tmp_cursor;
848 *comp = tmp_comp;
849 }
850
851 /* If there are empty components in the slot compact the remaining components
852 * as close to component 0 as possible. This will make it easier to fill the
853 * empty components with components from a different slot in a following pass.
854 */
855 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)856 compact_components(nir_shader *producer, nir_shader *consumer,
857 struct assigned_comps *assigned_comps,
858 bool default_to_smooth_interp)
859 {
860 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
861 struct varying_component *varying_comp_info;
862 unsigned varying_comp_info_size;
863
864 /* Gather varying component info */
865 gather_varying_component_info(producer, consumer, &varying_comp_info,
866 &varying_comp_info_size,
867 default_to_smooth_interp);
868
869 /* Sort varying components. */
870 qsort(varying_comp_info, varying_comp_info_size,
871 sizeof(struct varying_component), cmp_varying_component);
872
873 nir_pack_varying_options options = consumer->options->pack_varying_options;
874
875 unsigned cursor = 0;
876 unsigned comp = 0;
877
878 /* Set the remap array based on the sorted components */
879 for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
880 struct varying_component *info = &varying_comp_info[i];
881
882 assert(info->is_patch || cursor < MAX_VARYING);
883 if (info->is_patch) {
884 /* The list should be sorted with all non-patch inputs first followed
885 * by patch inputs. When we hit our first patch input, we need to
886 * reset the cursor to MAX_VARYING so we put them in the right slot.
887 */
888 if (cursor < MAX_VARYING) {
889 cursor = MAX_VARYING;
890 comp = 0;
891 }
892
893 assign_remap_locations(remap, assigned_comps, info,
894 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
895 options);
896 } else {
897 assign_remap_locations(remap, assigned_comps, info,
898 &cursor, &comp, MAX_VARYING,
899 options);
900
901 /* Check if we failed to assign a remap location. This can happen if
902 * for example there are a bunch of unmovable components with
903 * mismatching interpolation types causing us to skip over locations
904 * that would have been useful for packing later components.
905 * The solution is to iterate over the locations again (this should
906 * happen very rarely in practice).
907 */
908 if (cursor == MAX_VARYING) {
909 cursor = 0;
910 comp = 0;
911 assign_remap_locations(remap, assigned_comps, info,
912 &cursor, &comp, MAX_VARYING,
913 options);
914 }
915 }
916 }
917
918 ralloc_free(varying_comp_info);
919
920 uint64_t zero = 0;
921 uint32_t zero32 = 0;
922 remap_slots_and_components(consumer, nir_var_shader_in, remap,
923 &consumer->info.inputs_read, &zero,
924 &consumer->info.patch_inputs_read, &zero32);
925 remap_slots_and_components(producer, nir_var_shader_out, remap,
926 &producer->info.outputs_written,
927 &producer->info.outputs_read,
928 &producer->info.patch_outputs_written,
929 &producer->info.patch_outputs_read);
930 }
931
932 /* We assume that this has been called more-or-less directly after
933 * remove_unused_varyings. At this point, all of the varyings that we
934 * aren't going to be using have been completely removed and the
935 * inputs_read and outputs_written fields in nir_shader_info reflect
936 * this. Therefore, the total set of valid slots is the OR of the two
937 * sets of varyings; this accounts for varyings which one side may need
938 * to read/write even if the other doesn't. This can happen if, for
939 * instance, an array is used indirectly from one side causing it to be
940 * unsplittable but directly from the other.
941 */
942 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)943 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
944 bool default_to_smooth_interp)
945 {
946 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
947 assert(consumer->info.stage != MESA_SHADER_VERTEX);
948
949 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
950
951 get_unmoveable_components_masks(producer, nir_var_shader_out,
952 assigned_comps,
953 producer->info.stage,
954 default_to_smooth_interp);
955 get_unmoveable_components_masks(consumer, nir_var_shader_in,
956 assigned_comps,
957 consumer->info.stage,
958 default_to_smooth_interp);
959
960 compact_components(producer, consumer, assigned_comps,
961 default_to_smooth_interp);
962 }
963
964 /*
965 * Mark XFB varyings as always_active_io in the consumer so the linking opts
966 * don't touch them.
967 */
968 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)969 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
970 {
971 nir_variable *input_vars[MAX_VARYING][4] = { 0 };
972
973 nir_foreach_shader_in_variable(var, consumer) {
974 if (var->data.location >= VARYING_SLOT_VAR0 &&
975 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
976
977 unsigned location = var->data.location - VARYING_SLOT_VAR0;
978 input_vars[location][var->data.location_frac] = var;
979 }
980 }
981
982 nir_foreach_shader_out_variable(var, producer) {
983 if (var->data.location >= VARYING_SLOT_VAR0 &&
984 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
985
986 if (!var->data.always_active_io)
987 continue;
988
989 unsigned location = var->data.location - VARYING_SLOT_VAR0;
990 if (input_vars[location][var->data.location_frac]) {
991 input_vars[location][var->data.location_frac]->data.always_active_io = true;
992 }
993 }
994 }
995 }
996
997 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)998 does_varying_match(nir_variable *out_var, nir_variable *in_var)
999 {
1000 return in_var->data.location == out_var->data.location &&
1001 in_var->data.location_frac == out_var->data.location_frac;
1002 }
1003
1004 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)1005 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
1006 {
1007 nir_foreach_shader_in_variable(var, consumer) {
1008 if (does_varying_match(out_var, var))
1009 return var;
1010 }
1011
1012 return NULL;
1013 }
1014
1015 static bool
can_replace_varying(nir_variable * out_var)1016 can_replace_varying(nir_variable *out_var)
1017 {
1018 /* Skip types that require more complex handling.
1019 * TODO: add support for these types.
1020 */
1021 if (glsl_type_is_array(out_var->type) ||
1022 glsl_type_is_dual_slot(out_var->type) ||
1023 glsl_type_is_matrix(out_var->type) ||
1024 glsl_type_is_struct_or_ifc(out_var->type))
1025 return false;
1026
1027 /* Limit this pass to scalars for now to keep things simple. Most varyings
1028 * should have been lowered to scalars at this point anyway.
1029 */
1030 if (!glsl_type_is_scalar(out_var->type))
1031 return false;
1032
1033 if (out_var->data.location < VARYING_SLOT_VAR0 ||
1034 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1035 return false;
1036
1037 return true;
1038 }
1039
1040 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1041 replace_varying_input_by_constant_load(nir_shader *shader,
1042 nir_intrinsic_instr *store_intr)
1043 {
1044 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1045
1046 nir_builder b;
1047 nir_builder_init(&b, impl);
1048
1049 nir_variable *out_var =
1050 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1051
1052 bool progress = false;
1053 nir_foreach_block(block, impl) {
1054 nir_foreach_instr(instr, block) {
1055 if (instr->type != nir_instr_type_intrinsic)
1056 continue;
1057
1058 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1059 if (intr->intrinsic != nir_intrinsic_load_deref)
1060 continue;
1061
1062 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1063 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1064 continue;
1065
1066 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1067
1068 if (!does_varying_match(out_var, in_var))
1069 continue;
1070
1071 b.cursor = nir_before_instr(instr);
1072
1073 nir_load_const_instr *out_const =
1074 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1075
1076 /* Add new const to replace the input */
1077 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1078 intr->dest.ssa.bit_size,
1079 out_const->value);
1080
1081 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1082
1083 progress = true;
1084 }
1085 }
1086
1087 return progress;
1088 }
1089
1090 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1091 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1092 nir_intrinsic_instr *dup_store_intr)
1093 {
1094 assert(input_var);
1095
1096 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1097
1098 nir_builder b;
1099 nir_builder_init(&b, impl);
1100
1101 nir_variable *dup_out_var =
1102 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1103
1104 bool progress = false;
1105 nir_foreach_block(block, impl) {
1106 nir_foreach_instr(instr, block) {
1107 if (instr->type != nir_instr_type_intrinsic)
1108 continue;
1109
1110 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1111 if (intr->intrinsic != nir_intrinsic_load_deref)
1112 continue;
1113
1114 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1115 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1116 continue;
1117
1118 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1119
1120 if (!does_varying_match(dup_out_var, in_var) ||
1121 in_var->data.interpolation != input_var->data.interpolation ||
1122 get_interp_loc(in_var) != get_interp_loc(input_var))
1123 continue;
1124
1125 b.cursor = nir_before_instr(instr);
1126
1127 nir_ssa_def *load = nir_load_var(&b, input_var);
1128 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1129
1130 progress = true;
1131 }
1132 }
1133
1134 return progress;
1135 }
1136
1137 static bool
is_direct_uniform_load(nir_ssa_def * def,nir_ssa_scalar * s)1138 is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1139 {
1140 /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1141 assert(def->num_components == 1);
1142
1143 /* Uniform load may hide behind some move instruction for converting
1144 * vector to scalar:
1145 *
1146 * vec1 32 ssa_1 = deref_var &color (uniform vec3)
1147 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1148 * vec1 32 ssa_3 = mov ssa_2.x
1149 * vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1150 * intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1151 */
1152 *s = nir_ssa_scalar_resolved(def, 0);
1153
1154 nir_ssa_def *ssa = s->def;
1155 if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1156 return false;
1157
1158 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1159 if (intr->intrinsic != nir_intrinsic_load_deref)
1160 return false;
1161
1162 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1163 /* TODO: support nir_var_mem_ubo. */
1164 if (!nir_deref_mode_is(deref, nir_var_uniform))
1165 return false;
1166
1167 /* Does not support indirect uniform load. */
1168 return !nir_deref_instr_has_indirect(deref);
1169 }
1170
1171 static nir_variable *
get_uniform_var_in_consumer(nir_shader * consumer,nir_variable * var_in_producer)1172 get_uniform_var_in_consumer(nir_shader *consumer,
1173 nir_variable *var_in_producer)
1174 {
1175 /* Find if uniform already exists in consumer. */
1176 nir_variable *new_var = NULL;
1177 nir_foreach_uniform_variable(v, consumer) {
1178 if (!strcmp(var_in_producer->name, v->name)) {
1179 new_var = v;
1180 break;
1181 }
1182 }
1183
1184 /* Create a variable if not exist. */
1185 if (!new_var) {
1186 new_var = nir_variable_clone(var_in_producer, consumer);
1187 nir_shader_add_variable(consumer, new_var);
1188 }
1189
1190 return new_var;
1191 }
1192
1193 static nir_deref_instr *
clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1194 clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1195 {
1196 if (deref->deref_type == nir_deref_type_var)
1197 return nir_build_deref_var(b, var);
1198
1199 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1200 nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1201
1202 /* Build array and struct deref instruction.
1203 * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1204 */
1205 switch (deref->deref_type) {
1206 case nir_deref_type_array: {
1207 nir_load_const_instr *index =
1208 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1209 return nir_build_deref_array_imm(b, parent, index->value->i64);
1210 }
1211 case nir_deref_type_ptr_as_array: {
1212 nir_load_const_instr *index =
1213 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1214 nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1215 parent->dest.ssa.bit_size);
1216 return nir_build_deref_ptr_as_array(b, parent, ssa);
1217 }
1218 case nir_deref_type_struct:
1219 return nir_build_deref_struct(b, parent, deref->strct.index);
1220 default:
1221 unreachable("invalid type");
1222 return NULL;
1223 }
1224 }
1225
1226 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_ssa_scalar * scalar)1227 replace_varying_input_by_uniform_load(nir_shader *shader,
1228 nir_intrinsic_instr *store_intr,
1229 nir_ssa_scalar *scalar)
1230 {
1231 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1232
1233 nir_builder b;
1234 nir_builder_init(&b, impl);
1235
1236 nir_variable *out_var =
1237 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1238
1239 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1240 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1241 nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1242 uni_var = get_uniform_var_in_consumer(shader, uni_var);
1243
1244 bool progress = false;
1245 nir_foreach_block(block, impl) {
1246 nir_foreach_instr(instr, block) {
1247 if (instr->type != nir_instr_type_intrinsic)
1248 continue;
1249
1250 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1251 if (intr->intrinsic != nir_intrinsic_load_deref)
1252 continue;
1253
1254 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1255 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1256 continue;
1257
1258 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1259
1260 if (!does_varying_match(out_var, in_var))
1261 continue;
1262
1263 b.cursor = nir_before_instr(instr);
1264
1265 /* Clone instructions start from deref load to variable deref. */
1266 nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1267 nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1268
1269 /* Add a vector to scalar move if uniform is a vector. */
1270 if (uni_def->num_components > 1) {
1271 nir_alu_src src = {0};
1272 src.src = nir_src_for_ssa(uni_def);
1273 src.swizzle[0] = scalar->comp;
1274 uni_def = nir_mov_alu(&b, src, 1);
1275 }
1276
1277 /* Replace load input with load uniform. */
1278 nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1279
1280 progress = true;
1281 }
1282 }
1283
1284 return progress;
1285 }
1286
1287 /* The GLSL ES 3.20 spec says:
1288 *
1289 * "The precision of a vertex output does not need to match the precision of
1290 * the corresponding fragment input. The minimum precision at which vertex
1291 * outputs are interpolated is the minimum of the vertex output precision and
1292 * the fragment input precision, with the exception that for highp,
1293 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1294 * Output Matching by Name in Linked Programs")
1295 *
1296 * To implement this, when linking shaders we will take the minimum precision
1297 * qualifier (allowing drivers to interpolate at lower precision). For
1298 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1299 * requires we use the *last* specified precision if there is a conflict.
1300 *
1301 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1302 * NONE, we'll return the other precision, since there is no conflict.
1303 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1304 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1305 * "backwards". For non-fragment stages, we'll pick the latter precision to
1306 * comply with the spec. (Note that the order matters.)
1307 *
1308 * For streamout, "Variables declared with lowp or mediump precision are
1309 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1310 * of OpenGL ES 3.2 specification). So drivers should promote them
1311 * the transform feedback memory store, but not the output store.
1312 */
1313
1314 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1315 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1316 {
1317 if (producer == GLSL_PRECISION_NONE)
1318 return consumer;
1319 else if (consumer == GLSL_PRECISION_NONE)
1320 return producer;
1321 else
1322 return fs ? MAX2(producer, consumer) : consumer;
1323 }
1324
1325 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1326 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1327 {
1328 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1329
1330 nir_foreach_shader_out_variable(producer_var, producer) {
1331 /* Skip if the slot is not assigned */
1332 if (producer_var->data.location < 0)
1333 continue;
1334
1335 nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1336 nir_var_shader_in, producer_var->data.location);
1337
1338 /* Skip if the variable will be eliminated */
1339 if (!consumer_var)
1340 continue;
1341
1342 /* Now we have a pair of variables. Let's pick the smaller precision. */
1343 unsigned precision_1 = producer_var->data.precision;
1344 unsigned precision_2 = consumer_var->data.precision;
1345 unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1346
1347 /* Propagate the new precision */
1348 producer_var->data.precision = consumer_var->data.precision = minimum;
1349 }
1350 }
1351
1352 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1353 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1354 {
1355 /* TODO: Add support for more shader stage combinations */
1356 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1357 (producer->info.stage != MESA_SHADER_VERTEX &&
1358 producer->info.stage != MESA_SHADER_TESS_EVAL))
1359 return false;
1360
1361 bool progress = false;
1362
1363 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1364
1365 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1366
1367 /* If we find a store in the last block of the producer we can be sure this
1368 * is the only possible value for this output.
1369 */
1370 nir_block *last_block = nir_impl_last_block(impl);
1371 nir_foreach_instr_reverse(instr, last_block) {
1372 if (instr->type != nir_instr_type_intrinsic)
1373 continue;
1374
1375 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1376
1377 if (intr->intrinsic != nir_intrinsic_store_deref)
1378 continue;
1379
1380 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1381 if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1382 continue;
1383
1384 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1385 if (!can_replace_varying(out_var))
1386 continue;
1387
1388 nir_ssa_def *ssa = intr->src[1].ssa;
1389 if (ssa->parent_instr->type == nir_instr_type_load_const) {
1390 progress |= replace_varying_input_by_constant_load(consumer, intr);
1391 continue;
1392 }
1393
1394 nir_ssa_scalar uni_scalar;
1395 if (is_direct_uniform_load(ssa, &uni_scalar)) {
1396 if (consumer->options->lower_varying_from_uniform) {
1397 progress |= replace_varying_input_by_uniform_load(consumer, intr,
1398 &uni_scalar);
1399 continue;
1400 } else {
1401 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1402 /* The varying is loaded from same uniform, so no need to do any
1403 * interpolation. Mark it as flat explicitly.
1404 */
1405 if (!consumer->options->no_integers &&
1406 in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) {
1407 in_var->data.interpolation = INTERP_MODE_FLAT;
1408 out_var->data.interpolation = INTERP_MODE_FLAT;
1409 }
1410 }
1411 }
1412
1413 struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1414 if (entry) {
1415 progress |= replace_duplicate_input(consumer,
1416 (nir_variable *) entry->data,
1417 intr);
1418 } else {
1419 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1420 if (in_var) {
1421 _mesa_hash_table_insert(varying_values, ssa, in_var);
1422 }
1423 }
1424 }
1425
1426 _mesa_hash_table_destroy(varying_values, NULL);
1427
1428 return progress;
1429 }
1430
1431 /* TODO any better helper somewhere to sort a list? */
1432
1433 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1434 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1435 {
1436 nir_foreach_variable_in_list(var, var_list) {
1437 /* Use the `per_primitive` bool to sort per-primitive variables
1438 * to the end of the list, so they get the last driver locations
1439 * by nir_assign_io_var_locations.
1440 *
1441 * This is done because AMD HW requires that per-primitive outputs
1442 * are the last params.
1443 * In the future we can add an option for this, if needed by other HW.
1444 */
1445 if (new_var->data.per_primitive < var->data.per_primitive ||
1446 (new_var->data.per_primitive == var->data.per_primitive &&
1447 (var->data.location > new_var->data.location ||
1448 (var->data.location == new_var->data.location &&
1449 var->data.location_frac > new_var->data.location_frac)))) {
1450 exec_node_insert_node_before(&var->node, &new_var->node);
1451 return;
1452 }
1453 }
1454 exec_list_push_tail(var_list, &new_var->node);
1455 }
1456
1457 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1458 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1459 struct exec_list *sorted_list)
1460 {
1461 exec_list_make_empty(sorted_list);
1462 nir_foreach_variable_with_modes_safe(var, shader, mode) {
1463 exec_node_remove(&var->node);
1464 insert_sorted(sorted_list, var);
1465 }
1466 }
1467
1468 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1469 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1470 unsigned *size, gl_shader_stage stage)
1471 {
1472 unsigned location = 0;
1473 unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1474 uint64_t processed_locs[2] = {0};
1475
1476 struct exec_list io_vars;
1477 sort_varyings(shader, mode, &io_vars);
1478
1479 int ASSERTED last_loc = 0;
1480 bool ASSERTED last_per_prim = false;
1481 bool last_partial = false;
1482 nir_foreach_variable_in_list(var, &io_vars) {
1483 const struct glsl_type *type = var->type;
1484 if (nir_is_arrayed_io(var, stage)) {
1485 assert(glsl_type_is_array(type));
1486 type = glsl_get_array_element(type);
1487 }
1488
1489 int base;
1490 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1491 base = VERT_ATTRIB_GENERIC0;
1492 else if (var->data.mode == nir_var_shader_out &&
1493 stage == MESA_SHADER_FRAGMENT)
1494 base = FRAG_RESULT_DATA0;
1495 else
1496 base = VARYING_SLOT_VAR0;
1497
1498 unsigned var_size, driver_size;
1499 if (var->data.compact) {
1500 /* If we are inside a partial compact,
1501 * don't allow another compact to be in this slot
1502 * if it starts at component 0.
1503 */
1504 if (last_partial && var->data.location_frac == 0) {
1505 location++;
1506 }
1507
1508 /* compact variables must be arrays of scalars */
1509 assert(!var->data.per_view);
1510 assert(glsl_type_is_array(type));
1511 assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1512 unsigned start = 4 * location + var->data.location_frac;
1513 unsigned end = start + glsl_get_length(type);
1514 var_size = driver_size = end / 4 - location;
1515 last_partial = end % 4 != 0;
1516 } else {
1517 /* Compact variables bypass the normal varying compacting pass,
1518 * which means they cannot be in the same vec4 slot as a normal
1519 * variable. If part of the current slot is taken up by a compact
1520 * variable, we need to go to the next one.
1521 */
1522 if (last_partial) {
1523 location++;
1524 last_partial = false;
1525 }
1526
1527 /* per-view variables have an extra array dimension, which is ignored
1528 * when counting user-facing slots (var->data.location), but *not*
1529 * with driver slots (var->data.driver_location). That is, each user
1530 * slot maps to multiple driver slots.
1531 */
1532 driver_size = glsl_count_attribute_slots(type, false);
1533 if (var->data.per_view) {
1534 assert(glsl_type_is_array(type));
1535 var_size =
1536 glsl_count_attribute_slots(glsl_get_array_element(type), false);
1537 } else {
1538 var_size = driver_size;
1539 }
1540 }
1541
1542 /* Builtins don't allow component packing so we only need to worry about
1543 * user defined varyings sharing the same location.
1544 */
1545 bool processed = false;
1546 if (var->data.location >= base) {
1547 unsigned glsl_location = var->data.location - base;
1548
1549 for (unsigned i = 0; i < var_size; i++) {
1550 if (processed_locs[var->data.index] &
1551 ((uint64_t)1 << (glsl_location + i)))
1552 processed = true;
1553 else
1554 processed_locs[var->data.index] |=
1555 ((uint64_t)1 << (glsl_location + i));
1556 }
1557 }
1558
1559 /* Because component packing allows varyings to share the same location
1560 * we may have already have processed this location.
1561 */
1562 if (processed) {
1563 /* TODO handle overlapping per-view variables */
1564 assert(!var->data.per_view);
1565 unsigned driver_location = assigned_locations[var->data.location];
1566 var->data.driver_location = driver_location;
1567
1568 /* An array may be packed such that is crosses multiple other arrays
1569 * or variables, we need to make sure we have allocated the elements
1570 * consecutively if the previously proccessed var was shorter than
1571 * the current array we are processing.
1572 *
1573 * NOTE: The code below assumes the var list is ordered in ascending
1574 * location order, but per-vertex/per-primitive outputs may be
1575 * grouped separately.
1576 */
1577 assert(last_loc <= var->data.location ||
1578 last_per_prim != var->data.per_primitive);
1579 last_loc = var->data.location;
1580 last_per_prim = var->data.per_primitive;
1581 unsigned last_slot_location = driver_location + var_size;
1582 if (last_slot_location > location) {
1583 unsigned num_unallocated_slots = last_slot_location - location;
1584 unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1585 for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1586 assigned_locations[var->data.location + i] = location;
1587 location++;
1588 }
1589 }
1590 continue;
1591 }
1592
1593 for (unsigned i = 0; i < var_size; i++) {
1594 assigned_locations[var->data.location + i] = location + i;
1595 }
1596
1597 var->data.driver_location = location;
1598 location += driver_size;
1599 }
1600
1601 if (last_partial)
1602 location++;
1603
1604 exec_list_append(&shader->variables, &io_vars);
1605 *size = location;
1606 }
1607
1608 static uint64_t
get_linked_variable_location(unsigned location,bool patch)1609 get_linked_variable_location(unsigned location, bool patch)
1610 {
1611 if (!patch)
1612 return location;
1613
1614 /* Reserve locations 0...3 for special patch variables
1615 * like tess factors and bounding boxes, and the generic patch
1616 * variables will come after them.
1617 */
1618 if (location >= VARYING_SLOT_PATCH0)
1619 return location - VARYING_SLOT_PATCH0 + 4;
1620 else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1621 location <= VARYING_SLOT_BOUNDING_BOX1)
1622 return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1623 else
1624 unreachable("Unsupported variable in get_linked_variable_location.");
1625 }
1626
1627 static uint64_t
get_linked_variable_io_mask(nir_variable * variable,gl_shader_stage stage)1628 get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1629 {
1630 const struct glsl_type *type = variable->type;
1631
1632 if (nir_is_arrayed_io(variable, stage)) {
1633 assert(glsl_type_is_array(type));
1634 type = glsl_get_array_element(type);
1635 }
1636
1637 unsigned slots = glsl_count_attribute_slots(type, false);
1638 if (variable->data.compact) {
1639 unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1640 slots = DIV_ROUND_UP(component_count, 4);
1641 }
1642
1643 uint64_t mask = u_bit_consecutive64(0, slots);
1644 return mask;
1645 }
1646
1647 nir_linked_io_var_info
nir_assign_linked_io_var_locations(nir_shader * producer,nir_shader * consumer)1648 nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1649 {
1650 assert(producer);
1651 assert(consumer);
1652
1653 uint64_t producer_output_mask = 0;
1654 uint64_t producer_patch_output_mask = 0;
1655
1656 nir_foreach_shader_out_variable(variable, producer) {
1657 uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1658 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1659
1660 if (variable->data.patch)
1661 producer_patch_output_mask |= mask << loc;
1662 else
1663 producer_output_mask |= mask << loc;
1664 }
1665
1666 uint64_t consumer_input_mask = 0;
1667 uint64_t consumer_patch_input_mask = 0;
1668
1669 nir_foreach_shader_in_variable(variable, consumer) {
1670 uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1671 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1672
1673 if (variable->data.patch)
1674 consumer_patch_input_mask |= mask << loc;
1675 else
1676 consumer_input_mask |= mask << loc;
1677 }
1678
1679 uint64_t io_mask = producer_output_mask | consumer_input_mask;
1680 uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1681
1682 nir_foreach_shader_out_variable(variable, producer) {
1683 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1684
1685 if (variable->data.patch)
1686 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1687 else
1688 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1689 }
1690
1691 nir_foreach_shader_in_variable(variable, consumer) {
1692 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1693
1694 if (variable->data.patch)
1695 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1696 else
1697 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1698 }
1699
1700 nir_linked_io_var_info result = {
1701 .num_linked_io_vars = util_bitcount64(io_mask),
1702 .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1703 };
1704
1705 return result;
1706 }
1707