1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/hash_table.h"
25 #include "util/set.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
45
46 assert(var->data.mode == nir_var_shader_in ||
47 var->data.mode == nir_var_shader_out);
48 assert(var->data.location >= 0);
49 assert(location < 64);
50
51 const struct glsl_type *type = var->type;
52 if (nir_is_arrayed_io(var, stage)) {
53 assert(glsl_type_is_array(type));
54 type = glsl_get_array_element(type);
55 }
56
57 unsigned slots = glsl_count_attribute_slots(type, false);
58 return BITFIELD64_MASK(slots) << location;
59 }
60
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66 var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67 var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74 return 4;
75
76 return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78
79 static void
add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82 nir_foreach_function_impl(impl, shader) {
83 nir_foreach_block(block, impl) {
84 nir_foreach_instr(instr, block) {
85 if (instr->type != nir_instr_type_intrinsic)
86 continue;
87
88 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89 if (intrin->intrinsic != nir_intrinsic_load_deref)
90 continue;
91
92 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
93 if (!nir_deref_mode_is(deref, nir_var_shader_out))
94 continue;
95
96 nir_variable *var = nir_deref_instr_get_variable(deref);
97 for (unsigned i = 0; i < get_num_components(var); i++) {
98 if (var->data.patch) {
99 if (is_non_generic_patch_var(var))
100 continue;
101
102 patches_read[var->data.location_frac + i] |=
103 get_variable_io_mask(var, shader->info.stage);
104 } else {
105 read[var->data.location_frac + i] |=
106 get_variable_io_mask(var, shader->info.stage);
107 }
108 }
109 }
110 }
111 }
112 }
113
114 static bool
remove_unused_io_access(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)115 remove_unused_io_access(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
116 {
117 nir_variable_mode mode = *(nir_variable_mode *)cb_data;
118
119 unsigned srcn = 0;
120 switch (intrin->intrinsic) {
121 case nir_intrinsic_load_deref:
122 case nir_intrinsic_store_deref:
123 case nir_intrinsic_interp_deref_at_centroid:
124 case nir_intrinsic_interp_deref_at_sample:
125 case nir_intrinsic_interp_deref_at_offset:
126 case nir_intrinsic_interp_deref_at_vertex:
127 break;
128 case nir_intrinsic_copy_deref:
129 srcn = mode == nir_var_shader_in ? 1 : 0;
130 break;
131 default:
132 return false;
133 }
134
135 nir_variable *var = nir_intrinsic_get_var(intrin, srcn);
136 if (!var || var->data.mode != mode || var->data.location != NUM_TOTAL_VARYING_SLOTS)
137 return false;
138
139 if (intrin->intrinsic != nir_intrinsic_store_deref &&
140 intrin->intrinsic != nir_intrinsic_copy_deref) {
141 b->cursor = nir_before_instr(&intrin->instr);
142 nir_def *undef = nir_undef(b, intrin->num_components, intrin->def.bit_size);
143 nir_def_rewrite_uses(&intrin->def, undef);
144 }
145
146 nir_instr_remove(&intrin->instr);
147 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[srcn]));
148
149 return true;
150 }
151
152 /**
153 * Helper for removing unused shader I/O variables, by demoting them to global
154 * variables (which may then by dead code eliminated).
155 *
156 * Example usage is:
157 *
158 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
159 * read, patches_read) ||
160 * progress;
161 */
162 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)163 nir_remove_unused_io_vars(nir_shader *shader,
164 nir_variable_mode mode,
165 uint64_t *used_by_other_stage,
166 uint64_t *used_by_other_stage_patches)
167 {
168 bool progress = false;
169 uint64_t *used;
170
171 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
172
173 uint64_t read[4] = { 0 };
174 uint64_t patches_read[4] = { 0 };
175 if (mode == nir_var_shader_out)
176 add_output_reads(shader, read, patches_read);
177
178 nir_foreach_variable_with_modes_safe(var, shader, mode) {
179 if (var->data.patch)
180 used = used_by_other_stage_patches;
181 else
182 used = used_by_other_stage;
183
184 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0 &&
185 !(shader->info.stage == MESA_SHADER_MESH && var->data.location == VARYING_SLOT_PRIMITIVE_ID))
186 continue;
187
188 if (var->data.always_active_io)
189 continue;
190
191 if (var->data.explicit_xfb_buffer)
192 continue;
193
194 uint64_t other_stage = 0;
195 uint64_t this_stage = 0;
196 for (unsigned i = 0; i < get_num_components(var); i++) {
197 other_stage |= used[var->data.location_frac + i];
198 this_stage |= (var->data.patch ? patches_read : read)[var->data.location_frac + i];
199 }
200
201 uint64_t var_mask = get_variable_io_mask(var, shader->info.stage);
202 if (!((other_stage | this_stage) & var_mask)) {
203 /* Mark the variable as removed by setting the location to an invalid value. */
204 var->data.location = NUM_TOTAL_VARYING_SLOTS;
205 exec_node_remove(&var->node);
206 progress = true;
207 }
208 }
209
210 if (progress) {
211 nir_shader_intrinsics_pass(shader, &remove_unused_io_access, nir_metadata_control_flow, &mode);
212 } else {
213 nir_shader_preserve_all_metadata(shader);
214 }
215
216 return progress;
217 }
218
219 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)220 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
221 {
222 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
223 assert(consumer->info.stage != MESA_SHADER_VERTEX);
224
225 uint64_t read[4] = { 0 }, written[4] = { 0 };
226 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
227
228 nir_foreach_shader_out_variable(var, producer) {
229 for (unsigned i = 0; i < get_num_components(var); i++) {
230 if (var->data.patch) {
231 if (is_non_generic_patch_var(var))
232 continue;
233
234 patches_written[var->data.location_frac + i] |=
235 get_variable_io_mask(var, producer->info.stage);
236 } else {
237 written[var->data.location_frac + i] |=
238 get_variable_io_mask(var, producer->info.stage);
239 }
240 }
241 }
242
243 nir_foreach_shader_in_variable(var, consumer) {
244 for (unsigned i = 0; i < get_num_components(var); i++) {
245 if (var->data.patch) {
246 if (is_non_generic_patch_var(var))
247 continue;
248
249 patches_read[var->data.location_frac + i] |=
250 get_variable_io_mask(var, consumer->info.stage);
251 } else {
252 read[var->data.location_frac + i] |=
253 get_variable_io_mask(var, consumer->info.stage);
254 }
255 }
256 }
257
258 bool progress = false;
259 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
260 patches_read);
261
262 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
263 patches_written) ||
264 progress;
265
266 return progress;
267 }
268
269 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)270 get_interp_type(nir_variable *var, const struct glsl_type *type,
271 bool default_to_smooth_interp)
272 {
273 if (var->data.per_primitive)
274 return INTERP_MODE_NONE;
275 if (glsl_type_is_integer(type))
276 return INTERP_MODE_FLAT;
277 else if (var->data.interpolation != INTERP_MODE_NONE)
278 return var->data.interpolation;
279 else if (default_to_smooth_interp)
280 return INTERP_MODE_SMOOTH;
281 else
282 return INTERP_MODE_NONE;
283 }
284
285 #define INTERPOLATE_LOC_SAMPLE 0
286 #define INTERPOLATE_LOC_CENTROID 1
287 #define INTERPOLATE_LOC_CENTER 2
288
289 static uint8_t
get_interp_loc(nir_variable * var)290 get_interp_loc(nir_variable *var)
291 {
292 if (var->data.sample)
293 return INTERPOLATE_LOC_SAMPLE;
294 else if (var->data.centroid)
295 return INTERPOLATE_LOC_CENTROID;
296 else
297 return INTERPOLATE_LOC_CENTER;
298 }
299
300 static bool
is_packing_supported_for_type(const struct glsl_type * type)301 is_packing_supported_for_type(const struct glsl_type *type)
302 {
303 /* We ignore complex types such as arrays, matrices, structs and bitsizes
304 * other then 32bit. All other vector types should have been split into
305 * scalar variables by the lower_io_to_scalar pass. The only exception
306 * should be OpenGL xfb varyings.
307 * TODO: add support for more complex types?
308 */
309 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
310 }
311
312 struct assigned_comps {
313 uint8_t comps;
314 uint8_t interp_type;
315 uint8_t interp_loc;
316 bool is_32bit;
317 bool is_mediump;
318 bool is_per_primitive;
319 };
320
321 /* Packing arrays and dual slot varyings is difficult so to avoid complex
322 * algorithms this function just assigns them their existing location for now.
323 * TODO: allow better packing of complex types.
324 */
325 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)326 get_unmoveable_components_masks(nir_shader *shader,
327 nir_variable_mode mode,
328 struct assigned_comps *comps,
329 gl_shader_stage stage,
330 bool default_to_smooth_interp)
331 {
332 nir_foreach_variable_with_modes_safe(var, shader, mode) {
333 assert(var->data.location >= 0);
334
335 /* Only remap things that aren't built-ins. */
336 if (var->data.location >= VARYING_SLOT_VAR0 &&
337 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
338
339 const struct glsl_type *type = var->type;
340 if (nir_is_arrayed_io(var, stage)) {
341 assert(glsl_type_is_array(type));
342 type = glsl_get_array_element(type);
343 }
344
345 /* If we can pack this varying then don't mark the components as
346 * used.
347 */
348 if (is_packing_supported_for_type(type) &&
349 !var->data.always_active_io)
350 continue;
351
352 unsigned location = var->data.location - VARYING_SLOT_VAR0;
353
354 unsigned elements =
355 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? glsl_get_vector_elements(glsl_without_array(type)) : 4;
356
357 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
358 unsigned slots = glsl_count_attribute_slots(type, false);
359 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
360 unsigned comps_slot2 = 0;
361 for (unsigned i = 0; i < slots; i++) {
362 if (dual_slot) {
363 if (i & 1) {
364 comps[location + i].comps |= ((1 << comps_slot2) - 1);
365 } else {
366 unsigned num_comps = 4 - var->data.location_frac;
367 comps_slot2 = (elements * dmul) - num_comps;
368
369 /* Assume ARB_enhanced_layouts packing rules for doubles */
370 assert(var->data.location_frac == 0 ||
371 var->data.location_frac == 2);
372 assert(comps_slot2 <= 4);
373
374 comps[location + i].comps |=
375 ((1 << num_comps) - 1) << var->data.location_frac;
376 }
377 } else {
378 comps[location + i].comps |=
379 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
380 }
381
382 comps[location + i].interp_type =
383 get_interp_type(var, type, default_to_smooth_interp);
384 comps[location + i].interp_loc = get_interp_loc(var);
385 comps[location + i].is_32bit =
386 glsl_type_is_32bit(glsl_without_array(type));
387 comps[location + i].is_mediump =
388 var->data.precision == GLSL_PRECISION_MEDIUM ||
389 var->data.precision == GLSL_PRECISION_LOW;
390 comps[location + i].is_per_primitive = var->data.per_primitive;
391 }
392 }
393 }
394 }
395
396 struct varying_loc {
397 uint8_t component;
398 uint32_t location;
399 };
400
401 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)402 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
403 uint64_t slots_used_mask, unsigned num_slots)
404 {
405 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
406
407 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
408 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
409 }
410
411 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)412 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
413 {
414 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
415
416 slots_used[var->data.patch ? 1 : 0] |=
417 BITFIELD64_BIT(var->data.location - loc_offset + offset);
418 }
419
420 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)421 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
422 struct varying_loc (*remap)[4],
423 uint64_t *slots_used, uint64_t *out_slots_read,
424 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
425 {
426 const gl_shader_stage stage = shader->info.stage;
427 uint64_t out_slots_read_tmp[2] = { 0 };
428 uint64_t slots_used_tmp[2] = { 0 };
429
430 /* We don't touch builtins so just copy the bitmask */
431 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
432
433 nir_foreach_variable_with_modes(var, shader, mode) {
434 assert(var->data.location >= 0);
435
436 /* Only remap things that aren't built-ins */
437 if (var->data.location >= VARYING_SLOT_VAR0 &&
438 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
439
440 const struct glsl_type *type = var->type;
441 if (nir_is_arrayed_io(var, stage)) {
442 assert(glsl_type_is_array(type));
443 type = glsl_get_array_element(type);
444 }
445
446 unsigned num_slots = glsl_count_attribute_slots(type, false);
447 bool used_across_stages = false;
448 bool outputs_read = false;
449
450 unsigned location = var->data.location - VARYING_SLOT_VAR0;
451 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
452
453 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
454 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
455 uint64_t outs_used =
456 var->data.patch ? *p_out_slots_read : *out_slots_read;
457 uint64_t slots =
458 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
459
460 if (slots & used)
461 used_across_stages = true;
462
463 if (slots & outs_used)
464 outputs_read = true;
465
466 if (new_loc->location) {
467 var->data.location = new_loc->location;
468 var->data.location_frac = new_loc->component;
469 }
470
471 if (var->data.always_active_io) {
472 /* We can't apply link time optimisations (specifically array
473 * splitting) to these so we need to copy the existing mask
474 * otherwise we will mess up the mask for things like partially
475 * marked arrays.
476 */
477 if (used_across_stages)
478 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
479
480 if (outputs_read) {
481 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
482 num_slots);
483 }
484 } else {
485 for (unsigned i = 0; i < num_slots; i++) {
486 if (used_across_stages)
487 mark_used_slot(var, slots_used_tmp, i);
488
489 if (outputs_read)
490 mark_used_slot(var, out_slots_read_tmp, i);
491 }
492 }
493 }
494 }
495
496 *slots_used = slots_used_tmp[0];
497 *out_slots_read = out_slots_read_tmp[0];
498 *p_slots_used = slots_used_tmp[1];
499 *p_out_slots_read = out_slots_read_tmp[1];
500 }
501
502 struct varying_component {
503 nir_variable *var;
504 uint8_t interp_type;
505 uint8_t interp_loc;
506 bool is_32bit;
507 bool is_patch;
508 bool is_per_primitive;
509 bool is_mediump;
510 bool is_intra_stage_only;
511 bool initialised;
512 };
513
514 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)515 cmp_varying_component(const void *comp1_v, const void *comp2_v)
516 {
517 struct varying_component *comp1 = (struct varying_component *)comp1_v;
518 struct varying_component *comp2 = (struct varying_component *)comp2_v;
519
520 /* We want patches to be order at the end of the array */
521 if (comp1->is_patch != comp2->is_patch)
522 return comp1->is_patch ? 1 : -1;
523
524 /* Sort per-primitive outputs after per-vertex ones to allow
525 * better compaction when they are mixed in the shader's source.
526 */
527 if (comp1->is_per_primitive != comp2->is_per_primitive)
528 return comp1->is_per_primitive ? 1 : -1;
529
530 /* We want to try to group together TCS outputs that are only read by other
531 * TCS invocations and not consumed by the follow stage.
532 */
533 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
534 return comp1->is_intra_stage_only ? 1 : -1;
535
536 /* Group mediump varyings together. */
537 if (comp1->is_mediump != comp2->is_mediump)
538 return comp1->is_mediump ? 1 : -1;
539
540 /* We can only pack varyings with matching interpolation types so group
541 * them together.
542 */
543 if (comp1->interp_type != comp2->interp_type)
544 return comp1->interp_type - comp2->interp_type;
545
546 /* Interpolation loc must match also. */
547 if (comp1->interp_loc != comp2->interp_loc)
548 return comp1->interp_loc - comp2->interp_loc;
549
550 /* If everything else matches just use the original location to sort */
551 const struct nir_variable_data *const data1 = &comp1->var->data;
552 const struct nir_variable_data *const data2 = &comp2->var->data;
553 if (data1->location != data2->location)
554 return data1->location - data2->location;
555 return (int)data1->location_frac - (int)data2->location_frac;
556 }
557
558 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)559 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
560 struct varying_component **varying_comp_info,
561 unsigned *varying_comp_info_size,
562 bool default_to_smooth_interp)
563 {
564 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = { { 0 } };
565 unsigned num_of_comps_to_pack = 0;
566
567 /* Count the number of varying that can be packed and create a mapping
568 * of those varyings to the array we will pass to qsort.
569 */
570 nir_foreach_shader_out_variable(var, producer) {
571
572 /* Only remap things that aren't builtins. */
573 if (var->data.location >= VARYING_SLOT_VAR0 &&
574 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
575
576 /* We can't repack xfb varyings. */
577 if (var->data.always_active_io)
578 continue;
579
580 const struct glsl_type *type = var->type;
581 if (nir_is_arrayed_io(var, producer->info.stage)) {
582 assert(glsl_type_is_array(type));
583 type = glsl_get_array_element(type);
584 }
585
586 if (!is_packing_supported_for_type(type))
587 continue;
588
589 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
590 store_varying_info_idx[loc][var->data.location_frac] =
591 ++num_of_comps_to_pack;
592 }
593 }
594
595 *varying_comp_info_size = num_of_comps_to_pack;
596 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
597 num_of_comps_to_pack);
598
599 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
600
601 /* Walk over the shader and populate the varying component info array */
602 nir_foreach_block(block, impl) {
603 nir_foreach_instr(instr, block) {
604 if (instr->type != nir_instr_type_intrinsic)
605 continue;
606
607 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
608 if (intr->intrinsic != nir_intrinsic_load_deref &&
609 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
610 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
611 intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
612 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
613 continue;
614
615 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
616 if (!nir_deref_mode_is(deref, nir_var_shader_in))
617 continue;
618
619 /* We only remap things that aren't builtins. */
620 nir_variable *in_var = nir_deref_instr_get_variable(deref);
621 if (in_var->data.location < VARYING_SLOT_VAR0)
622 continue;
623
624 /* Do not remap per-vertex shader inputs because it's an array of
625 * 3-elements and this isn't supported.
626 */
627 if (in_var->data.per_vertex)
628 continue;
629
630 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
631 if (location >= MAX_VARYINGS_INCL_PATCH)
632 continue;
633
634 unsigned var_info_idx =
635 store_varying_info_idx[location][in_var->data.location_frac];
636 if (!var_info_idx)
637 continue;
638
639 struct varying_component *vc_info =
640 &(*varying_comp_info)[var_info_idx - 1];
641
642 if (!vc_info->initialised) {
643 const struct glsl_type *type = in_var->type;
644 if (nir_is_arrayed_io(in_var, consumer->info.stage)) {
645 assert(glsl_type_is_array(type));
646 type = glsl_get_array_element(type);
647 }
648
649 vc_info->var = in_var;
650 vc_info->interp_type =
651 get_interp_type(in_var, type, default_to_smooth_interp);
652 vc_info->interp_loc = get_interp_loc(in_var);
653 vc_info->is_32bit = glsl_type_is_32bit(type);
654 vc_info->is_patch = in_var->data.patch;
655 vc_info->is_per_primitive = in_var->data.per_primitive;
656 vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) &&
657 (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
658 in_var->data.precision == GLSL_PRECISION_LOW);
659 vc_info->is_intra_stage_only = false;
660 vc_info->initialised = true;
661 }
662 }
663 }
664
665 /* Walk over the shader and populate the varying component info array
666 * for varyings which are read by other TCS instances but are not consumed
667 * by the TES.
668 */
669 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
670 impl = nir_shader_get_entrypoint(producer);
671
672 nir_foreach_block(block, impl) {
673 nir_foreach_instr(instr, block) {
674 if (instr->type != nir_instr_type_intrinsic)
675 continue;
676
677 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
678 if (intr->intrinsic != nir_intrinsic_load_deref)
679 continue;
680
681 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
682 if (!nir_deref_mode_is(deref, nir_var_shader_out))
683 continue;
684
685 /* We only remap things that aren't builtins. */
686 nir_variable *out_var = nir_deref_instr_get_variable(deref);
687 if (out_var->data.location < VARYING_SLOT_VAR0)
688 continue;
689
690 unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
691 if (location >= MAX_VARYINGS_INCL_PATCH)
692 continue;
693
694 unsigned var_info_idx =
695 store_varying_info_idx[location][out_var->data.location_frac];
696 if (!var_info_idx) {
697 /* Something went wrong, the shader interfaces didn't match, so
698 * abandon packing. This can happen for example when the
699 * inputs are scalars but the outputs are struct members.
700 */
701 *varying_comp_info_size = 0;
702 break;
703 }
704
705 struct varying_component *vc_info =
706 &(*varying_comp_info)[var_info_idx - 1];
707
708 if (!vc_info->initialised) {
709 const struct glsl_type *type = out_var->type;
710 if (nir_is_arrayed_io(out_var, producer->info.stage)) {
711 assert(glsl_type_is_array(type));
712 type = glsl_get_array_element(type);
713 }
714
715 vc_info->var = out_var;
716 vc_info->interp_type =
717 get_interp_type(out_var, type, default_to_smooth_interp);
718 vc_info->interp_loc = get_interp_loc(out_var);
719 vc_info->is_32bit = glsl_type_is_32bit(type);
720 vc_info->is_patch = out_var->data.patch;
721 vc_info->is_per_primitive = out_var->data.per_primitive;
722 vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) &&
723 (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
724 out_var->data.precision == GLSL_PRECISION_LOW);
725 vc_info->is_intra_stage_only = true;
726 vc_info->initialised = true;
727 }
728 }
729 }
730 }
731
732 for (unsigned i = 0; i < *varying_comp_info_size; i++) {
733 struct varying_component *vc_info = &(*varying_comp_info)[i];
734 if (!vc_info->initialised) {
735 /* Something went wrong, the shader interfaces didn't match, so
736 * abandon packing. This can happen for example when the outputs are
737 * scalars but the inputs are struct members.
738 */
739 *varying_comp_info_size = 0;
740 break;
741 }
742 }
743 }
744
745 static bool
allow_pack_interp_type(nir_io_options options,int type)746 allow_pack_interp_type(nir_io_options options, int type)
747 {
748 switch (type) {
749 case INTERP_MODE_NONE:
750 case INTERP_MODE_SMOOTH:
751 case INTERP_MODE_NOPERSPECTIVE:
752 return options & nir_io_has_flexible_input_interpolation_except_flat;
753 default:
754 return false;
755 }
756 }
757
758 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_io_options options)759 assign_remap_locations(struct varying_loc (*remap)[4],
760 struct assigned_comps *assigned_comps,
761 struct varying_component *info,
762 unsigned *cursor, unsigned *comp,
763 unsigned max_location,
764 nir_io_options options)
765 {
766 unsigned tmp_cursor = *cursor;
767 unsigned tmp_comp = *comp;
768
769 for (; tmp_cursor < max_location; tmp_cursor++) {
770
771 if (assigned_comps[tmp_cursor].comps) {
772 /* Don't pack per-primitive and per-vertex varyings together. */
773 if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
774 tmp_comp = 0;
775 continue;
776 }
777
778 /* We can only pack varyings with matching precision. */
779 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
780 tmp_comp = 0;
781 continue;
782 }
783
784 /* We can only pack varyings with matching interpolation type
785 * if driver does not support it.
786 */
787 if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
788 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
789 !allow_pack_interp_type(options, info->interp_type))) {
790 tmp_comp = 0;
791 continue;
792 }
793
794 /* We can only pack varyings with matching interpolation location
795 * if driver does not support it.
796 */
797 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
798 !(options & nir_io_has_flexible_input_interpolation_except_flat)) {
799 tmp_comp = 0;
800 continue;
801 }
802
803 /* We can only pack varyings with matching types, and the current
804 * algorithm only supports packing 32-bit.
805 */
806 if (!assigned_comps[tmp_cursor].is_32bit) {
807 tmp_comp = 0;
808 continue;
809 }
810
811 while (tmp_comp < 4 &&
812 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
813 tmp_comp++;
814 }
815 }
816
817 if (tmp_comp == 4) {
818 tmp_comp = 0;
819 continue;
820 }
821
822 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
823
824 /* Once we have assigned a location mark it as used */
825 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
826 assigned_comps[tmp_cursor].interp_type = info->interp_type;
827 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
828 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
829 assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
830 assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
831
832 /* Assign remap location */
833 remap[location][info->var->data.location_frac].component = tmp_comp++;
834 remap[location][info->var->data.location_frac].location =
835 tmp_cursor + VARYING_SLOT_VAR0;
836
837 break;
838 }
839
840 *cursor = tmp_cursor;
841 *comp = tmp_comp;
842 }
843
844 /* If there are empty components in the slot compact the remaining components
845 * as close to component 0 as possible. This will make it easier to fill the
846 * empty components with components from a different slot in a following pass.
847 */
848 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)849 compact_components(nir_shader *producer, nir_shader *consumer,
850 struct assigned_comps *assigned_comps,
851 bool default_to_smooth_interp)
852 {
853 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = { { { 0 }, { 0 } } };
854 struct varying_component *varying_comp_info;
855 unsigned varying_comp_info_size;
856
857 /* Gather varying component info */
858 gather_varying_component_info(producer, consumer, &varying_comp_info,
859 &varying_comp_info_size,
860 default_to_smooth_interp);
861
862 /* Sort varying components. */
863 qsort(varying_comp_info, varying_comp_info_size,
864 sizeof(struct varying_component), cmp_varying_component);
865
866 unsigned cursor = 0;
867 unsigned comp = 0;
868
869 /* Set the remap array based on the sorted components */
870 for (unsigned i = 0; i < varying_comp_info_size; i++) {
871 struct varying_component *info = &varying_comp_info[i];
872
873 assert(info->is_patch || cursor < MAX_VARYING);
874 if (info->is_patch) {
875 /* The list should be sorted with all non-patch inputs first followed
876 * by patch inputs. When we hit our first patch input, we need to
877 * reset the cursor to MAX_VARYING so we put them in the right slot.
878 */
879 if (cursor < MAX_VARYING) {
880 cursor = MAX_VARYING;
881 comp = 0;
882 }
883
884 assign_remap_locations(remap, assigned_comps, info,
885 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
886 consumer->options->io_options);
887 } else {
888 assign_remap_locations(remap, assigned_comps, info,
889 &cursor, &comp, MAX_VARYING,
890 consumer->options->io_options);
891
892 /* Check if we failed to assign a remap location. This can happen if
893 * for example there are a bunch of unmovable components with
894 * mismatching interpolation types causing us to skip over locations
895 * that would have been useful for packing later components.
896 * The solution is to iterate over the locations again (this should
897 * happen very rarely in practice).
898 */
899 if (cursor == MAX_VARYING) {
900 cursor = 0;
901 comp = 0;
902 assign_remap_locations(remap, assigned_comps, info,
903 &cursor, &comp, MAX_VARYING,
904 consumer->options->io_options);
905 }
906 }
907 }
908
909 ralloc_free(varying_comp_info);
910
911 uint64_t zero = 0;
912 uint32_t zero32 = 0;
913 remap_slots_and_components(consumer, nir_var_shader_in, remap,
914 &consumer->info.inputs_read, &zero,
915 &consumer->info.patch_inputs_read, &zero32);
916 remap_slots_and_components(producer, nir_var_shader_out, remap,
917 &producer->info.outputs_written,
918 &producer->info.outputs_read,
919 &producer->info.patch_outputs_written,
920 &producer->info.patch_outputs_read);
921 }
922
923 /* We assume that this has been called more-or-less directly after
924 * remove_unused_varyings. At this point, all of the varyings that we
925 * aren't going to be using have been completely removed and the
926 * inputs_read and outputs_written fields in nir_shader_info reflect
927 * this. Therefore, the total set of valid slots is the OR of the two
928 * sets of varyings; this accounts for varyings which one side may need
929 * to read/write even if the other doesn't. This can happen if, for
930 * instance, an array is used indirectly from one side causing it to be
931 * unsplittable but directly from the other.
932 */
933 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)934 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
935 bool default_to_smooth_interp)
936 {
937 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
938 assert(consumer->info.stage != MESA_SHADER_VERTEX);
939
940 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = { { 0 } };
941
942 get_unmoveable_components_masks(producer, nir_var_shader_out,
943 assigned_comps,
944 producer->info.stage,
945 default_to_smooth_interp);
946 get_unmoveable_components_masks(consumer, nir_var_shader_in,
947 assigned_comps,
948 consumer->info.stage,
949 default_to_smooth_interp);
950
951 compact_components(producer, consumer, assigned_comps,
952 default_to_smooth_interp);
953 }
954
955 /*
956 * Mark XFB varyings as always_active_io in the consumer so the linking opts
957 * don't touch them.
958 */
959 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)960 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
961 {
962 nir_variable *input_vars[MAX_VARYING][4] = { 0 };
963
964 nir_foreach_shader_in_variable(var, consumer) {
965 if (var->data.location >= VARYING_SLOT_VAR0 &&
966 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
967
968 unsigned location = var->data.location - VARYING_SLOT_VAR0;
969 input_vars[location][var->data.location_frac] = var;
970 }
971 }
972
973 nir_foreach_shader_out_variable(var, producer) {
974 if (var->data.location >= VARYING_SLOT_VAR0 &&
975 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
976
977 if (!var->data.always_active_io)
978 continue;
979
980 unsigned location = var->data.location - VARYING_SLOT_VAR0;
981 if (input_vars[location][var->data.location_frac]) {
982 input_vars[location][var->data.location_frac]->data.always_active_io = true;
983 }
984 }
985 }
986 }
987
988 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)989 does_varying_match(nir_variable *out_var, nir_variable *in_var)
990 {
991 return in_var->data.location == out_var->data.location &&
992 in_var->data.location_frac == out_var->data.location_frac &&
993 in_var->type == out_var->type;
994 }
995
996 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)997 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
998 {
999 nir_foreach_shader_in_variable(var, consumer) {
1000 if (does_varying_match(out_var, var))
1001 return var;
1002 }
1003
1004 return NULL;
1005 }
1006
1007 static bool
can_replace_varying(nir_variable * out_var)1008 can_replace_varying(nir_variable *out_var)
1009 {
1010 /* Skip types that require more complex handling.
1011 * TODO: add support for these types.
1012 */
1013 if (glsl_type_is_array(out_var->type) ||
1014 glsl_type_is_dual_slot(out_var->type) ||
1015 glsl_type_is_matrix(out_var->type) ||
1016 glsl_type_is_struct_or_ifc(out_var->type))
1017 return false;
1018
1019 /* Limit this pass to scalars for now to keep things simple. Most varyings
1020 * should have been lowered to scalars at this point anyway.
1021 */
1022 if (!glsl_type_is_scalar(out_var->type))
1023 return false;
1024
1025 if (out_var->data.location < VARYING_SLOT_VAR0 ||
1026 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1027 return false;
1028
1029 return true;
1030 }
1031
1032 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1033 replace_varying_input_by_constant_load(nir_shader *shader,
1034 nir_intrinsic_instr *store_intr)
1035 {
1036 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1037
1038 nir_builder b = nir_builder_create(impl);
1039
1040 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1041
1042 bool progress = false;
1043 nir_foreach_block(block, impl) {
1044 nir_foreach_instr(instr, block) {
1045 if (instr->type != nir_instr_type_intrinsic)
1046 continue;
1047
1048 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1049 if (intr->intrinsic != nir_intrinsic_load_deref)
1050 continue;
1051
1052 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1053 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1054 continue;
1055
1056 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1057
1058 if (!does_varying_match(out_var, in_var))
1059 continue;
1060
1061 b.cursor = nir_before_instr(instr);
1062
1063 nir_load_const_instr *out_const =
1064 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1065
1066 /* Add new const to replace the input */
1067 nir_def *nconst = nir_build_imm(&b, store_intr->num_components,
1068 intr->def.bit_size,
1069 out_const->value);
1070
1071 nir_def_rewrite_uses(&intr->def, nconst);
1072
1073 progress = true;
1074 }
1075 }
1076
1077 return progress;
1078 }
1079
1080 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1081 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1082 nir_intrinsic_instr *dup_store_intr)
1083 {
1084 assert(input_var);
1085
1086 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1087
1088 nir_builder b = nir_builder_create(impl);
1089
1090 nir_variable *dup_out_var = nir_intrinsic_get_var(dup_store_intr, 0);
1091
1092 bool progress = false;
1093 nir_foreach_block(block, impl) {
1094 nir_foreach_instr(instr, block) {
1095 if (instr->type != nir_instr_type_intrinsic)
1096 continue;
1097
1098 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1099 if (intr->intrinsic != nir_intrinsic_load_deref)
1100 continue;
1101
1102 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1103 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1104 continue;
1105
1106 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1107
1108 if (!does_varying_match(dup_out_var, in_var) ||
1109 in_var->data.interpolation != input_var->data.interpolation ||
1110 get_interp_loc(in_var) != get_interp_loc(input_var) ||
1111 in_var->data.per_vertex)
1112 continue;
1113
1114 b.cursor = nir_before_instr(instr);
1115
1116 nir_def *load = nir_load_var(&b, input_var);
1117 nir_def_rewrite_uses(&intr->def, load);
1118
1119 progress = true;
1120 }
1121 }
1122
1123 return progress;
1124 }
1125
1126 static bool
is_direct_uniform_load(nir_def * def,nir_scalar * s)1127 is_direct_uniform_load(nir_def *def, nir_scalar *s)
1128 {
1129 /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1130 assert(def->num_components == 1);
1131
1132 /* Uniform load may hide behind some move instruction for converting
1133 * vector to scalar:
1134 *
1135 * vec1 32 ssa_1 = deref_var &color (uniform vec3)
1136 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1137 * vec1 32 ssa_3 = mov ssa_2.x
1138 * vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1139 * intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1140 */
1141 *s = nir_scalar_resolved(def, 0);
1142
1143 nir_def *ssa = s->def;
1144 if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1145 return false;
1146
1147 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1148 if (intr->intrinsic != nir_intrinsic_load_deref)
1149 return false;
1150
1151 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1152 /* TODO: support nir_var_mem_ubo. */
1153 if (!nir_deref_mode_is(deref, nir_var_uniform))
1154 return false;
1155
1156 /* Does not support indirect uniform load. */
1157 return !nir_deref_instr_has_indirect(deref);
1158 }
1159
1160 /**
1161 * Add a uniform variable from one shader to a different shader.
1162 *
1163 * \param nir The shader where to add the uniform
1164 * \param uniform The uniform that's declared in another shader.
1165 */
1166 nir_variable *
nir_clone_uniform_variable(nir_shader * nir,nir_variable * uniform,bool spirv)1167 nir_clone_uniform_variable(nir_shader *nir, nir_variable *uniform, bool spirv)
1168 {
1169 /* Find if uniform already exists in consumer. */
1170 nir_variable *new_var = NULL;
1171 nir_foreach_variable_with_modes(v, nir, uniform->data.mode) {
1172 if ((spirv && uniform->data.mode & nir_var_mem_ubo &&
1173 v->data.binding == uniform->data.binding) ||
1174 (!spirv && !strcmp(uniform->name, v->name))) {
1175 new_var = v;
1176 break;
1177 }
1178 }
1179
1180 /* Create a variable if not exist. */
1181 if (!new_var) {
1182 new_var = nir_variable_clone(uniform, nir);
1183 nir_shader_add_variable(nir, new_var);
1184 }
1185
1186 return new_var;
1187 }
1188
1189 nir_deref_instr *
nir_clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1190 nir_clone_deref_instr(nir_builder *b, nir_variable *var,
1191 nir_deref_instr *deref)
1192 {
1193 if (deref->deref_type == nir_deref_type_var)
1194 return nir_build_deref_var(b, var);
1195
1196 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1197 nir_deref_instr *parent = nir_clone_deref_instr(b, var, parent_deref);
1198
1199 /* Build array and struct deref instruction.
1200 * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1201 */
1202 switch (deref->deref_type) {
1203 case nir_deref_type_array: {
1204 if (b->shader ==
1205 nir_cf_node_get_function(&deref->instr.block->cf_node)->function->shader) {
1206 /* Cloning within the same shader. */
1207 return nir_build_deref_array(b, parent, deref->arr.index.ssa);
1208 } else {
1209 /* Cloning to a different shader. The index must be constant because
1210 * we don't implement cloning the index SSA here.
1211 */
1212 nir_load_const_instr *index =
1213 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1214 return nir_build_deref_array_imm(b, parent, index->value->i64);
1215 }
1216 }
1217 case nir_deref_type_ptr_as_array: {
1218 nir_load_const_instr *index =
1219 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1220 nir_def *ssa = nir_imm_intN_t(b, index->value->i64,
1221 parent->def.bit_size);
1222 return nir_build_deref_ptr_as_array(b, parent, ssa);
1223 }
1224 case nir_deref_type_struct:
1225 return nir_build_deref_struct(b, parent, deref->strct.index);
1226 default:
1227 unreachable("invalid type");
1228 return NULL;
1229 }
1230 }
1231
1232 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_scalar * scalar)1233 replace_varying_input_by_uniform_load(nir_shader *shader,
1234 nir_intrinsic_instr *store_intr,
1235 nir_scalar *scalar)
1236 {
1237 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1238
1239 nir_builder b = nir_builder_create(impl);
1240
1241 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1242
1243 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1244 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1245 nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1246 uni_var = nir_clone_uniform_variable(shader, uni_var, false);
1247
1248 bool progress = false;
1249 nir_foreach_block(block, impl) {
1250 nir_foreach_instr(instr, block) {
1251 if (instr->type != nir_instr_type_intrinsic)
1252 continue;
1253
1254 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1255 if (intr->intrinsic != nir_intrinsic_load_deref)
1256 continue;
1257
1258 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1259 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1260 continue;
1261
1262 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1263
1264 if (!does_varying_match(out_var, in_var))
1265 continue;
1266
1267 b.cursor = nir_before_instr(instr);
1268
1269 /* Clone instructions start from deref load to variable deref. */
1270 nir_deref_instr *uni_deref = nir_clone_deref_instr(&b, uni_var, deref);
1271 nir_def *uni_def = nir_load_deref(&b, uni_deref);
1272
1273 /* Add a vector to scalar move if uniform is a vector. */
1274 if (uni_def->num_components > 1) {
1275 nir_alu_src src = { 0 };
1276 src.src = nir_src_for_ssa(uni_def);
1277 src.swizzle[0] = scalar->comp;
1278 uni_def = nir_mov_alu(&b, src, 1);
1279 }
1280
1281 /* Replace load input with load uniform. */
1282 nir_def_rewrite_uses(&intr->def, uni_def);
1283
1284 progress = true;
1285 }
1286 }
1287
1288 return progress;
1289 }
1290
1291 /* The GLSL ES 3.20 spec says:
1292 *
1293 * "The precision of a vertex output does not need to match the precision of
1294 * the corresponding fragment input. The minimum precision at which vertex
1295 * outputs are interpolated is the minimum of the vertex output precision and
1296 * the fragment input precision, with the exception that for highp,
1297 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1298 * Output Matching by Name in Linked Programs")
1299 *
1300 * To implement this, when linking shaders we will take the minimum precision
1301 * qualifier (allowing drivers to interpolate at lower precision). For
1302 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1303 * requires we use the *last* specified precision if there is a conflict.
1304 *
1305 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1306 * NONE, we'll return the other precision, since there is no conflict.
1307 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1308 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1309 * "backwards". For non-fragment stages, we'll pick the latter precision to
1310 * comply with the spec. (Note that the order matters.)
1311 *
1312 * For streamout, "Variables declared with lowp or mediump precision are
1313 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1314 * of OpenGL ES 3.2 specification). So drivers should promote them
1315 * the transform feedback memory store, but not the output store.
1316 */
1317
1318 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1319 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1320 {
1321 if (producer == GLSL_PRECISION_NONE)
1322 return consumer;
1323 else if (consumer == GLSL_PRECISION_NONE)
1324 return producer;
1325 else
1326 return fs ? MAX2(producer, consumer) : consumer;
1327 }
1328
1329 static nir_variable *
find_consumer_variable(const nir_shader * consumer,const nir_variable * producer_var)1330 find_consumer_variable(const nir_shader *consumer,
1331 const nir_variable *producer_var)
1332 {
1333 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) {
1334 if (var->data.location == producer_var->data.location &&
1335 var->data.location_frac == producer_var->data.location_frac)
1336 return var;
1337 }
1338 return NULL;
1339 }
1340
1341 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1342 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1343 {
1344 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1345
1346 nir_foreach_shader_out_variable(producer_var, producer) {
1347 /* Skip if the slot is not assigned */
1348 if (producer_var->data.location < 0)
1349 continue;
1350
1351 nir_variable *consumer_var = find_consumer_variable(consumer,
1352 producer_var);
1353
1354 /* Skip if the variable will be eliminated */
1355 if (!consumer_var)
1356 continue;
1357
1358 /* Now we have a pair of variables. Let's pick the smaller precision. */
1359 unsigned precision_1 = producer_var->data.precision;
1360 unsigned precision_2 = consumer_var->data.precision;
1361 unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1362
1363 /* Propagate the new precision */
1364 producer_var->data.precision = consumer_var->data.precision = minimum;
1365 }
1366 }
1367
1368 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1369 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1370 {
1371 /* TODO: Add support for more shader stage combinations */
1372 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1373 (producer->info.stage != MESA_SHADER_VERTEX &&
1374 producer->info.stage != MESA_SHADER_TESS_EVAL))
1375 return false;
1376
1377 bool progress = false;
1378
1379 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1380
1381 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1382
1383 /* If we find a store in the last block of the producer we can be sure this
1384 * is the only possible value for this output.
1385 */
1386 nir_block *last_block = nir_impl_last_block(impl);
1387 nir_foreach_instr_reverse(instr, last_block) {
1388 if (instr->type != nir_instr_type_intrinsic)
1389 continue;
1390
1391 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1392
1393 if (intr->intrinsic != nir_intrinsic_store_deref)
1394 continue;
1395
1396 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1397 if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1398 continue;
1399
1400 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1401 if (!can_replace_varying(out_var))
1402 continue;
1403
1404 nir_def *ssa = intr->src[1].ssa;
1405 if (ssa->parent_instr->type == nir_instr_type_load_const) {
1406 progress |= replace_varying_input_by_constant_load(consumer, intr);
1407 continue;
1408 }
1409
1410 nir_scalar uni_scalar;
1411 if (consumer->options->max_varying_expression_cost >= 2 &&
1412 is_direct_uniform_load(ssa, &uni_scalar)) {
1413 progress |= replace_varying_input_by_uniform_load(consumer, intr,
1414 &uni_scalar);
1415 continue;
1416 }
1417
1418 struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1419 if (entry) {
1420 progress |= replace_duplicate_input(consumer,
1421 (nir_variable *)entry->data,
1422 intr);
1423 } else {
1424 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1425 if (in_var) {
1426 _mesa_hash_table_insert(varying_values, ssa, in_var);
1427 }
1428 }
1429 }
1430
1431 _mesa_hash_table_destroy(varying_values, NULL);
1432
1433 return progress;
1434 }
1435
1436 /* TODO any better helper somewhere to sort a list? */
1437
1438 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1439 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1440 {
1441 nir_foreach_variable_in_list(var, var_list) {
1442 /* Use the `per_primitive` bool to sort per-primitive variables
1443 * to the end of the list, so they get the last driver locations
1444 * by nir_assign_io_var_locations.
1445 *
1446 * This is done because AMD HW requires that per-primitive outputs
1447 * are the last params.
1448 * In the future we can add an option for this, if needed by other HW.
1449 */
1450 if (new_var->data.per_primitive < var->data.per_primitive ||
1451 (new_var->data.per_primitive == var->data.per_primitive &&
1452 (var->data.location > new_var->data.location ||
1453 (var->data.location == new_var->data.location &&
1454 var->data.location_frac > new_var->data.location_frac)))) {
1455 exec_node_insert_node_before(&var->node, &new_var->node);
1456 return;
1457 }
1458 }
1459 exec_list_push_tail(var_list, &new_var->node);
1460 }
1461
1462 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1463 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1464 struct exec_list *sorted_list)
1465 {
1466 exec_list_make_empty(sorted_list);
1467 nir_foreach_variable_with_modes_safe(var, shader, mode) {
1468 exec_node_remove(&var->node);
1469 insert_sorted(sorted_list, var);
1470 }
1471 }
1472
1473 void
nir_sort_variables_by_location(nir_shader * shader,nir_variable_mode mode)1474 nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode)
1475 {
1476 struct exec_list vars;
1477
1478 sort_varyings(shader, mode, &vars);
1479 exec_list_append(&shader->variables, &vars);
1480 }
1481
1482 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1483 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1484 unsigned *size, gl_shader_stage stage)
1485 {
1486 unsigned location = 0;
1487 unsigned assigned_locations[VARYING_SLOT_TESS_MAX][2];
1488 uint64_t processed_locs[2] = { 0 };
1489
1490 struct exec_list io_vars;
1491 sort_varyings(shader, mode, &io_vars);
1492
1493 int ASSERTED last_loc = 0;
1494 bool ASSERTED last_per_prim = false;
1495 bool last_partial = false;
1496 nir_foreach_variable_in_list(var, &io_vars) {
1497 const struct glsl_type *type = var->type;
1498 if (nir_is_arrayed_io(var, stage)) {
1499 assert(glsl_type_is_array(type));
1500 type = glsl_get_array_element(type);
1501 }
1502
1503 int base;
1504 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1505 base = VERT_ATTRIB_GENERIC0;
1506 else if (var->data.mode == nir_var_shader_out &&
1507 stage == MESA_SHADER_FRAGMENT)
1508 base = FRAG_RESULT_DATA0;
1509 else
1510 base = VARYING_SLOT_VAR0;
1511
1512 unsigned var_size, driver_size;
1513 if (var->data.compact) {
1514 /* If we are inside a partial compact,
1515 * don't allow another compact to be in this slot
1516 * if it starts at component 0.
1517 */
1518 if (last_partial && var->data.location_frac == 0) {
1519 location++;
1520 }
1521
1522 /* compact variables must be arrays of scalars */
1523 assert(!var->data.per_view);
1524 assert(glsl_type_is_array(type));
1525 assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1526 unsigned start = 4 * location + var->data.location_frac;
1527 unsigned end = start + glsl_get_length(type);
1528 var_size = driver_size = end / 4 - location;
1529 last_partial = end % 4 != 0;
1530 } else {
1531 /* Compact variables bypass the normal varying compacting pass,
1532 * which means they cannot be in the same vec4 slot as a normal
1533 * variable. If part of the current slot is taken up by a compact
1534 * variable, we need to go to the next one.
1535 */
1536 if (last_partial) {
1537 location++;
1538 last_partial = false;
1539 }
1540
1541 var_size = glsl_count_attribute_slots(type, false);
1542 if (var->data.per_view &&
1543 shader->options->per_view_unique_driver_locations) {
1544 /* per-view variables have an extra array dimension, which is
1545 * ignored when counting user-facing slots (var->data.location),
1546 * but *not* with driver slots (var->data.driver_location). That
1547 * is, each user slot maps to multiple driver slots. */
1548 const struct glsl_type *array_type = var->type;
1549 driver_size = glsl_count_attribute_slots(array_type, false);
1550 } else {
1551 driver_size = var_size;
1552 }
1553 }
1554
1555 /* Builtins don't allow component packing so we only need to worry about
1556 * user defined varyings sharing the same location.
1557 */
1558 bool processed = false;
1559 if (var->data.location >= base) {
1560 unsigned glsl_location = var->data.location - base;
1561
1562 for (unsigned i = 0; i < var_size; i++) {
1563 if (processed_locs[var->data.index] &
1564 ((uint64_t)1 << (glsl_location + i)))
1565 processed = true;
1566 else
1567 processed_locs[var->data.index] |=
1568 ((uint64_t)1 << (glsl_location + i));
1569 }
1570 }
1571
1572 /* Because component packing allows varyings to share the same location
1573 * we may have already have processed this location.
1574 */
1575 if (processed) {
1576 /* TODO handle overlapping per-view variables */
1577 assert(!var->data.per_view);
1578 unsigned driver_location = assigned_locations[var->data.location][var->data.index];
1579 var->data.driver_location = driver_location;
1580
1581 /* An array may be packed such that is crosses multiple other arrays
1582 * or variables, we need to make sure we have allocated the elements
1583 * consecutively if the previously proccessed var was shorter than
1584 * the current array we are processing.
1585 *
1586 * NOTE: The code below assumes the var list is ordered in ascending
1587 * location order, but per-vertex/per-primitive outputs may be
1588 * grouped separately.
1589 */
1590 assert(last_loc <= var->data.location ||
1591 last_per_prim != var->data.per_primitive);
1592 last_loc = var->data.location;
1593 last_per_prim = var->data.per_primitive;
1594 unsigned last_slot_location = driver_location + var_size;
1595 if (last_slot_location > location) {
1596 unsigned num_unallocated_slots = last_slot_location - location;
1597 unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1598 for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1599 assigned_locations[var->data.location + i][var->data.index] = location;
1600 location++;
1601 }
1602 }
1603 continue;
1604 }
1605
1606 for (unsigned i = 0; i < var_size; i++) {
1607 assigned_locations[var->data.location + i][var->data.index] = location + i;
1608 }
1609
1610 var->data.driver_location = location;
1611 location += driver_size;
1612 }
1613
1614 if (last_partial)
1615 location++;
1616
1617 exec_list_append(&shader->variables, &io_vars);
1618 *size = location;
1619 }
1620