1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler/nir/nir_builder.h"
25 #include "compiler.h"
26
27 /* Divergent attribute access is undefined behaviour. To avoid divergence,
28 * lower to an if-chain like:
29 *
30 * value = 0;
31 * if (lane == 0)
32 * value = ld()
33 * else if (lane == 1)
34 * value = ld()
35 * ...
36 * else if (lane == MAX_LANE)
37 * value = ld()
38 */
39
40 static bool
bi_lower_divergent_indirects_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)41 bi_lower_divergent_indirects_impl(nir_builder *b, nir_intrinsic_instr *intr,
42 void *data)
43 {
44 gl_shader_stage stage = b->shader->info.stage;
45 nir_src *offset;
46
47 /* Not all indirect access needs this workaround */
48 switch (intr->intrinsic) {
49 case nir_intrinsic_load_input:
50 case nir_intrinsic_load_interpolated_input:
51 case nir_intrinsic_load_attribute_pan:
52 /* Attributes and varyings */
53 offset = nir_get_io_offset_src(intr);
54 break;
55
56 case nir_intrinsic_store_output:
57 /* Varyings only */
58 if (stage == MESA_SHADER_FRAGMENT)
59 return false;
60
61 offset = nir_get_io_offset_src(intr);
62 break;
63
64 case nir_intrinsic_store_per_view_output:
65 assert(stage == MESA_SHADER_VERTEX);
66 assert(!nir_src_is_divergent(&intr->src[1]));
67 offset = nir_get_io_offset_src(intr);
68 break;
69
70 case nir_intrinsic_image_texel_address:
71 case nir_intrinsic_image_load:
72 case nir_intrinsic_image_store:
73 /* Any image access */
74 offset = &intr->src[0];
75 break;
76 default:
77 return false;
78 }
79
80 if (!nir_src_is_divergent(offset))
81 return false;
82
83 /* This indirect does need it */
84
85 b->cursor = nir_before_instr(&intr->instr);
86 nir_def *lane = nir_load_subgroup_invocation(b);
87 unsigned *lanes = data;
88
89 /* Write zero in a funny way to bypass lower_load_const_to_scalar */
90 bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
91 unsigned size = has_dest ? intr->def.bit_size : 32;
92 nir_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
93 nir_def *zeroes[4] = {zero, zero, zero, zero};
94 nir_def *res =
95 has_dest ? nir_vec(b, zeroes, intr->def.num_components) : NULL;
96
97 for (unsigned i = 0; i < (*lanes); ++i) {
98 nir_push_if(b, nir_ieq_imm(b, lane, i));
99
100 nir_instr *c = nir_instr_clone(b->shader, &intr->instr);
101 nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
102 nir_builder_instr_insert(b, c);
103 nir_pop_if(b, NULL);
104
105 if (has_dest) {
106 nir_def *c_ssa = &c_intr->def;
107 res = nir_if_phi(b, c_ssa, res);
108 }
109 }
110
111 if (has_dest)
112 nir_def_rewrite_uses(&intr->def, res);
113
114 nir_instr_remove(&intr->instr);
115 return true;
116 }
117
118 bool
bi_lower_divergent_indirects(nir_shader * shader,unsigned lanes)119 bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes)
120 {
121 return nir_shader_intrinsics_pass(shader, bi_lower_divergent_indirects_impl,
122 nir_metadata_none, &lanes);
123 }
124