1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * Lower non uniform at sample messages to the interpolator.
26 *
27 * This is pretty much identical to what nir_lower_non_uniform_access() does.
28 * We do it here because otherwise GCM would undo this optimization. Also we
29 * can assume divergence analysis here.
30 */
31
32 #include "intel_nir.h"
33 #include "compiler/nir/nir_builder.h"
34
35 static bool
intel_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder * b,nir_instr * instr,void * cb_data)36 intel_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
37 nir_instr *instr,
38 void *cb_data)
39 {
40 if (instr->type != nir_instr_type_intrinsic)
41 return false;
42
43 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
44 if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
45 return false;
46
47 if (nir_src_is_always_uniform(intrin->src[0]) ||
48 !nir_src_is_divergent(&intrin->src[0]))
49 return false;
50
51 if (intrin->def.parent_instr->pass_flags != 0)
52 return false;
53
54 nir_def *sample_id = intrin->src[0].ssa;
55
56 b->cursor = nir_instr_remove(&intrin->instr);
57
58 nir_push_loop(b);
59 {
60 nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
61
62 nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
63 {
64 nir_builder_instr_insert(b, &intrin->instr);
65 intrin->def.parent_instr->pass_flags = 1;
66
67 nir_src_rewrite(&intrin->src[0], first_sample_id);
68
69 nir_jump(b, nir_jump_break);
70 }
71 }
72
73 return true;
74 }
75
76 static bool
intel_nir_lower_non_uniform_interpolated_input_instr(nir_builder * b,nir_instr * instr,void * cb_data)77 intel_nir_lower_non_uniform_interpolated_input_instr(nir_builder *b,
78 nir_instr *instr,
79 void *cb_data)
80 {
81 if (instr->type != nir_instr_type_intrinsic)
82 return false;
83
84 nir_intrinsic_instr *load_ii = nir_instr_as_intrinsic(instr);
85 if (load_ii->intrinsic != nir_intrinsic_load_interpolated_input)
86 return false;
87
88 assert(load_ii->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic);
89
90 nir_intrinsic_instr *bary =
91 nir_instr_as_intrinsic(load_ii->src[0].ssa->parent_instr);
92 if (bary->intrinsic != nir_intrinsic_load_barycentric_at_sample)
93 return false;
94
95 if (nir_src_is_always_uniform(bary->src[0]) ||
96 !nir_src_is_divergent(&bary->src[0]))
97 return false;
98
99 nir_def *sample_id = bary->src[0].ssa;
100
101 b->cursor = nir_instr_remove(&load_ii->instr);
102
103 nir_push_loop(b);
104 {
105 nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
106
107 nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
108 {
109 nir_def *new_bary = nir_load_barycentric_at_sample(
110 b, bary->def.bit_size, first_sample_id,
111 .interp_mode = nir_intrinsic_interp_mode(bary));
112
113 /* Set pass_flags so that the other lowering pass won't try to also
114 * lower this new load_barycentric_at_sample.
115 */
116 new_bary->parent_instr->pass_flags = 1;
117
118 nir_builder_instr_insert(b, &load_ii->instr);
119
120 nir_src_rewrite(&load_ii->src[0], new_bary);
121
122 nir_jump(b, nir_jump_break);
123 }
124 }
125
126 return true;
127 }
128
129 bool
intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader * nir)130 intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir)
131 {
132 bool progress;
133
134 nir_shader_clear_pass_flags(nir);
135
136 progress = nir_shader_instructions_pass(
137 nir,
138 intel_nir_lower_non_uniform_interpolated_input_instr,
139 nir_metadata_none,
140 NULL);
141
142 progress = nir_shader_instructions_pass(
143 nir,
144 intel_nir_lower_non_uniform_barycentric_at_sample_instr,
145 nir_metadata_none,
146 NULL) || progress;
147
148 return progress;
149 }
150