1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019-2021 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "pan_device.h"
26 #include "pan_shader.h"
27 #include "pan_format.h"
28
29 #if PAN_ARCH <= 5
30 #include "panfrost/midgard/midgard_compile.h"
31 #else
32 #include "panfrost/bifrost/bifrost_compile.h"
33 #endif
34
35 const nir_shader_compiler_options *
GENX(pan_shader_get_compiler_options)36 GENX(pan_shader_get_compiler_options)(void)
37 {
38 #if PAN_ARCH >= 6
39 return &bifrost_nir_options;
40 #else
41 return &midgard_nir_options;
42 #endif
43 }
44
45 static enum pipe_format
varying_format(nir_alu_type t,unsigned ncomps)46 varying_format(nir_alu_type t, unsigned ncomps)
47 {
48 #define VARYING_FORMAT(ntype, nsz, ptype, psz) \
49 { \
50 .type = nir_type_ ## ntype ## nsz, \
51 .formats = { \
52 PIPE_FORMAT_R ## psz ## _ ## ptype, \
53 PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
54 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
55 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \
56 } \
57 }
58
59 static const struct {
60 nir_alu_type type;
61 enum pipe_format formats[4];
62 } conv[] = {
63 VARYING_FORMAT(float, 32, FLOAT, 32),
64 VARYING_FORMAT(int, 32, SINT, 32),
65 VARYING_FORMAT(uint, 32, UINT, 32),
66 VARYING_FORMAT(float, 16, FLOAT, 16),
67 VARYING_FORMAT(int, 16, SINT, 16),
68 VARYING_FORMAT(uint, 16, UINT, 16),
69 VARYING_FORMAT(int, 8, SINT, 8),
70 VARYING_FORMAT(uint, 8, UINT, 8),
71 VARYING_FORMAT(bool, 32, UINT, 32),
72 VARYING_FORMAT(bool, 16, UINT, 16),
73 VARYING_FORMAT(bool, 8, UINT, 8),
74 VARYING_FORMAT(bool, 1, UINT, 8),
75 };
76 #undef VARYING_FORMAT
77
78 assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
79
80 for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
81 if (conv[i].type == t)
82 return conv[i].formats[ncomps - 1];
83 }
84
85 return PIPE_FORMAT_NONE;
86 }
87
88 static void
collect_varyings(nir_shader * s,nir_variable_mode varying_mode,struct pan_shader_varying * varyings,unsigned * varying_count)89 collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
90 struct pan_shader_varying *varyings,
91 unsigned *varying_count)
92 {
93 *varying_count = 0;
94
95 unsigned comps[PAN_MAX_VARYINGS] = { 0 };
96
97 nir_foreach_variable_with_modes(var, s, varying_mode) {
98 unsigned loc = var->data.driver_location;
99 const struct glsl_type *column =
100 glsl_without_array_or_matrix(var->type);
101 unsigned chan = glsl_get_components(column);
102
103 /* If we have a fractional location added, we need to increase the size
104 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
105 * We could do better but this is an edge case as it is, normally
106 * packed varyings will be aligned.
107 */
108 chan += var->data.location_frac;
109 comps[loc] = MAX2(comps[loc], chan);
110 }
111
112 nir_foreach_variable_with_modes(var, s, varying_mode) {
113 unsigned loc = var->data.driver_location;
114 unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
115 const struct glsl_type *column =
116 glsl_without_array_or_matrix(var->type);
117 enum glsl_base_type base_type = glsl_get_base_type(column);
118 unsigned chan = comps[loc];
119
120 nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
121 type = nir_alu_type_get_base_type(type);
122
123 /* Can't do type conversion since GLSL IR packs in funny ways */
124 if (PAN_ARCH >= 6 && var->data.interpolation == INTERP_MODE_FLAT)
125 type = nir_type_uint;
126
127 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
128 * will saturate instead of wrap which is not conformant, so we need to
129 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
130 * the intended behaviour.
131 */
132 if (type == nir_type_float &&
133 (var->data.precision == GLSL_PRECISION_MEDIUM ||
134 var->data.precision == GLSL_PRECISION_LOW) &&
135 !s->info.has_transform_feedback_varyings) {
136 type |= 16;
137 } else {
138 type |= 32;
139 }
140
141 enum pipe_format format = varying_format(type, chan);
142 assert(format != PIPE_FORMAT_NONE);
143
144 for (int c = 0; c < sz; ++c) {
145 assert(loc + c < PAN_MAX_VARYINGS);
146 varyings[loc + c].location = var->data.location + c;
147 varyings[loc + c].format = format;
148 }
149
150 *varying_count = MAX2(*varying_count, loc + sz);
151 }
152 }
153
154 #if PAN_ARCH >= 6
155 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)156 bifrost_blend_type_from_nir(nir_alu_type nir_type)
157 {
158 switch(nir_type) {
159 case 0: /* Render target not in use */
160 return 0;
161 case nir_type_float16:
162 return MALI_REGISTER_FILE_FORMAT_F16;
163 case nir_type_float32:
164 return MALI_REGISTER_FILE_FORMAT_F32;
165 case nir_type_int32:
166 return MALI_REGISTER_FILE_FORMAT_I32;
167 case nir_type_uint32:
168 return MALI_REGISTER_FILE_FORMAT_U32;
169 case nir_type_int16:
170 return MALI_REGISTER_FILE_FORMAT_I16;
171 case nir_type_uint16:
172 return MALI_REGISTER_FILE_FORMAT_U16;
173 default:
174 unreachable("Unsupported blend shader type for NIR alu type");
175 return 0;
176 }
177 }
178 #endif
179
180 void
GENX(pan_shader_compile)181 GENX(pan_shader_compile)(nir_shader *s,
182 struct panfrost_compile_inputs *inputs,
183 struct util_dynarray *binary,
184 struct pan_shader_info *info)
185 {
186 memset(info, 0, sizeof(*info));
187
188 #if PAN_ARCH >= 6
189 bifrost_compile_shader_nir(s, inputs, binary, info);
190 #else
191 for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) {
192 enum pipe_format fmt = inputs->rt_formats[i];
193 unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
194
195 if (wb_fmt < MALI_COLOR_FORMAT_R8)
196 inputs->raw_fmt_mask |= BITFIELD_BIT(i);
197 }
198
199 midgard_compile_shader_nir(s, inputs, binary, info);
200 #endif
201
202 info->stage = s->info.stage;
203 info->contains_barrier = s->info.uses_memory_barrier ||
204 s->info.uses_control_barrier;
205 info->separable = s->info.separate_shader;
206
207 switch (info->stage) {
208 case MESA_SHADER_VERTEX:
209 info->attribute_count = util_bitcount64(s->info.inputs_read);
210
211 #if PAN_ARCH <= 5
212 bool vertex_id = BITSET_TEST(s->info.system_values_read,
213 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
214 if (vertex_id)
215 info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
216
217 bool instance_id = BITSET_TEST(s->info.system_values_read,
218 SYSTEM_VALUE_INSTANCE_ID);
219 if (instance_id)
220 info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
221 #endif
222
223 info->vs.writes_point_size =
224 s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
225 collect_varyings(s, nir_var_shader_out, info->varyings.output,
226 &info->varyings.output_count);
227 break;
228 case MESA_SHADER_FRAGMENT:
229 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
230 info->fs.writes_depth = true;
231 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
232 info->fs.writes_stencil = true;
233 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
234 info->fs.writes_coverage = true;
235
236 info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0;
237 info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0;
238
239 /* EXT_shader_framebuffer_fetch requires per-sample */
240 info->fs.sample_shading = s->info.fs.uses_sample_shading ||
241 info->fs.outputs_read;
242
243 info->fs.can_discard = s->info.fs.uses_discard;
244 info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;
245 info->fs.early_fragment_tests = s->info.fs.early_fragment_tests;
246
247 /* List of reasons we need to execute frag shaders when things
248 * are masked off */
249
250 info->fs.sidefx = s->info.writes_memory ||
251 s->info.fs.uses_discard ||
252 s->info.fs.uses_demote;
253
254 /* With suitable ZSA/blend, is early-z possible? */
255 info->fs.can_early_z =
256 !info->fs.sidefx &&
257 !info->fs.writes_depth &&
258 !info->fs.writes_stencil &&
259 !info->fs.writes_coverage;
260
261 /* Similiarly with suitable state, is FPK possible? */
262 info->fs.can_fpk =
263 !info->fs.writes_depth &&
264 !info->fs.writes_stencil &&
265 !info->fs.writes_coverage &&
266 !info->fs.can_discard &&
267 !info->fs.outputs_read;
268
269 info->fs.reads_frag_coord =
270 (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
271 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
272 info->fs.reads_point_coord =
273 s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
274 info->fs.reads_face =
275 (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
276 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
277 info->fs.reads_sample_id =
278 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
279 info->fs.reads_sample_pos =
280 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);
281 info->fs.reads_sample_mask_in =
282 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
283 info->fs.reads_helper_invocation =
284 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);
285 collect_varyings(s, nir_var_shader_in, info->varyings.input,
286 &info->varyings.input_count);
287 break;
288 case MESA_SHADER_COMPUTE:
289 info->wls_size = s->info.shared_size;
290 break;
291 default:
292 unreachable("Unknown shader state");
293 }
294
295 info->outputs_written = s->info.outputs_written;
296
297 /* Sysvals have dedicated UBO */
298 if (info->sysvals.sysval_count)
299 info->ubo_count = MAX2(s->info.num_ubos + 1, inputs->sysval_ubo + 1);
300 else
301 info->ubo_count = s->info.num_ubos;
302
303 info->attribute_count += util_last_bit(s->info.images_used);
304 info->writes_global = s->info.writes_memory;
305
306 info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used);
307
308 #if PAN_ARCH >= 6
309 /* This is "redundant" information, but is needed in a draw-time hot path */
310 for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) {
311 info->bifrost.blend[i].format =
312 bifrost_blend_type_from_nir(info->bifrost.blend[i].type);
313 }
314 #endif
315 }
316