• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Alyssa Rosenzweig
3  * Copyright (C) 2019-2021 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "pan_device.h"
26 #include "pan_shader.h"
27 #include "pan_format.h"
28 
29 #if PAN_ARCH <= 5
30 #include "panfrost/midgard/midgard_compile.h"
31 #else
32 #include "panfrost/bifrost/bifrost_compile.h"
33 #endif
34 
35 const nir_shader_compiler_options *
GENX(pan_shader_get_compiler_options)36 GENX(pan_shader_get_compiler_options)(void)
37 {
38 #if PAN_ARCH >= 6
39         return &bifrost_nir_options;
40 #else
41         return &midgard_nir_options;
42 #endif
43 }
44 
45 #if PAN_ARCH <= 7
46 static enum pipe_format
varying_format(nir_alu_type t,unsigned ncomps)47 varying_format(nir_alu_type t, unsigned ncomps)
48 {
49 #define VARYING_FORMAT(ntype, nsz, ptype, psz) \
50         { \
51                 .type = nir_type_ ## ntype ## nsz, \
52                 .formats = { \
53                         PIPE_FORMAT_R ## psz ## _ ## ptype, \
54                         PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
55                         PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
56                         PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
57                 } \
58         }
59 
60         static const struct {
61                 nir_alu_type type;
62                 enum pipe_format formats[4];
63         } conv[] = {
64                 VARYING_FORMAT(float, 32, FLOAT, 32),
65                 VARYING_FORMAT(int, 32, SINT, 32),
66                 VARYING_FORMAT(uint, 32, UINT, 32),
67                 VARYING_FORMAT(float, 16, FLOAT, 16),
68                 VARYING_FORMAT(int, 16, SINT, 16),
69                 VARYING_FORMAT(uint, 16, UINT, 16),
70                 VARYING_FORMAT(int, 8, SINT, 8),
71                 VARYING_FORMAT(uint, 8, UINT, 8),
72                 VARYING_FORMAT(bool, 32, UINT, 32),
73                 VARYING_FORMAT(bool, 16, UINT, 16),
74                 VARYING_FORMAT(bool, 8, UINT, 8),
75                 VARYING_FORMAT(bool, 1, UINT, 8),
76         };
77 #undef VARYING_FORMAT
78 
79         assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
80 
81         for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
82                 if (conv[i].type == t)
83                         return conv[i].formats[ncomps - 1];
84         }
85 
86         return PIPE_FORMAT_NONE;
87 }
88 
89 static void
collect_varyings(nir_shader * s,nir_variable_mode varying_mode,struct pan_shader_varying * varyings,unsigned * varying_count)90 collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
91                  struct pan_shader_varying *varyings,
92                  unsigned *varying_count)
93 {
94         *varying_count = 0;
95 
96         unsigned comps[PAN_MAX_VARYINGS] = { 0 };
97 
98         nir_foreach_variable_with_modes(var, s, varying_mode) {
99                 unsigned loc = var->data.driver_location;
100                 const struct glsl_type *column =
101                         glsl_without_array_or_matrix(var->type);
102                 unsigned chan = glsl_get_components(column);
103 
104                 /* If we have a fractional location added, we need to increase the size
105                  * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
106                  * We could do better but this is an edge case as it is, normally
107                  * packed varyings will be aligned.
108                  */
109                 chan += var->data.location_frac;
110                 comps[loc] = MAX2(comps[loc], chan);
111         }
112 
113         nir_foreach_variable_with_modes(var, s, varying_mode) {
114                 unsigned loc = var->data.driver_location;
115                 unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
116                 const struct glsl_type *column =
117                         glsl_without_array_or_matrix(var->type);
118                 enum glsl_base_type base_type = glsl_get_base_type(column);
119                 unsigned chan = comps[loc];
120 
121                 nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
122                 type = nir_alu_type_get_base_type(type);
123 
124                 /* Can't do type conversion since GLSL IR packs in funny ways */
125                 if (PAN_ARCH >= 6 && var->data.interpolation == INTERP_MODE_FLAT)
126                         type = nir_type_uint;
127 
128                 /* Point size is handled specially on Valhall (with malloc
129                  * IDVS).. probably though this entire linker should be bypassed
130                  * for Valhall in the future.
131                  */
132                 if (PAN_ARCH >= 9 && var->data.location == VARYING_SLOT_PSIZ)
133                         continue;
134 
135                 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
136                  * will saturate instead of wrap which is not conformant, so we need to
137                  * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
138                  * the intended behaviour.
139                  */
140                 if (type == nir_type_float &&
141                     (var->data.precision == GLSL_PRECISION_MEDIUM ||
142                      var->data.precision == GLSL_PRECISION_LOW) &&
143                     !s->info.has_transform_feedback_varyings) {
144                         type |= 16;
145                 } else {
146                         type |= 32;
147                 }
148 
149                 enum pipe_format format = varying_format(type, chan);
150                 assert(format != PIPE_FORMAT_NONE);
151 
152                 for (int c = 0; c < sz; ++c) {
153                         assert(loc + c < PAN_MAX_VARYINGS);
154                         varyings[loc + c].location = var->data.location + c;
155                         varyings[loc + c].format = format;
156                 }
157 
158                 *varying_count = MAX2(*varying_count, loc + sz);
159         }
160 }
161 #endif
162 
163 #if PAN_ARCH >= 6
164 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)165 bifrost_blend_type_from_nir(nir_alu_type nir_type)
166 {
167         switch(nir_type) {
168         case 0: /* Render target not in use */
169                 return 0;
170         case nir_type_float16:
171                 return MALI_REGISTER_FILE_FORMAT_F16;
172         case nir_type_float32:
173                 return MALI_REGISTER_FILE_FORMAT_F32;
174         case nir_type_int32:
175                 return MALI_REGISTER_FILE_FORMAT_I32;
176         case nir_type_uint32:
177                 return MALI_REGISTER_FILE_FORMAT_U32;
178         case nir_type_int16:
179                 return MALI_REGISTER_FILE_FORMAT_I16;
180         case nir_type_uint16:
181                 return MALI_REGISTER_FILE_FORMAT_U16;
182         default:
183                 unreachable("Unsupported blend shader type for NIR alu type");
184                 return 0;
185         }
186 }
187 #endif
188 
189 void
GENX(pan_shader_compile)190 GENX(pan_shader_compile)(nir_shader *s,
191                          struct panfrost_compile_inputs *inputs,
192                          struct util_dynarray *binary,
193                          struct pan_shader_info *info)
194 {
195         memset(info, 0, sizeof(*info));
196 
197 #if PAN_ARCH >= 6
198         bifrost_compile_shader_nir(s, inputs, binary, info);
199 #else
200         for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) {
201                 enum pipe_format fmt = inputs->rt_formats[i];
202                 unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback;
203 
204                 if (wb_fmt < MALI_COLOR_FORMAT_R8)
205                         inputs->raw_fmt_mask |= BITFIELD_BIT(i);
206         }
207 
208         midgard_compile_shader_nir(s, inputs, binary, info);
209 #endif
210 
211         info->stage = s->info.stage;
212         info->contains_barrier = s->info.uses_memory_barrier ||
213                                  s->info.uses_control_barrier;
214         info->separable = s->info.separate_shader;
215 
216         switch (info->stage) {
217         case MESA_SHADER_VERTEX:
218                 info->attributes_read = s->info.inputs_read;
219                 info->attributes_read_count = util_bitcount64(info->attributes_read);
220                 info->attribute_count = info->attributes_read_count;
221 
222 #if PAN_ARCH <= 5
223                 bool vertex_id = BITSET_TEST(s->info.system_values_read,
224                                              SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
225                 if (vertex_id)
226                         info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);
227 
228                 bool instance_id = BITSET_TEST(s->info.system_values_read,
229                                                SYSTEM_VALUE_INSTANCE_ID);
230                 if (instance_id)
231                         info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);
232 #endif
233 
234                 info->vs.writes_point_size =
235                         s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
236 
237 #if PAN_ARCH >= 9
238                 info->varyings.output_count =
239                         util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
240 #else
241                 collect_varyings(s, nir_var_shader_out, info->varyings.output,
242                                  &info->varyings.output_count);
243 #endif
244                 break;
245         case MESA_SHADER_FRAGMENT:
246                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
247                         info->fs.writes_depth = true;
248                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
249                         info->fs.writes_stencil = true;
250                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
251                         info->fs.writes_coverage = true;
252 
253                 info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0;
254                 info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0;
255                 info->fs.sample_shading = s->info.fs.uses_sample_shading;
256 
257                 info->fs.can_discard = s->info.fs.uses_discard;
258                 info->fs.early_fragment_tests = s->info.fs.early_fragment_tests;
259 
260                 /* List of reasons we need to execute frag shaders when things
261                  * are masked off */
262 
263                 info->fs.sidefx = s->info.writes_memory ||
264                                   s->info.fs.uses_discard ||
265                                   s->info.fs.uses_demote;
266 
267                 /* With suitable ZSA/blend, is early-z possible? */
268                 info->fs.can_early_z =
269                         !info->fs.sidefx &&
270                         !info->fs.writes_depth &&
271                         !info->fs.writes_stencil &&
272                         !info->fs.writes_coverage;
273 
274                 /* Similiarly with suitable state, is FPK possible? */
275                 info->fs.can_fpk =
276                         !info->fs.writes_depth &&
277                         !info->fs.writes_stencil &&
278                         !info->fs.writes_coverage &&
279                         !info->fs.can_discard &&
280                         !info->fs.outputs_read;
281 
282                 /* Requires the same hardware guarantees, so grouped as one bit
283                  * in the hardware.
284                  */
285                 info->contains_barrier |= s->info.fs.needs_quad_helper_invocations;
286 
287                 info->fs.reads_frag_coord =
288                         (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
289                         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
290                 info->fs.reads_point_coord =
291                         s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
292                 info->fs.reads_face =
293                         (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
294                         BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
295 #if PAN_ARCH >= 9
296                 info->varyings.output_count =
297                         util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0);
298 #else
299                 collect_varyings(s, nir_var_shader_in, info->varyings.input,
300                                  &info->varyings.input_count);
301 #endif
302                 break;
303         case MESA_SHADER_COMPUTE:
304                 info->wls_size = s->info.shared_size;
305                 break;
306         default:
307                 unreachable("Unknown shader state");
308         }
309 
310         info->outputs_written = s->info.outputs_written;
311 
312         /* Sysvals have dedicated UBO */
313         info->ubo_count = s->info.num_ubos;
314         if (info->sysvals.sysval_count && inputs->fixed_sysval_ubo < 0)
315                 info->ubo_count++;
316 
317         info->attribute_count += BITSET_LAST_BIT(s->info.images_used);
318         info->writes_global = s->info.writes_memory;
319 
320         info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used);
321 
322 #if PAN_ARCH >= 6
323         /* This is "redundant" information, but is needed in a draw-time hot path */
324         for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) {
325                 info->bifrost.blend[i].format =
326                         bifrost_blend_type_from_nir(info->bifrost.blend[i].type);
327         }
328 #endif
329 }
330