1 /*
2 * © Copyright 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019-2020 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include "pan_bo.h"
30 #include "pan_context.h"
31 #include "pan_util.h"
32 #include "panfrost-quirks.h"
33
34 #include "compiler/nir/nir.h"
35 #include "nir/tgsi_to_nir.h"
36 #include "midgard/midgard_compile.h"
37 #include "bifrost/bifrost_compile.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_upload_mgr.h"
40
41 #include "tgsi/tgsi_dump.h"
42
43 static void
pan_prepare_midgard_props(struct panfrost_shader_state * state,gl_shader_stage stage)44 pan_prepare_midgard_props(struct panfrost_shader_state *state,
45 gl_shader_stage stage)
46 {
47 pan_prepare(&state->properties, RENDERER_PROPERTIES);
48 state->properties.uniform_buffer_count = state->ubo_count;
49 state->properties.midgard.uniform_count = state->uniform_count;
50 state->properties.midgard.shader_has_side_effects = state->writes_global;
51
52 /* TODO: Select the appropriate mode. Suppresing inf/nan works around
53 * some bugs in gles2 apps (eg glmark2's terrain scene) but isn't
54 * conformant on gles3 */
55 state->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_SUPPRESSED;
56
57 /* For fragment shaders, work register count, early-z, reads at draw-time */
58
59 if (stage != MESA_SHADER_FRAGMENT)
60 state->properties.midgard.work_register_count = state->work_reg_count;
61 }
62
63 static void
pan_prepare_bifrost_props(struct panfrost_shader_state * state,gl_shader_stage stage)64 pan_prepare_bifrost_props(struct panfrost_shader_state *state,
65 gl_shader_stage stage)
66 {
67
68 switch (stage) {
69 case MESA_SHADER_VERTEX:
70 pan_prepare(&state->properties, RENDERER_PROPERTIES);
71 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
72 state->properties.uniform_buffer_count = state->ubo_count;
73
74 pan_prepare(&state->preload, PRELOAD);
75 state->preload.uniform_count = state->uniform_count;
76 state->preload.vertex.vertex_id = true;
77 state->preload.vertex.instance_id = true;
78 break;
79 case MESA_SHADER_FRAGMENT:
80 pan_prepare(&state->properties, RENDERER_PROPERTIES);
81 /* Early-Z set at draw-time */
82 if (state->writes_depth || state->writes_stencil) {
83 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
84 state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
85 } else {
86 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
87 state->properties.bifrost.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
88 }
89 state->properties.uniform_buffer_count = state->ubo_count;
90 state->properties.bifrost.shader_modifies_coverage = state->can_discard;
91
92 pan_prepare(&state->preload, PRELOAD);
93 state->preload.uniform_count = state->uniform_count;
94 state->preload.fragment.fragment_position = state->reads_frag_coord;
95 state->preload.fragment.coverage = true;
96 state->preload.fragment.primitive_flags = state->reads_face;
97 break;
98 default:
99 unreachable("TODO");
100 }
101 }
102
103 static void
pan_upload_shader_descriptor(struct panfrost_context * ctx,struct panfrost_shader_state * state)104 pan_upload_shader_descriptor(struct panfrost_context *ctx,
105 struct panfrost_shader_state *state)
106 {
107 const struct panfrost_device *dev = pan_device(ctx->base.screen);
108 struct mali_state_packed *out;
109
110 u_upload_alloc(ctx->state_uploader, 0, MALI_RENDERER_STATE_LENGTH, MALI_RENDERER_STATE_LENGTH,
111 &state->upload.offset, &state->upload.rsrc, (void **) &out);
112
113 pan_pack(out, RENDERER_STATE, cfg) {
114 cfg.shader = state->shader;
115 cfg.properties = state->properties;
116
117 if (dev->quirks & IS_BIFROST)
118 cfg.preload = state->preload;
119 }
120
121 u_upload_unmap(ctx->state_uploader);
122 }
123
124 static unsigned
pan_format_from_nir_base(nir_alu_type base)125 pan_format_from_nir_base(nir_alu_type base)
126 {
127 switch (base) {
128 case nir_type_int:
129 return MALI_FORMAT_SINT;
130 case nir_type_uint:
131 case nir_type_bool:
132 return MALI_FORMAT_UINT;
133 case nir_type_float:
134 return MALI_CHANNEL_FLOAT;
135 default:
136 unreachable("Invalid base");
137 }
138 }
139
140 static unsigned
pan_format_from_nir_size(nir_alu_type base,unsigned size)141 pan_format_from_nir_size(nir_alu_type base, unsigned size)
142 {
143 if (base == nir_type_float) {
144 switch (size) {
145 case 16: return MALI_FORMAT_SINT;
146 case 32: return MALI_FORMAT_UNORM;
147 default:
148 unreachable("Invalid float size for format");
149 }
150 } else {
151 switch (size) {
152 case 1:
153 case 8: return MALI_CHANNEL_8;
154 case 16: return MALI_CHANNEL_16;
155 case 32: return MALI_CHANNEL_32;
156 default:
157 unreachable("Invalid int size for format");
158 }
159 }
160 }
161
162 static enum mali_format
pan_format_from_glsl(const struct glsl_type * type,unsigned precision,unsigned frac)163 pan_format_from_glsl(const struct glsl_type *type, unsigned precision, unsigned frac)
164 {
165 const struct glsl_type *column = glsl_without_array_or_matrix(type);
166 enum glsl_base_type glsl_base = glsl_get_base_type(column);
167 nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);
168 unsigned chan = glsl_get_components(column);
169
170 /* If we have a fractional location added, we need to increase the size
171 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
172 * We could do better but this is an edge case as it is, normally
173 * packed varyings will be aligned. */
174 chan += frac;
175
176 assert(chan >= 1 && chan <= 4);
177
178 unsigned base = nir_alu_type_get_base_type(t);
179 unsigned size = nir_alu_type_get_type_size(t);
180
181 /* Demote to fp16 where possible. int16 varyings are TODO as the hw
182 * will saturate instead of wrap which is not conformant, so we need to
183 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
184 * the intended behaviour */
185
186 bool is_16 = (precision == GLSL_PRECISION_MEDIUM)
187 || (precision == GLSL_PRECISION_LOW);
188
189 if (is_16 && base == nir_type_float)
190 size = 16;
191 else
192 size = 32;
193
194 return pan_format_from_nir_base(base) |
195 pan_format_from_nir_size(base, size) |
196 MALI_NR_CHANNELS(chan);
197 }
198
199 static enum mali_bifrost_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)200 bifrost_blend_type_from_nir(nir_alu_type nir_type)
201 {
202 switch(nir_type) {
203 case 0: /* Render target not in use */
204 return 0;
205 case nir_type_float16:
206 return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
207 case nir_type_float32:
208 return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
209 case nir_type_int32:
210 return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
211 case nir_type_uint32:
212 return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
213 case nir_type_int16:
214 return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
215 case nir_type_uint16:
216 return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
217 default:
218 unreachable("Unsupported blend shader type for NIR alu type");
219 return 0;
220 }
221 }
222
223 void
panfrost_shader_compile(struct panfrost_context * ctx,enum pipe_shader_ir ir_type,const void * ir,gl_shader_stage stage,struct panfrost_shader_state * state,uint64_t * outputs_written)224 panfrost_shader_compile(struct panfrost_context *ctx,
225 enum pipe_shader_ir ir_type,
226 const void *ir,
227 gl_shader_stage stage,
228 struct panfrost_shader_state *state,
229 uint64_t *outputs_written)
230 {
231 struct panfrost_device *dev = pan_device(ctx->base.screen);
232
233 nir_shader *s;
234
235 if (ir_type == PIPE_SHADER_IR_NIR) {
236 s = nir_shader_clone(NULL, ir);
237 } else {
238 assert (ir_type == PIPE_SHADER_IR_TGSI);
239 s = tgsi_to_nir(ir, ctx->base.screen, false);
240 }
241
242 s->info.stage = stage;
243
244 /* Call out to Midgard compiler given the above NIR */
245 struct panfrost_compile_inputs inputs = {
246 .gpu_id = dev->gpu_id,
247 .shaderdb = !!(dev->debug & PAN_DBG_PRECOMPILE),
248 };
249
250 memcpy(inputs.rt_formats, state->rt_formats, sizeof(inputs.rt_formats));
251
252 panfrost_program *program;
253
254 if (dev->quirks & IS_BIFROST)
255 program = bifrost_compile_shader_nir(NULL, s, &inputs);
256 else
257 program = midgard_compile_shader_nir(NULL, s, &inputs);
258
259 /* Prepare the compiled binary for upload */
260 mali_ptr shader = 0;
261 unsigned attribute_count = 0, varying_count = 0;
262 int size = program->compiled.size;
263
264 if (size) {
265 state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
266 memcpy(state->bo->ptr.cpu, program->compiled.data, size);
267 shader = state->bo->ptr.gpu;
268 }
269
270 /* Midgard needs the first tag on the bottom nibble */
271
272 if (!(dev->quirks & IS_BIFROST)) {
273 /* If size = 0, we tag as "end-of-shader" */
274
275 if (size)
276 shader |= program->first_tag;
277 else
278 shader = 0x1;
279 }
280
281 state->sysval_count = program->sysval_count;
282 memcpy(state->sysval, program->sysvals, sizeof(state->sysval[0]) * state->sysval_count);
283
284 bool vertex_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
285 bool instance_id = BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
286
287 state->writes_global = s->info.writes_memory;
288
289 switch (stage) {
290 case MESA_SHADER_VERTEX:
291 attribute_count = util_bitcount64(s->info.inputs_read);
292 varying_count = util_bitcount64(s->info.outputs_written);
293
294 if (vertex_id)
295 attribute_count = MAX2(attribute_count, PAN_VERTEX_ID + 1);
296
297 if (instance_id)
298 attribute_count = MAX2(attribute_count, PAN_INSTANCE_ID + 1);
299
300 break;
301 case MESA_SHADER_FRAGMENT:
302 for (unsigned i = 0; i < ARRAY_SIZE(state->blend_ret_addrs); i++) {
303 if (!program->blend_ret_offsets[i])
304 continue;
305
306 state->blend_ret_addrs[i] = (state->bo->ptr.gpu & UINT32_MAX) +
307 program->blend_ret_offsets[i];
308 assert(!(state->blend_ret_addrs[i] & 0x7));
309 }
310 varying_count = util_bitcount64(s->info.inputs_read);
311 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
312 state->writes_depth = true;
313 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
314 state->writes_stencil = true;
315
316 uint64_t outputs_read = s->info.outputs_read;
317 if (outputs_read & BITFIELD64_BIT(FRAG_RESULT_COLOR))
318 outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
319
320 state->outputs_read = outputs_read >> FRAG_RESULT_DATA0;
321
322 /* List of reasons we need to execute frag shaders when things
323 * are masked off */
324
325 state->fs_sidefx =
326 s->info.writes_memory ||
327 s->info.fs.uses_discard ||
328 s->info.fs.uses_demote;
329 break;
330 case MESA_SHADER_COMPUTE:
331 /* TODO: images */
332 state->shared_size = s->info.cs.shared_size;
333 break;
334 default:
335 unreachable("Unknown shader state");
336 }
337
338 state->can_discard = s->info.fs.uses_discard;
339 state->helper_invocations = s->info.fs.needs_helper_invocations;
340 state->stack_size = program->tls_size;
341
342 state->reads_frag_coord = (s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||
343 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
344 state->reads_point_coord = s->info.inputs_read & (1 << VARYING_SLOT_PNTC);
345 state->reads_face = (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||
346 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
347 state->writes_point_size = s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
348
349 if (outputs_written)
350 *outputs_written = s->info.outputs_written;
351
352 /* Separate as primary uniform count is truncated. Sysvals are prefix
353 * uniforms */
354 state->uniform_count = MIN2(s->num_uniforms + program->sysval_count, program->uniform_cutoff);
355 state->work_reg_count = program->work_register_count;
356
357 if (dev->quirks & IS_BIFROST)
358 for (unsigned i = 0; i < ARRAY_SIZE(state->blend_types); i++)
359 state->blend_types[i] = bifrost_blend_type_from_nir(program->blend_types[i]);
360
361 /* Record the varying mapping for the command stream's bookkeeping */
362
363 nir_variable_mode varying_mode =
364 stage == MESA_SHADER_VERTEX ? nir_var_shader_out : nir_var_shader_in;
365
366 nir_foreach_variable_with_modes(var, s, varying_mode) {
367 unsigned loc = var->data.driver_location;
368 unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
369
370 for (int c = 0; c < sz; ++c) {
371 state->varyings_loc[loc + c] = var->data.location + c;
372 state->varyings[loc + c] = pan_format_from_glsl(var->type,
373 var->data.precision, var->data.location_frac);
374 }
375 }
376
377 /* Needed for linkage */
378 state->attribute_count = attribute_count;
379 state->varying_count = varying_count;
380 state->ubo_count = s->info.num_ubos + 1; /* off-by-one for uniforms */
381
382 /* Prepare the descriptors at compile-time */
383 state->shader.shader = shader;
384 state->shader.attribute_count = attribute_count;
385 state->shader.varying_count = varying_count;
386 state->shader.texture_count = s->info.num_textures;
387 state->shader.sampler_count = s->info.num_textures;
388
389 if (dev->quirks & IS_BIFROST)
390 pan_prepare_bifrost_props(state, stage);
391 else
392 pan_prepare_midgard_props(state, stage);
393
394 state->properties.stencil_from_shader = state->writes_stencil;
395 state->properties.shader_contains_barrier = state->helper_invocations;
396 state->properties.depth_source = state->writes_depth ?
397 MALI_DEPTH_SOURCE_SHADER :
398 MALI_DEPTH_SOURCE_FIXED_FUNCTION;
399
400 if (stage != MESA_SHADER_FRAGMENT)
401 pan_upload_shader_descriptor(ctx, state);
402
403 ralloc_free(program);
404
405 /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
406 * a NULL context */
407 ralloc_free(s);
408 }
409