1 /*
2 * Copyright (c) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
32
33 #include "pipe/p_state.h"
34
35 #include "lima_screen.h"
36 #include "lima_context.h"
37 #include "lima_job.h"
38 #include "lima_program.h"
39 #include "lima_bo.h"
40 #include "lima_format.h"
41
42 #include "ir/lima_ir.h"
43
44 static const nir_shader_compiler_options vs_nir_options = {
45 .lower_ffma16 = true,
46 .lower_ffma32 = true,
47 .lower_ffma64 = true,
48 .lower_fpow = true,
49 .lower_ffract = true,
50 .lower_fdiv = true,
51 .lower_fmod = true,
52 .lower_fsqrt = true,
53 .lower_sub = true,
54 .lower_flrp32 = true,
55 .lower_flrp64 = true,
56 /* could be implemented by clamp */
57 .lower_fsat = true,
58 .lower_bitops = true,
59 .lower_rotate = true,
60 .lower_sincos = true,
61 .lower_fceil = true,
62 };
63
64 static const nir_shader_compiler_options fs_nir_options = {
65 .lower_ffma16 = true,
66 .lower_ffma32 = true,
67 .lower_ffma64 = true,
68 .lower_fpow = true,
69 .lower_fdiv = true,
70 .lower_fmod = true,
71 .lower_sub = true,
72 .lower_flrp32 = true,
73 .lower_flrp64 = true,
74 .lower_fsign = true,
75 .lower_rotate = true,
76 .lower_fdot = true,
77 .lower_fdph = true,
78 .lower_bitops = true,
79 .lower_vector_cmp = true,
80 };
81
82 const void *
lima_program_get_compiler_options(enum pipe_shader_type shader)83 lima_program_get_compiler_options(enum pipe_shader_type shader)
84 {
85 switch (shader) {
86 case PIPE_SHADER_VERTEX:
87 return &vs_nir_options;
88 case PIPE_SHADER_FRAGMENT:
89 return &fs_nir_options;
90 default:
91 return NULL;
92 }
93 }
94
95 static int
type_size(const struct glsl_type * type,bool bindless)96 type_size(const struct glsl_type *type, bool bindless)
97 {
98 return glsl_count_attribute_slots(type, false);
99 }
100
101 void
lima_program_optimize_vs_nir(struct nir_shader * s)102 lima_program_optimize_vs_nir(struct nir_shader *s)
103 {
104 bool progress;
105
106 NIR_PASS_V(s, nir_lower_viewport_transform);
107 NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
108 NIR_PASS_V(s, nir_lower_io,
109 nir_var_shader_in | nir_var_shader_out, type_size, 0);
110 NIR_PASS_V(s, nir_lower_load_const_to_scalar);
111 NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
112 NIR_PASS_V(s, nir_lower_io_to_scalar,
113 nir_var_shader_in|nir_var_shader_out);
114
115 do {
116 progress = false;
117
118 NIR_PASS_V(s, nir_lower_vars_to_ssa);
119 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
120 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
121 NIR_PASS(progress, s, nir_copy_prop);
122 NIR_PASS(progress, s, nir_opt_remove_phis);
123 NIR_PASS(progress, s, nir_opt_dce);
124 NIR_PASS(progress, s, nir_opt_dead_cf);
125 NIR_PASS(progress, s, nir_opt_cse);
126 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
127 NIR_PASS(progress, s, nir_opt_algebraic);
128 NIR_PASS(progress, s, lima_nir_lower_ftrunc);
129 NIR_PASS(progress, s, nir_opt_constant_folding);
130 NIR_PASS(progress, s, nir_opt_undef);
131 NIR_PASS(progress, s, nir_opt_loop_unroll,
132 nir_var_shader_in |
133 nir_var_shader_out |
134 nir_var_function_temp);
135 } while (progress);
136
137 NIR_PASS_V(s, nir_lower_int_to_float);
138 /* int_to_float pass generates ftrunc, so lower it */
139 NIR_PASS(progress, s, lima_nir_lower_ftrunc);
140 NIR_PASS_V(s, nir_lower_bool_to_float);
141
142 NIR_PASS_V(s, nir_copy_prop);
143 NIR_PASS_V(s, nir_opt_dce);
144 NIR_PASS_V(s, nir_lower_locals_to_regs);
145 NIR_PASS_V(s, nir_convert_from_ssa, true);
146 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
147 nir_sweep(s);
148 }
149
150 static bool
lima_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)151 lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
152 {
153 if (instr->type != nir_instr_type_alu)
154 return false;
155
156 nir_alu_instr *alu = nir_instr_as_alu(instr);
157 switch (alu->op) {
158 case nir_op_frcp:
159 case nir_op_frsq:
160 case nir_op_flog2:
161 case nir_op_fexp2:
162 case nir_op_fsqrt:
163 case nir_op_fsin:
164 case nir_op_fcos:
165 return true;
166 default:
167 break;
168 }
169
170 /* nir vec4 fcsel assumes that each component of the condition will be
171 * used to select the same component from the two options, but Utgard PP
172 * has only 1 component condition. If all condition components are not the
173 * same we need to lower it to scalar.
174 */
175 switch (alu->op) {
176 case nir_op_bcsel:
177 case nir_op_fcsel:
178 break;
179 default:
180 return false;
181 }
182
183 int num_components = nir_dest_num_components(alu->dest.dest);
184
185 uint8_t swizzle = alu->src[0].swizzle[0];
186
187 for (int i = 1; i < num_components; i++)
188 if (alu->src[0].swizzle[i] != swizzle)
189 return true;
190
191 return false;
192 }
193
194 void
lima_program_optimize_fs_nir(struct nir_shader * s,struct nir_lower_tex_options * tex_options)195 lima_program_optimize_fs_nir(struct nir_shader *s,
196 struct nir_lower_tex_options *tex_options)
197 {
198 bool progress;
199
200 NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
201 NIR_PASS_V(s, nir_lower_io,
202 nir_var_shader_in | nir_var_shader_out, type_size, 0);
203 NIR_PASS_V(s, nir_lower_regs_to_ssa);
204 NIR_PASS_V(s, nir_lower_tex, tex_options);
205
206 do {
207 progress = false;
208 NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
209 } while (progress);
210
211 do {
212 progress = false;
213
214 NIR_PASS_V(s, nir_lower_vars_to_ssa);
215 NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
216 NIR_PASS(progress, s, nir_copy_prop);
217 NIR_PASS(progress, s, nir_opt_remove_phis);
218 NIR_PASS(progress, s, nir_opt_dce);
219 NIR_PASS(progress, s, nir_opt_dead_cf);
220 NIR_PASS(progress, s, nir_opt_cse);
221 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
222 NIR_PASS(progress, s, nir_opt_algebraic);
223 NIR_PASS(progress, s, nir_opt_constant_folding);
224 NIR_PASS(progress, s, nir_opt_undef);
225 NIR_PASS(progress, s, nir_opt_loop_unroll,
226 nir_var_shader_in |
227 nir_var_shader_out |
228 nir_var_function_temp);
229 NIR_PASS(progress, s, lima_nir_split_load_input);
230 } while (progress);
231
232 NIR_PASS_V(s, nir_lower_int_to_float);
233 NIR_PASS_V(s, nir_lower_bool_to_float);
234
235 /* Some ops must be lowered after being converted from int ops,
236 * so re-run nir_opt_algebraic after int lowering. */
237 do {
238 progress = false;
239 NIR_PASS(progress, s, nir_opt_algebraic);
240 } while (progress);
241
242 /* Must be run after optimization loop */
243 NIR_PASS_V(s, lima_nir_scale_trig);
244
245 /* Lower modifiers */
246 NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
247 NIR_PASS_V(s, nir_copy_prop);
248 NIR_PASS_V(s, nir_opt_dce);
249
250 NIR_PASS_V(s, nir_lower_locals_to_regs);
251 NIR_PASS_V(s, nir_convert_from_ssa, true);
252 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
253
254 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
255 NIR_PASS_V(s, nir_lower_vec_to_movs);
256
257 NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
258 NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
259 NIR_PASS_V(s, lima_nir_duplicate_load_consts);
260
261 nir_sweep(s);
262 }
263
264 static bool
lima_fs_compile_shader(struct lima_context * ctx,struct lima_fs_shader_state * fs,struct nir_lower_tex_options * tex_options)265 lima_fs_compile_shader(struct lima_context *ctx,
266 struct lima_fs_shader_state *fs,
267 struct nir_lower_tex_options *tex_options)
268 {
269 struct lima_screen *screen = lima_screen(ctx->base.screen);
270 nir_shader *nir = nir_shader_clone(fs, fs->base.ir.nir);
271
272 lima_program_optimize_fs_nir(nir, tex_options);
273
274 if (lima_debug & LIMA_DEBUG_PP)
275 nir_print_shader(nir, stdout);
276
277 if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) {
278 ralloc_free(nir);
279 return false;
280 }
281
282 fs->uses_discard = nir->info.fs.uses_discard;
283 ralloc_free(nir);
284
285 return true;
286 }
287
288 static void *
lima_create_fs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)289 lima_create_fs_state(struct pipe_context *pctx,
290 const struct pipe_shader_state *cso)
291 {
292 struct lima_context *ctx = lima_context(pctx);
293 struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
294
295 if (!so)
296 return NULL;
297
298 nir_shader *nir;
299 if (cso->type == PIPE_SHADER_IR_NIR)
300 /* The backend takes ownership of the NIR shader on state
301 * creation.
302 */
303 nir = cso->ir.nir;
304 else {
305 assert(cso->type == PIPE_SHADER_IR_TGSI);
306
307 nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
308 }
309
310 so->base.type = PIPE_SHADER_IR_NIR;
311 so->base.ir.nir = nir;
312
313 uint8_t identity[4] = { PIPE_SWIZZLE_X,
314 PIPE_SWIZZLE_Y,
315 PIPE_SWIZZLE_Z,
316 PIPE_SWIZZLE_W };
317
318 struct nir_lower_tex_options tex_options = {
319 .lower_txp = ~0u,
320 .swizzle_result = 0,
321 };
322
323 /* Initialize with identity swizzles. That should suffice for most shaders */
324 for (int i = 0; i < PIPE_MAX_SAMPLERS; i++)
325 memcpy(so->swizzles[i], identity, 4);
326
327 if (!lima_fs_compile_shader(ctx, so, &tex_options)) {
328 ralloc_free(so);
329 return NULL;
330 }
331
332 return so;
333 }
334
335 static void
lima_bind_fs_state(struct pipe_context * pctx,void * hwcso)336 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
337 {
338 struct lima_context *ctx = lima_context(pctx);
339
340 ctx->fs = hwcso;
341 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
342 }
343
344 static void
lima_delete_fs_state(struct pipe_context * pctx,void * hwcso)345 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
346 {
347 struct lima_fs_shader_state *so = hwcso;
348
349 if (so->bo)
350 lima_bo_unreference(so->bo);
351
352 ralloc_free(so->base.ir.nir);
353 ralloc_free(so);
354 }
355
356 bool
lima_update_vs_state(struct lima_context * ctx)357 lima_update_vs_state(struct lima_context *ctx)
358 {
359 struct lima_vs_shader_state *vs = ctx->vs;
360 if (!vs->bo) {
361 struct lima_screen *screen = lima_screen(ctx->base.screen);
362 vs->bo = lima_bo_create(screen, vs->shader_size, 0);
363 if (!vs->bo) {
364 fprintf(stderr, "lima: create vs shader bo fail\n");
365 return false;
366 }
367
368 memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
369 ralloc_free(vs->shader);
370 vs->shader = NULL;
371 }
372
373 return true;
374 }
375
376 bool
lima_update_fs_state(struct lima_context * ctx)377 lima_update_fs_state(struct lima_context *ctx)
378 {
379 struct lima_fs_shader_state *fs = ctx->fs;
380 struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
381 struct nir_lower_tex_options tex_options = {
382 .lower_txp = ~0u,
383 .swizzle_result = 0,
384 };
385 bool needs_recompile = false;
386
387 /* Check if texture formats has changed since last compilation.
388 * If it has we need to recompile shader.
389 */
390 if (((ctx->dirty & LIMA_CONTEXT_DIRTY_TEXTURES) &&
391 lima_tex->num_samplers &&
392 lima_tex->num_textures)) {
393 uint8_t identity[4] = { PIPE_SWIZZLE_X,
394 PIPE_SWIZZLE_Y,
395 PIPE_SWIZZLE_Z,
396 PIPE_SWIZZLE_W };
397 for (int i = 0; i < lima_tex->num_samplers; i++) {
398 struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]);
399 struct pipe_resource *prsc = texture->base.texture;
400 const uint8_t *swizzle = lima_format_get_texel_swizzle(prsc->format);
401 if (memcmp(fs->swizzles[i], swizzle, 4)) {
402 needs_recompile = true;
403 memcpy(fs->swizzles[i], swizzle, 4);
404 }
405
406 for (int j = 0; j < 4; j++)
407 tex_options.swizzles[i][j] = swizzle[j];
408
409 if (memcmp(swizzle, identity, 4))
410 tex_options.swizzle_result |= (1 << i);
411 }
412
413 /* Fill rest with identity swizzle */
414 for (int i = lima_tex->num_samplers; i < PIPE_MAX_SAMPLERS; i++)
415 memcpy(fs->swizzles[i], identity, 4);
416 }
417
418 if (needs_recompile) {
419 if (fs->bo) {
420 lima_bo_unreference(fs->bo);
421 fs->bo = NULL;
422 }
423
424 if (!lima_fs_compile_shader(ctx, fs, &tex_options))
425 return false;
426 }
427
428 if (!fs->bo) {
429 struct lima_screen *screen = lima_screen(ctx->base.screen);
430 fs->bo = lima_bo_create(screen, fs->shader_size, 0);
431 if (!fs->bo) {
432 fprintf(stderr, "lima: create fs shader bo fail\n");
433 return false;
434 }
435
436 memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
437 ralloc_free(fs->shader);
438 fs->shader = NULL;
439 }
440
441 struct lima_job *job = lima_job_get(ctx);
442 job->pp_max_stack_size = MAX2(job->pp_max_stack_size, ctx->fs->stack_size);
443
444 return true;
445 }
446
447 static void *
lima_create_vs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)448 lima_create_vs_state(struct pipe_context *pctx,
449 const struct pipe_shader_state *cso)
450 {
451 struct lima_context *ctx = lima_context(pctx);
452 struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
453
454 if (!so)
455 return NULL;
456
457 nir_shader *nir;
458 if (cso->type == PIPE_SHADER_IR_NIR)
459 nir = cso->ir.nir;
460 else {
461 assert(cso->type == PIPE_SHADER_IR_TGSI);
462
463 nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
464 }
465
466 lima_program_optimize_vs_nir(nir);
467
468 if (lima_debug & LIMA_DEBUG_GP)
469 nir_print_shader(nir, stdout);
470
471 if (!gpir_compile_nir(so, nir, &ctx->debug)) {
472 ralloc_free(so);
473 return NULL;
474 }
475
476 ralloc_free(nir);
477
478 return so;
479 }
480
481 static void
lima_bind_vs_state(struct pipe_context * pctx,void * hwcso)482 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
483 {
484 struct lima_context *ctx = lima_context(pctx);
485
486 ctx->vs = hwcso;
487 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
488 }
489
490 static void
lima_delete_vs_state(struct pipe_context * pctx,void * hwcso)491 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
492 {
493 struct lima_vs_shader_state *so = hwcso;
494
495 if (so->bo)
496 lima_bo_unreference(so->bo);
497
498 ralloc_free(so);
499 }
500
501 void
lima_program_init(struct lima_context * ctx)502 lima_program_init(struct lima_context *ctx)
503 {
504 ctx->base.create_fs_state = lima_create_fs_state;
505 ctx->base.bind_fs_state = lima_bind_fs_state;
506 ctx->base.delete_fs_state = lima_delete_fs_state;
507
508 ctx->base.create_vs_state = lima_create_vs_state;
509 ctx->base.bind_vs_state = lima_bind_vs_state;
510 ctx->base.delete_vs_state = lima_delete_vs_state;
511 }
512