1 /*
2 * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
3 * Copyright (C) 2019-2022 Collabora, Ltd.
4 * Copyright (C) 2019 Red Hat Inc.
5 * Copyright (C) 2018 Alyssa Rosenzweig
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors (Collabora):
27 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
28 *
29 */
30
31 #include "pan_shader.h"
32 #include "nir/tgsi_to_nir.h"
33 #include "util/u_memory.h"
34 #include "util/u_prim.h"
35 #include "nir_builder.h"
36 #include "nir_serialize.h"
37 #include "pan_bo.h"
38 #include "pan_context.h"
39
40 static struct panfrost_uncompiled_shader *
panfrost_alloc_shader(const nir_shader * nir)41 panfrost_alloc_shader(const nir_shader *nir)
42 {
43 struct panfrost_uncompiled_shader *so =
44 rzalloc(NULL, struct panfrost_uncompiled_shader);
45
46 simple_mtx_init(&so->lock, mtx_plain);
47 util_dynarray_init(&so->variants, so);
48
49 so->nir = nir;
50
51 /* Serialize the NIR to a binary blob that we can hash for the disk
52 * cache. Drop unnecessary information (like variable names) so the
53 * serialized NIR is smaller, and also to let us detect more isomorphic
54 * shaders when hashing, increasing cache hits.
55 */
56 struct blob blob;
57 blob_init(&blob);
58 nir_serialize(&blob, nir, true);
59 _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
60 blob_finish(&blob);
61
62 return so;
63 }
64
65 static struct panfrost_compiled_shader *
panfrost_alloc_variant(struct panfrost_uncompiled_shader * so)66 panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
67 {
68 return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
69 }
70
71 static bool
lower_load_poly_line_smooth_enabled(nir_builder * b,nir_intrinsic_instr * intrin,void * data)72 lower_load_poly_line_smooth_enabled(nir_builder *b, nir_intrinsic_instr *intrin,
73 void *data)
74 {
75 if (intrin->intrinsic != nir_intrinsic_load_poly_line_smooth_enabled)
76 return false;
77
78 b->cursor = nir_before_instr(&intrin->instr);
79 nir_def_replace(&intrin->def, nir_imm_true(b));
80 return true;
81 }
82
83 /* From the OpenGL 4.6 spec 14.3.1:
84 *
85 * If MULTISAMPLE is disabled, multisample rasterization of all primitives
86 * is equivalent to single-sample (fragment-center) rasterization, except
87 * that the fragment coverage value is set to full coverage.
88 *
89 * So always use the original sample mask when multisample is disabled */
90 static bool
lower_sample_mask_writes(nir_builder * b,nir_intrinsic_instr * intrin,void * data)91 lower_sample_mask_writes(nir_builder *b, nir_intrinsic_instr *intrin,
92 void *data)
93 {
94 if (intrin->intrinsic != nir_intrinsic_store_output)
95 return false;
96
97 if (nir_intrinsic_io_semantics(intrin).location != FRAG_RESULT_SAMPLE_MASK)
98 return false;
99
100 b->cursor = nir_before_instr(&intrin->instr);
101
102 nir_def *orig = nir_load_sample_mask(b);
103 nir_def *new = nir_b32csel(b, nir_load_multisampled_pan(b),
104 intrin->src[0].ssa, orig);
105 nir_src_rewrite(&intrin->src[0], new);
106
107 return true;
108 }
109
110 static void
panfrost_shader_compile(struct panfrost_screen * screen,const nir_shader * ir,struct util_debug_callback * dbg,struct panfrost_shader_key * key,unsigned req_local_mem,unsigned fixed_varying_mask,struct panfrost_shader_binary * out)111 panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
112 struct util_debug_callback *dbg,
113 struct panfrost_shader_key *key, unsigned req_local_mem,
114 unsigned fixed_varying_mask,
115 struct panfrost_shader_binary *out)
116 {
117 struct panfrost_device *dev = pan_device(&screen->base);
118
119 nir_shader *s = nir_shader_clone(NULL, ir);
120
121 /* While graphics shaders are preprocessed at CSO create time, compute
122 * kernels are not preprocessed until they're cloned since the driver does
123 * not get ownership of the NIR from compute CSOs. Do this preprocessing now.
124 * Compute CSOs call this function during create time, so preprocessing
125 * happens at CSO create time regardless.
126 */
127 if (gl_shader_stage_is_compute(s->info.stage))
128 pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
129
130 struct panfrost_compile_inputs inputs = {
131 .debug = dbg,
132 .gpu_id = panfrost_device_gpu_id(dev),
133 };
134
135 /* Lower this early so the backends don't have to worry about it */
136 if (s->info.stage == MESA_SHADER_FRAGMENT) {
137 inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
138 } else if (s->info.stage == MESA_SHADER_VERTEX) {
139 inputs.fixed_varying_mask = fixed_varying_mask;
140
141 /* No IDVS for internal XFB shaders */
142 inputs.no_idvs = s->info.has_transform_feedback_varyings;
143
144 if (s->info.has_transform_feedback_varyings) {
145 NIR_PASS(_, s, nir_io_add_const_offset_to_base,
146 nir_var_shader_in | nir_var_shader_out);
147 NIR_PASS(_, s, nir_io_add_intrinsic_xfb_info);
148 NIR_PASS(_, s, pan_lower_xfb);
149 }
150 }
151
152 util_dynarray_init(&out->binary, NULL);
153
154 if (s->info.stage == MESA_SHADER_FRAGMENT) {
155 if (key->fs.nr_cbufs_for_fragcolor) {
156 NIR_PASS(_, s, panfrost_nir_remove_fragcolor_stores,
157 key->fs.nr_cbufs_for_fragcolor);
158 }
159
160 if (key->fs.sprite_coord_enable) {
161 NIR_PASS(_, s, nir_lower_texcoord_replace_late,
162 key->fs.sprite_coord_enable,
163 true /* point coord is sysval */);
164 }
165
166 if (key->fs.clip_plane_enable) {
167 NIR_PASS(_, s, nir_lower_clip_fs, key->fs.clip_plane_enable,
168 false, true);
169 }
170
171 if (key->fs.line_smooth) {
172 NIR_PASS(_, s, nir_lower_poly_line_smooth, 16);
173 NIR_PASS(_, s, nir_shader_intrinsics_pass,
174 lower_load_poly_line_smooth_enabled,
175 nir_metadata_control_flow, key);
176 NIR_PASS(_, s, nir_lower_alu);
177 }
178
179 NIR_PASS(_, s, nir_shader_intrinsics_pass,
180 lower_sample_mask_writes, nir_metadata_control_flow, NULL);
181 }
182
183 if (dev->arch <= 5 && s->info.stage == MESA_SHADER_FRAGMENT) {
184 NIR_PASS(_, s, pan_lower_framebuffer, key->fs.rt_formats,
185 pan_raw_format_mask_midgard(key->fs.rt_formats), 0,
186 panfrost_device_gpu_id(dev) < 0x700);
187 }
188
189 if (s->info.stage == MESA_SHADER_VERTEX)
190 NIR_PASS(_, s, pan_nir_lower_static_noperspective,
191 key->vs.noperspective_varyings);
192
193 NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals);
194
195 /* Lower resource indices */
196 NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
197
198 screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
199
200 assert(req_local_mem >= out->info.wls_size);
201 out->info.wls_size = req_local_mem;
202
203 /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
204 * a NULL context
205 */
206 ralloc_free(s);
207 }
208
209 static void
panfrost_shader_get(struct pipe_screen * pscreen,struct panfrost_pool * shader_pool,struct panfrost_pool * desc_pool,struct panfrost_uncompiled_shader * uncompiled,struct util_debug_callback * dbg,struct panfrost_compiled_shader * state,unsigned req_local_mem)210 panfrost_shader_get(struct pipe_screen *pscreen,
211 struct panfrost_pool *shader_pool,
212 struct panfrost_pool *desc_pool,
213 struct panfrost_uncompiled_shader *uncompiled,
214 struct util_debug_callback *dbg,
215 struct panfrost_compiled_shader *state,
216 unsigned req_local_mem)
217 {
218 struct panfrost_screen *screen = pan_screen(pscreen);
219 struct panfrost_device *dev = pan_device(pscreen);
220
221 struct panfrost_shader_binary res = {0};
222
223 /* Try to retrieve the variant from the disk cache. If that fails,
224 * compile a new variant and store in the disk cache for later reuse.
225 */
226 if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
227 &state->key, &res)) {
228 panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
229 req_local_mem, uncompiled->fixed_varying_mask,
230 &res);
231
232 panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
233 &res);
234 }
235
236 state->info = res.info;
237 state->sysvals = res.sysvals;
238
239 if (res.binary.size) {
240 state->bin = panfrost_pool_take_ref(
241 shader_pool,
242 pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
243 res.binary.size, 128));
244 }
245
246 util_dynarray_fini(&res.binary);
247
248 /* Don't upload RSD for fragment shaders since they need draw-time
249 * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
250 * shader program descriptors on Valhall, which can be preuploaded even
251 * for fragment shaders. */
252 bool upload =
253 !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
254 screen->vtbl.prepare_shader(state, desc_pool, upload);
255
256 panfrost_analyze_sysvals(state);
257 }
258
259 static void
panfrost_build_vs_key(struct panfrost_context * ctx,struct panfrost_vs_key * key,struct panfrost_uncompiled_shader * uncompiled)260 panfrost_build_vs_key(struct panfrost_context *ctx,
261 struct panfrost_vs_key *key,
262 struct panfrost_uncompiled_shader *uncompiled)
263 {
264 struct panfrost_uncompiled_shader *fs = ctx->uncompiled[MESA_SHADER_FRAGMENT];
265
266 assert(fs != NULL && "too early");
267 key->noperspective_varyings = fs->noperspective_varyings;
268 }
269
270 static void
panfrost_build_fs_key(struct panfrost_context * ctx,struct panfrost_fs_key * key,struct panfrost_uncompiled_shader * uncompiled)271 panfrost_build_fs_key(struct panfrost_context *ctx,
272 struct panfrost_fs_key *key,
273 struct panfrost_uncompiled_shader *uncompiled)
274 {
275 const nir_shader *nir = uncompiled->nir;
276
277 struct panfrost_device *dev = pan_device(ctx->base.screen);
278 struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
279 struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
280 struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
281
282 /* gl_FragColor lowering needs the number of colour buffers */
283 if (uncompiled->fragcolor_lowered) {
284 key->nr_cbufs_for_fragcolor = fb->nr_cbufs;
285 }
286
287 /* Point sprite lowering needed on Bifrost and newer */
288 if (dev->arch >= 6 && rast && ctx->active_prim == MESA_PRIM_POINTS) {
289 key->sprite_coord_enable = rast->sprite_coord_enable;
290 }
291
292 /* User clip plane lowering needed everywhere */
293 if (rast) {
294 key->clip_plane_enable = rast->clip_plane_enable;
295
296 if (u_reduced_prim(ctx->active_prim) == MESA_PRIM_LINES)
297 key->line_smooth = rast->line_smooth;
298 }
299
300 if (dev->arch <= 5) {
301 u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
302 enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
303
304 if ((fb->nr_cbufs > i) && fb->cbufs[i])
305 fmt = fb->cbufs[i]->format;
306
307 if (panfrost_blendable_formats_v6[fmt].internal)
308 fmt = PIPE_FORMAT_NONE;
309
310 key->rt_formats[i] = fmt;
311 }
312 }
313
314 /* Funny desktop GL varying lowering on Valhall */
315 if (dev->arch >= 9) {
316 assert(vs != NULL && "too early");
317 key->fixed_varying_mask = vs->fixed_varying_mask;
318 }
319 }
320
321 static void
panfrost_build_key(struct panfrost_context * ctx,struct panfrost_shader_key * key,struct panfrost_uncompiled_shader * uncompiled)322 panfrost_build_key(struct panfrost_context *ctx,
323 struct panfrost_shader_key *key,
324 struct panfrost_uncompiled_shader *uncompiled)
325 {
326 const nir_shader *nir = uncompiled->nir;
327
328 switch (nir->info.stage) {
329 case MESA_SHADER_VERTEX:
330 panfrost_build_vs_key(ctx, &key->vs, uncompiled);
331 break;
332 case MESA_SHADER_FRAGMENT:
333 panfrost_build_fs_key(ctx, &key->fs, uncompiled);
334 break;
335 default:
336 break;
337 }
338 }
339
340 static struct panfrost_compiled_shader *
panfrost_new_variant_locked(struct panfrost_context * ctx,struct panfrost_uncompiled_shader * uncompiled,struct panfrost_shader_key * key)341 panfrost_new_variant_locked(struct panfrost_context *ctx,
342 struct panfrost_uncompiled_shader *uncompiled,
343 struct panfrost_shader_key *key)
344 {
345 struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
346
347 *prog = (struct panfrost_compiled_shader){
348 .key = *key,
349 .stream_output = uncompiled->stream_output,
350 };
351
352 panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
353 &ctx->base.debug, prog, 0);
354
355 prog->earlyzs = pan_earlyzs_analyze(&prog->info);
356
357 return prog;
358 }
359
360 static void
panfrost_bind_shader_state(struct pipe_context * pctx,void * hwcso,enum pipe_shader_type type)361 panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
362 enum pipe_shader_type type)
363 {
364 struct panfrost_context *ctx = pan_context(pctx);
365 ctx->uncompiled[type] = hwcso;
366 ctx->prog[type] = NULL;
367
368 ctx->dirty |= PAN_DIRTY_TLS_SIZE;
369 ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
370
371 if (hwcso)
372 panfrost_update_shader_variant(ctx, type);
373 }
374
375 void
panfrost_update_shader_variant(struct panfrost_context * ctx,enum pipe_shader_type type)376 panfrost_update_shader_variant(struct panfrost_context *ctx,
377 enum pipe_shader_type type)
378 {
379 /* No shader variants for compute */
380 if (type == PIPE_SHADER_COMPUTE)
381 return;
382
383 /* We need linking information, defer this */
384 if ((type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX]) ||
385 (type == PIPE_SHADER_VERTEX && !ctx->uncompiled[PIPE_SHADER_FRAGMENT]))
386 return;
387
388 /* Also defer, happens with GALLIUM_HUD */
389 if (!ctx->uncompiled[type])
390 return;
391
392 /* Match the appropriate variant */
393 struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
394 struct panfrost_compiled_shader *compiled = NULL;
395
396 simple_mtx_lock(&uncompiled->lock);
397
398 struct panfrost_shader_key key = {0};
399 panfrost_build_key(ctx, &key, uncompiled);
400
401 util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
402 so) {
403 if (memcmp(&key, &so->key, sizeof(key)) == 0) {
404 compiled = so;
405 break;
406 }
407 }
408
409 if (compiled == NULL)
410 compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
411
412 ctx->prog[type] = compiled;
413
414 simple_mtx_unlock(&uncompiled->lock);
415 }
416
417 static void
panfrost_bind_vs_state(struct pipe_context * pctx,void * hwcso)418 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
419 {
420 panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
421
422 /* Fragment shaders are linked with vertex shaders */
423 struct panfrost_context *ctx = pan_context(pctx);
424 panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
425 }
426
427 static void
panfrost_bind_fs_state(struct pipe_context * pctx,void * hwcso)428 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
429 {
430 panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
431
432 /* Vertex shaders are linked with fragment shaders */
433 struct panfrost_context *ctx = pan_context(pctx);
434 panfrost_update_shader_variant(ctx, PIPE_SHADER_VERTEX);
435 }
436
437 static void *
panfrost_create_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)438 panfrost_create_shader_state(struct pipe_context *pctx,
439 const struct pipe_shader_state *cso)
440 {
441 nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
442 ? tgsi_to_nir(cso->tokens, pctx->screen, false)
443 : cso->ir.nir;
444
445 struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
446
447 /* The driver gets ownership of the nir_shader for graphics. The NIR is
448 * ralloc'd. Free the NIR when we free the uncompiled shader.
449 */
450 ralloc_steal(so, nir);
451
452 so->stream_output = cso->stream_output;
453 so->nir = nir;
454
455 /* Fix linkage early */
456 if (so->nir->info.stage == MESA_SHADER_VERTEX) {
457 so->fixed_varying_mask =
458 (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
459 ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
460 }
461
462 /* gl_FragColor needs to be lowered before lowering I/O, do that now */
463 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
464 nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
465
466 NIR_PASS(_, nir, nir_lower_fragcolor,
467 nir->info.fs.color_is_dual_source ? 1 : 8);
468 so->fragcolor_lowered = true;
469 }
470
471 /* Then run the suite of lowering and optimization, including I/O lowering */
472 struct panfrost_device *dev = pan_device(pctx->screen);
473 pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
474
475 if (nir->info.stage == MESA_SHADER_FRAGMENT)
476 so->noperspective_varyings =
477 pan_nir_collect_noperspective_varyings_fs(nir);
478
479 /* Vertex shaders get passed images through the vertex attribute descriptor
480 * array. We need to add an offset to all image intrinsics so they point
481 * to the right attribute.
482 */
483 if (nir->info.stage == MESA_SHADER_VERTEX && dev->arch <= 7) {
484 NIR_PASS(_, nir, pan_lower_image_index,
485 util_bitcount64(nir->info.inputs_read));
486 }
487
488 /* If this shader uses transform feedback, compile the transform
489 * feedback program. This is a special shader variant.
490 */
491 struct panfrost_context *ctx = pan_context(pctx);
492
493 if (so->nir->xfb_info) {
494 so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
495 so->xfb->key.vs.is_xfb = true;
496
497 panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
498 &ctx->base.debug, so->xfb, 0);
499
500 /* Since transform feedback is handled via the transform
501 * feedback program, the original program no longer uses XFB
502 */
503 nir->info.has_transform_feedback_varyings = false;
504 }
505
506 /* Compile the program. We don't use vertex shader keys, so there will
507 * be no further vertex shader variants. We do have fragment shader
508 * keys, but we can still compile with a default key that will work most
509 * of the time.
510 */
511 struct panfrost_shader_key key = {0};
512
513 /* gl_FragColor lowering needs the number of colour buffers on desktop
514 * GL, where it acts as an implicit broadcast to all colour buffers.
515 *
516 * However, gl_FragColor is a legacy feature, so assume that if
517 * gl_FragColor is used, there is only a single render target. The
518 * implicit broadcast is neither especially useful nor required by GLES.
519 */
520 if (so->fragcolor_lowered)
521 key.fs.nr_cbufs_for_fragcolor = 1;
522
523 /* Creating a CSO is single-threaded, so it's ok to use the
524 * locked function without explicitly taking the lock. Creating a
525 * default variant acts as a precompile.
526 */
527 panfrost_new_variant_locked(ctx, so, &key);
528
529 return so;
530 }
531
532 static void
panfrost_delete_shader_state(struct pipe_context * pctx,void * so)533 panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
534 {
535 struct panfrost_uncompiled_shader *cso =
536 (struct panfrost_uncompiled_shader *)so;
537
538 util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
539 panfrost_bo_unreference(so->bin.bo);
540 panfrost_bo_unreference(so->state.bo);
541 panfrost_bo_unreference(so->linkage.bo);
542 }
543
544 if (cso->xfb) {
545 panfrost_bo_unreference(cso->xfb->bin.bo);
546 panfrost_bo_unreference(cso->xfb->state.bo);
547 panfrost_bo_unreference(cso->xfb->linkage.bo);
548 free(cso->xfb);
549 }
550
551 simple_mtx_destroy(&cso->lock);
552
553 ralloc_free(so);
554 }
555
556 /*
557 * Create a compute CSO. As compute kernels do not require variants, they are
558 * precompiled, creating both the uncompiled and compiled shaders now.
559 */
560 static void *
panfrost_create_compute_state(struct pipe_context * pctx,const struct pipe_compute_state * cso)561 panfrost_create_compute_state(struct pipe_context *pctx,
562 const struct pipe_compute_state *cso)
563 {
564 struct panfrost_context *ctx = pan_context(pctx);
565 struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
566 struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
567 memset(v, 0, sizeof *v);
568
569 assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
570
571 panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
572 &ctx->base.debug, v, cso->static_shared_mem);
573
574 /* The NIR becomes invalid after this. For compute kernels, we never
575 * need to access it again. Don't keep a dangling pointer around.
576 */
577 ralloc_free((void *)so->nir);
578 so->nir = NULL;
579
580 return so;
581 }
582
583 static void
panfrost_bind_compute_state(struct pipe_context * pipe,void * cso)584 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
585 {
586 struct panfrost_context *ctx = pan_context(pipe);
587 struct panfrost_uncompiled_shader *uncompiled = cso;
588
589 ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
590
591 ctx->prog[PIPE_SHADER_COMPUTE] =
592 uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
593 }
594
595 static void
panfrost_get_compute_state_info(struct pipe_context * pipe,void * cso,struct pipe_compute_state_object_info * info)596 panfrost_get_compute_state_info(struct pipe_context *pipe, void *cso,
597 struct pipe_compute_state_object_info *info)
598 {
599 struct panfrost_device *dev = pan_device(pipe->screen);
600 struct panfrost_uncompiled_shader *uncompiled = cso;
601 struct panfrost_compiled_shader *cs =
602 util_dynarray_begin(&uncompiled->variants);
603
604 info->max_threads = panfrost_compute_max_thread_count(
605 &dev->kmod.props, cs->info.work_reg_count);
606 info->private_memory = cs->info.tls_size;
607 info->simd_sizes = pan_subgroup_size(dev->arch);
608 info->preferred_simd_size = info->simd_sizes;
609 }
610
611 void
panfrost_shader_context_init(struct pipe_context * pctx)612 panfrost_shader_context_init(struct pipe_context *pctx)
613 {
614 pctx->create_vs_state = panfrost_create_shader_state;
615 pctx->delete_vs_state = panfrost_delete_shader_state;
616 pctx->bind_vs_state = panfrost_bind_vs_state;
617
618 pctx->create_fs_state = panfrost_create_shader_state;
619 pctx->delete_fs_state = panfrost_delete_shader_state;
620 pctx->bind_fs_state = panfrost_bind_fs_state;
621
622 pctx->create_compute_state = panfrost_create_compute_state;
623 pctx->bind_compute_state = panfrost_bind_compute_state;
624 pctx->get_compute_state_info = panfrost_get_compute_state_info;
625 pctx->delete_compute_state = panfrost_delete_shader_state;
626 }
627