• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
3  * Copyright (C) 2019-2022 Collabora, Ltd.
4  * Copyright (C) 2019 Red Hat Inc.
5  * Copyright (C) 2018 Alyssa Rosenzweig
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  * Authors (Collabora):
27  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
28  *
29  */
30 
31 #include "pan_shader.h"
32 #include "nir/tgsi_to_nir.h"
33 #include "util/u_memory.h"
34 #include "util/u_prim.h"
35 #include "nir_builder.h"
36 #include "nir_serialize.h"
37 #include "pan_bo.h"
38 #include "pan_context.h"
39 
40 static struct panfrost_uncompiled_shader *
panfrost_alloc_shader(const nir_shader * nir)41 panfrost_alloc_shader(const nir_shader *nir)
42 {
43    struct panfrost_uncompiled_shader *so =
44       rzalloc(NULL, struct panfrost_uncompiled_shader);
45 
46    simple_mtx_init(&so->lock, mtx_plain);
47    util_dynarray_init(&so->variants, so);
48 
49    so->nir = nir;
50 
51    /* Serialize the NIR to a binary blob that we can hash for the disk
52     * cache. Drop unnecessary information (like variable names) so the
53     * serialized NIR is smaller, and also to let us detect more isomorphic
54     * shaders when hashing, increasing cache hits.
55     */
56    struct blob blob;
57    blob_init(&blob);
58    nir_serialize(&blob, nir, true);
59    _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
60    blob_finish(&blob);
61 
62    return so;
63 }
64 
65 static struct panfrost_compiled_shader *
panfrost_alloc_variant(struct panfrost_uncompiled_shader * so)66 panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
67 {
68    return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
69 }
70 
71 static bool
lower_load_poly_line_smooth_enabled(nir_builder * b,nir_intrinsic_instr * intrin,void * data)72 lower_load_poly_line_smooth_enabled(nir_builder *b, nir_intrinsic_instr *intrin,
73                                     void *data)
74 {
75    if (intrin->intrinsic != nir_intrinsic_load_poly_line_smooth_enabled)
76       return false;
77 
78    b->cursor = nir_before_instr(&intrin->instr);
79    nir_def_replace(&intrin->def, nir_imm_true(b));
80    return true;
81 }
82 
83 /* From the OpenGL 4.6 spec 14.3.1:
84  *
85  *    If MULTISAMPLE is disabled, multisample rasterization of all primitives
86  *    is equivalent to single-sample (fragment-center) rasterization, except
87  *    that the fragment coverage value is set to full coverage.
88  *
89  * So always use the original sample mask when multisample is disabled */
90 static bool
lower_sample_mask_writes(nir_builder * b,nir_intrinsic_instr * intrin,void * data)91 lower_sample_mask_writes(nir_builder *b, nir_intrinsic_instr *intrin,
92                          void *data)
93 {
94    if (intrin->intrinsic != nir_intrinsic_store_output)
95       return false;
96 
97    if (nir_intrinsic_io_semantics(intrin).location != FRAG_RESULT_SAMPLE_MASK)
98       return false;
99 
100    b->cursor = nir_before_instr(&intrin->instr);
101 
102    nir_def *orig = nir_load_sample_mask(b);
103    nir_def *new = nir_b32csel(b, nir_load_multisampled_pan(b),
104                                intrin->src[0].ssa, orig);
105    nir_src_rewrite(&intrin->src[0], new);
106 
107    return true;
108 }
109 
110 static void
panfrost_shader_compile(struct panfrost_screen * screen,const nir_shader * ir,struct util_debug_callback * dbg,struct panfrost_shader_key * key,unsigned req_local_mem,unsigned fixed_varying_mask,struct panfrost_shader_binary * out)111 panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
112                         struct util_debug_callback *dbg,
113                         struct panfrost_shader_key *key, unsigned req_local_mem,
114                         unsigned fixed_varying_mask,
115                         struct panfrost_shader_binary *out)
116 {
117    struct panfrost_device *dev = pan_device(&screen->base);
118 
119    nir_shader *s = nir_shader_clone(NULL, ir);
120 
121    /* While graphics shaders are preprocessed at CSO create time, compute
122     * kernels are not preprocessed until they're cloned since the driver does
123     * not get ownership of the NIR from compute CSOs. Do this preprocessing now.
124     * Compute CSOs call this function during create time, so preprocessing
125     * happens at CSO create time regardless.
126     */
127    if (gl_shader_stage_is_compute(s->info.stage))
128       pan_shader_preprocess(s, panfrost_device_gpu_id(dev));
129 
130    struct panfrost_compile_inputs inputs = {
131       .debug = dbg,
132       .gpu_id = panfrost_device_gpu_id(dev),
133    };
134 
135    if (dev->arch >= 9)
136       /* Use LD_VAR_BUF for varying lookups. */
137       inputs.valhall.use_ld_var_buf = true;
138 
139    /* Lower this early so the backends don't have to worry about it */
140    if (s->info.stage == MESA_SHADER_FRAGMENT) {
141       inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
142    } else if (s->info.stage == MESA_SHADER_VERTEX) {
143       inputs.fixed_varying_mask = fixed_varying_mask;
144 
145       /* No IDVS for internal XFB shaders */
146       inputs.no_idvs = s->info.has_transform_feedback_varyings;
147 
148       if (s->info.has_transform_feedback_varyings) {
149          NIR_PASS(_, s, nir_io_add_const_offset_to_base,
150                   nir_var_shader_in | nir_var_shader_out);
151          NIR_PASS(_, s, nir_io_add_intrinsic_xfb_info);
152          NIR_PASS(_, s, pan_lower_xfb);
153       }
154    }
155 
156    util_dynarray_init(&out->binary, NULL);
157 
158    if (s->info.stage == MESA_SHADER_FRAGMENT) {
159       if (key->fs.nr_cbufs_for_fragcolor) {
160          NIR_PASS(_, s, panfrost_nir_remove_fragcolor_stores,
161                   key->fs.nr_cbufs_for_fragcolor);
162       }
163 
164       if (key->fs.sprite_coord_enable) {
165          NIR_PASS(_, s, nir_lower_texcoord_replace_late,
166                   key->fs.sprite_coord_enable,
167                   true /* point coord is sysval */);
168       }
169 
170       if (key->fs.clip_plane_enable) {
171          NIR_PASS(_, s, nir_lower_clip_fs, key->fs.clip_plane_enable,
172                   false, true);
173       }
174 
175       if (key->fs.line_smooth) {
176          NIR_PASS(_, s, nir_lower_poly_line_smooth, 16);
177          NIR_PASS(_, s, nir_shader_intrinsics_pass,
178                   lower_load_poly_line_smooth_enabled,
179                   nir_metadata_control_flow, key);
180          NIR_PASS(_, s, nir_lower_alu);
181       }
182 
183       NIR_PASS(_, s, nir_shader_intrinsics_pass,
184                lower_sample_mask_writes, nir_metadata_control_flow, NULL);
185    }
186 
187    if (dev->arch <= 5 && s->info.stage == MESA_SHADER_FRAGMENT) {
188       NIR_PASS(_, s, pan_lower_framebuffer, key->fs.rt_formats,
189                pan_raw_format_mask_midgard(key->fs.rt_formats), 0,
190                panfrost_device_gpu_id(dev) < 0x700);
191    }
192 
193    if (s->info.stage == MESA_SHADER_VERTEX)
194       NIR_PASS(_, s, pan_nir_lower_static_noperspective,
195                key->vs.noperspective_varyings);
196 
197    NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals);
198 
199    /* Lower resource indices */
200    NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
201 
202    screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
203 
204    assert(req_local_mem >= out->info.wls_size);
205    out->info.wls_size = req_local_mem;
206 
207    /* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
208     * a NULL context
209     */
210    ralloc_free(s);
211 }
212 
213 static void
panfrost_shader_get(struct pipe_screen * pscreen,struct panfrost_pool * shader_pool,struct panfrost_pool * desc_pool,struct panfrost_uncompiled_shader * uncompiled,struct util_debug_callback * dbg,struct panfrost_compiled_shader * state,unsigned req_local_mem)214 panfrost_shader_get(struct pipe_screen *pscreen,
215                     struct panfrost_pool *shader_pool,
216                     struct panfrost_pool *desc_pool,
217                     struct panfrost_uncompiled_shader *uncompiled,
218                     struct util_debug_callback *dbg,
219                     struct panfrost_compiled_shader *state,
220                     unsigned req_local_mem)
221 {
222    struct panfrost_screen *screen = pan_screen(pscreen);
223    struct panfrost_device *dev = pan_device(pscreen);
224 
225    struct panfrost_shader_binary res = {0};
226 
227    /* Try to retrieve the variant from the disk cache. If that fails,
228     * compile a new variant and store in the disk cache for later reuse.
229     */
230    if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
231                                      &state->key, &res)) {
232       panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
233                               req_local_mem, uncompiled->fixed_varying_mask,
234                               &res);
235 
236       panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
237                                 &res);
238    }
239 
240    state->info = res.info;
241    state->sysvals = res.sysvals;
242 
243    if (res.binary.size) {
244       state->bin = panfrost_pool_take_ref(
245          shader_pool,
246          pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
247                                  res.binary.size, 128));
248    }
249 
250    util_dynarray_fini(&res.binary);
251 
252    /* Don't upload RSD for fragment shaders since they need draw-time
253     * merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
254     * shader program descriptors on Valhall, which can be preuploaded even
255     * for fragment shaders. */
256    bool upload =
257       !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
258    screen->vtbl.prepare_shader(state, desc_pool, upload);
259 
260    panfrost_analyze_sysvals(state);
261 }
262 
263 static void
panfrost_build_vs_key(struct panfrost_context * ctx,struct panfrost_vs_key * key,struct panfrost_uncompiled_shader * uncompiled)264 panfrost_build_vs_key(struct panfrost_context *ctx,
265                       struct panfrost_vs_key *key,
266                       struct panfrost_uncompiled_shader *uncompiled)
267 {
268    struct panfrost_uncompiled_shader *fs = ctx->uncompiled[MESA_SHADER_FRAGMENT];
269 
270    assert(fs != NULL && "too early");
271    key->noperspective_varyings = fs->noperspective_varyings;
272 }
273 
274 static void
panfrost_build_fs_key(struct panfrost_context * ctx,struct panfrost_fs_key * key,struct panfrost_uncompiled_shader * uncompiled)275 panfrost_build_fs_key(struct panfrost_context *ctx,
276                       struct panfrost_fs_key *key,
277                       struct panfrost_uncompiled_shader *uncompiled)
278 {
279    const nir_shader *nir = uncompiled->nir;
280 
281    struct panfrost_device *dev = pan_device(ctx->base.screen);
282    struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
283    struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
284    struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
285 
286    /* gl_FragColor lowering needs the number of colour buffers */
287    if (uncompiled->fragcolor_lowered) {
288       key->nr_cbufs_for_fragcolor = fb->nr_cbufs;
289    }
290 
291    /* Point sprite lowering needed on Bifrost and newer */
292    if (dev->arch >= 6 && rast && ctx->active_prim == MESA_PRIM_POINTS) {
293       key->sprite_coord_enable = rast->sprite_coord_enable;
294    }
295 
296    /* User clip plane lowering needed everywhere */
297    if (rast) {
298       key->clip_plane_enable = rast->clip_plane_enable;
299 
300       if (u_reduced_prim(ctx->active_prim) == MESA_PRIM_LINES)
301          key->line_smooth = rast->line_smooth;
302    }
303 
304    if (dev->arch <= 5) {
305       u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
306          enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
307 
308          if ((fb->nr_cbufs > i) && fb->cbufs[i])
309             fmt = fb->cbufs[i]->format;
310 
311          if (panfrost_blendable_formats_v6[fmt].internal)
312             fmt = PIPE_FORMAT_NONE;
313 
314          key->rt_formats[i] = fmt;
315       }
316    }
317 
318    /* Funny desktop GL varying lowering on Valhall */
319    if (dev->arch >= 9) {
320       assert(vs != NULL && "too early");
321       key->fixed_varying_mask = vs->fixed_varying_mask;
322    }
323 }
324 
325 static void
panfrost_build_key(struct panfrost_context * ctx,struct panfrost_shader_key * key,struct panfrost_uncompiled_shader * uncompiled)326 panfrost_build_key(struct panfrost_context *ctx,
327                    struct panfrost_shader_key *key,
328                    struct panfrost_uncompiled_shader *uncompiled)
329 {
330    const nir_shader *nir = uncompiled->nir;
331 
332    switch (nir->info.stage) {
333    case MESA_SHADER_VERTEX:
334       panfrost_build_vs_key(ctx, &key->vs, uncompiled);
335       break;
336    case MESA_SHADER_FRAGMENT:
337       panfrost_build_fs_key(ctx, &key->fs, uncompiled);
338       break;
339    default:
340       break;
341    }
342 }
343 
344 static struct panfrost_compiled_shader *
panfrost_new_variant_locked(struct panfrost_context * ctx,struct panfrost_uncompiled_shader * uncompiled,struct panfrost_shader_key * key)345 panfrost_new_variant_locked(struct panfrost_context *ctx,
346                             struct panfrost_uncompiled_shader *uncompiled,
347                             struct panfrost_shader_key *key)
348 {
349    struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
350 
351    *prog = (struct panfrost_compiled_shader){
352       .key = *key,
353       .stream_output = uncompiled->stream_output,
354    };
355 
356    panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
357                        &ctx->base.debug, prog, 0);
358 
359    prog->earlyzs = pan_earlyzs_analyze(&prog->info);
360 
361    return prog;
362 }
363 
364 static void
panfrost_bind_shader_state(struct pipe_context * pctx,void * hwcso,enum pipe_shader_type type)365 panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
366                            enum pipe_shader_type type)
367 {
368    struct panfrost_context *ctx = pan_context(pctx);
369    ctx->uncompiled[type] = hwcso;
370    ctx->prog[type] = NULL;
371 
372    ctx->dirty |= PAN_DIRTY_TLS_SIZE;
373    ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
374 
375    if (hwcso)
376       panfrost_update_shader_variant(ctx, type);
377 }
378 
379 void
panfrost_update_shader_variant(struct panfrost_context * ctx,enum pipe_shader_type type)380 panfrost_update_shader_variant(struct panfrost_context *ctx,
381                                enum pipe_shader_type type)
382 {
383    /* No shader variants for compute */
384    if (type == PIPE_SHADER_COMPUTE)
385       return;
386 
387    /* We need linking information, defer this */
388    if ((type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX]) ||
389        (type == PIPE_SHADER_VERTEX && !ctx->uncompiled[PIPE_SHADER_FRAGMENT]))
390       return;
391 
392    /* Also defer, happens with GALLIUM_HUD */
393    if (!ctx->uncompiled[type])
394       return;
395 
396    /* Match the appropriate variant */
397    struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
398    struct panfrost_compiled_shader *compiled = NULL;
399 
400    simple_mtx_lock(&uncompiled->lock);
401 
402    struct panfrost_shader_key key = {0};
403    panfrost_build_key(ctx, &key, uncompiled);
404 
405    util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
406                          so) {
407       if (memcmp(&key, &so->key, sizeof(key)) == 0) {
408          compiled = so;
409          break;
410       }
411    }
412 
413    if (compiled == NULL)
414       compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
415 
416    ctx->prog[type] = compiled;
417 
418    simple_mtx_unlock(&uncompiled->lock);
419 }
420 
421 static void
panfrost_bind_vs_state(struct pipe_context * pctx,void * hwcso)422 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
423 {
424    panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
425 
426    /* Fragment shaders are linked with vertex shaders */
427    struct panfrost_context *ctx = pan_context(pctx);
428    panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
429 }
430 
431 static void
panfrost_bind_fs_state(struct pipe_context * pctx,void * hwcso)432 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
433 {
434    panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
435 
436    /* Vertex shaders are linked with fragment shaders */
437    struct panfrost_context *ctx = pan_context(pctx);
438    panfrost_update_shader_variant(ctx, PIPE_SHADER_VERTEX);
439 }
440 
441 static void *
panfrost_create_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)442 panfrost_create_shader_state(struct pipe_context *pctx,
443                              const struct pipe_shader_state *cso)
444 {
445    nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
446                         ? tgsi_to_nir(cso->tokens, pctx->screen, false)
447                         : cso->ir.nir;
448 
449    struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
450 
451    /* The driver gets ownership of the nir_shader for graphics. The NIR is
452     * ralloc'd. Free the NIR when we free the uncompiled shader.
453     */
454    ralloc_steal(so, nir);
455 
456    so->stream_output = cso->stream_output;
457    so->nir = nir;
458 
459    /* Fix linkage early */
460    if (so->nir->info.stage == MESA_SHADER_VERTEX) {
461       so->fixed_varying_mask =
462          (so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
463          ~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
464    }
465 
466    /* gl_FragColor needs to be lowered before lowering I/O, do that now */
467    if (nir->info.stage == MESA_SHADER_FRAGMENT &&
468        nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
469 
470       NIR_PASS(_, nir, nir_lower_fragcolor,
471                nir->info.fs.color_is_dual_source ? 1 : 8);
472       so->fragcolor_lowered = true;
473    }
474 
475    /* Then run the suite of lowering and optimization, including I/O lowering */
476    struct panfrost_device *dev = pan_device(pctx->screen);
477    pan_shader_preprocess(nir, panfrost_device_gpu_id(dev));
478 
479    if (nir->info.stage == MESA_SHADER_FRAGMENT)
480       so->noperspective_varyings =
481          pan_nir_collect_noperspective_varyings_fs(nir);
482 
483    /* Vertex shaders get passed images through the vertex attribute descriptor
484     * array. We need to add an offset to all image intrinsics so they point
485     * to the right attribute.
486     */
487    if (nir->info.stage == MESA_SHADER_VERTEX && dev->arch <= 7) {
488       NIR_PASS(_, nir, pan_lower_image_index,
489                util_bitcount64(nir->info.inputs_read));
490    }
491 
492    /* If this shader uses transform feedback, compile the transform
493     * feedback program. This is a special shader variant.
494     */
495    struct panfrost_context *ctx = pan_context(pctx);
496 
497    if (so->nir->xfb_info) {
498       so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
499       so->xfb->key.vs.is_xfb = true;
500 
501       panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
502                           &ctx->base.debug, so->xfb, 0);
503 
504       /* Since transform feedback is handled via the transform
505        * feedback program, the original program no longer uses XFB
506        */
507       nir->info.has_transform_feedback_varyings = false;
508    }
509 
510    /* Compile the program. We don't use vertex shader keys, so there will
511     * be no further vertex shader variants. We do have fragment shader
512     * keys, but we can still compile with a default key that will work most
513     * of the time.
514     */
515    struct panfrost_shader_key key = {0};
516 
517    /* gl_FragColor lowering needs the number of colour buffers on desktop
518     * GL, where it acts as an implicit broadcast to all colour buffers.
519     *
520     * However, gl_FragColor is a legacy feature, so assume that if
521     * gl_FragColor is used, there is only a single render target. The
522     * implicit broadcast is neither especially useful nor required by GLES.
523     */
524    if (so->fragcolor_lowered)
525       key.fs.nr_cbufs_for_fragcolor = 1;
526 
527    /* Creating a CSO is single-threaded, so it's ok to use the
528     * locked function without explicitly taking the lock. Creating a
529     * default variant acts as a precompile.
530     */
531    panfrost_new_variant_locked(ctx, so, &key);
532 
533    return so;
534 }
535 
536 static void
panfrost_delete_shader_state(struct pipe_context * pctx,void * so)537 panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
538 {
539    struct panfrost_uncompiled_shader *cso =
540       (struct panfrost_uncompiled_shader *)so;
541 
542    util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
543       panfrost_bo_unreference(so->bin.bo);
544       panfrost_bo_unreference(so->state.bo);
545       panfrost_bo_unreference(so->linkage.bo);
546    }
547 
548    if (cso->xfb) {
549       panfrost_bo_unreference(cso->xfb->bin.bo);
550       panfrost_bo_unreference(cso->xfb->state.bo);
551       panfrost_bo_unreference(cso->xfb->linkage.bo);
552       free(cso->xfb);
553    }
554 
555    simple_mtx_destroy(&cso->lock);
556 
557    ralloc_free(so);
558 }
559 
560 /*
561  * Create a compute CSO. As compute kernels do not require variants, they are
562  * precompiled, creating both the uncompiled and compiled shaders now.
563  */
564 static void *
panfrost_create_compute_state(struct pipe_context * pctx,const struct pipe_compute_state * cso)565 panfrost_create_compute_state(struct pipe_context *pctx,
566                               const struct pipe_compute_state *cso)
567 {
568    struct panfrost_context *ctx = pan_context(pctx);
569    struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
570    struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
571    memset(v, 0, sizeof *v);
572 
573    assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
574 
575    panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
576                        &ctx->base.debug, v, cso->static_shared_mem);
577 
578    /* The NIR becomes invalid after this. For compute kernels, we never
579     * need to access it again. Don't keep a dangling pointer around.
580     */
581    ralloc_free((void *)so->nir);
582    so->nir = NULL;
583 
584    return so;
585 }
586 
587 static void
panfrost_bind_compute_state(struct pipe_context * pipe,void * cso)588 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
589 {
590    struct panfrost_context *ctx = pan_context(pipe);
591    struct panfrost_uncompiled_shader *uncompiled = cso;
592 
593    ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
594 
595    ctx->prog[PIPE_SHADER_COMPUTE] =
596       uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
597 }
598 
599 static void
panfrost_get_compute_state_info(struct pipe_context * pipe,void * cso,struct pipe_compute_state_object_info * info)600 panfrost_get_compute_state_info(struct pipe_context *pipe, void *cso,
601                                 struct pipe_compute_state_object_info *info)
602 {
603    struct panfrost_device *dev = pan_device(pipe->screen);
604    struct panfrost_uncompiled_shader *uncompiled = cso;
605    struct panfrost_compiled_shader *cs =
606       util_dynarray_begin(&uncompiled->variants);
607 
608    info->max_threads = panfrost_compute_max_thread_count(
609       &dev->kmod.props, cs->info.work_reg_count);
610    info->private_memory = cs->info.tls_size;
611    info->simd_sizes = pan_subgroup_size(dev->arch);
612    info->preferred_simd_size = info->simd_sizes;
613 }
614 
615 void
panfrost_shader_context_init(struct pipe_context * pctx)616 panfrost_shader_context_init(struct pipe_context *pctx)
617 {
618    pctx->create_vs_state = panfrost_create_shader_state;
619    pctx->delete_vs_state = panfrost_delete_shader_state;
620    pctx->bind_vs_state = panfrost_bind_vs_state;
621 
622    pctx->create_fs_state = panfrost_create_shader_state;
623    pctx->delete_fs_state = panfrost_delete_shader_state;
624    pctx->bind_fs_state = panfrost_bind_fs_state;
625 
626    pctx->create_compute_state = panfrost_create_compute_state;
627    pctx->bind_compute_state = panfrost_bind_compute_state;
628    pctx->get_compute_state_info = panfrost_get_compute_state_info;
629    pctx->delete_compute_state = panfrost_delete_shader_state;
630 }
631