• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Rob Clark <robclark@freedesktop.org>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <robclark@freedesktop.org>
7  */
8 
9 #include "ir3/ir3_nir.h"
10 
11 /* This has to reach into the fd_context a bit more than the rest of
12  * ir3, but it needs to be aligned with the compiler, so both agree
13  * on which const regs hold what.  And the logic is identical between
14  * ir3 generations, the only difference is small details in the actual
15  * CP_LOAD_STATE packets (which is handled inside the generation
16  * specific ctx->emit_const(_bo)() fxns)
17  *
18  * This file should be included in only a single .c file per gen, which
19  * defines the following functions:
20  */
21 
22 static bool is_stateobj(struct fd_ringbuffer *ring);
23 
24 static void emit_const_user(struct fd_ringbuffer *ring,
25                             const struct ir3_shader_variant *v, uint32_t regid,
26                             uint32_t size, const uint32_t *user_buffer);
27 
28 static void emit_const_bo(struct fd_ringbuffer *ring,
29                           const struct ir3_shader_variant *v, uint32_t regid,
30                           uint32_t offset, uint32_t size, struct fd_bo *bo);
31 
32 static void
emit_const_prsc(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t size,struct pipe_resource * buffer)33 emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
34                 uint32_t regid, uint32_t offset, uint32_t size,
35                 struct pipe_resource *buffer)
36 {
37    struct fd_resource *rsc = fd_resource(buffer);
38    emit_const_bo(ring, v, regid, offset, size, rsc->bo);
39 }
40 
41 static void emit_const_ptrs(struct fd_ringbuffer *ring,
42                             const struct ir3_shader_variant *v,
43                             uint32_t dst_offset, uint32_t num,
44                             struct fd_bo **bos, uint32_t *offsets);
45 
46 static void
emit_const_asserts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords)47 emit_const_asserts(struct fd_ringbuffer *ring,
48                    const struct ir3_shader_variant *v, uint32_t regid,
49                    uint32_t sizedwords)
50 {
51    assert((v->type == MESA_SHADER_VERTEX) ||
52           !v->compiler->load_shader_consts_via_preamble);
53    assert((regid % 4) == 0);
54    assert((sizedwords % 4) == 0);
55    assert(regid + sizedwords <= v->constlen * 4);
56 }
57 
58 static void
ring_wfi(struct fd_batch * batch,struct fd_ringbuffer * ring)59 ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
60 {
61    /* when we emit const state via ring (IB2) we need a WFI, but when
62     * it is emit'd via stateobj, we don't
63     */
64    if (is_stateobj(ring))
65       return;
66 
67    fd_wfi(batch, ring);
68 }
69 
70 /**
71  * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
72  * Returns number of packets, and total size of all the payload.
73  *
74  * The value can be a worst-case, ie. some shader variants may not read all
75  * consts, etc.
76  *
77  * Returns size in dwords.
78  */
79 static inline void
ir3_user_consts_size(const struct ir3_ubo_analysis_state * state,unsigned * packets,unsigned * size)80 ir3_user_consts_size(const struct ir3_ubo_analysis_state *state, unsigned *packets,
81                      unsigned *size)
82 {
83    *packets = *size = 0;
84 
85    for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
86       if (state->range[i].start < state->range[i].end) {
87          *size += state->range[i].end - state->range[i].start;
88          (*packets)++;
89       }
90    }
91 }
92 
93 /**
94  * Uploads the referenced subranges of the nir constant_data to the hardware's
95  * constant buffer.
96  */
97 static inline void
ir3_emit_constant_data(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)98 ir3_emit_constant_data(const struct ir3_shader_variant *v,
99                        struct fd_ringbuffer *ring)
100 {
101    const struct ir3_const_state *const_state = ir3_const_state(v);
102    const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
103 
104    for (unsigned i = 0; i < state->num_enabled; i++) {
105       unsigned ubo = state->range[i].ubo.block;
106       if (ubo != const_state->consts_ubo.idx)
107          continue;
108 
109       uint32_t size = state->range[i].end - state->range[i].start;
110 
111       /* Pre-a6xx, we might have ranges enabled in the shader that aren't
112        * used in the binning variant.
113        */
114       if (16 * v->constlen <= state->range[i].offset)
115          continue;
116 
117       /* and even if the start of the const buffer is before
118        * first_immediate, the end may not be:
119        */
120       size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
121 
122       if (size == 0)
123          continue;
124 
125       emit_const_bo(ring, v, state->range[i].offset / 4,
126                     v->info.constant_data_offset + state->range[i].start,
127                     size / 4, v->bo);
128    }
129 }
130 
131 /**
132  * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
133  * outside of these ranges will be done using full UBO accesses in the
134  * shader).
135  */
136 static inline void
ir3_emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)137 ir3_emit_user_consts(const struct ir3_shader_variant *v,
138                      struct fd_ringbuffer *ring,
139                      struct fd_constbuf_stateobj *constbuf)
140 {
141    const struct ir3_const_state *const_state = ir3_const_state(v);
142    const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
143 
144    for (unsigned i = 0; i < state->num_enabled; i++) {
145       assert(!state->range[i].ubo.bindless);
146       unsigned ubo = state->range[i].ubo.block;
147       if (!(constbuf->enabled_mask & (1 << ubo)) ||
148           ubo == const_state->consts_ubo.idx) {
149          continue;
150       }
151       struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
152 
153       uint32_t size = state->range[i].end - state->range[i].start;
154       uint32_t offset = cb->buffer_offset + state->range[i].start;
155 
156       /* Pre-a6xx, we might have ranges enabled in the shader that aren't
157        * used in the binning variant.
158        */
159       if (16 * v->constlen <= state->range[i].offset)
160          continue;
161 
162       /* and even if the start of the const buffer is before
163        * first_immediate, the end may not be:
164        */
165       size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
166 
167       if (size == 0)
168          continue;
169 
170       /* things should be aligned to vec4: */
171       assert((state->range[i].offset % 16) == 0);
172       assert((size % 16) == 0);
173       assert((offset % 16) == 0);
174 
175       if (cb->user_buffer) {
176          uint8_t *p = (uint8_t *)cb->user_buffer;
177          p += state->range[i].start;
178          emit_const_user(ring, v, state->range[i].offset / 4, size / 4, (uint32_t *)p);
179       } else {
180          emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
181                          cb->buffer);
182       }
183    }
184 }
185 
186 static inline void
ir3_emit_ubos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)187 ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
188               struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
189 {
190    const struct ir3_const_state *const_state = ir3_const_state(v);
191    uint32_t offset =
192       const_state->allocs.consts[IR3_CONST_ALLOC_UBO_PTRS].offset_vec4;
193 
194    /* a6xx+ uses UBO state and ldc instead of pointers emitted in
195     * const state and ldg:
196     */
197    if (ctx->screen->gen >= 6)
198       return;
199 
200    if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_UBO_PTRS,
201                             v->constlen)) {
202       uint32_t params = const_state->num_ubos;
203       uint32_t offsets[params];
204       struct fd_bo *bos[params];
205 
206       for (uint32_t i = 0; i < params; i++) {
207          if (i == const_state->consts_ubo.idx) {
208             bos[i] = v->bo;
209             offsets[i] = v->info.constant_data_offset;
210             continue;
211          }
212 
213          struct pipe_constant_buffer *cb = &constbuf->cb[i];
214 
215          /* If we have user pointers (constbuf 0, aka GL uniforms), upload
216           * them to a buffer now, and save it in the constbuf so that we
217           * don't have to reupload until they get changed.
218           */
219          if (cb->user_buffer) {
220             struct pipe_context *pctx = &ctx->base;
221             u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
222                           cb->user_buffer, &cb->buffer_offset, &cb->buffer);
223             cb->user_buffer = NULL;
224          }
225 
226          if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
227             offsets[i] = cb->buffer_offset;
228             bos[i] = fd_resource(cb->buffer)->bo;
229          } else {
230             offsets[i] = 0;
231             bos[i] = NULL;
232          }
233       }
234 
235       assert(offset * 4 + params <= v->constlen * 4);
236 
237       emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
238    }
239 }
240 
241 static inline void
ir3_emit_image_dims(struct fd_screen * screen,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_shaderimg_stateobj * si)242 ir3_emit_image_dims(struct fd_screen *screen,
243                     const struct ir3_shader_variant *v,
244                     struct fd_ringbuffer *ring,
245                     struct fd_shaderimg_stateobj *si)
246 {
247    const struct ir3_const_state *const_state = ir3_const_state(v);
248    uint32_t offset =
249       const_state->allocs.consts[IR3_CONST_ALLOC_IMAGE_DIMS].offset_vec4;
250    if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_IMAGE_DIMS,
251                             v->constlen)) {
252       uint32_t dims[align(const_state->image_dims.count, 4)];
253       unsigned mask = const_state->image_dims.mask;
254 
255       while (mask) {
256          struct pipe_image_view *img;
257          struct fd_resource *rsc;
258          unsigned index = u_bit_scan(&mask);
259          unsigned off = const_state->image_dims.off[index];
260 
261          img = &si->si[index];
262          rsc = fd_resource(img->resource);
263 
264          dims[off + 0] = util_format_get_blocksize(img->format);
265          if (img->resource->target != PIPE_BUFFER) {
266             struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
267             /* note for 2d/cube/etc images, even if re-interpreted
268              * as a different color format, the pixel size should
269              * be the same, so use original dimensions for y and z
270              * stride:
271              */
272             dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
273             /* see corresponding logic in fd_resource_offset(): */
274             if (rsc->layout.layer_first) {
275                dims[off + 2] = rsc->layout.layer_size;
276             } else {
277                dims[off + 2] = slice->size0;
278             }
279          } else {
280             /* For buffer-backed images, the log2 of the format's
281              * bytes-per-pixel is placed on the 2nd slot. This is useful
282              * when emitting image_size instructions, for which we need
283              * to divide by bpp for image buffers. Since the bpp
284              * can only be power-of-two, the division is implemented
285              * as a SHR, and for that it is handy to have the log2 of
286              * bpp as a constant. (log2 = first-set-bit - 1)
287              */
288             dims[off + 1] = ffs(dims[off + 0]) - 1;
289          }
290       }
291       uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
292 
293       emit_const_user(ring, v, offset * 4, size, dims);
294    }
295 }
296 
297 static inline void
ir3_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)298 ir3_emit_immediates(const struct ir3_shader_variant *v,
299                     struct fd_ringbuffer *ring)
300 {
301    const struct ir3_const_state *const_state = ir3_const_state(v);
302    uint32_t base = const_state->allocs.max_const_offset_vec4;
303    int size = DIV_ROUND_UP(const_state->immediates_count, 4);
304 
305    /* truncate size to avoid writing constants that shader
306     * does not use:
307     */
308    size = MIN2(size + base, v->constlen) - base;
309 
310    /* convert out of vec4: */
311    base *= 4;
312    size *= 4;
313 
314    if (size > 0)
315       emit_const_user(ring, v, base, size, const_state->immediates);
316 
317    /* NIR constant data has the same lifetime as immediates, so upload it
318     * now, too.
319     */
320    ir3_emit_constant_data(v, ring);
321 }
322 
323 static inline void
ir3_emit_link_map(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)324 ir3_emit_link_map(const struct ir3_shader_variant *producer,
325                   const struct ir3_shader_variant *consumer,
326                   struct fd_ringbuffer *ring)
327 {
328    const struct ir3_const_state *const_state = ir3_const_state(consumer);
329    if (!ir3_const_can_upload(&const_state->allocs,
330                              IR3_CONST_ALLOC_PRIMITIVE_MAP,
331                              consumer->constlen))
332       return;
333 
334    uint32_t base =
335       const_state->allocs.consts[IR3_CONST_ALLOC_PRIMITIVE_MAP].offset_vec4;
336    int size = DIV_ROUND_UP(consumer->input_size, 4);
337 
338    /* truncate size to avoid writing constants that shader
339     * does not use:
340     */
341    size = MIN2(size + base, consumer->constlen) - base;
342 
343    /* convert out of vec4: */
344    base *= 4;
345    size *= 4;
346 
347    if (size > 0)
348       emit_const_user(ring, consumer, base, size, producer->output_loc);
349 }
350 
351 /* emit stream-out buffers: */
352 static inline void
emit_tfbos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)353 emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
354            struct fd_ringbuffer *ring)
355 {
356    /* streamout addresses after driver-params: */
357    const struct ir3_const_state *const_state = ir3_const_state(v);
358    uint32_t offset =
359       const_state->allocs.consts[IR3_CONST_ALLOC_TFBO].offset_vec4;
360    if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_TFBO,
361                             v->constlen)) {
362       struct fd_streamout_stateobj *so = &ctx->streamout;
363       const struct ir3_stream_output_info *info = &v->stream_output;
364       uint32_t params = 4;
365       uint32_t offsets[params];
366       struct fd_bo *bos[params];
367 
368       for (uint32_t i = 0; i < params; i++) {
369          struct pipe_stream_output_target *target = so->targets[i];
370 
371          if (target) {
372             offsets[i] =
373                (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
374             bos[i] = fd_resource(target->buffer)->bo;
375          } else {
376             offsets[i] = 0;
377             bos[i] = NULL;
378          }
379       }
380 
381       assert(offset * 4 + params <= v->constlen * 4);
382 
383       emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
384    }
385 }
386 
387 static inline void
emit_common_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,enum pipe_shader_type t)388 emit_common_consts(const struct ir3_shader_variant *v,
389                    struct fd_ringbuffer *ring, struct fd_context *ctx,
390                    enum pipe_shader_type t) assert_dt
391 {
392    enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
393 
394    /* When we use CP_SET_DRAW_STATE objects to emit constant state,
395     * if we emit any of it we need to emit all.  This is because
396     * we are using the same state-group-id each time for uniform
397     * state, and if previous update is never evaluated (due to no
398     * visible primitives in the current tile) then the new stateobj
399     * completely replaces the old one.
400     *
401     * Possibly if we split up different parts of the const state to
402     * different state-objects we could avoid this.
403     */
404    if (dirty && is_stateobj(ring))
405       dirty = (enum fd_dirty_shader_state)~0;
406 
407    if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
408       struct fd_constbuf_stateobj *constbuf;
409       bool shader_dirty;
410 
411       constbuf = &ctx->constbuf[t];
412       shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
413 
414       ring_wfi(ctx->batch, ring);
415 
416       ir3_emit_user_consts(v, ring, constbuf);
417       ir3_emit_ubos(ctx, v, ring, constbuf);
418       if (shader_dirty)
419          ir3_emit_immediates(v, ring);
420    }
421 
422    if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
423       struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
424       ring_wfi(ctx->batch, ring);
425       ir3_emit_image_dims(ctx->screen, v, ring, si);
426    }
427 }
428 
429 /* emit kernel params */
430 static inline void
emit_kernel_params(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,const struct pipe_grid_info * info)431 emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
432                    struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
433    assert_dt
434 {
435    const struct ir3_const_state *const_state = ir3_const_state(v);
436    uint32_t offset =
437       const_state->allocs.consts[IR3_CONST_ALLOC_KERNEL_PARAMS].offset_vec4;
438    if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_KERNEL_PARAMS,
439                             v->constlen)) {
440       ring_wfi(ctx->batch, ring);
441       emit_const_user(ring, v, offset * 4,
442                       align(v->cs.req_input_mem, 4),
443                       (uint32_t *)info->input);
444    }
445 }
446 
447 static inline struct ir3_driver_params_vs
ir3_build_driver_params_vs(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,uint32_t draw_id,bool needs_ucp)448 ir3_build_driver_params_vs(struct fd_context *ctx,
449                            const struct pipe_draw_info *info,
450                            const struct pipe_draw_start_count_bias *draw,
451                            uint32_t draw_id, bool needs_ucp)
452    assert_dt
453 {
454    struct ir3_driver_params_vs vertex_params = {
455       .draw_id = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
456       .vtxid_base = info->index_size ? draw->index_bias : draw->start,
457       .instid_base = info->start_instance,
458       .vtxcnt_max = ctx->streamout.max_tf_vtx,
459       .is_indexed_draw = info->index_size != 0 ? ~0 : 0,
460    };
461    if (needs_ucp) {
462       struct pipe_clip_state *ucp = &ctx->ucp;
463       for (unsigned i = 0; i < ARRAY_SIZE(vertex_params.ucp); i++) {
464          vertex_params.ucp[i].x = fui(ucp->ucp[i][0]);
465          vertex_params.ucp[i].y = fui(ucp->ucp[i][1]);
466          vertex_params.ucp[i].z = fui(ucp->ucp[i][2]);
467          vertex_params.ucp[i].w = fui(ucp->ucp[i][3]);
468       }
469    }
470    return vertex_params;
471 }
472 
473 static inline void
ir3_emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct ir3_driver_params_vs * vertex_params)474 ir3_emit_driver_params(const struct ir3_shader_variant *v,
475                        struct fd_ringbuffer *ring, struct fd_context *ctx,
476                        const struct pipe_draw_info *info,
477                        const struct pipe_draw_indirect_info *indirect,
478                        const struct ir3_driver_params_vs *vertex_params)
479    assert_dt
480 {
481    assert(v->need_driver_params);
482 
483    const struct ir3_const_state *const_state = ir3_const_state(v);
484    uint32_t offset =
485       const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
486 
487    /* Only emit as many params as needed, i.e. up to the highest enabled UCP
488     * plane. However a binning pass may drop even some of these, so limit to
489     * program max.
490     */
491    const uint32_t vertex_params_size =
492       MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
493    assert(vertex_params_size <= dword_sizeof(*vertex_params));
494 
495    /* for indirect draw, we need to copy VTXID_BASE from
496     * indirect-draw parameters buffer.. which is annoying
497     * and means we can't easily emit these consts in cmd
498     * stream so need to copy them to bo.
499     */
500    if (indirect && v->vtxid_base != INVALID_REG) {
501       uint32_t vertex_params_area = align(vertex_params_size, 16);
502       struct pipe_resource *vertex_params_rsc =
503          pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
504                             PIPE_USAGE_STREAM, vertex_params_area * 4);
505       unsigned src_off = indirect->offset;
506       void *ptr;
507 
508       ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
509       memcpy(ptr, vertex_params, vertex_params_size * 4);
510 
511       if (info->index_size) {
512          /* indexed draw, index_bias is 4th field: */
513          src_off += 3 * 4;
514       } else {
515          /* non-indexed draw, start is 3rd field: */
516          src_off += 2 * 4;
517       }
518 
519       /* copy index_bias or start from draw params: */
520       ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
521                               src_off, 1);
522 
523       emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
524                       vertex_params_rsc);
525 
526       pipe_resource_reference(&vertex_params_rsc, NULL);
527    } else {
528       emit_const_user(ring, v, offset * 4, vertex_params_size, (uint32_t *)vertex_params);
529    }
530 
531    /* if needed, emit stream-out buffer addresses: */
532    if (vertex_params->vtxcnt_max > 0) {
533       emit_tfbos(ctx, v, ring);
534    }
535 }
536 
537 static inline struct ir3_driver_params_tcs
ir3_build_driver_params_tcs(struct fd_context * ctx)538 ir3_build_driver_params_tcs(struct fd_context *ctx)
539    assert_dt
540 {
541    return (struct ir3_driver_params_tcs) {
542       .default_outer_level_x = fui(ctx->default_outer_level[0]),
543       .default_outer_level_y = fui(ctx->default_outer_level[1]),
544       .default_outer_level_z = fui(ctx->default_outer_level[2]),
545       .default_outer_level_w = fui(ctx->default_outer_level[3]),
546       .default_inner_level_x = fui(ctx->default_inner_level[0]),
547       .default_inner_level_y = fui(ctx->default_inner_level[1]),
548    };
549 }
550 
551 static inline void
ir3_emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)552 ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
553                           struct fd_ringbuffer *ring,
554                           struct fd_context *ctx)
555    assert_dt
556 {
557    assert(v->need_driver_params);
558 
559    const struct ir3_const_state *const_state = ir3_const_state(v);
560    if (!ir3_const_can_upload(&const_state->allocs,
561                              IR3_CONST_ALLOC_DRIVER_PARAMS,
562                              v->constlen))
563       return;
564 
565    uint32_t offset =
566       const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
567    struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
568 
569    const uint32_t hs_params_size =
570       MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
571    assert(hs_params_size <= dword_sizeof(hs_params));
572 
573    emit_const_user(ring, v, offset * 4, hs_params_size, (uint32_t *)&hs_params);
574 }
575 
576 
577 static inline void
ir3_emit_vs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw)578 ir3_emit_vs_consts(const struct ir3_shader_variant *v,
579                    struct fd_ringbuffer *ring, struct fd_context *ctx,
580                    const struct pipe_draw_info *info,
581                    const struct pipe_draw_indirect_info *indirect,
582                    const struct pipe_draw_start_count_bias *draw) assert_dt
583 {
584    assert(v->type == MESA_SHADER_VERTEX);
585 
586    emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
587 
588    /* emit driver params every time: */
589    if (info && v->need_driver_params) {
590       ring_wfi(ctx->batch, ring);
591 
592       struct ir3_driver_params_vs p =
593          ir3_build_driver_params_vs(ctx, info, draw, 0, v->key.ucp_enables);
594 
595       ir3_emit_driver_params(v, ring, ctx, info, indirect, &p);
596    }
597 }
598 
599 static inline void
ir3_emit_fs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)600 ir3_emit_fs_consts(const struct ir3_shader_variant *v,
601                    struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
602 {
603    assert(v->type == MESA_SHADER_FRAGMENT);
604 
605    emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
606 }
607 
608 static inline struct ir3_driver_params_cs
ir3_build_driver_params_cs(const struct ir3_shader_variant * v,const struct pipe_grid_info * info)609 ir3_build_driver_params_cs(const struct ir3_shader_variant *v,
610                            const struct pipe_grid_info *info)
611 {
612    return (struct ir3_driver_params_cs) {
613       .num_work_groups_x = info->grid[0],
614       .num_work_groups_y = info->grid[1],
615       .num_work_groups_z = info->grid[2],
616       .work_dim = info->work_dim,
617       .base_group_x = info->grid_base[0],
618       .base_group_y = info->grid_base[1],
619       .base_group_z = info->grid_base[2],
620       .subgroup_size = v->info.subgroup_size,
621       .local_group_size_x = info->block[0],
622       .local_group_size_y = info->block[1],
623       .local_group_size_z = info->block[2],
624       .subgroup_id_shift = util_logbase2(v->info.subgroup_size),
625       .workgroup_id_x = 0, // TODO
626       .workgroup_id_y = 0, // TODO
627       .workgroup_id_z = 0, // TODO
628    };
629 }
630 
631 static inline void
ir3_emit_cs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)632 ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
633                           struct fd_ringbuffer *ring, struct fd_context *ctx,
634                           const struct pipe_grid_info *info)
635    assert_dt
636 {
637    emit_kernel_params(ctx, v, ring, info);
638 
639    /* a3xx/a4xx can inject these directly */
640    if (ctx->screen->gen <= 4)
641       return;
642 
643    /* emit compute-shader driver-params: */
644    const struct ir3_const_state *const_state = ir3_const_state(v);
645    uint32_t offset =
646       const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
647    uint32_t size =
648       align(MIN2(const_state->num_driver_params, (v->constlen - offset) * 4), 16);
649 
650    if (size > 0 &&
651        ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_DRIVER_PARAMS,
652                             v->constlen)) {
653       ring_wfi(ctx->batch, ring);
654 
655       struct ir3_driver_params_cs compute_params = ir3_build_driver_params_cs(v, info);
656 
657       if (info->indirect) {
658          struct pipe_resource *buffer = NULL;
659          unsigned buffer_offset;
660 
661          u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
662                        16, &compute_params,  &buffer_offset, &buffer);
663 
664          /* Copy the indirect params into the driver param buffer.  The layout
665           * of the indirect buffer should match the first three fields of
666           * compute_params:
667           */
668          STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_x) == 0);
669          STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_y) == 4);
670          STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_z) == 8);
671 
672          ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
673                                  info->indirect_offset, 3);
674 
675          emit_const_prsc(ring, v, offset * 4, buffer_offset, size, buffer);
676 
677          pipe_resource_reference(&buffer, NULL);
678       } else {
679          emit_const_user(ring, v, offset * 4, size, (uint32_t *)&compute_params);
680       }
681    }
682 }
683 
684 /* emit compute-shader consts: */
685 static inline void
ir3_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)686 ir3_emit_cs_consts(const struct ir3_shader_variant *v,
687                    struct fd_ringbuffer *ring, struct fd_context *ctx,
688                    const struct pipe_grid_info *info) assert_dt
689 {
690    assert(gl_shader_stage_is_compute(v->type));
691 
692    emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
693 
694    ir3_emit_cs_driver_params(v, ring, ctx, info);
695 }
696