• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #define FD_BO_NO_HARDPIN 1
8 
9 #include "fd6_barrier.h"
10 #include "fd6_const.h"
11 #include "fd6_compute.h"
12 #include "fd6_pack.h"
13 
14 #define emit_const_user fd6_emit_const_user
15 #define emit_const_bo   fd6_emit_const_bo
16 #include "ir3_const.h"
17 
18 static inline void
fd6_emit_driver_ubo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,int base,uint32_t sizedwords,unsigned buffer_offset,struct fd_bo * bo)19 fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
20                     int base, uint32_t sizedwords, unsigned buffer_offset,
21                     struct fd_bo *bo)
22 {
23    enum a6xx_state_block block = fd6_stage2shadersb(v->type);
24 
25    /* base == ubo idx */
26    OUT_PKT7(ring, fd6_stage2opcode(v->type), 5);
27    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(base) |
28             CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
29             CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
30             CP_LOAD_STATE6_0_STATE_BLOCK(block) |
31             CP_LOAD_STATE6_0_NUM_UNIT(1));
32    OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
33    OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
34 
35    int size_vec4s = DIV_ROUND_UP(sizedwords, 4);
36    OUT_RELOC(ring, bo, buffer_offset,
37              ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
38 }
39 
40 /* A helper to upload driver-params to a UBO, for the case where constants are
41  * loaded by shader preamble rather than ST6_CONSTANTS
42  */
43 static void
fd6_upload_emit_driver_ubo(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,int base,uint32_t sizedwords,const void * dwords)44 fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
45                            const struct ir3_shader_variant *v, int base,
46                            uint32_t sizedwords, const void *dwords)
47 {
48    struct pipe_context *pctx = &ctx->base;
49 
50    assert(ctx->screen->info->chip >= 7 && ctx->screen->info->a7xx.load_shader_consts_via_preamble);
51 
52    if (!sizedwords || (base < 0))
53       return;
54 
55    unsigned buffer_offset;
56    struct pipe_resource *buffer = NULL;
57    u_upload_data(pctx->const_uploader, 0, sizedwords * sizeof(uint32_t),
58                  16, dwords,  &buffer_offset, &buffer);
59    if (!buffer)
60       return;  /* nothing good will come of this.. */
61 
62    /* The backing BO may otherwise not be tracked by the resource, as
63     * this allocation happens outside of the context of batch resource
64     * tracking.
65     */
66    fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
67 
68    fd6_emit_driver_ubo(ring, v, base, sizedwords, buffer_offset,
69                        fd_resource(buffer)->bo);
70 
71    pipe_resource_reference(&buffer, NULL);
72 }
73 
74 /* regid:          base const register
75  * prsc or dwords: buffer containing constant values
76  * sizedwords:     size of const value buffer
77  */
78 void
fd6_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)79 fd6_emit_const_user(struct fd_ringbuffer *ring,
80                     const struct ir3_shader_variant *v, uint32_t regid,
81                     uint32_t sizedwords, const uint32_t *dwords)
82 {
83    emit_const_asserts(ring, v, regid, sizedwords);
84 
85    /* NOTE we cheat a bit here, since we know mesa is aligning
86     * the size of the user buffer to 16 bytes.  And we want to
87     * cut cycles in a hot path.
88     */
89    uint32_t align_sz = align(sizedwords, 4);
90 
91    if (fd6_geom_stage(v->type)) {
92       OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
93          CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
94                           .state_src = SS6_DIRECT,
95                           .state_block = fd6_stage2shadersb(v->type),
96                           .num_unit = DIV_ROUND_UP(sizedwords, 4)),
97          CP_LOAD_STATE6_1(),
98          CP_LOAD_STATE6_2());
99    } else {
100       OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
101          CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
102                           .state_src = SS6_DIRECT,
103                           .state_block = fd6_stage2shadersb(v->type),
104                           .num_unit = DIV_ROUND_UP(sizedwords, 4)),
105          CP_LOAD_STATE6_1(),
106          CP_LOAD_STATE6_2());
107    }
108 }
109 
110 void
fd6_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)111 fd6_emit_const_bo(struct fd_ringbuffer *ring,
112                   const struct ir3_shader_variant *v, uint32_t regid,
113                   uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
114 {
115    uint32_t dst_off = regid / 4;
116    assert(dst_off % 4 == 0);
117    uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
118    assert(num_unit % 4 == 0);
119 
120    emit_const_asserts(ring, v, regid, sizedwords);
121 
122    if (fd6_geom_stage(v->type)) {
123       OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
124               CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
125                                .state_src = SS6_INDIRECT,
126                                .state_block = fd6_stage2shadersb(v->type),
127                                .num_unit = num_unit, ),
128               CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
129    } else {
130       OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
131               CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
132                                .state_src = SS6_INDIRECT,
133                                .state_block = fd6_stage2shadersb(v->type),
134                                .num_unit = num_unit, ),
135               CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
136    }
137 }
138 
139 static bool
is_stateobj(struct fd_ringbuffer * ring)140 is_stateobj(struct fd_ringbuffer *ring)
141 {
142    return true;
143 }
144 
145 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)146 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
147                 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
148                 uint32_t *offsets)
149 {
150    unreachable("shouldn't be called on a6xx");
151 }
152 
153 static void
wait_mem_writes(struct fd_context * ctx)154 wait_mem_writes(struct fd_context *ctx)
155 {
156    ctx->batch->barrier |= FD6_WAIT_MEM_WRITES | FD6_INVALIDATE_CACHE | FD6_WAIT_FOR_IDLE;
157 }
158 
159 template <chip CHIP>
160 static void
emit_stage_tess_consts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,struct fd_context * ctx,uint32_t * params,int num_params)161 emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
162                        struct fd_context *ctx, uint32_t *params, int num_params)
163 {
164    const struct ir3_const_state *const_state = ir3_const_state(v);
165 
166    if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
167       int base = const_state->primitive_param_ubo.idx;
168 
169       fd6_upload_emit_driver_ubo(ctx, ring, v, base, num_params, params);
170    } else if (ir3_const_can_upload(&const_state->allocs,
171                                    IR3_CONST_ALLOC_PRIMITIVE_PARAM,
172                                    v->constlen)) {
173       const unsigned regid =
174          const_state->allocs.consts[IR3_CONST_ALLOC_PRIMITIVE_PARAM].offset_vec4;
175       int size = MIN2(1 + regid, v->constlen) - regid;
176       if (size > 0)
177          fd6_emit_const_user(ring, v, regid * 4, num_params, params);
178    }
179 }
180 
181 template <chip CHIP>
182 struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit * emit)183 fd6_build_tess_consts(struct fd6_emit *emit)
184 {
185    struct fd_context *ctx = emit->ctx;
186    struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
187       ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
188 
189    /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
190     * size is dwords, since that's what LDG/STG use.
191     */
192    unsigned num_vertices = emit->hs
193                               ? ctx->patch_vertices
194                               : emit->gs->gs.vertices_in;
195 
196    uint32_t vs_params[4] = {
197       emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
198       emit->vs->output_size * 4,                /* vs vertex stride */
199       0, 0};
200 
201    emit_stage_tess_consts<CHIP>(constobj, emit->vs, emit->ctx, vs_params, ARRAY_SIZE(vs_params));
202 
203    if (emit->hs) {
204       struct fd_bo *tess_bo = ctx->screen->tess_bo;
205       int64_t tess_factor_iova = fd_bo_get_iova(tess_bo);
206       int64_t tess_param_iova = tess_factor_iova + FD6_TESS_FACTOR_SIZE;
207 
208       fd_ringbuffer_attach_bo(constobj, tess_bo);
209 
210       uint32_t hs_params[8] = {
211          emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
212          emit->vs->output_size * 4,                /* vs vertex stride */
213          emit->hs->output_size,
214          ctx->patch_vertices,
215          tess_param_iova,
216          tess_param_iova >> 32,
217          tess_factor_iova,
218          tess_factor_iova >> 32,
219       };
220 
221       emit_stage_tess_consts<CHIP>(constobj, emit->hs, emit->ctx,
222                                    hs_params, ARRAY_SIZE(hs_params));
223 
224       if (emit->gs)
225          num_vertices = emit->gs->gs.vertices_in;
226 
227       uint32_t ds_params[8] = {
228          emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
229          emit->ds->output_size * 4,                /* ds vertex stride */
230          emit->hs->output_size,                    /* hs vertex stride (dwords) */
231          emit->hs->tess.tcs_vertices_out,
232          tess_param_iova,
233          tess_param_iova >> 32,
234          tess_factor_iova,
235          tess_factor_iova >> 32,
236       };
237 
238       emit_stage_tess_consts<CHIP>(constobj, emit->ds, emit->ctx,
239                                    ds_params,  ARRAY_SIZE(ds_params));
240    }
241 
242    if (emit->gs) {
243       const struct ir3_shader_variant *prev;
244       if (emit->ds)
245          prev = emit->ds;
246       else
247          prev = emit->vs;
248 
249       uint32_t gs_params[4] = {
250          prev->output_size * num_vertices * 4, /* ds primitive stride */
251          prev->output_size * 4,                /* ds vertex stride */
252          0,
253          0,
254       };
255 
256       num_vertices = emit->gs->gs.vertices_in;
257       emit_stage_tess_consts<CHIP>(constobj, emit->gs, emit->ctx,
258                                    gs_params, ARRAY_SIZE(gs_params));
259    }
260 
261    return constobj;
262 }
263 FD_GENX(fd6_build_tess_consts);
264 
265 static void
fd6_emit_ubos(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)266 fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
267               struct fd_constbuf_stateobj *constbuf)
268 {
269    const struct ir3_const_state *const_state = ir3_const_state(v);
270    int num_ubos = const_state->num_app_ubos;
271 
272    if (!num_ubos)
273       return;
274 
275    OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
276    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
277                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
278                      CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
279                      CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
280                      CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
281    OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
282    OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
283 
284    for (int i = 0; i < num_ubos; i++) {
285       struct pipe_constant_buffer *cb = &constbuf->cb[i];
286 
287       if (cb->buffer) {
288          int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
289          OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
290                    (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
291       } else {
292          OUT_RING(ring, 0xbad00000 | (i << 16));
293          OUT_RING(ring, A6XX_UBO_1_SIZE(0));
294       }
295    }
296 }
297 
298 template <chip CHIP>
299 unsigned
fd6_user_consts_cmdstream_size(const struct ir3_shader_variant * v)300 fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
301 {
302    if (!v)
303       return 0;
304 
305    const struct ir3_const_state *const_state = ir3_const_state(v);
306    const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
307    unsigned packets, size;
308 
309    if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble) {
310       packets = 0;
311       size = 0;
312    } else {
313       /* pre-calculate size required for userconst stateobj: */
314       ir3_user_consts_size(ubo_state, &packets, &size);
315    }
316 
317    /* also account for UBO addresses: */
318    packets += 1;
319    size += 2 * const_state->num_app_ubos;
320 
321    unsigned sizedwords = (4 * packets) + size;
322    return sizedwords * 4;
323 }
324 FD_GENX(fd6_user_consts_cmdstream_size);
325 
326 template <chip CHIP>
327 static void
emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)328 emit_user_consts(const struct ir3_shader_variant *v,
329                  struct fd_ringbuffer *ring,
330                  struct fd_constbuf_stateobj *constbuf)
331 {
332    fd6_emit_ubos(v, ring, constbuf);
333 
334    if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble)
335       return;
336 
337    ir3_emit_user_consts(v, ring, constbuf);
338 }
339 
340 template <chip CHIP, fd6_pipeline_type PIPELINE>
341 struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit * emit)342 fd6_build_user_consts(struct fd6_emit *emit)
343 {
344    struct fd_context *ctx = emit->ctx;
345    unsigned sz = emit->prog->user_consts_cmdstream_size;
346 
347    struct fd_ringbuffer *constobj =
348       fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
349 
350    emit_user_consts<CHIP>(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
351 
352    if (PIPELINE == HAS_TESS_GS) {
353       if (emit->hs) {
354          emit_user_consts<CHIP>(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
355          emit_user_consts<CHIP>(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
356       }
357       if (emit->gs) {
358          emit_user_consts<CHIP>(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
359       }
360    }
361    emit_user_consts<CHIP>(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
362 
363    return constobj;
364 }
365 template struct fd_ringbuffer * fd6_build_user_consts<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
366 template struct fd_ringbuffer * fd6_build_user_consts<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
367 template struct fd_ringbuffer * fd6_build_user_consts<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
368 template struct fd_ringbuffer * fd6_build_user_consts<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
369 
370 template <chip CHIP>
371 static inline void
emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * dpconstobj,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct ir3_driver_params_vs * vertex_params)372 emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpconstobj,
373                    struct fd_context *ctx, const struct pipe_draw_info *info,
374                    const struct pipe_draw_indirect_info *indirect,
375                    const struct ir3_driver_params_vs *vertex_params)
376 {
377    if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
378       const struct ir3_const_state *const_state = ir3_const_state(v);
379       int base = const_state->driver_params_ubo.idx;
380 
381       fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
382                                  dword_sizeof(*vertex_params),
383                                  vertex_params);
384    } else {
385       ir3_emit_driver_params(v, dpconstobj, ctx, info, indirect, vertex_params);
386    }
387 }
388 
389 template <chip CHIP>
390 static inline void
emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * dpconstobj,struct fd_context * ctx)391 emit_hs_driver_params(const struct ir3_shader_variant *v,
392                       struct fd_ringbuffer *dpconstobj,
393                       struct fd_context *ctx)
394 {
395    if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
396       const struct ir3_const_state *const_state = ir3_const_state(v);
397       struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
398       int base = const_state->driver_params_ubo.idx;
399 
400       fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
401                                  dword_sizeof(hs_params),
402                                  &hs_params);
403    } else {
404       ir3_emit_hs_driver_params(v, dpconstobj, ctx);
405    }
406 }
407 
408 template <chip CHIP, fd6_pipeline_type PIPELINE>
409 struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit * emit)410 fd6_build_driver_params(struct fd6_emit *emit)
411 {
412    struct fd_context *ctx = emit->ctx;
413    struct fd6_context *fd6_ctx = fd6_context(ctx);
414    unsigned num_dp = emit->prog->num_driver_params;
415    unsigned num_ubo_dp;
416 
417    if (CHIP == A6XX) {
418       assert(!emit->prog->num_ubo_driver_params);
419       /* Make it easier for compiler to see that this path isn't used on a6xx: */
420       num_ubo_dp = 0;
421    } else {
422       num_ubo_dp = emit->prog->num_ubo_driver_params;
423    }
424 
425    if (!num_dp && !num_ubo_dp) {
426       fd6_ctx->has_dp_state = false;
427       return NULL;
428    }
429 
430    bool needs_ucp = !!emit->vs->key.ucp_enables;
431 
432    if (PIPELINE == HAS_TESS_GS) {
433       needs_ucp |= emit->gs && emit->gs->key.ucp_enables;
434       needs_ucp |= emit->hs && emit->hs->key.ucp_enables;
435       needs_ucp |= emit->ds && emit->ds->key.ucp_enables;
436    }
437 
438    struct ir3_driver_params_vs p =
439       ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp);
440 
441    unsigned size_dwords =
442       num_dp * (4 + dword_sizeof(p)) + /* 4dw PKT7 header */
443       num_ubo_dp * 6;                  /* 6dw per UBO descriptor */
444 
445    struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
446          ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
447 
448    /* VS still works the old way*/
449    if (emit->vs->need_driver_params) {
450       ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
451    }
452 
453    if (PIPELINE == HAS_TESS_GS) {
454       if (emit->gs && emit->gs->need_driver_params) {
455          emit_driver_params<CHIP>(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
456       }
457 
458       if (emit->hs && emit->hs->need_driver_params) {
459          emit_hs_driver_params<CHIP>(emit->hs, dpconstobj, ctx);
460       }
461 
462       if (emit->ds && emit->ds->need_driver_params) {
463          emit_driver_params<CHIP>(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
464       }
465    }
466 
467    if (emit->indirect)
468       wait_mem_writes(ctx);
469 
470    fd6_ctx->has_dp_state = true;
471 
472    return dpconstobj;
473 }
474 
475 template struct fd_ringbuffer * fd6_build_driver_params<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
476 template struct fd_ringbuffer * fd6_build_driver_params<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
477 template struct fd_ringbuffer * fd6_build_driver_params<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
478 template struct fd_ringbuffer * fd6_build_driver_params<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
479 
480 template <chip CHIP>
481 void
fd6_emit_cs_driver_params(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs,const struct pipe_grid_info * info)482 fd6_emit_cs_driver_params(struct fd_context *ctx,
483                           struct fd_ringbuffer *ring,
484                           struct fd6_compute_state *cs,
485                           const struct pipe_grid_info *info)
486 {
487    /* info->input not handled in the UBO path.  I believe this was only
488     * ever used by clover
489     */
490    assert(!info->input);
491 
492    if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
493       const struct ir3_const_state *const_state = ir3_const_state(cs->v);
494       struct ir3_driver_params_cs compute_params =
495          ir3_build_driver_params_cs(cs->v, info);
496       int base = const_state->driver_params_ubo.idx;
497 
498       if (base < 0)
499          return;
500 
501       struct pipe_resource *buffer = NULL;
502       unsigned buffer_offset;
503 
504       u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
505                      16, &compute_params,  &buffer_offset, &buffer);
506 
507       if (info->indirect) {
508          /* Copy indirect params into UBO: */
509          ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
510                                  info->indirect_offset, 3);
511 
512          wait_mem_writes(ctx);
513       } else {
514          fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
515       }
516 
517       fd6_emit_driver_ubo(ring, cs->v, base, dword_sizeof(compute_params),
518                           buffer_offset, fd_resource(buffer)->bo);
519 
520       pipe_resource_reference(&buffer, NULL);
521    } else {
522       ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
523       if (info->indirect)
524          wait_mem_writes(ctx);
525    }
526 }
527 FD_GENX(fd6_emit_cs_driver_params);
528 
529 template <chip CHIP>
530 void
fd6_emit_cs_user_consts(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs)531 fd6_emit_cs_user_consts(struct fd_context *ctx,
532                         struct fd_ringbuffer *ring,
533                         struct fd6_compute_state *cs)
534 {
535    emit_user_consts<CHIP>(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
536 }
537 FD_GENX(fd6_emit_cs_user_consts);
538 
539 template <chip CHIP>
540 void
fd6_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)541 fd6_emit_immediates(const struct ir3_shader_variant *v,
542                     struct fd_ringbuffer *ring)
543 {
544    const struct ir3_const_state *const_state = ir3_const_state(v);
545 
546    if (const_state->consts_ubo.idx >= 0) {
547       int sizedwords = DIV_ROUND_UP(v->constant_data_size, 4);
548 
549       fd6_emit_driver_ubo(ring, v, const_state->consts_ubo.idx, sizedwords,
550                           v->info.constant_data_offset, v->bo);
551    }
552 
553    if (CHIP == A7XX && v->compiler->load_inline_uniforms_via_preamble_ldgk)
554       return;
555 
556    ir3_emit_immediates(v, ring);
557 }
558 FD_GENX(fd6_emit_immediates);
559 
560 template <chip CHIP>
561 void
fd6_emit_link_map(struct fd_context * ctx,const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)562 fd6_emit_link_map(struct fd_context *ctx,
563                   const struct ir3_shader_variant *producer,
564                   const struct ir3_shader_variant *consumer,
565                   struct fd_ringbuffer *ring)
566 {
567    if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
568       const struct ir3_const_state *const_state = ir3_const_state(consumer);
569       int base = const_state->primitive_map_ubo.idx;
570       uint32_t size = ALIGN(consumer->input_size, 4);
571 
572       fd6_upload_emit_driver_ubo(ctx, ring, consumer, base, size, producer->output_loc);
573    } else {
574       ir3_emit_link_map(producer, consumer, ring);
575    }
576 }
577 FD_GENX(fd6_emit_link_map);
578