1 /*
2 * Copyright © 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #define FD_BO_NO_HARDPIN 1
8
9 #include "fd6_barrier.h"
10 #include "fd6_const.h"
11 #include "fd6_compute.h"
12 #include "fd6_pack.h"
13
14 #define emit_const_user fd6_emit_const_user
15 #define emit_const_bo fd6_emit_const_bo
16 #include "ir3_const.h"
17
18 static inline void
fd6_emit_driver_ubo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,int base,uint32_t sizedwords,unsigned buffer_offset,struct fd_bo * bo)19 fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
20 int base, uint32_t sizedwords, unsigned buffer_offset,
21 struct fd_bo *bo)
22 {
23 enum a6xx_state_block block = fd6_stage2shadersb(v->type);
24
25 /* base == ubo idx */
26 OUT_PKT7(ring, fd6_stage2opcode(v->type), 5);
27 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(base) |
28 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
29 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
30 CP_LOAD_STATE6_0_STATE_BLOCK(block) |
31 CP_LOAD_STATE6_0_NUM_UNIT(1));
32 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
33 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
34
35 int size_vec4s = DIV_ROUND_UP(sizedwords, 4);
36 OUT_RELOC(ring, bo, buffer_offset,
37 ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
38 }
39
40 /* A helper to upload driver-params to a UBO, for the case where constants are
41 * loaded by shader preamble rather than ST6_CONSTANTS
42 */
43 static void
fd6_upload_emit_driver_ubo(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,int base,uint32_t sizedwords,const void * dwords)44 fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
45 const struct ir3_shader_variant *v, int base,
46 uint32_t sizedwords, const void *dwords)
47 {
48 struct pipe_context *pctx = &ctx->base;
49
50 assert(ctx->screen->info->chip >= 7 && ctx->screen->info->a7xx.load_shader_consts_via_preamble);
51
52 if (!sizedwords || (base < 0))
53 return;
54
55 unsigned buffer_offset;
56 struct pipe_resource *buffer = NULL;
57 u_upload_data(pctx->const_uploader, 0, sizedwords * sizeof(uint32_t),
58 16, dwords, &buffer_offset, &buffer);
59 if (!buffer)
60 return; /* nothing good will come of this.. */
61
62 /* The backing BO may otherwise not be tracked by the resource, as
63 * this allocation happens outside of the context of batch resource
64 * tracking.
65 */
66 fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
67
68 fd6_emit_driver_ubo(ring, v, base, sizedwords, buffer_offset,
69 fd_resource(buffer)->bo);
70
71 pipe_resource_reference(&buffer, NULL);
72 }
73
74 /* regid: base const register
75 * prsc or dwords: buffer containing constant values
76 * sizedwords: size of const value buffer
77 */
78 void
fd6_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)79 fd6_emit_const_user(struct fd_ringbuffer *ring,
80 const struct ir3_shader_variant *v, uint32_t regid,
81 uint32_t sizedwords, const uint32_t *dwords)
82 {
83 emit_const_asserts(ring, v, regid, sizedwords);
84
85 /* NOTE we cheat a bit here, since we know mesa is aligning
86 * the size of the user buffer to 16 bytes. And we want to
87 * cut cycles in a hot path.
88 */
89 uint32_t align_sz = align(sizedwords, 4);
90
91 if (fd6_geom_stage(v->type)) {
92 OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
93 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
94 .state_src = SS6_DIRECT,
95 .state_block = fd6_stage2shadersb(v->type),
96 .num_unit = DIV_ROUND_UP(sizedwords, 4)),
97 CP_LOAD_STATE6_1(),
98 CP_LOAD_STATE6_2());
99 } else {
100 OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
101 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
102 .state_src = SS6_DIRECT,
103 .state_block = fd6_stage2shadersb(v->type),
104 .num_unit = DIV_ROUND_UP(sizedwords, 4)),
105 CP_LOAD_STATE6_1(),
106 CP_LOAD_STATE6_2());
107 }
108 }
109
110 void
fd6_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)111 fd6_emit_const_bo(struct fd_ringbuffer *ring,
112 const struct ir3_shader_variant *v, uint32_t regid,
113 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
114 {
115 uint32_t dst_off = regid / 4;
116 assert(dst_off % 4 == 0);
117 uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
118 assert(num_unit % 4 == 0);
119
120 emit_const_asserts(ring, v, regid, sizedwords);
121
122 if (fd6_geom_stage(v->type)) {
123 OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
124 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
125 .state_src = SS6_INDIRECT,
126 .state_block = fd6_stage2shadersb(v->type),
127 .num_unit = num_unit, ),
128 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
129 } else {
130 OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
131 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
132 .state_src = SS6_INDIRECT,
133 .state_block = fd6_stage2shadersb(v->type),
134 .num_unit = num_unit, ),
135 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
136 }
137 }
138
139 static bool
is_stateobj(struct fd_ringbuffer * ring)140 is_stateobj(struct fd_ringbuffer *ring)
141 {
142 return true;
143 }
144
145 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)146 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
147 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
148 uint32_t *offsets)
149 {
150 unreachable("shouldn't be called on a6xx");
151 }
152
153 static void
wait_mem_writes(struct fd_context * ctx)154 wait_mem_writes(struct fd_context *ctx)
155 {
156 ctx->batch->barrier |= FD6_WAIT_MEM_WRITES | FD6_INVALIDATE_CACHE | FD6_WAIT_FOR_IDLE;
157 }
158
159 template <chip CHIP>
160 static void
emit_stage_tess_consts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,struct fd_context * ctx,uint32_t * params,int num_params)161 emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
162 struct fd_context *ctx, uint32_t *params, int num_params)
163 {
164 const struct ir3_const_state *const_state = ir3_const_state(v);
165
166 if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
167 int base = const_state->primitive_param_ubo.idx;
168
169 fd6_upload_emit_driver_ubo(ctx, ring, v, base, num_params, params);
170 } else if (ir3_const_can_upload(&const_state->allocs,
171 IR3_CONST_ALLOC_PRIMITIVE_PARAM,
172 v->constlen)) {
173 const unsigned regid =
174 const_state->allocs.consts[IR3_CONST_ALLOC_PRIMITIVE_PARAM].offset_vec4;
175 int size = MIN2(1 + regid, v->constlen) - regid;
176 if (size > 0)
177 fd6_emit_const_user(ring, v, regid * 4, num_params, params);
178 }
179 }
180
181 template <chip CHIP>
182 struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit * emit)183 fd6_build_tess_consts(struct fd6_emit *emit)
184 {
185 struct fd_context *ctx = emit->ctx;
186 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
187 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
188
189 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
190 * size is dwords, since that's what LDG/STG use.
191 */
192 unsigned num_vertices = emit->hs
193 ? ctx->patch_vertices
194 : emit->gs->gs.vertices_in;
195
196 uint32_t vs_params[4] = {
197 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
198 emit->vs->output_size * 4, /* vs vertex stride */
199 0, 0};
200
201 emit_stage_tess_consts<CHIP>(constobj, emit->vs, emit->ctx, vs_params, ARRAY_SIZE(vs_params));
202
203 if (emit->hs) {
204 struct fd_bo *tess_bo = ctx->screen->tess_bo;
205 int64_t tess_factor_iova = fd_bo_get_iova(tess_bo);
206 int64_t tess_param_iova = tess_factor_iova + FD6_TESS_FACTOR_SIZE;
207
208 fd_ringbuffer_attach_bo(constobj, tess_bo);
209
210 uint32_t hs_params[8] = {
211 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
212 emit->vs->output_size * 4, /* vs vertex stride */
213 emit->hs->output_size,
214 ctx->patch_vertices,
215 tess_param_iova,
216 tess_param_iova >> 32,
217 tess_factor_iova,
218 tess_factor_iova >> 32,
219 };
220
221 emit_stage_tess_consts<CHIP>(constobj, emit->hs, emit->ctx,
222 hs_params, ARRAY_SIZE(hs_params));
223
224 if (emit->gs)
225 num_vertices = emit->gs->gs.vertices_in;
226
227 uint32_t ds_params[8] = {
228 emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
229 emit->ds->output_size * 4, /* ds vertex stride */
230 emit->hs->output_size, /* hs vertex stride (dwords) */
231 emit->hs->tess.tcs_vertices_out,
232 tess_param_iova,
233 tess_param_iova >> 32,
234 tess_factor_iova,
235 tess_factor_iova >> 32,
236 };
237
238 emit_stage_tess_consts<CHIP>(constobj, emit->ds, emit->ctx,
239 ds_params, ARRAY_SIZE(ds_params));
240 }
241
242 if (emit->gs) {
243 const struct ir3_shader_variant *prev;
244 if (emit->ds)
245 prev = emit->ds;
246 else
247 prev = emit->vs;
248
249 uint32_t gs_params[4] = {
250 prev->output_size * num_vertices * 4, /* ds primitive stride */
251 prev->output_size * 4, /* ds vertex stride */
252 0,
253 0,
254 };
255
256 num_vertices = emit->gs->gs.vertices_in;
257 emit_stage_tess_consts<CHIP>(constobj, emit->gs, emit->ctx,
258 gs_params, ARRAY_SIZE(gs_params));
259 }
260
261 return constobj;
262 }
263 FD_GENX(fd6_build_tess_consts);
264
265 static void
fd6_emit_ubos(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)266 fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
267 struct fd_constbuf_stateobj *constbuf)
268 {
269 const struct ir3_const_state *const_state = ir3_const_state(v);
270 int num_ubos = const_state->num_app_ubos;
271
272 if (!num_ubos)
273 return;
274
275 OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
276 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
277 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
278 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
279 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
280 CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
281 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
282 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
283
284 for (int i = 0; i < num_ubos; i++) {
285 struct pipe_constant_buffer *cb = &constbuf->cb[i];
286
287 if (cb->buffer) {
288 int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
289 OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
290 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
291 } else {
292 OUT_RING(ring, 0xbad00000 | (i << 16));
293 OUT_RING(ring, A6XX_UBO_1_SIZE(0));
294 }
295 }
296 }
297
298 template <chip CHIP>
299 unsigned
fd6_user_consts_cmdstream_size(const struct ir3_shader_variant * v)300 fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
301 {
302 if (!v)
303 return 0;
304
305 const struct ir3_const_state *const_state = ir3_const_state(v);
306 const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
307 unsigned packets, size;
308
309 if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble) {
310 packets = 0;
311 size = 0;
312 } else {
313 /* pre-calculate size required for userconst stateobj: */
314 ir3_user_consts_size(ubo_state, &packets, &size);
315 }
316
317 /* also account for UBO addresses: */
318 packets += 1;
319 size += 2 * const_state->num_app_ubos;
320
321 unsigned sizedwords = (4 * packets) + size;
322 return sizedwords * 4;
323 }
324 FD_GENX(fd6_user_consts_cmdstream_size);
325
326 template <chip CHIP>
327 static void
emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)328 emit_user_consts(const struct ir3_shader_variant *v,
329 struct fd_ringbuffer *ring,
330 struct fd_constbuf_stateobj *constbuf)
331 {
332 fd6_emit_ubos(v, ring, constbuf);
333
334 if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble)
335 return;
336
337 ir3_emit_user_consts(v, ring, constbuf);
338 }
339
340 template <chip CHIP, fd6_pipeline_type PIPELINE>
341 struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit * emit)342 fd6_build_user_consts(struct fd6_emit *emit)
343 {
344 struct fd_context *ctx = emit->ctx;
345 unsigned sz = emit->prog->user_consts_cmdstream_size;
346
347 struct fd_ringbuffer *constobj =
348 fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
349
350 emit_user_consts<CHIP>(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
351
352 if (PIPELINE == HAS_TESS_GS) {
353 if (emit->hs) {
354 emit_user_consts<CHIP>(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
355 emit_user_consts<CHIP>(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
356 }
357 if (emit->gs) {
358 emit_user_consts<CHIP>(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
359 }
360 }
361 emit_user_consts<CHIP>(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
362
363 return constobj;
364 }
365 template struct fd_ringbuffer * fd6_build_user_consts<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
366 template struct fd_ringbuffer * fd6_build_user_consts<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
367 template struct fd_ringbuffer * fd6_build_user_consts<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
368 template struct fd_ringbuffer * fd6_build_user_consts<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
369
370 template <chip CHIP>
371 static inline void
emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * dpconstobj,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct ir3_driver_params_vs * vertex_params)372 emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpconstobj,
373 struct fd_context *ctx, const struct pipe_draw_info *info,
374 const struct pipe_draw_indirect_info *indirect,
375 const struct ir3_driver_params_vs *vertex_params)
376 {
377 if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
378 const struct ir3_const_state *const_state = ir3_const_state(v);
379 int base = const_state->driver_params_ubo.idx;
380
381 fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
382 dword_sizeof(*vertex_params),
383 vertex_params);
384 } else {
385 ir3_emit_driver_params(v, dpconstobj, ctx, info, indirect, vertex_params);
386 }
387 }
388
389 template <chip CHIP>
390 static inline void
emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * dpconstobj,struct fd_context * ctx)391 emit_hs_driver_params(const struct ir3_shader_variant *v,
392 struct fd_ringbuffer *dpconstobj,
393 struct fd_context *ctx)
394 {
395 if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
396 const struct ir3_const_state *const_state = ir3_const_state(v);
397 struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
398 int base = const_state->driver_params_ubo.idx;
399
400 fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
401 dword_sizeof(hs_params),
402 &hs_params);
403 } else {
404 ir3_emit_hs_driver_params(v, dpconstobj, ctx);
405 }
406 }
407
408 template <chip CHIP, fd6_pipeline_type PIPELINE>
409 struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit * emit)410 fd6_build_driver_params(struct fd6_emit *emit)
411 {
412 struct fd_context *ctx = emit->ctx;
413 struct fd6_context *fd6_ctx = fd6_context(ctx);
414 unsigned num_dp = emit->prog->num_driver_params;
415 unsigned num_ubo_dp;
416
417 if (CHIP == A6XX) {
418 assert(!emit->prog->num_ubo_driver_params);
419 /* Make it easier for compiler to see that this path isn't used on a6xx: */
420 num_ubo_dp = 0;
421 } else {
422 num_ubo_dp = emit->prog->num_ubo_driver_params;
423 }
424
425 if (!num_dp && !num_ubo_dp) {
426 fd6_ctx->has_dp_state = false;
427 return NULL;
428 }
429
430 bool needs_ucp = !!emit->vs->key.ucp_enables;
431
432 if (PIPELINE == HAS_TESS_GS) {
433 needs_ucp |= emit->gs && emit->gs->key.ucp_enables;
434 needs_ucp |= emit->hs && emit->hs->key.ucp_enables;
435 needs_ucp |= emit->ds && emit->ds->key.ucp_enables;
436 }
437
438 struct ir3_driver_params_vs p =
439 ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp);
440
441 unsigned size_dwords =
442 num_dp * (4 + dword_sizeof(p)) + /* 4dw PKT7 header */
443 num_ubo_dp * 6; /* 6dw per UBO descriptor */
444
445 struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
446 ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
447
448 /* VS still works the old way*/
449 if (emit->vs->need_driver_params) {
450 ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
451 }
452
453 if (PIPELINE == HAS_TESS_GS) {
454 if (emit->gs && emit->gs->need_driver_params) {
455 emit_driver_params<CHIP>(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
456 }
457
458 if (emit->hs && emit->hs->need_driver_params) {
459 emit_hs_driver_params<CHIP>(emit->hs, dpconstobj, ctx);
460 }
461
462 if (emit->ds && emit->ds->need_driver_params) {
463 emit_driver_params<CHIP>(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
464 }
465 }
466
467 if (emit->indirect)
468 wait_mem_writes(ctx);
469
470 fd6_ctx->has_dp_state = true;
471
472 return dpconstobj;
473 }
474
475 template struct fd_ringbuffer * fd6_build_driver_params<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
476 template struct fd_ringbuffer * fd6_build_driver_params<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
477 template struct fd_ringbuffer * fd6_build_driver_params<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
478 template struct fd_ringbuffer * fd6_build_driver_params<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
479
480 template <chip CHIP>
481 void
fd6_emit_cs_driver_params(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs,const struct pipe_grid_info * info)482 fd6_emit_cs_driver_params(struct fd_context *ctx,
483 struct fd_ringbuffer *ring,
484 struct fd6_compute_state *cs,
485 const struct pipe_grid_info *info)
486 {
487 /* info->input not handled in the UBO path. I believe this was only
488 * ever used by clover
489 */
490 assert(!info->input);
491
492 if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
493 const struct ir3_const_state *const_state = ir3_const_state(cs->v);
494 struct ir3_driver_params_cs compute_params =
495 ir3_build_driver_params_cs(cs->v, info);
496 int base = const_state->driver_params_ubo.idx;
497
498 if (base < 0)
499 return;
500
501 struct pipe_resource *buffer = NULL;
502 unsigned buffer_offset;
503
504 u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
505 16, &compute_params, &buffer_offset, &buffer);
506
507 if (info->indirect) {
508 /* Copy indirect params into UBO: */
509 ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
510 info->indirect_offset, 3);
511
512 wait_mem_writes(ctx);
513 } else {
514 fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
515 }
516
517 fd6_emit_driver_ubo(ring, cs->v, base, dword_sizeof(compute_params),
518 buffer_offset, fd_resource(buffer)->bo);
519
520 pipe_resource_reference(&buffer, NULL);
521 } else {
522 ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
523 if (info->indirect)
524 wait_mem_writes(ctx);
525 }
526 }
527 FD_GENX(fd6_emit_cs_driver_params);
528
529 template <chip CHIP>
530 void
fd6_emit_cs_user_consts(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs)531 fd6_emit_cs_user_consts(struct fd_context *ctx,
532 struct fd_ringbuffer *ring,
533 struct fd6_compute_state *cs)
534 {
535 emit_user_consts<CHIP>(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
536 }
537 FD_GENX(fd6_emit_cs_user_consts);
538
539 template <chip CHIP>
540 void
fd6_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)541 fd6_emit_immediates(const struct ir3_shader_variant *v,
542 struct fd_ringbuffer *ring)
543 {
544 const struct ir3_const_state *const_state = ir3_const_state(v);
545
546 if (const_state->consts_ubo.idx >= 0) {
547 int sizedwords = DIV_ROUND_UP(v->constant_data_size, 4);
548
549 fd6_emit_driver_ubo(ring, v, const_state->consts_ubo.idx, sizedwords,
550 v->info.constant_data_offset, v->bo);
551 }
552
553 if (CHIP == A7XX && v->compiler->load_inline_uniforms_via_preamble_ldgk)
554 return;
555
556 ir3_emit_immediates(v, ring);
557 }
558 FD_GENX(fd6_emit_immediates);
559
560 template <chip CHIP>
561 void
fd6_emit_link_map(struct fd_context * ctx,const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)562 fd6_emit_link_map(struct fd_context *ctx,
563 const struct ir3_shader_variant *producer,
564 const struct ir3_shader_variant *consumer,
565 struct fd_ringbuffer *ring)
566 {
567 if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
568 const struct ir3_const_state *const_state = ir3_const_state(consumer);
569 int base = const_state->primitive_map_ubo.idx;
570 uint32_t size = ALIGN(consumer->input_size, 4);
571
572 fd6_upload_emit_driver_ubo(ctx, ring, consumer, base, size, producer->output_loc);
573 } else {
574 ir3_emit_link_map(producer, consumer, ring);
575 }
576 }
577 FD_GENX(fd6_emit_link_map);
578