1 /*
2 * Copyright © 2014 Rob Clark <robclark@freedesktop.org>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <robclark@freedesktop.org>
7 */
8
9 #include "ir3/ir3_nir.h"
10
11 /* This has to reach into the fd_context a bit more than the rest of
12 * ir3, but it needs to be aligned with the compiler, so both agree
13 * on which const regs hold what. And the logic is identical between
14 * ir3 generations, the only difference is small details in the actual
15 * CP_LOAD_STATE packets (which is handled inside the generation
16 * specific ctx->emit_const(_bo)() fxns)
17 *
18 * This file should be included in only a single .c file per gen, which
19 * defines the following functions:
20 */
21
22 static bool is_stateobj(struct fd_ringbuffer *ring);
23
24 static void emit_const_user(struct fd_ringbuffer *ring,
25 const struct ir3_shader_variant *v, uint32_t regid,
26 uint32_t size, const uint32_t *user_buffer);
27
28 static void emit_const_bo(struct fd_ringbuffer *ring,
29 const struct ir3_shader_variant *v, uint32_t regid,
30 uint32_t offset, uint32_t size, struct fd_bo *bo);
31
32 static void
emit_const_prsc(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t size,struct pipe_resource * buffer)33 emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
34 uint32_t regid, uint32_t offset, uint32_t size,
35 struct pipe_resource *buffer)
36 {
37 struct fd_resource *rsc = fd_resource(buffer);
38 emit_const_bo(ring, v, regid, offset, size, rsc->bo);
39 }
40
41 static void emit_const_ptrs(struct fd_ringbuffer *ring,
42 const struct ir3_shader_variant *v,
43 uint32_t dst_offset, uint32_t num,
44 struct fd_bo **bos, uint32_t *offsets);
45
46 static void
emit_const_asserts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords)47 emit_const_asserts(struct fd_ringbuffer *ring,
48 const struct ir3_shader_variant *v, uint32_t regid,
49 uint32_t sizedwords)
50 {
51 assert((v->type == MESA_SHADER_VERTEX) ||
52 !v->compiler->load_shader_consts_via_preamble);
53 assert((regid % 4) == 0);
54 assert((sizedwords % 4) == 0);
55 assert(regid + sizedwords <= v->constlen * 4);
56 }
57
58 static void
ring_wfi(struct fd_batch * batch,struct fd_ringbuffer * ring)59 ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
60 {
61 /* when we emit const state via ring (IB2) we need a WFI, but when
62 * it is emit'd via stateobj, we don't
63 */
64 if (is_stateobj(ring))
65 return;
66
67 fd_wfi(batch, ring);
68 }
69
70 /**
71 * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
72 * Returns number of packets, and total size of all the payload.
73 *
74 * The value can be a worst-case, ie. some shader variants may not read all
75 * consts, etc.
76 *
77 * Returns size in dwords.
78 */
79 static inline void
ir3_user_consts_size(const struct ir3_ubo_analysis_state * state,unsigned * packets,unsigned * size)80 ir3_user_consts_size(const struct ir3_ubo_analysis_state *state, unsigned *packets,
81 unsigned *size)
82 {
83 *packets = *size = 0;
84
85 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
86 if (state->range[i].start < state->range[i].end) {
87 *size += state->range[i].end - state->range[i].start;
88 (*packets)++;
89 }
90 }
91 }
92
93 /**
94 * Uploads the referenced subranges of the nir constant_data to the hardware's
95 * constant buffer.
96 */
97 static inline void
ir3_emit_constant_data(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)98 ir3_emit_constant_data(const struct ir3_shader_variant *v,
99 struct fd_ringbuffer *ring)
100 {
101 const struct ir3_const_state *const_state = ir3_const_state(v);
102 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
103
104 for (unsigned i = 0; i < state->num_enabled; i++) {
105 unsigned ubo = state->range[i].ubo.block;
106 if (ubo != const_state->consts_ubo.idx)
107 continue;
108
109 uint32_t size = state->range[i].end - state->range[i].start;
110
111 /* Pre-a6xx, we might have ranges enabled in the shader that aren't
112 * used in the binning variant.
113 */
114 if (16 * v->constlen <= state->range[i].offset)
115 continue;
116
117 /* and even if the start of the const buffer is before
118 * first_immediate, the end may not be:
119 */
120 size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
121
122 if (size == 0)
123 continue;
124
125 emit_const_bo(ring, v, state->range[i].offset / 4,
126 v->info.constant_data_offset + state->range[i].start,
127 size / 4, v->bo);
128 }
129 }
130
131 /**
132 * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
133 * outside of these ranges will be done using full UBO accesses in the
134 * shader).
135 */
136 static inline void
ir3_emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)137 ir3_emit_user_consts(const struct ir3_shader_variant *v,
138 struct fd_ringbuffer *ring,
139 struct fd_constbuf_stateobj *constbuf)
140 {
141 const struct ir3_const_state *const_state = ir3_const_state(v);
142 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
143
144 for (unsigned i = 0; i < state->num_enabled; i++) {
145 assert(!state->range[i].ubo.bindless);
146 unsigned ubo = state->range[i].ubo.block;
147 if (!(constbuf->enabled_mask & (1 << ubo)) ||
148 ubo == const_state->consts_ubo.idx) {
149 continue;
150 }
151 struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
152
153 uint32_t size = state->range[i].end - state->range[i].start;
154 uint32_t offset = cb->buffer_offset + state->range[i].start;
155
156 /* Pre-a6xx, we might have ranges enabled in the shader that aren't
157 * used in the binning variant.
158 */
159 if (16 * v->constlen <= state->range[i].offset)
160 continue;
161
162 /* and even if the start of the const buffer is before
163 * first_immediate, the end may not be:
164 */
165 size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
166
167 if (size == 0)
168 continue;
169
170 /* things should be aligned to vec4: */
171 assert((state->range[i].offset % 16) == 0);
172 assert((size % 16) == 0);
173 assert((offset % 16) == 0);
174
175 if (cb->user_buffer) {
176 uint8_t *p = (uint8_t *)cb->user_buffer;
177 p += state->range[i].start;
178 emit_const_user(ring, v, state->range[i].offset / 4, size / 4, (uint32_t *)p);
179 } else {
180 emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
181 cb->buffer);
182 }
183 }
184 }
185
186 static inline void
ir3_emit_ubos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)187 ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
188 struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
189 {
190 const struct ir3_const_state *const_state = ir3_const_state(v);
191 uint32_t offset =
192 const_state->allocs.consts[IR3_CONST_ALLOC_UBO_PTRS].offset_vec4;
193
194 /* a6xx+ uses UBO state and ldc instead of pointers emitted in
195 * const state and ldg:
196 */
197 if (ctx->screen->gen >= 6)
198 return;
199
200 if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_UBO_PTRS,
201 v->constlen)) {
202 uint32_t params = const_state->num_ubos;
203 uint32_t offsets[params];
204 struct fd_bo *bos[params];
205
206 for (uint32_t i = 0; i < params; i++) {
207 if (i == const_state->consts_ubo.idx) {
208 bos[i] = v->bo;
209 offsets[i] = v->info.constant_data_offset;
210 continue;
211 }
212
213 struct pipe_constant_buffer *cb = &constbuf->cb[i];
214
215 /* If we have user pointers (constbuf 0, aka GL uniforms), upload
216 * them to a buffer now, and save it in the constbuf so that we
217 * don't have to reupload until they get changed.
218 */
219 if (cb->user_buffer) {
220 struct pipe_context *pctx = &ctx->base;
221 u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
222 cb->user_buffer, &cb->buffer_offset, &cb->buffer);
223 cb->user_buffer = NULL;
224 }
225
226 if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
227 offsets[i] = cb->buffer_offset;
228 bos[i] = fd_resource(cb->buffer)->bo;
229 } else {
230 offsets[i] = 0;
231 bos[i] = NULL;
232 }
233 }
234
235 assert(offset * 4 + params <= v->constlen * 4);
236
237 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
238 }
239 }
240
241 static inline void
ir3_emit_image_dims(struct fd_screen * screen,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_shaderimg_stateobj * si)242 ir3_emit_image_dims(struct fd_screen *screen,
243 const struct ir3_shader_variant *v,
244 struct fd_ringbuffer *ring,
245 struct fd_shaderimg_stateobj *si)
246 {
247 const struct ir3_const_state *const_state = ir3_const_state(v);
248 uint32_t offset =
249 const_state->allocs.consts[IR3_CONST_ALLOC_IMAGE_DIMS].offset_vec4;
250 if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_IMAGE_DIMS,
251 v->constlen)) {
252 uint32_t dims[align(const_state->image_dims.count, 4)];
253 unsigned mask = const_state->image_dims.mask;
254
255 while (mask) {
256 struct pipe_image_view *img;
257 struct fd_resource *rsc;
258 unsigned index = u_bit_scan(&mask);
259 unsigned off = const_state->image_dims.off[index];
260
261 img = &si->si[index];
262 rsc = fd_resource(img->resource);
263
264 dims[off + 0] = util_format_get_blocksize(img->format);
265 if (img->resource->target != PIPE_BUFFER) {
266 struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
267 /* note for 2d/cube/etc images, even if re-interpreted
268 * as a different color format, the pixel size should
269 * be the same, so use original dimensions for y and z
270 * stride:
271 */
272 dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
273 /* see corresponding logic in fd_resource_offset(): */
274 if (rsc->layout.layer_first) {
275 dims[off + 2] = rsc->layout.layer_size;
276 } else {
277 dims[off + 2] = slice->size0;
278 }
279 } else {
280 /* For buffer-backed images, the log2 of the format's
281 * bytes-per-pixel is placed on the 2nd slot. This is useful
282 * when emitting image_size instructions, for which we need
283 * to divide by bpp for image buffers. Since the bpp
284 * can only be power-of-two, the division is implemented
285 * as a SHR, and for that it is handy to have the log2 of
286 * bpp as a constant. (log2 = first-set-bit - 1)
287 */
288 dims[off + 1] = ffs(dims[off + 0]) - 1;
289 }
290 }
291 uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
292
293 emit_const_user(ring, v, offset * 4, size, dims);
294 }
295 }
296
297 static inline void
ir3_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)298 ir3_emit_immediates(const struct ir3_shader_variant *v,
299 struct fd_ringbuffer *ring)
300 {
301 const struct ir3_const_state *const_state = ir3_const_state(v);
302 uint32_t base = const_state->allocs.max_const_offset_vec4;
303 int size = DIV_ROUND_UP(const_state->immediates_count, 4);
304
305 /* truncate size to avoid writing constants that shader
306 * does not use:
307 */
308 size = MIN2(size + base, v->constlen) - base;
309
310 /* convert out of vec4: */
311 base *= 4;
312 size *= 4;
313
314 if (size > 0)
315 emit_const_user(ring, v, base, size, const_state->immediates);
316
317 /* NIR constant data has the same lifetime as immediates, so upload it
318 * now, too.
319 */
320 ir3_emit_constant_data(v, ring);
321 }
322
323 static inline void
ir3_emit_link_map(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)324 ir3_emit_link_map(const struct ir3_shader_variant *producer,
325 const struct ir3_shader_variant *consumer,
326 struct fd_ringbuffer *ring)
327 {
328 const struct ir3_const_state *const_state = ir3_const_state(consumer);
329 if (!ir3_const_can_upload(&const_state->allocs,
330 IR3_CONST_ALLOC_PRIMITIVE_MAP,
331 consumer->constlen))
332 return;
333
334 uint32_t base =
335 const_state->allocs.consts[IR3_CONST_ALLOC_PRIMITIVE_MAP].offset_vec4;
336 int size = DIV_ROUND_UP(consumer->input_size, 4);
337
338 /* truncate size to avoid writing constants that shader
339 * does not use:
340 */
341 size = MIN2(size + base, consumer->constlen) - base;
342
343 /* convert out of vec4: */
344 base *= 4;
345 size *= 4;
346
347 if (size > 0)
348 emit_const_user(ring, consumer, base, size, producer->output_loc);
349 }
350
351 /* emit stream-out buffers: */
352 static inline void
emit_tfbos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)353 emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
354 struct fd_ringbuffer *ring)
355 {
356 /* streamout addresses after driver-params: */
357 const struct ir3_const_state *const_state = ir3_const_state(v);
358 uint32_t offset =
359 const_state->allocs.consts[IR3_CONST_ALLOC_TFBO].offset_vec4;
360 if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_TFBO,
361 v->constlen)) {
362 struct fd_streamout_stateobj *so = &ctx->streamout;
363 const struct ir3_stream_output_info *info = &v->stream_output;
364 uint32_t params = 4;
365 uint32_t offsets[params];
366 struct fd_bo *bos[params];
367
368 for (uint32_t i = 0; i < params; i++) {
369 struct pipe_stream_output_target *target = so->targets[i];
370
371 if (target) {
372 offsets[i] =
373 (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
374 bos[i] = fd_resource(target->buffer)->bo;
375 } else {
376 offsets[i] = 0;
377 bos[i] = NULL;
378 }
379 }
380
381 assert(offset * 4 + params <= v->constlen * 4);
382
383 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
384 }
385 }
386
387 static inline void
emit_common_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,enum pipe_shader_type t)388 emit_common_consts(const struct ir3_shader_variant *v,
389 struct fd_ringbuffer *ring, struct fd_context *ctx,
390 enum pipe_shader_type t) assert_dt
391 {
392 enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
393
394 /* When we use CP_SET_DRAW_STATE objects to emit constant state,
395 * if we emit any of it we need to emit all. This is because
396 * we are using the same state-group-id each time for uniform
397 * state, and if previous update is never evaluated (due to no
398 * visible primitives in the current tile) then the new stateobj
399 * completely replaces the old one.
400 *
401 * Possibly if we split up different parts of the const state to
402 * different state-objects we could avoid this.
403 */
404 if (dirty && is_stateobj(ring))
405 dirty = (enum fd_dirty_shader_state)~0;
406
407 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
408 struct fd_constbuf_stateobj *constbuf;
409 bool shader_dirty;
410
411 constbuf = &ctx->constbuf[t];
412 shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
413
414 ring_wfi(ctx->batch, ring);
415
416 ir3_emit_user_consts(v, ring, constbuf);
417 ir3_emit_ubos(ctx, v, ring, constbuf);
418 if (shader_dirty)
419 ir3_emit_immediates(v, ring);
420 }
421
422 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
423 struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
424 ring_wfi(ctx->batch, ring);
425 ir3_emit_image_dims(ctx->screen, v, ring, si);
426 }
427 }
428
429 /* emit kernel params */
430 static inline void
emit_kernel_params(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,const struct pipe_grid_info * info)431 emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
432 struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
433 assert_dt
434 {
435 const struct ir3_const_state *const_state = ir3_const_state(v);
436 uint32_t offset =
437 const_state->allocs.consts[IR3_CONST_ALLOC_KERNEL_PARAMS].offset_vec4;
438 if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_KERNEL_PARAMS,
439 v->constlen)) {
440 ring_wfi(ctx->batch, ring);
441 emit_const_user(ring, v, offset * 4,
442 align(v->cs.req_input_mem, 4),
443 (uint32_t *)info->input);
444 }
445 }
446
447 static inline struct ir3_driver_params_vs
ir3_build_driver_params_vs(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,uint32_t draw_id,bool needs_ucp)448 ir3_build_driver_params_vs(struct fd_context *ctx,
449 const struct pipe_draw_info *info,
450 const struct pipe_draw_start_count_bias *draw,
451 uint32_t draw_id, bool needs_ucp)
452 assert_dt
453 {
454 struct ir3_driver_params_vs vertex_params = {
455 .draw_id = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
456 .vtxid_base = info->index_size ? draw->index_bias : draw->start,
457 .instid_base = info->start_instance,
458 .vtxcnt_max = ctx->streamout.max_tf_vtx,
459 .is_indexed_draw = info->index_size != 0 ? ~0 : 0,
460 };
461 if (needs_ucp) {
462 struct pipe_clip_state *ucp = &ctx->ucp;
463 for (unsigned i = 0; i < ARRAY_SIZE(vertex_params.ucp); i++) {
464 vertex_params.ucp[i].x = fui(ucp->ucp[i][0]);
465 vertex_params.ucp[i].y = fui(ucp->ucp[i][1]);
466 vertex_params.ucp[i].z = fui(ucp->ucp[i][2]);
467 vertex_params.ucp[i].w = fui(ucp->ucp[i][3]);
468 }
469 }
470 return vertex_params;
471 }
472
473 static inline void
ir3_emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct ir3_driver_params_vs * vertex_params)474 ir3_emit_driver_params(const struct ir3_shader_variant *v,
475 struct fd_ringbuffer *ring, struct fd_context *ctx,
476 const struct pipe_draw_info *info,
477 const struct pipe_draw_indirect_info *indirect,
478 const struct ir3_driver_params_vs *vertex_params)
479 assert_dt
480 {
481 assert(v->need_driver_params);
482
483 const struct ir3_const_state *const_state = ir3_const_state(v);
484 uint32_t offset =
485 const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
486
487 /* Only emit as many params as needed, i.e. up to the highest enabled UCP
488 * plane. However a binning pass may drop even some of these, so limit to
489 * program max.
490 */
491 const uint32_t vertex_params_size =
492 MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
493 assert(vertex_params_size <= dword_sizeof(*vertex_params));
494
495 /* for indirect draw, we need to copy VTXID_BASE from
496 * indirect-draw parameters buffer.. which is annoying
497 * and means we can't easily emit these consts in cmd
498 * stream so need to copy them to bo.
499 */
500 if (indirect && v->vtxid_base != INVALID_REG) {
501 uint32_t vertex_params_area = align(vertex_params_size, 16);
502 struct pipe_resource *vertex_params_rsc =
503 pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
504 PIPE_USAGE_STREAM, vertex_params_area * 4);
505 unsigned src_off = indirect->offset;
506 void *ptr;
507
508 ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
509 memcpy(ptr, vertex_params, vertex_params_size * 4);
510
511 if (info->index_size) {
512 /* indexed draw, index_bias is 4th field: */
513 src_off += 3 * 4;
514 } else {
515 /* non-indexed draw, start is 3rd field: */
516 src_off += 2 * 4;
517 }
518
519 /* copy index_bias or start from draw params: */
520 ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
521 src_off, 1);
522
523 emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
524 vertex_params_rsc);
525
526 pipe_resource_reference(&vertex_params_rsc, NULL);
527 } else {
528 emit_const_user(ring, v, offset * 4, vertex_params_size, (uint32_t *)vertex_params);
529 }
530
531 /* if needed, emit stream-out buffer addresses: */
532 if (vertex_params->vtxcnt_max > 0) {
533 emit_tfbos(ctx, v, ring);
534 }
535 }
536
537 static inline struct ir3_driver_params_tcs
ir3_build_driver_params_tcs(struct fd_context * ctx)538 ir3_build_driver_params_tcs(struct fd_context *ctx)
539 assert_dt
540 {
541 return (struct ir3_driver_params_tcs) {
542 .default_outer_level_x = fui(ctx->default_outer_level[0]),
543 .default_outer_level_y = fui(ctx->default_outer_level[1]),
544 .default_outer_level_z = fui(ctx->default_outer_level[2]),
545 .default_outer_level_w = fui(ctx->default_outer_level[3]),
546 .default_inner_level_x = fui(ctx->default_inner_level[0]),
547 .default_inner_level_y = fui(ctx->default_inner_level[1]),
548 };
549 }
550
551 static inline void
ir3_emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)552 ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
553 struct fd_ringbuffer *ring,
554 struct fd_context *ctx)
555 assert_dt
556 {
557 assert(v->need_driver_params);
558
559 const struct ir3_const_state *const_state = ir3_const_state(v);
560 if (!ir3_const_can_upload(&const_state->allocs,
561 IR3_CONST_ALLOC_DRIVER_PARAMS,
562 v->constlen))
563 return;
564
565 uint32_t offset =
566 const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
567 struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
568
569 const uint32_t hs_params_size =
570 MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
571 assert(hs_params_size <= dword_sizeof(hs_params));
572
573 emit_const_user(ring, v, offset * 4, hs_params_size, (uint32_t *)&hs_params);
574 }
575
576
577 static inline void
ir3_emit_vs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw)578 ir3_emit_vs_consts(const struct ir3_shader_variant *v,
579 struct fd_ringbuffer *ring, struct fd_context *ctx,
580 const struct pipe_draw_info *info,
581 const struct pipe_draw_indirect_info *indirect,
582 const struct pipe_draw_start_count_bias *draw) assert_dt
583 {
584 assert(v->type == MESA_SHADER_VERTEX);
585
586 emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
587
588 /* emit driver params every time: */
589 if (info && v->need_driver_params) {
590 ring_wfi(ctx->batch, ring);
591
592 struct ir3_driver_params_vs p =
593 ir3_build_driver_params_vs(ctx, info, draw, 0, v->key.ucp_enables);
594
595 ir3_emit_driver_params(v, ring, ctx, info, indirect, &p);
596 }
597 }
598
599 static inline void
ir3_emit_fs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)600 ir3_emit_fs_consts(const struct ir3_shader_variant *v,
601 struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
602 {
603 assert(v->type == MESA_SHADER_FRAGMENT);
604
605 emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
606 }
607
608 static inline struct ir3_driver_params_cs
ir3_build_driver_params_cs(const struct ir3_shader_variant * v,const struct pipe_grid_info * info)609 ir3_build_driver_params_cs(const struct ir3_shader_variant *v,
610 const struct pipe_grid_info *info)
611 {
612 return (struct ir3_driver_params_cs) {
613 .num_work_groups_x = info->grid[0],
614 .num_work_groups_y = info->grid[1],
615 .num_work_groups_z = info->grid[2],
616 .work_dim = info->work_dim,
617 .base_group_x = info->grid_base[0],
618 .base_group_y = info->grid_base[1],
619 .base_group_z = info->grid_base[2],
620 .subgroup_size = v->info.subgroup_size,
621 .local_group_size_x = info->block[0],
622 .local_group_size_y = info->block[1],
623 .local_group_size_z = info->block[2],
624 .subgroup_id_shift = util_logbase2(v->info.subgroup_size),
625 .workgroup_id_x = 0, // TODO
626 .workgroup_id_y = 0, // TODO
627 .workgroup_id_z = 0, // TODO
628 };
629 }
630
631 static inline void
ir3_emit_cs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)632 ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
633 struct fd_ringbuffer *ring, struct fd_context *ctx,
634 const struct pipe_grid_info *info)
635 assert_dt
636 {
637 emit_kernel_params(ctx, v, ring, info);
638
639 /* a3xx/a4xx can inject these directly */
640 if (ctx->screen->gen <= 4)
641 return;
642
643 /* emit compute-shader driver-params: */
644 const struct ir3_const_state *const_state = ir3_const_state(v);
645 uint32_t offset =
646 const_state->allocs.consts[IR3_CONST_ALLOC_DRIVER_PARAMS].offset_vec4;
647 uint32_t size =
648 align(MIN2(const_state->num_driver_params, (v->constlen - offset) * 4), 16);
649
650 if (size > 0 &&
651 ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_DRIVER_PARAMS,
652 v->constlen)) {
653 ring_wfi(ctx->batch, ring);
654
655 struct ir3_driver_params_cs compute_params = ir3_build_driver_params_cs(v, info);
656
657 if (info->indirect) {
658 struct pipe_resource *buffer = NULL;
659 unsigned buffer_offset;
660
661 u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
662 16, &compute_params, &buffer_offset, &buffer);
663
664 /* Copy the indirect params into the driver param buffer. The layout
665 * of the indirect buffer should match the first three fields of
666 * compute_params:
667 */
668 STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_x) == 0);
669 STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_y) == 4);
670 STATIC_ASSERT(offsetof(struct ir3_driver_params_cs, num_work_groups_z) == 8);
671
672 ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
673 info->indirect_offset, 3);
674
675 emit_const_prsc(ring, v, offset * 4, buffer_offset, size, buffer);
676
677 pipe_resource_reference(&buffer, NULL);
678 } else {
679 emit_const_user(ring, v, offset * 4, size, (uint32_t *)&compute_params);
680 }
681 }
682 }
683
684 /* emit compute-shader consts: */
685 static inline void
ir3_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)686 ir3_emit_cs_consts(const struct ir3_shader_variant *v,
687 struct fd_ringbuffer *ring, struct fd_context *ctx,
688 const struct pipe_grid_info *info) assert_dt
689 {
690 assert(gl_shader_stage_is_compute(v->type));
691
692 emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
693
694 ir3_emit_cs_driver_params(v, ring, ctx, info);
695 }
696