1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #define FD_BO_NO_HARDPIN 1
26
27 #include "fd6_const.h"
28 #include "fd6_compute.h"
29 #include "fd6_pack.h"
30
31 #define emit_const_user fd6_emit_const_user
32 #define emit_const_bo fd6_emit_const_bo
33 #include "ir3_const.h"
34
35 /* regid: base const register
36 * prsc or dwords: buffer containing constant values
37 * sizedwords: size of const value buffer
38 */
39 void
fd6_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)40 fd6_emit_const_user(struct fd_ringbuffer *ring,
41 const struct ir3_shader_variant *v, uint32_t regid,
42 uint32_t sizedwords, const uint32_t *dwords)
43 {
44 emit_const_asserts(ring, v, regid, sizedwords);
45
46 /* NOTE we cheat a bit here, since we know mesa is aligning
47 * the size of the user buffer to 16 bytes. And we want to
48 * cut cycles in a hot path.
49 */
50 uint32_t align_sz = align(sizedwords, 4);
51
52 if (fd6_geom_stage(v->type)) {
53 OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
54 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
55 .state_src = SS6_DIRECT,
56 .state_block = fd6_stage2shadersb(v->type),
57 .num_unit = DIV_ROUND_UP(sizedwords, 4)),
58 CP_LOAD_STATE6_1(),
59 CP_LOAD_STATE6_2());
60 } else {
61 OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
62 CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
63 .state_src = SS6_DIRECT,
64 .state_block = fd6_stage2shadersb(v->type),
65 .num_unit = DIV_ROUND_UP(sizedwords, 4)),
66 CP_LOAD_STATE6_1(),
67 CP_LOAD_STATE6_2());
68 }
69 }
70 void
fd6_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)71 fd6_emit_const_bo(struct fd_ringbuffer *ring,
72 const struct ir3_shader_variant *v, uint32_t regid,
73 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
74 {
75 uint32_t dst_off = regid / 4;
76 assert(dst_off % 4 == 0);
77 uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
78 assert(num_unit % 4 == 0);
79
80 emit_const_asserts(ring, v, regid, sizedwords);
81
82 if (fd6_geom_stage(v->type)) {
83 OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
84 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
85 .state_src = SS6_INDIRECT,
86 .state_block = fd6_stage2shadersb(v->type),
87 .num_unit = num_unit, ),
88 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
89 } else {
90 OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
91 CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
92 .state_src = SS6_INDIRECT,
93 .state_block = fd6_stage2shadersb(v->type),
94 .num_unit = num_unit, ),
95 CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
96 }
97 }
98
99 static bool
is_stateobj(struct fd_ringbuffer * ring)100 is_stateobj(struct fd_ringbuffer *ring)
101 {
102 return true;
103 }
104
105 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)106 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
107 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
108 uint32_t *offsets)
109 {
110 unreachable("shouldn't be called on a6xx");
111 }
112
113 static void
emit_stage_tess_consts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t * params,int num_params)114 emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
115 uint32_t *params, int num_params)
116 {
117 const struct ir3_const_state *const_state = ir3_const_state(v);
118 const unsigned regid = const_state->offsets.primitive_param;
119 int size = MIN2(1 + regid, v->constlen) - regid;
120 if (size > 0)
121 fd6_emit_const_user(ring, v, regid * 4, num_params, params);
122 }
123
124 struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit * emit)125 fd6_build_tess_consts(struct fd6_emit *emit)
126 {
127 struct fd_context *ctx = emit->ctx;
128
129 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
130 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
131
132 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
133 * size is dwords, since that's what LDG/STG use.
134 */
135 unsigned num_vertices = emit->hs
136 ? ctx->patch_vertices
137 : emit->gs->gs.vertices_in;
138
139 uint32_t vs_params[4] = {
140 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
141 emit->vs->output_size * 4, /* vs vertex stride */
142 0, 0};
143
144 emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
145
146 if (emit->hs) {
147 uint32_t hs_params[4] = {
148 emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
149 emit->vs->output_size * 4, /* vs vertex stride */
150 emit->hs->output_size, ctx->patch_vertices};
151
152 emit_stage_tess_consts(constobj, emit->hs, hs_params,
153 ARRAY_SIZE(hs_params));
154
155 if (emit->gs)
156 num_vertices = emit->gs->gs.vertices_in;
157
158 uint32_t ds_params[4] = {
159 emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
160 emit->ds->output_size * 4, /* ds vertex stride */
161 emit->hs->output_size, /* hs vertex stride (dwords) */
162 emit->hs->tess.tcs_vertices_out};
163
164 emit_stage_tess_consts(constobj, emit->ds, ds_params,
165 ARRAY_SIZE(ds_params));
166 }
167
168 if (emit->gs) {
169 const struct ir3_shader_variant *prev;
170 if (emit->ds)
171 prev = emit->ds;
172 else
173 prev = emit->vs;
174
175 uint32_t gs_params[4] = {
176 prev->output_size * num_vertices * 4, /* ds primitive stride */
177 prev->output_size * 4, /* ds vertex stride */
178 0,
179 0,
180 };
181
182 num_vertices = emit->gs->gs.vertices_in;
183 emit_stage_tess_consts(constobj, emit->gs, gs_params,
184 ARRAY_SIZE(gs_params));
185 }
186
187 return constobj;
188 }
189
190 static void
fd6_emit_ubos(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)191 fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
192 struct fd_constbuf_stateobj *constbuf)
193 {
194 const struct ir3_const_state *const_state = ir3_const_state(v);
195 int num_ubos = const_state->num_ubos;
196
197 if (!num_ubos)
198 return;
199
200 OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
201 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
202 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
203 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
204 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
205 CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
206 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
207 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
208
209 for (int i = 0; i < num_ubos; i++) {
210 /* NIR constant data is packed into the end of the shader. */
211 if (i == const_state->consts_ubo.idx) {
212 int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
213 OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
214 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
215 continue;
216 }
217
218 struct pipe_constant_buffer *cb = &constbuf->cb[i];
219
220 if (cb->buffer) {
221 int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
222 OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
223 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
224 } else {
225 OUT_RING(ring, 0xbad00000 | (i << 16));
226 OUT_RING(ring, A6XX_UBO_1_SIZE(0));
227 }
228 }
229 }
230
231 unsigned
fd6_user_consts_cmdstream_size(const struct ir3_shader_variant * v)232 fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
233 {
234 if (!v)
235 return 0;
236
237 struct ir3_const_state *const_state = ir3_const_state(v);
238 struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
239 unsigned packets, size;
240
241 /* pre-calculate size required for userconst stateobj: */
242 ir3_user_consts_size(ubo_state, &packets, &size);
243
244 /* also account for UBO addresses: */
245 packets += 1;
246 size += 2 * const_state->num_ubos;
247
248 unsigned sizedwords = (4 * packets) + size;
249 return sizedwords * 4;
250 }
251
252 static void
emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)253 emit_user_consts(const struct ir3_shader_variant *v,
254 struct fd_ringbuffer *ring,
255 struct fd_constbuf_stateobj *constbuf)
256 {
257 ir3_emit_user_consts(v, ring, constbuf);
258 fd6_emit_ubos(v, ring, constbuf);
259 }
260
261 template <fd6_pipeline_type PIPELINE>
262 struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit * emit)263 fd6_build_user_consts(struct fd6_emit *emit)
264 {
265 struct fd_context *ctx = emit->ctx;
266 unsigned sz = emit->prog->user_consts_cmdstream_size;
267
268 struct fd_ringbuffer *constobj =
269 fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
270
271 emit_user_consts(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
272 if (PIPELINE == HAS_TESS_GS) {
273 if (emit->hs) {
274 emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
275 emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
276 }
277 if (emit->gs) {
278 emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
279 }
280 }
281 emit_user_consts(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
282
283 return constobj;
284 }
285
286 template struct fd_ringbuffer * fd6_build_user_consts<HAS_TESS_GS>(struct fd6_emit *emit);
287 template struct fd_ringbuffer * fd6_build_user_consts<NO_TESS_GS>(struct fd6_emit *emit);
288
289 template <fd6_pipeline_type PIPELINE>
290 struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit * emit)291 fd6_build_driver_params(struct fd6_emit *emit)
292 {
293 struct fd_context *ctx = emit->ctx;
294 struct fd6_context *fd6_ctx = fd6_context(ctx);
295 unsigned num_dp = emit->prog->num_driver_params;
296
297 if (!num_dp) {
298 fd6_ctx->has_dp_state = false;
299 return NULL;
300 }
301
302 unsigned size_dwords = num_dp * (4 + IR3_DP_VS_COUNT); /* 4dw PKT7 header */
303 struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
304 ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
305
306 if (emit->vs->need_driver_params) {
307 ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info,
308 emit->indirect, emit->draw, emit->draw_id);
309 }
310
311 if (PIPELINE == HAS_TESS_GS) {
312 if (emit->gs && emit->gs->need_driver_params) {
313 ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
314 emit->indirect, emit->draw, 0);
315 }
316
317 if (emit->hs && emit->hs->need_driver_params) {
318 ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx);
319 }
320
321 if (emit->ds && emit->ds->need_driver_params) {
322 ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
323 emit->indirect, emit->draw, 0);
324 }
325 }
326
327 fd6_ctx->has_dp_state = true;
328
329 return dpconstobj;
330 }
331
332 template struct fd_ringbuffer * fd6_build_driver_params<HAS_TESS_GS>(struct fd6_emit *emit);
333 template struct fd_ringbuffer * fd6_build_driver_params<NO_TESS_GS>(struct fd6_emit *emit);
334
335 void
fd6_emit_cs_driver_params(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs,const struct pipe_grid_info * info)336 fd6_emit_cs_driver_params(struct fd_context *ctx,
337 struct fd_ringbuffer *ring,
338 struct fd6_compute_state *cs,
339 const struct pipe_grid_info *info)
340 {
341 ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
342 }
343
344 void
fd6_emit_cs_user_consts(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd6_compute_state * cs)345 fd6_emit_cs_user_consts(struct fd_context *ctx,
346 struct fd_ringbuffer *ring,
347 struct fd6_compute_state *cs)
348 {
349 emit_user_consts(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
350 }
351
352 void
fd6_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)353 fd6_emit_immediates(const struct ir3_shader_variant *v,
354 struct fd_ringbuffer *ring)
355 {
356 ir3_emit_immediates(v, ring);
357 }
358
359 void
fd6_emit_link_map(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)360 fd6_emit_link_map(const struct ir3_shader_variant *producer,
361 const struct ir3_shader_variant *consumer,
362 struct fd_ringbuffer *ring)
363 {
364 ir3_emit_link_map(producer, consumer, ring);
365 }
366