1 /*
2 * Copyright © 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <robclark@freedesktop.org>
8 */
9
10 #define FD_BO_NO_HARDPIN 1
11
12 #include "pipe/p_state.h"
13
14 #include "freedreno_resource.h"
15 #include "freedreno_state.h"
16
17 #include "fd6_image.h"
18 #include "fd6_pack.h"
19 #include "fd6_resource.h"
20 #include "fd6_screen.h"
21 #include "fd6_texture.h"
22
23 static const uint8_t swiz_identity[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
24 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W};
25
26 static uint64_t
rsc_iova(struct pipe_resource * prsc,unsigned offset)27 rsc_iova(struct pipe_resource *prsc, unsigned offset)
28 {
29 if (!prsc)
30 return 0;
31 return fd_bo_get_iova(fd_resource(prsc)->bo) + offset;
32 }
33
34 static void
fd6_ssbo_descriptor(struct fd_context * ctx,const struct pipe_shader_buffer * buf,uint32_t * descriptor)35 fd6_ssbo_descriptor(struct fd_context *ctx,
36 const struct pipe_shader_buffer *buf, uint32_t *descriptor)
37 {
38 fdl6_buffer_view_init(
39 descriptor,
40 PIPE_FORMAT_R32_UINT,
41 swiz_identity, rsc_iova(buf->buffer, buf->buffer_offset),
42 buf->buffer_size);
43 }
44
45 static void
fd6_image_descriptor(struct fd_context * ctx,const struct pipe_image_view * buf,uint32_t * descriptor)46 fd6_image_descriptor(struct fd_context *ctx, const struct pipe_image_view *buf,
47 uint32_t *descriptor)
48 {
49 if (buf->resource->target == PIPE_BUFFER) {
50 uint32_t size = fd_clamp_buffer_size(buf->format, buf->u.buf.size,
51 A4XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT);
52
53 fdl6_buffer_view_init(descriptor, buf->format, swiz_identity,
54 rsc_iova(buf->resource, buf->u.buf.offset),
55 size);
56 } else {
57 struct fdl_view_args args = {
58 .chip = ctx->screen->gen,
59
60 .iova = rsc_iova(buf->resource, 0),
61
62 .base_miplevel = buf->u.tex.level,
63 .level_count = 1,
64
65 .base_array_layer = buf->u.tex.first_layer,
66 .layer_count = buf->u.tex.last_layer - buf->u.tex.first_layer + 1,
67
68 .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
69 PIPE_SWIZZLE_W},
70 .format = buf->format,
71
72 .type = fdl_type_from_pipe_target(buf->resource->target),
73 .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
74 FDL_CHROMA_LOCATION_COSITED_EVEN},
75 };
76
77 /* fdl6_view makes the storage descriptor treat cubes like a 2D array (so
78 * you can reference a specific layer), but we need to do that for the
79 * texture descriptor as well to get our layer.
80 */
81 if (args.type == FDL_VIEW_TYPE_CUBE)
82 args.type = FDL_VIEW_TYPE_2D;
83
84 struct fdl6_view view;
85 struct fd_resource *rsc = fd_resource(buf->resource);
86 const struct fdl_layout *layouts[3] = { &rsc->layout, NULL, NULL };
87 fdl6_view_init(&view, layouts, &args,
88 ctx->screen->info->a6xx.has_z24uint_s8uint);
89
90 memcpy(descriptor, view.storage_descriptor, sizeof(view.storage_descriptor));
91 }
92 }
93
94 static struct fd6_descriptor_set *
descriptor_set(struct fd_context * ctx,enum pipe_shader_type shader)95 descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader)
96 assert_dt
97 {
98 struct fd6_context *fd6_ctx = fd6_context(ctx);
99
100 if (shader == PIPE_SHADER_COMPUTE)
101 return &fd6_ctx->cs_descriptor_set;
102
103 unsigned idx = ir3_shader_descriptor_set(shader);
104 assert(idx < ARRAY_SIZE(fd6_ctx->descriptor_sets));
105 return &fd6_ctx->descriptor_sets[idx];
106 }
107
108 static void
clear_descriptor(struct fd6_descriptor_set * set,unsigned slot)109 clear_descriptor(struct fd6_descriptor_set *set, unsigned slot)
110 {
111 /* The 2nd dword of the descriptor contains the width and height.
112 * so a non-zero value means the slot was previously valid and
113 * must be cleared. We can't leave dangling descriptors as the
114 * shader could use variable indexing into the set of IBOs to
115 * get at them. See piglit arb_shader_image_load_store-invalid.
116 */
117 if (!set->descriptor[slot][1])
118 return;
119
120 fd6_descriptor_set_invalidate(set);
121
122 memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot]));
123 }
124
125 static void
validate_image_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_image_view * img)126 validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
127 unsigned slot, struct pipe_image_view *img)
128 {
129 struct fd_resource *rsc = fd_resource(img->resource);
130
131 if (!rsc || (rsc->seqno == set->seqno[slot]))
132 return;
133
134 fd6_descriptor_set_invalidate(set);
135
136 fd6_image_descriptor(ctx, img, set->descriptor[slot]);
137 set->seqno[slot] = rsc->seqno;
138 }
139
140 static void
validate_buffer_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_shader_buffer * buf)141 validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
142 unsigned slot, struct pipe_shader_buffer *buf)
143 {
144 struct fd_resource *rsc = fd_resource(buf->buffer);
145
146 if (!rsc || (rsc->seqno == set->seqno[slot]))
147 return;
148
149 fd6_descriptor_set_invalidate(set);
150
151 fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]);
152 set->seqno[slot] = rsc->seqno;
153 }
154
155 /* Build bindless descriptor state, returns ownership of state reference */
156 template <chip CHIP>
157 struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context * ctx,enum pipe_shader_type shader,bool append_fb_read)158 fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
159 bool append_fb_read)
160 {
161 struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
162 struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
163 struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
164
165 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
166 ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING);
167
168 /* Don't re-use a previous descriptor set if appending the
169 * fb-read descriptor, as that can change across batches.
170 * The normal descriptor slots are safe to re-use even if
171 * the state is dirtied due to batch flush, but the fb-read
172 * slot is not.
173 */
174 if (unlikely(append_fb_read))
175 fd6_descriptor_set_invalidate(set);
176
177 /*
178 * Re-validate the descriptor slots, ie. in the case that
179 * the resource gets rebound due to use with non-UBWC
180 * compatible view format, etc.
181 *
182 * While we are at it, attach the BOs to the ring.
183 */
184
185 u_foreach_bit (b, bufso->enabled_mask) {
186 struct pipe_shader_buffer *buf = &bufso->sb[b];
187 unsigned idx = b + IR3_BINDLESS_SSBO_OFFSET;
188 validate_buffer_descriptor(ctx, set, idx, buf);
189 }
190
191 u_foreach_bit (b, imgso->enabled_mask) {
192 struct pipe_image_view *img = &imgso->si[b];
193 unsigned idx = b + IR3_BINDLESS_IMAGE_OFFSET;
194 validate_image_descriptor(ctx, set, idx, img);
195 }
196
197 if (!set->bo) {
198 set->bo = fd_bo_new(
199 ctx->dev, sizeof(set->descriptor),
200 /* Use same flags as ringbuffer so hits the same heap,
201 * because those will already have the FD_RELOC_DUMP
202 * flag set:
203 */
204 FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT,
205 "%s bindless", _mesa_shader_stage_to_abbrev(shader));
206 fd_bo_mark_for_dump(set->bo);
207
208 uint32_t *desc_buf = (uint32_t *)fd_bo_map(set->bo);
209
210 memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
211
212 if (unlikely(append_fb_read)) {
213 /* Reserve A6XX_MAX_RENDER_TARGETS image slots for fb-read */
214 unsigned idx = IR3_BINDLESS_DESC_COUNT - 1 - A6XX_MAX_RENDER_TARGETS;
215
216 for (int i = 0; i < ctx->batch->framebuffer.nr_cbufs; i++) {
217 /* This is patched with the appropriate descriptor for GMEM or
218 * sysmem rendering path in fd6_gmem
219 */
220 struct fd_cs_patch patch = {
221 .cs = &desc_buf[(idx + i) * FDL6_TEX_CONST_DWORDS],
222 .val = i,
223 };
224 util_dynarray_append(&ctx->batch->fb_read_patches,
225 __typeof__(patch), patch);
226 }
227 }
228 }
229
230 /*
231 * Build stateobj emitting reg writes to configure the descriptor
232 * set and CP_LOAD_STATE packets to preload the state.
233 *
234 * Note that unless the app is using the max # of SSBOs there will
235 * be a gap between the IBO descriptors used for SSBOs and for images,
236 * so emit this as two CP_LOAD_STATE packets:
237 */
238
239 unsigned idx = ir3_shader_descriptor_set(shader);
240
241 fd_ringbuffer_attach_bo(ring, set->bo);
242
243 if (shader == PIPE_SHADER_COMPUTE) {
244 OUT_REG(ring,
245 HLSQ_INVALIDATE_CMD(
246 CHIP,
247 .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
248 )
249 );
250 OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
251 idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
252 ));
253
254 if (CHIP == A6XX) {
255 OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
256 idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
257 ));
258 }
259
260 if (bufso->enabled_mask) {
261 OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
262 CP_LOAD_STATE6_0(
263 .dst_off = IR3_BINDLESS_SSBO_OFFSET,
264 .state_type = ST6_IBO,
265 .state_src = SS6_BINDLESS,
266 .state_block = SB6_CS_SHADER,
267 .num_unit = util_last_bit(bufso->enabled_mask),
268 ),
269 CP_LOAD_STATE6_EXT_SRC_ADDR(
270 /* This isn't actually an address: */
271 .qword = (idx << 28) |
272 IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
273 ),
274 );
275 }
276
277 if (imgso->enabled_mask) {
278 OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
279 CP_LOAD_STATE6_0(
280 .dst_off = IR3_BINDLESS_IMAGE_OFFSET,
281 .state_type = ST6_IBO,
282 .state_src = SS6_BINDLESS,
283 .state_block = SB6_CS_SHADER,
284 .num_unit = util_last_bit(imgso->enabled_mask),
285 ),
286 CP_LOAD_STATE6_EXT_SRC_ADDR(
287 /* This isn't actually an address: */
288 .qword = (idx << 28) |
289 IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
290 ),
291 );
292 }
293 } else {
294 OUT_REG(ring,
295 HLSQ_INVALIDATE_CMD(
296 CHIP,
297 .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
298 )
299 );
300 OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP,
301 idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
302 ));
303 if (CHIP == A6XX) {
304 OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
305 idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
306 ));
307 }
308
309 if (bufso->enabled_mask) {
310 OUT_PKT(ring, CP_LOAD_STATE6,
311 CP_LOAD_STATE6_0(
312 .dst_off = IR3_BINDLESS_SSBO_OFFSET,
313 .state_type = ST6_SHADER,
314 .state_src = SS6_BINDLESS,
315 .state_block = SB6_IBO,
316 .num_unit = util_last_bit(bufso->enabled_mask),
317 ),
318 CP_LOAD_STATE6_EXT_SRC_ADDR(
319 /* This isn't actually an address: */
320 .qword = (idx << 28) |
321 IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
322 ),
323 );
324 }
325
326 if (imgso->enabled_mask) {
327 OUT_PKT(ring, CP_LOAD_STATE6,
328 CP_LOAD_STATE6_0(
329 .dst_off = IR3_BINDLESS_IMAGE_OFFSET,
330 .state_type = ST6_SHADER,
331 .state_src = SS6_BINDLESS,
332 .state_block = SB6_IBO,
333 .num_unit = util_last_bit(imgso->enabled_mask),
334 ),
335 CP_LOAD_STATE6_EXT_SRC_ADDR(
336 /* This isn't actually an address: */
337 .qword = (idx << 28) |
338 IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
339 ),
340 );
341 }
342 }
343
344 return ring;
345 }
346 FD_GENX(fd6_build_bindless_state);
347
348 static void
fd6_set_shader_buffers(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)349 fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
350 unsigned start, unsigned count,
351 const struct pipe_shader_buffer *buffers,
352 unsigned writable_bitmask)
353 in_dt
354 {
355 struct fd_context *ctx = fd_context(pctx);
356 struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader];
357 struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
358
359 fd_set_shader_buffers(pctx, shader, start, count, buffers, writable_bitmask);
360
361 for (unsigned i = 0; i < count; i++) {
362 unsigned n = i + start;
363 unsigned slot = n + IR3_BINDLESS_SSBO_OFFSET;
364 struct pipe_shader_buffer *buf = &so->sb[n];
365
366 /* invalidate descriptor: */
367 set->seqno[slot] = 0;
368
369 if (!buf->buffer) {
370 clear_descriptor(set, slot);
371 continue;
372 }
373
374 /* update descriptor: */
375 validate_buffer_descriptor(ctx, set, slot, buf);
376 }
377 }
378
379 static void
fd6_set_shader_images(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)380 fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
381 unsigned start, unsigned count,
382 unsigned unbind_num_trailing_slots,
383 const struct pipe_image_view *images)
384 in_dt
385 {
386 struct fd_context *ctx = fd_context(pctx);
387 struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
388 struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
389
390 fd_set_shader_images(pctx, shader, start, count, unbind_num_trailing_slots,
391 images);
392
393 for (unsigned i = 0; i < count; i++) {
394 unsigned n = i + start;
395 unsigned slot = n + IR3_BINDLESS_IMAGE_OFFSET;
396 struct pipe_image_view *buf = &so->si[n];
397
398 /* invalidate descriptor: */
399 set->seqno[slot] = 0;
400
401 if (!buf->resource) {
402 clear_descriptor(set, slot);
403 continue;
404 }
405
406 struct fd_resource *rsc = fd_resource(buf->resource);
407
408 if (buf->shader_access & (PIPE_IMAGE_ACCESS_COHERENT |
409 PIPE_IMAGE_ACCESS_VOLATILE)) {
410 /* UBWC compression cannot be used with coherent/volatile access
411 * due to the extra caching (CCU) involved:
412 */
413 if (rsc->layout.ubwc) {
414 bool linear =
415 fd6_check_valid_format(rsc, buf->format) == DEMOTE_TO_LINEAR;
416
417 perf_debug_ctx(ctx,
418 "%" PRSC_FMT ": demoted to %suncompressed due to coherent/volatile use as %s",
419 PRSC_ARGS(&rsc->b.b), linear ? "linear+" : "",
420 util_format_short_name(buf->format));
421
422 fd_resource_uncompress(ctx, rsc, linear);
423 }
424 } else {
425 fd6_validate_format(ctx, rsc, buf->format);
426 }
427
428 /* update descriptor: */
429 validate_image_descriptor(ctx, set, slot, buf);
430 }
431
432 for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
433 unsigned slot = i + start + count + IR3_BINDLESS_IMAGE_OFFSET;
434
435 set->seqno[slot] = 0;
436 clear_descriptor(set, slot);
437 }
438 }
439
440 void
fd6_image_init(struct pipe_context * pctx)441 fd6_image_init(struct pipe_context *pctx)
442 {
443 pctx->set_shader_buffers = fd6_set_shader_buffers;
444 pctx->set_shader_images = fd6_set_shader_images;
445 }
446