• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #define FD_BO_NO_HARDPIN 1
29 
30 #include "pipe/p_state.h"
31 
32 #include "freedreno_resource.h"
33 #include "freedreno_state.h"
34 
35 #include "fd6_image.h"
36 #include "fd6_pack.h"
37 #include "fd6_resource.h"
38 #include "fd6_screen.h"
39 #include "fd6_texture.h"
40 
41 static const uint8_t swiz_identity[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
42                                          PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W};
43 
44 static uint64_t
rsc_iova(struct pipe_resource * prsc,unsigned offset)45 rsc_iova(struct pipe_resource *prsc, unsigned offset)
46 {
47    if (!prsc)
48       return 0;
49    return fd_bo_get_iova(fd_resource(prsc)->bo) + offset;
50 }
51 
52 static void
fd6_ssbo_descriptor(struct fd_context * ctx,const struct pipe_shader_buffer * buf,uint32_t * descriptor)53 fd6_ssbo_descriptor(struct fd_context *ctx,
54                     const struct pipe_shader_buffer *buf, uint32_t *descriptor)
55 {
56    fdl6_buffer_view_init(
57       descriptor,
58       PIPE_FORMAT_R32_UINT,
59       swiz_identity, rsc_iova(buf->buffer, buf->buffer_offset),
60       buf->buffer_size);
61 }
62 
63 static void
fd6_image_descriptor(struct fd_context * ctx,const struct pipe_image_view * buf,uint32_t * descriptor)64 fd6_image_descriptor(struct fd_context *ctx, const struct pipe_image_view *buf,
65                      uint32_t *descriptor)
66 {
67    if (buf->resource->target == PIPE_BUFFER) {
68       uint32_t size = fd_clamp_buffer_size(buf->format, buf->u.buf.size,
69                                            A4XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT);
70 
71       fdl6_buffer_view_init(descriptor, buf->format, swiz_identity,
72                             rsc_iova(buf->resource, buf->u.buf.offset),
73                             size);
74    } else {
75       struct fdl_view_args args = {
76          .chip = A6XX,
77 
78          .iova = rsc_iova(buf->resource, 0),
79 
80          .base_miplevel = buf->u.tex.level,
81          .level_count = 1,
82 
83          .base_array_layer = buf->u.tex.first_layer,
84          .layer_count = buf->u.tex.last_layer - buf->u.tex.first_layer + 1,
85 
86          .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
87                   PIPE_SWIZZLE_W},
88          .format = buf->format,
89 
90          .type = fdl_type_from_pipe_target(buf->resource->target),
91          .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
92                             FDL_CHROMA_LOCATION_COSITED_EVEN},
93       };
94 
95       /* fdl6_view makes the storage descriptor treat cubes like a 2D array (so
96        * you can reference a specific layer), but we need to do that for the
97        * texture descriptor as well to get our layer.
98        */
99       if (args.type == FDL_VIEW_TYPE_CUBE)
100          args.type = FDL_VIEW_TYPE_2D;
101 
102       struct fdl6_view view;
103       struct fd_resource *rsc = fd_resource(buf->resource);
104       const struct fdl_layout *layouts[3] = { &rsc->layout, NULL, NULL };
105       fdl6_view_init(&view, layouts, &args,
106                      ctx->screen->info->a6xx.has_z24uint_s8uint);
107 
108       memcpy(descriptor, view.storage_descriptor, sizeof(view.storage_descriptor));
109    }
110 }
111 
112 static struct fd6_descriptor_set *
descriptor_set(struct fd_context * ctx,enum pipe_shader_type shader)113 descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader)
114    assert_dt
115 {
116    struct fd6_context *fd6_ctx = fd6_context(ctx);
117 
118    if (shader == PIPE_SHADER_COMPUTE)
119       return &fd6_ctx->cs_descriptor_set;
120 
121    unsigned idx = ir3_shader_descriptor_set(shader);
122    assert(idx < ARRAY_SIZE(fd6_ctx->descriptor_sets));
123    return &fd6_ctx->descriptor_sets[idx];
124 }
125 
126 static void
clear_descriptor(struct fd6_descriptor_set * set,unsigned slot)127 clear_descriptor(struct fd6_descriptor_set *set, unsigned slot)
128 {
129    /* The 2nd dword of the descriptor contains the width and height.
130     * so a non-zero value means the slot was previously valid and
131     * must be cleared.  We can't leave dangling descriptors as the
132     * shader could use variable indexing into the set of IBOs to
133     * get at them.  See piglit arb_shader_image_load_store-invalid.
134     */
135    if (!set->descriptor[slot][1])
136       return;
137 
138    fd6_descriptor_set_invalidate(set);
139 
140    memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot]));
141 }
142 
143 static void
validate_image_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_image_view * img)144 validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
145                           unsigned slot, struct pipe_image_view *img)
146 {
147    struct fd_resource *rsc = fd_resource(img->resource);
148 
149    if (!rsc || (rsc->seqno == set->seqno[slot]))
150       return;
151 
152    fd6_descriptor_set_invalidate(set);
153 
154    fd6_image_descriptor(ctx, img, set->descriptor[slot]);
155    set->seqno[slot] = rsc->seqno;
156 }
157 
158 static void
validate_buffer_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_shader_buffer * buf)159 validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
160                            unsigned slot, struct pipe_shader_buffer *buf)
161 {
162    struct fd_resource *rsc = fd_resource(buf->buffer);
163 
164    if (!rsc || (rsc->seqno == set->seqno[slot]))
165       return;
166 
167    fd6_descriptor_set_invalidate(set);
168 
169    fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]);
170    set->seqno[slot] = rsc->seqno;
171 }
172 
173 /* Build bindless descriptor state, returns ownership of state reference */
174 template <chip CHIP>
175 struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context * ctx,enum pipe_shader_type shader,bool append_fb_read)176 fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
177                          bool append_fb_read)
178 {
179    struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
180    struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
181    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
182 
183    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
184       ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING);
185 
186    /* Don't re-use a previous descriptor set if appending the
187     * fb-read descriptor, as that can change across batches.
188     * The normal descriptor slots are safe to re-use even if
189     * the state is dirtied due to batch flush, but the fb-read
190     * slot is not.
191     */
192    if (unlikely(append_fb_read))
193       fd6_descriptor_set_invalidate(set);
194 
195    /*
196     * Re-validate the descriptor slots, ie. in the case that
197     * the resource gets rebound due to use with non-UBWC
198     * compatible view format, etc.
199     *
200     * While we are at it, attach the BOs to the ring.
201     */
202 
203    u_foreach_bit (b, bufso->enabled_mask) {
204       struct pipe_shader_buffer *buf = &bufso->sb[b];
205       unsigned idx = b + IR3_BINDLESS_SSBO_OFFSET;
206       validate_buffer_descriptor(ctx, set, idx, buf);
207    }
208 
209    u_foreach_bit (b, imgso->enabled_mask) {
210       struct pipe_image_view *img = &imgso->si[b];
211       unsigned idx = b + IR3_BINDLESS_IMAGE_OFFSET;
212       validate_image_descriptor(ctx, set, idx, img);
213    }
214 
215    if (!set->bo) {
216       set->bo = fd_bo_new(
217             ctx->dev, sizeof(set->descriptor),
218             /* Use same flags as ringbuffer so hits the same heap,
219              * because those will already have the FD_RELOC_DUMP
220              * flag set:
221              */
222             FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT,
223             "%s bindless", _mesa_shader_stage_to_abbrev(shader));
224       fd_bo_mark_for_dump(set->bo);
225 
226       uint32_t *desc_buf = (uint32_t *)fd_bo_map(set->bo);
227 
228       memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
229 
230       if (unlikely(append_fb_read)) {
231          /* Reserve A6XX_MAX_RENDER_TARGETS image slots for fb-read */
232          unsigned idx = IR3_BINDLESS_DESC_COUNT - 1 - A6XX_MAX_RENDER_TARGETS;
233 
234          for (int i = 0; i < ctx->batch->framebuffer.nr_cbufs; i++) {
235             /* This is patched with the appropriate descriptor for GMEM or
236              * sysmem rendering path in fd6_gmem
237              */
238             struct fd_cs_patch patch = {
239                .cs = &desc_buf[(idx + i) * FDL6_TEX_CONST_DWORDS],
240                .val = i,
241             };
242             util_dynarray_append(&ctx->batch->fb_read_patches,
243                                  __typeof__(patch), patch);
244          }
245       }
246    }
247 
248    /*
249     * Build stateobj emitting reg writes to configure the descriptor
250     * set and CP_LOAD_STATE packets to preload the state.
251     *
252     * Note that unless the app is using the max # of SSBOs there will
253     * be a gap between the IBO descriptors used for SSBOs and for images,
254     * so emit this as two CP_LOAD_STATE packets:
255     */
256 
257    unsigned idx = ir3_shader_descriptor_set(shader);
258 
259    fd_ringbuffer_attach_bo(ring, set->bo);
260 
261    if (shader == PIPE_SHADER_COMPUTE) {
262       OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .cs_bindless = 0x1f));
263       OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
264             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
265       ));
266       OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
267             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
268       ));
269 
270       if (bufso->enabled_mask) {
271          OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
272             CP_LOAD_STATE6_0(
273                   .dst_off     = IR3_BINDLESS_SSBO_OFFSET,
274                   .state_type  = ST6_IBO,
275                   .state_src   = SS6_BINDLESS,
276                   .state_block = SB6_CS_SHADER,
277                   .num_unit    = util_last_bit(bufso->enabled_mask),
278             ),
279             CP_LOAD_STATE6_EXT_SRC_ADDR(
280                   /* This isn't actually an address: */
281                   .qword = (idx << 28) |
282                      IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
283             ),
284          );
285       }
286 
287       if (imgso->enabled_mask) {
288          OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
289             CP_LOAD_STATE6_0(
290                   .dst_off     = IR3_BINDLESS_IMAGE_OFFSET,
291                   .state_type  = ST6_IBO,
292                   .state_src   = SS6_BINDLESS,
293                   .state_block = SB6_CS_SHADER,
294                   .num_unit    = util_last_bit(imgso->enabled_mask),
295             ),
296             CP_LOAD_STATE6_EXT_SRC_ADDR(
297                   /* This isn't actually an address: */
298                   .qword = (idx << 28) |
299                      IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
300             ),
301          );
302       }
303    } else {
304       OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .gfx_bindless = 0x1f));
305       OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP,
306             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
307       ));
308       OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
309             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
310       ));
311 
312       if (bufso->enabled_mask) {
313          OUT_PKT(ring, CP_LOAD_STATE6,
314             CP_LOAD_STATE6_0(
315                   .dst_off     = IR3_BINDLESS_SSBO_OFFSET,
316                   .state_type  = ST6_SHADER,
317                   .state_src   = SS6_BINDLESS,
318                   .state_block = SB6_IBO,
319                   .num_unit    = util_last_bit(bufso->enabled_mask),
320             ),
321             CP_LOAD_STATE6_EXT_SRC_ADDR(
322                   /* This isn't actually an address: */
323                   .qword = (idx << 28) |
324                      IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
325             ),
326          );
327       }
328 
329       if (imgso->enabled_mask) {
330          OUT_PKT(ring, CP_LOAD_STATE6,
331             CP_LOAD_STATE6_0(
332                   .dst_off     = IR3_BINDLESS_IMAGE_OFFSET,
333                   .state_type  = ST6_SHADER,
334                   .state_src   = SS6_BINDLESS,
335                   .state_block = SB6_IBO,
336                   .num_unit    = util_last_bit(imgso->enabled_mask),
337             ),
338             CP_LOAD_STATE6_EXT_SRC_ADDR(
339                   /* This isn't actually an address: */
340                   .qword = (idx << 28) |
341                      IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
342             ),
343          );
344       }
345    }
346 
347    return ring;
348 }
349 
350 template struct fd_ringbuffer *fd6_build_bindless_state<A6XX>(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read);
351 template struct fd_ringbuffer *fd6_build_bindless_state<A7XX>(struct fd_context *ctx, enum pipe_shader_type shader, bool append_fb_read);
352 
353 static void
fd6_set_shader_buffers(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)354 fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
355                        unsigned start, unsigned count,
356                        const struct pipe_shader_buffer *buffers,
357                        unsigned writable_bitmask)
358    in_dt
359 {
360    struct fd_context *ctx = fd_context(pctx);
361    struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader];
362    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
363 
364    fd_set_shader_buffers(pctx, shader, start, count, buffers, writable_bitmask);
365 
366    for (unsigned i = 0; i < count; i++) {
367       unsigned n = i + start;
368       unsigned slot = n + IR3_BINDLESS_SSBO_OFFSET;
369       struct pipe_shader_buffer *buf = &so->sb[n];
370 
371       /* invalidate descriptor: */
372       set->seqno[slot] = 0;
373 
374       if (!buf->buffer) {
375          clear_descriptor(set, slot);
376          continue;
377       }
378 
379       /* update descriptor: */
380       validate_buffer_descriptor(ctx, set, slot, buf);
381    }
382 }
383 
384 static void
fd6_set_shader_images(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)385 fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
386                       unsigned start, unsigned count,
387                       unsigned unbind_num_trailing_slots,
388                       const struct pipe_image_view *images)
389    in_dt
390 {
391    struct fd_context *ctx = fd_context(pctx);
392    struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
393    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
394 
395    fd_set_shader_images(pctx, shader, start, count, unbind_num_trailing_slots,
396                         images);
397 
398    for (unsigned i = 0; i < count; i++) {
399       unsigned n = i + start;
400       unsigned slot = n + IR3_BINDLESS_IMAGE_OFFSET;
401       struct pipe_image_view *buf = &so->si[n];
402 
403       /* invalidate descriptor: */
404       set->seqno[slot] = 0;
405 
406       if (!buf->resource) {
407          clear_descriptor(set, slot);
408          continue;
409       }
410 
411       struct fd_resource *rsc = fd_resource(buf->resource);
412 
413       if (buf->shader_access & (PIPE_IMAGE_ACCESS_COHERENT |
414                                 PIPE_IMAGE_ACCESS_VOLATILE)) {
415          /* UBWC compression cannot be used with coherent/volatile access
416           * due to the extra caching (CCU) involved:
417           */
418          if (rsc->layout.ubwc) {
419             bool linear =
420                   fd6_check_valid_format(rsc, buf->format) == DEMOTE_TO_LINEAR;
421 
422             perf_debug_ctx(ctx,
423                            "%" PRSC_FMT ": demoted to %suncompressed due to coherent/volatile use as %s",
424                            PRSC_ARGS(&rsc->b.b), linear ? "linear+" : "",
425                            util_format_short_name(buf->format));
426 
427             fd_resource_uncompress(ctx, rsc, linear);
428          }
429       } else {
430          fd6_validate_format(ctx, rsc, buf->format);
431       }
432 
433       /* update descriptor: */
434       validate_image_descriptor(ctx, set, slot, buf);
435    }
436 
437    for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
438       unsigned slot = i + start + count + IR3_BINDLESS_IMAGE_OFFSET;
439 
440       set->seqno[slot] = 0;
441       clear_descriptor(set, slot);
442    }
443 }
444 
445 void
fd6_image_init(struct pipe_context * pctx)446 fd6_image_init(struct pipe_context *pctx)
447 {
448    pctx->set_shader_buffers = fd6_set_shader_buffers;
449    pctx->set_shader_images = fd6_set_shader_images;
450 }
451