• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_BATCH_H_
28 #define FREEDRENO_BATCH_H_
29 
30 #include "util/list.h"
31 #include "util/simple_mtx.h"
32 #include "util/u_inlines.h"
33 #include "util/u_queue.h"
34 #include "util/perf/u_trace.h"
35 
36 #include "freedreno_context.h"
37 #include "freedreno_fence.h"
38 #include "freedreno_util.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 struct fd_resource;
45 struct fd_batch_key;
46 struct fd_batch_result;
47 
48 /* A batch tracks everything about a cmdstream batch/submit, including the
49  * ringbuffers used for binning, draw, and gmem cmds, list of associated
50  * fd_resource-s, etc.
51  */
52 struct fd_batch {
53    struct pipe_reference reference;
54    unsigned seqno;
55    unsigned idx; /* index into cache->batches[] */
56 
57    struct u_trace trace;
58 
59    /* To detect cases where we can skip cmdstream to record timestamp: */
60    uint32_t *last_timestamp_cmd;
61 
62    int in_fence_fd;
63    struct pipe_fence_handle *fence;
64 
65    struct fd_context *ctx;
66 
67    /* emit_lock serializes cmdstream emission and flush.  Acquire before
68     * screen->lock.
69     */
70    simple_mtx_t submit_lock;
71 
72    /* do we need to mem2gmem before rendering.  We don't, if for example,
73     * there was a glClear() that invalidated the entire previous buffer
74     * contents.  Keep track of which buffer(s) are cleared, or needs
75     * restore.  Masks of PIPE_CLEAR_*
76     *
77     * The 'cleared' bits will be set for buffers which are *entirely*
78     * cleared, and 'partial_cleared' bits will be set if you must
79     * check cleared_scissor.
80     *
81     * The 'invalidated' bits are set for cleared buffers, and buffers
82     * where the contents are undefined, ie. what we don't need to restore
83     * to gmem.
84     */
85    enum {
86       /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
87       FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
88       FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
89       FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
90       FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
91    } invalidated, cleared, fast_cleared, restore, resolve;
92 
93    /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
94    bool nondraw : 1;
95    bool needs_flush : 1;
96    bool flushed : 1;
97    bool tessellation : 1; /* tessellation used in batch */
98 
99    /* Keep track if WAIT_FOR_IDLE is needed for registers we need
100     * to update via RMW:
101     */
102    bool needs_wfi : 1;
103 
104    /* To decide whether to render to system memory, keep track of the
105     * number of draws, and whether any of them require multisample,
106     * depth_test (or depth write), stencil_test, blending, and
107     * color_logic_Op (since those functions are disabled when by-
108     * passing GMEM.
109     */
110    enum fd_gmem_reason gmem_reason;
111 
112    /* At submit time, once we've decided that this batch will use GMEM
113     * rendering, the appropriate gmem state is looked up:
114     */
115    const struct fd_gmem_stateobj *gmem_state;
116 
117    /* A calculated "draw cost" value for the batch, which tries to
118     * estimate the bandwidth-per-sample of all the draws according
119     * to:
120     *
121     *    foreach_draw (...) {
122     *      cost += num_mrt;
123     *      if (blend_enabled)
124     *        cost += num_mrt;
125     *      if (depth_test_enabled)
126     *        cost++;
127     *      if (depth_write_enabled)
128     *        cost++;
129     *    }
130     *
131     * The idea is that each sample-passed minimally does one write
132     * per MRT.  If blend is enabled, the hw will additionally do
133     * a framebuffer read per sample-passed (for each MRT with blend
134     * enabled).  If depth-test is enabled, the hw will additionally
135     * a depth buffer read.  If depth-write is enable, the hw will
136     * additionally do a depth buffer write.
137     *
138     * This does ignore depth buffer traffic for samples which do not
139     * pass do to depth-test fail, and some other details.  But it is
140     * just intended to be a rough estimate that is easy to calculate.
141     */
142    unsigned cost;
143 
144    /* Tells the gen specific backend where to write stats used for
145     * the autotune module.
146     *
147     * Pointer only valid during gmem emit code.
148     */
149    struct fd_batch_result *autotune_result;
150 
151    unsigned num_draws;    /* number of draws in current batch */
152    unsigned num_vertices; /* number of vertices in current batch */
153 
154    /* Currently only used on a6xx, to calculate vsc prim/draw stream
155     * sizes:
156     */
157    unsigned num_bins_per_pipe;
158    unsigned prim_strm_bits;
159    unsigned draw_strm_bits;
160 
161    /* Track the maximal bounds of the scissor of all the draws within a
162     * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
163     * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
164     */
165    struct pipe_scissor_state max_scissor;
166 
167    /* Keep track of DRAW initiators that need to be patched up depending
168     * on whether we using binning or not:
169     */
170    struct util_dynarray draw_patches;
171 
172    /* texture state that needs patching for fb_read: */
173    struct util_dynarray fb_read_patches;
174 
175    /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
176     * once we know whether or not to use GMEM, and GMEM tile pitch.
177     *
178     * (only for a3xx.. but having gen specific subclasses of fd_batch
179     * seemed overkill for now)
180     */
181    struct util_dynarray rbrc_patches;
182 
183    /* Keep track of GMEM related values that need to be patched up once we
184     * know the gmem layout:
185     */
186    struct util_dynarray gmem_patches;
187 
188    /* Keep track of pointer to start of MEM exports for a20x binning shaders
189     *
190     * this is so the end of the shader can be cut off at the right point
191     * depending on the GMEM configuration
192     */
193    struct util_dynarray shader_patches;
194 
195    struct pipe_framebuffer_state framebuffer;
196 
197    struct fd_submit *submit;
198 
199    /** draw pass cmdstream: */
200    struct fd_ringbuffer *draw;
201    /** binning pass cmdstream: */
202    struct fd_ringbuffer *binning;
203    /** tiling/gmem (IB0) cmdstream: */
204    struct fd_ringbuffer *gmem;
205 
206    /** preemble cmdstream (executed once before first tile): */
207    struct fd_ringbuffer *prologue;
208 
209    /** epilogue cmdstream (executed after each tile): */
210    struct fd_ringbuffer *epilogue;
211 
212    struct fd_ringbuffer *tile_setup;
213    struct fd_ringbuffer *tile_fini;
214 
215    union pipe_color_union clear_color[MAX_RENDER_TARGETS];
216    double clear_depth;
217    unsigned clear_stencil;
218 
219    /**
220     * hw query related state:
221     */
222    /*@{*/
223    /* next sample offset.. incremented for each sample in the batch/
224     * submit, reset to zero on next submit.
225     */
226    uint32_t next_sample_offset;
227 
228    /* cached samples (in case multiple queries need to reference
229     * the same sample snapshot)
230     */
231    struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
232 
233    /* which sample providers were used in the current batch: */
234    uint32_t query_providers_used;
235 
236    /* which sample providers are currently enabled in the batch: */
237    uint32_t query_providers_active;
238 
239    /* list of samples in current batch: */
240    struct util_dynarray samples;
241 
242    /* current query result bo and tile stride: */
243    struct pipe_resource *query_buf;
244    uint32_t query_tile_stride;
245    /*@}*/
246 
247    /* Set of resources used by currently-unsubmitted batch (read or
248     * write).. does not hold a reference to the resource.
249     */
250    struct set *resources;
251 
252    /** key in batch-cache (if not null): */
253    struct fd_batch_key *key;
254    uint32_t hash;
255 
256    /** set of dependent batches.. holds refs to dependent batches: */
257    uint32_t dependents_mask;
258 
259    /* Buffer for tessellation engine input
260     */
261    struct fd_bo *tessfactor_bo;
262    uint32_t tessfactor_size;
263 
264    /* Buffer for passing parameters between TCS and TES
265     */
266    struct fd_bo *tessparam_bo;
267    uint32_t tessparam_size;
268 
269    struct fd_ringbuffer *tess_addrs_constobj;
270 };
271 
272 struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
273 
274 void fd_batch_reset(struct fd_batch *batch) assert_dt;
275 void fd_batch_flush(struct fd_batch *batch) assert_dt;
276 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
277 void fd_batch_resource_write(struct fd_batch *batch,
278                              struct fd_resource *rsc) assert_dt;
279 void fd_batch_resource_read_slowpath(struct fd_batch *batch,
280                                      struct fd_resource *rsc) assert_dt;
281 void fd_batch_check_size(struct fd_batch *batch) assert_dt;
282 
283 uint32_t fd_batch_key_hash(const void *_key);
284 bool fd_batch_key_equals(const void *_a, const void *_b);
285 struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
286                                         const struct fd_batch_key *key);
287 
288 /* not called directly: */
289 void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
290 void __fd_batch_destroy(struct fd_batch *batch);
291 
292 /*
293  * NOTE the rule is, you need to hold the screen->lock when destroying
294  * a batch..  so either use fd_batch_reference() (which grabs the lock
295  * for you) if you don't hold the lock, or fd_batch_reference_locked()
296  * if you do hold the lock.
297  *
298  * WARNING the _locked() version can briefly drop the lock.  Without
299  * recursive mutexes, I'm not sure there is much else we can do (since
300  * __fd_batch_destroy() needs to unref resources)
301  *
302  * WARNING you must acquire the screen->lock and use the _locked()
303  * version in case that the batch being ref'd can disappear under
304  * you.
305  */
306 
307 static inline void
fd_batch_reference_locked(struct fd_batch ** ptr,struct fd_batch * batch)308 fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
309 {
310    struct fd_batch *old_batch = *ptr;
311 
312    /* only need lock if a reference is dropped: */
313    if (old_batch)
314       fd_screen_assert_locked(old_batch->ctx->screen);
315 
316    if (pipe_reference_described(
317           &(*ptr)->reference, &batch->reference,
318           (debug_reference_descriptor)__fd_batch_describe))
319       __fd_batch_destroy(old_batch);
320 
321    *ptr = batch;
322 }
323 
324 static inline void
fd_batch_reference(struct fd_batch ** ptr,struct fd_batch * batch)325 fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
326 {
327    struct fd_batch *old_batch = *ptr;
328    struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
329 
330    if (ctx)
331       fd_screen_lock(ctx->screen);
332 
333    fd_batch_reference_locked(ptr, batch);
334 
335    if (ctx)
336       fd_screen_unlock(ctx->screen);
337 }
338 
339 static inline void
fd_batch_unlock_submit(struct fd_batch * batch)340 fd_batch_unlock_submit(struct fd_batch *batch)
341 {
342    simple_mtx_unlock(&batch->submit_lock);
343 }
344 
345 /**
346  * Returns true if emit-lock was acquired, false if failed to acquire lock,
347  * ie. batch already flushed.
348  */
349 static inline bool MUST_CHECK
fd_batch_lock_submit(struct fd_batch * batch)350 fd_batch_lock_submit(struct fd_batch *batch)
351 {
352    simple_mtx_lock(&batch->submit_lock);
353    bool ret = !batch->flushed;
354    if (!ret)
355       fd_batch_unlock_submit(batch);
356    return ret;
357 }
358 
359 /**
360  * Mark the batch as having something worth flushing (rendering, blit, query,
361  * etc)
362  */
363 static inline void
fd_batch_needs_flush(struct fd_batch * batch)364 fd_batch_needs_flush(struct fd_batch *batch)
365 {
366    batch->needs_flush = true;
367    fd_fence_ref(&batch->ctx->last_fence, NULL);
368 }
369 
370 /* Since we reorder batches and can pause/resume queries (notably for disabling
371  * queries dueing some meta operations), we update the current query state for
372  * the batch before each draw.
373  */
374 static inline void
fd_batch_update_queries(struct fd_batch * batch)375 fd_batch_update_queries(struct fd_batch *batch) assert_dt
376 {
377    struct fd_context *ctx = batch->ctx;
378 
379    if (ctx->query_update_batch)
380       ctx->query_update_batch(batch, false);
381 }
382 
383 static inline void
fd_batch_finish_queries(struct fd_batch * batch)384 fd_batch_finish_queries(struct fd_batch *batch) assert_dt
385 {
386    struct fd_context *ctx = batch->ctx;
387 
388    if (ctx->query_update_batch)
389       ctx->query_update_batch(batch, true);
390 }
391 
392 static inline void
fd_reset_wfi(struct fd_batch * batch)393 fd_reset_wfi(struct fd_batch *batch)
394 {
395    batch->needs_wfi = true;
396 }
397 
398 void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
399 
400 /* emit a CP_EVENT_WRITE:
401  */
402 static inline void
fd_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt)403 fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
404                enum vgt_event_type evt)
405 {
406    OUT_PKT3(ring, CP_EVENT_WRITE, 1);
407    OUT_RING(ring, evt);
408    fd_reset_wfi(batch);
409 }
410 
411 /* Get per-tile epilogue */
412 static inline struct fd_ringbuffer *
fd_batch_get_epilogue(struct fd_batch * batch)413 fd_batch_get_epilogue(struct fd_batch *batch)
414 {
415    if (batch->epilogue == NULL) {
416       batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
417                                                  (enum fd_ringbuffer_flags)0);
418    }
419 
420    return batch->epilogue;
421 }
422 
423 struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);
424 
425 #ifdef __cplusplus
426 }
427 #endif
428 
429 #endif /* FREEDRENO_BATCH_H_ */
430