• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_BATCH_H_
28 #define FREEDRENO_BATCH_H_
29 
30 #include "util/list.h"
31 #include "util/simple_mtx.h"
32 #include "util/u_inlines.h"
33 #include "util/u_queue.h"
34 #include "util/perf/u_trace.h"
35 
36 #include "freedreno_context.h"
37 #include "freedreno_fence.h"
38 #include "freedreno_util.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 struct fd_resource;
45 struct fd_batch_key;
46 struct fd_batch_result;
47 
48 /**
49  * A subpass is a fragment of a batch potentially starting with a clear.
50  * If the app does a mid-batch clear, that clear and subsequent draws
51  * can be split out into another sub-pass.  At gmem time, the appropriate
52  * sysmem or gmem clears can be interleaved with the CP_INDIRECT_BUFFER
53  * to the subpass's draw cmdstream.
54  *
55  * For depth clears, a replacement LRZ buffer can be allocated (clear
56  * still inserted into the prologue cmdstream since it needs be executed
57  * even in sysmem or if we aren't binning, since later batches could
58  * depend in the LRZ state).  The alternative would be to invalidate
59  * LRZ for draws after the start of the new subpass.
60  */
61 struct fd_batch_subpass {
62    struct list_head node;
63 
64    /** draw pass cmdstream: */
65    struct fd_ringbuffer *draw;
66 
67    /** for the gmem code to stash per tile per subpass clears */
68    struct fd_ringbuffer *subpass_clears;
69 
70    BITMASK_ENUM(fd_buffer_mask) fast_cleared;
71 
72    union pipe_color_union clear_color[MAX_RENDER_TARGETS];
73    double clear_depth;
74    unsigned clear_stencil;
75 
76    /**
77     * The number of draws emitted to this subpass.  If it is greater than
78     * zero, a clear triggers creating a new subpass (because clears must
79     * always come at the start of a subpass).
80     */
81    unsigned num_draws;
82 
83    /**
84     * If a subpass starts with a LRZ clear, it gets a new LRZ buffer.
85     * The fd_resource::lrz always tracks the current lrz buffer, but at
86     * binning/gmem time we need to know what was the current lrz buffer
87     * at the time draws were emitted to the subpass.  Which is tracked
88     * here.
89     */
90    struct fd_bo *lrz;
91 };
92 
93 /**
94  * A batch tracks everything about a cmdstream batch/submit, including the
95  * ringbuffers used for binning, draw, and gmem cmds, list of associated
96  * fd_resource-s, etc.
97  */
98 struct fd_batch {
99    struct pipe_reference reference;
100    unsigned seqno;
101    unsigned idx; /* index into cache->batches[] */
102 
103    struct u_trace trace;
104 
105    /* To detect cases where we can skip cmdstream to record timestamp: */
106    uint32_t *last_timestamp_cmd;
107 
108    int in_fence_fd;
109    struct pipe_fence_handle *fence;
110 
111    struct fd_context *ctx;
112 
113    /* do we need to mem2gmem before rendering.  We don't, if for example,
114     * there was a glClear() that invalidated the entire previous buffer
115     * contents.  Keep track of which buffer(s) are cleared, or needs
116     * restore.  Masks of PIPE_CLEAR_*
117     *
118     * The 'cleared' bits will be set for buffers which are *entirely*
119     * cleared.
120     *
121     * The 'invalidated' bits are set for cleared buffers, and buffers
122     * where the contents are undefined, ie. what we don't need to restore
123     * to gmem.
124     */
125    BITMASK_ENUM(fd_buffer_mask) invalidated, cleared, restore, resolve;
126 
127    /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
128    bool nondraw : 1;
129    bool needs_flush : 1;
130    bool flushed : 1;
131    bool tessellation : 1; /* tessellation used in batch */
132 
133    /* Keep track if WAIT_FOR_IDLE is needed for registers we need
134     * to update via RMW:
135     */
136    bool needs_wfi : 1;
137 
138    /* To decide whether to render to system memory, keep track of the
139     * number of draws, and whether any of them require multisample,
140     * depth_test (or depth write), stencil_test, blending, and
141     * color_logic_Op (since those functions are disabled when by-
142     * passing GMEM.
143     */
144    BITMASK_ENUM(fd_gmem_reason) gmem_reason;
145 
146    /* At submit time, once we've decided that this batch will use GMEM
147     * rendering, the appropriate gmem state is looked up:
148     */
149    const struct fd_gmem_stateobj *gmem_state;
150 
151    /* Driver specific barrier/flush flags: */
152    unsigned barrier;
153 
154    /* A calculated "draw cost" value for the batch, which tries to
155     * estimate the bandwidth-per-sample of all the draws according
156     * to:
157     *
158     *    foreach_draw (...) {
159     *      cost += num_mrt;
160     *      if (blend_enabled)
161     *        cost += num_mrt;
162     *      if (depth_test_enabled)
163     *        cost++;
164     *      if (depth_write_enabled)
165     *        cost++;
166     *    }
167     *
168     * The idea is that each sample-passed minimally does one write
169     * per MRT.  If blend is enabled, the hw will additionally do
170     * a framebuffer read per sample-passed (for each MRT with blend
171     * enabled).  If depth-test is enabled, the hw will additionally
172     * a depth buffer read.  If depth-write is enable, the hw will
173     * additionally do a depth buffer write.
174     *
175     * This does ignore depth buffer traffic for samples which do not
176     * pass do to depth-test fail, and some other details.  But it is
177     * just intended to be a rough estimate that is easy to calculate.
178     */
179    unsigned cost;
180 
181    /* Tells the gen specific backend where to write stats used for
182     * the autotune module.
183     *
184     * Pointer only valid during gmem emit code.
185     */
186    struct fd_batch_result *autotune_result;
187 
188    unsigned num_draws;    /* number of draws in current batch */
189    unsigned num_vertices; /* number of vertices in current batch */
190 
191    /* Currently only used on a6xx, to calculate vsc prim/draw stream
192     * sizes:
193     */
194    unsigned num_bins_per_pipe;
195    unsigned prim_strm_bits;
196    unsigned draw_strm_bits;
197 
198    /* Track the maximal bounds of the scissor of all the draws within a
199     * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
200     * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
201     *
202     * Note that unlike gallium state, maxx/maxy are inclusive (for
203     * fully covered 512x512 the scissor would be 0,0+511,511)
204     */
205    struct pipe_scissor_state max_scissor;
206 
207    /* Keep track of DRAW initiators that need to be patched up depending
208     * on whether we using binning or not:
209     */
210    struct util_dynarray draw_patches;
211 
212    /* texture state that needs patching for fb_read: */
213    struct util_dynarray fb_read_patches;
214 
215    /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
216     * once we know whether or not to use GMEM, and GMEM tile pitch.
217     *
218     * (only for a3xx.. but having gen specific subclasses of fd_batch
219     * seemed overkill for now)
220     */
221    struct util_dynarray rbrc_patches;
222 
223    /* Keep track of GMEM related values that need to be patched up once we
224     * know the gmem layout:
225     */
226    struct util_dynarray gmem_patches;
227 
228    /* Keep track of pointer to start of MEM exports for a20x binning shaders
229     *
230     * this is so the end of the shader can be cut off at the right point
231     * depending on the GMEM configuration
232     */
233    struct util_dynarray shader_patches;
234 
235    struct pipe_framebuffer_state framebuffer;
236 
237    struct fd_submit *submit;
238 
239    /**
240     * List of fd_batch_subpass.
241     */
242    struct list_head subpasses;
243 
244 #define foreach_subpass(subpass, batch) \
245    list_for_each_entry (struct fd_batch_subpass, subpass, &batch->subpasses, node)
246 #define foreach_subpass_safe(subpass, batch) \
247    list_for_each_entry_safe (struct fd_batch_subpass, subpass, &batch->subpasses, node)
248 
249    /**
250     * The current subpass.
251     */
252    struct fd_batch_subpass *subpass;
253 
254    /**
255     * just a reference to the current subpass's draw cmds for backwards compat.
256     */
257    struct fd_ringbuffer *draw;
258    /** binning pass cmdstream: */
259    struct fd_ringbuffer *binning;
260    /** tiling/gmem (IB0) cmdstream: */
261    struct fd_ringbuffer *gmem;
262 
263    /** preemble cmdstream (executed once before first tile): */
264    struct fd_ringbuffer *prologue;
265 
266    /** epilogue cmdstream (executed after each tile): */
267    struct fd_ringbuffer *tile_epilogue;
268 
269    /** epilogue cmdstream (executed after all tiles): */
270    struct fd_ringbuffer *epilogue;
271 
272    struct fd_ringbuffer *tile_loads;
273    struct fd_ringbuffer *tile_store;
274 
275    /**
276     * hw query related state:
277     */
278    /*@{*/
279    /* next sample offset.. incremented for each sample in the batch/
280     * submit, reset to zero on next submit.
281     */
282    uint32_t next_sample_offset;
283 
284    /* The # of pipeline-stats queries running.  In case of nested
285     * queries using {START/STOP}_{PRIMITIVE,FRAGMENT,COMPUTE}_CNTRS,
286     * we need to start only on the first one and stop only on the
287     * last one.
288     */
289    uint8_t pipeline_stats_queries_active[3];
290 
291    /* cached samples (in case multiple queries need to reference
292     * the same sample snapshot)
293     */
294    struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
295 
296    /* which sample providers were used in the current batch: */
297    uint32_t query_providers_used;
298 
299    /* which sample providers are currently enabled in the batch: */
300    uint32_t query_providers_active;
301 
302    /* list of samples in current batch: */
303    struct util_dynarray samples;
304 
305    /* current query result bo and tile stride: */
306    struct pipe_resource *query_buf;
307    uint32_t query_tile_stride;
308    /*@}*/
309 
310    /* Set of resources used by currently-unsubmitted batch (read or
311     * write).. does not hold a reference to the resource.
312     */
313    struct set *resources;
314 
315    /** key in batch-cache (if not null): */
316    struct fd_batch_key *key;
317    uint32_t hash;
318 
319    /** set of dependent batches.. holds refs to dependent batches: */
320    uint32_t dependents_mask;
321 };
322 
323 struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
324 
325 struct fd_batch_subpass *fd_batch_create_subpass(struct fd_batch *batch) assert_dt;
326 
327 void fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb) assert_dt;
328 
329 void fd_batch_flush(struct fd_batch *batch) assert_dt;
330 bool fd_batch_has_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
331 void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
332 void fd_batch_resource_write(struct fd_batch *batch,
333                              struct fd_resource *rsc) assert_dt;
334 void fd_batch_resource_read_slowpath(struct fd_batch *batch,
335                                      struct fd_resource *rsc) assert_dt;
336 void fd_batch_check_size(struct fd_batch *batch) assert_dt;
337 
338 uint32_t fd_batch_key_hash(const void *_key);
339 bool fd_batch_key_equals(const void *_a, const void *_b);
340 struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
341                                         const struct fd_batch_key *key);
342 
343 /* not called directly: */
344 void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
345 void __fd_batch_destroy_locked(struct fd_batch *batch);
346 void __fd_batch_destroy(struct fd_batch *batch);
347 
348 /*
349  * NOTE the rule is, you need to hold the screen->lock when destroying
350  * a batch..  so either use fd_batch_reference() (which grabs the lock
351  * for you) if you don't hold the lock, or fd_batch_reference_locked()
352  * if you do hold the lock.
353  *
354  * WARNING the _locked() version can briefly drop the lock.  Without
355  * recursive mutexes, I'm not sure there is much else we can do (since
356  * __fd_batch_destroy() needs to unref resources)
357  *
358  * WARNING you must acquire the screen->lock and use the _locked()
359  * version in case that the batch being ref'd can disappear under
360  * you.
361  */
362 
363 static inline void
fd_batch_reference_locked(struct fd_batch ** ptr,struct fd_batch * batch)364 fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
365 {
366    struct fd_batch *old_batch = *ptr;
367 
368    /* only need lock if a reference is dropped: */
369    if (old_batch)
370       fd_screen_assert_locked(old_batch->ctx->screen);
371 
372    if (pipe_reference_described(
373           &(*ptr)->reference, &batch->reference,
374           (debug_reference_descriptor)__fd_batch_describe))
375       __fd_batch_destroy_locked(old_batch);
376 
377    *ptr = batch;
378 }
379 
380 static inline void
fd_batch_reference(struct fd_batch ** ptr,struct fd_batch * batch)381 fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
382 {
383    struct fd_batch *old_batch = *ptr;
384 
385    if (pipe_reference_described(
386           &(*ptr)->reference, &batch->reference,
387           (debug_reference_descriptor)__fd_batch_describe))
388       __fd_batch_destroy(old_batch);
389 
390    *ptr = batch;
391 }
392 
393 /**
394  * Mark the batch as having something worth flushing (rendering, blit, query,
395  * etc)
396  */
397 static inline void
fd_batch_needs_flush(struct fd_batch * batch)398 fd_batch_needs_flush(struct fd_batch *batch)
399 {
400    batch->needs_flush = true;
401    fd_pipe_fence_ref(&batch->ctx->last_fence, NULL);
402 }
403 
404 /* Since we reorder batches and can pause/resume queries (notably for disabling
405  * queries dueing some meta operations), we update the current query state for
406  * the batch before each draw.
407  */
408 static inline void
fd_batch_update_queries(struct fd_batch * batch)409 fd_batch_update_queries(struct fd_batch *batch) assert_dt
410 {
411    struct fd_context *ctx = batch->ctx;
412 
413    if (!(ctx->dirty & FD_DIRTY_QUERY))
414       return;
415 
416    ctx->query_update_batch(batch, false);
417 }
418 
419 static inline void
fd_batch_finish_queries(struct fd_batch * batch)420 fd_batch_finish_queries(struct fd_batch *batch) assert_dt
421 {
422    struct fd_context *ctx = batch->ctx;
423 
424    ctx->query_update_batch(batch, true);
425 }
426 
427 static inline void
fd_reset_wfi(struct fd_batch * batch)428 fd_reset_wfi(struct fd_batch *batch)
429 {
430    batch->needs_wfi = true;
431 }
432 
433 void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
434 
435 /* emit a CP_EVENT_WRITE:
436  */
437 static inline void
fd_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt)438 fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
439                enum vgt_event_type evt)
440 {
441    OUT_PKT3(ring, CP_EVENT_WRITE, 1);
442    OUT_RING(ring, evt);
443    fd_reset_wfi(batch);
444 }
445 
446 /* Get per-tile epilogue */
447 static inline struct fd_ringbuffer *
fd_batch_get_tile_epilogue(struct fd_batch * batch)448 fd_batch_get_tile_epilogue(struct fd_batch *batch)
449 {
450    if (batch->tile_epilogue == NULL) {
451       batch->tile_epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
452                                                  FD_RINGBUFFER_GROWABLE);
453    }
454 
455    return batch->tile_epilogue;
456 }
457 
458 /* Get epilogue run after all tiles*/
459 static inline struct fd_ringbuffer *
fd_batch_get_epilogue(struct fd_batch * batch)460 fd_batch_get_epilogue(struct fd_batch *batch)
461 {
462    if (batch->epilogue == NULL) {
463       batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
464                                                  FD_RINGBUFFER_GROWABLE);
465    }
466 
467    return batch->epilogue;
468 }
469 
470 struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);
471 
472 #ifdef __cplusplus
473 }
474 #endif
475 
476 #endif /* FREEDRENO_BATCH_H_ */
477