• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_CONTEXT_H_
28 #define FREEDRENO_CONTEXT_H_
29 
30 #include "pipe/p_context.h"
31 #include "util/libsync.h"
32 #include "util/list.h"
33 #include "util/slab.h"
34 #include "util/u_blitter.h"
35 #include "util/u_string.h"
36 #include "util/u_threaded_context.h"
37 #include "util/perf/u_trace.h"
38 
39 #include "freedreno_autotune.h"
40 #include "freedreno_gmem.h"
41 #include "freedreno_perfetto.h"
42 #include "freedreno_screen.h"
43 #include "freedreno_util.h"
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
50 
51 struct fd_vertex_stateobj;
52 struct fd_batch;
53 
54 struct fd_texture_stateobj {
55    struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
56    unsigned num_textures;
57    unsigned valid_textures;
58    struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
59    unsigned num_samplers;
60    unsigned valid_samplers;
61 };
62 
63 struct fd_program_stateobj {
64    void *vs, *hs, *ds, *gs, *fs;
65 };
66 
67 struct fd_constbuf_stateobj {
68    struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
69    uint32_t enabled_mask;
70 };
71 
72 struct fd_shaderbuf_stateobj {
73    struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
74    uint32_t enabled_mask;
75    uint32_t writable_mask;
76 };
77 
78 struct fd_shaderimg_stateobj {
79    struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
80    uint32_t enabled_mask;
81 };
82 
83 struct fd_vertexbuf_stateobj {
84    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
85    unsigned count;
86    uint32_t enabled_mask;
87 };
88 
89 struct fd_vertex_stateobj {
90    struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
91    unsigned strides[PIPE_MAX_ATTRIBS];
92    unsigned num_elements;
93 };
94 
95 struct fd_stream_output_target {
96    struct pipe_stream_output_target base;
97    struct pipe_resource *offset_buf;
98    /* stride of the last stream out recorded to this target, for
99     * glDrawTransformFeedback(). */
100    uint32_t stride;
101 };
102 
103 struct fd_streamout_stateobj {
104    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
105    /* Bitmask of stream that should be reset. */
106    unsigned reset;
107 
108    unsigned num_targets;
109    /* Track offset from vtxcnt for streamout data.  This counter
110     * is just incremented by # of vertices on each draw until
111     * reset or new streamout buffer bound.
112     *
113     * When we eventually have GS, the CPU won't actually know the
114     * number of vertices per draw, so I think we'll have to do
115     * something more clever.
116     */
117    unsigned offsets[PIPE_MAX_SO_BUFFERS];
118 
119    /* Pre-a6xx, the maximum number of vertices that could be recorded to this
120     * set of targets with the current vertex shader.  a6xx and newer, hardware
121     * queries are used.
122     */
123    unsigned max_tf_vtx;
124 
125    /* Pre-a6xx, the number of verts written to the buffers since the last
126     * Begin.  Used for overflow checking for SW queries.
127     */
128    unsigned verts_written;
129 };
130 
131 #define MAX_GLOBAL_BUFFERS 16
132 struct fd_global_bindings_stateobj {
133    struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
134    uint32_t enabled_mask;
135 };
136 
137 /* group together the vertex and vertexbuf state.. for ease of passing
138  * around, and because various internal operations (gmem<->mem, etc)
139  * need their own vertex state:
140  */
141 struct fd_vertex_state {
142    struct fd_vertex_stateobj *vtx;
143    struct fd_vertexbuf_stateobj vertexbuf;
144 };
145 
146 /* global 3d pipeline dirty state: */
147 enum fd_dirty_3d_state {
148    FD_DIRTY_BLEND = BIT(0),
149    FD_DIRTY_RASTERIZER = BIT(1),
150    FD_DIRTY_ZSA = BIT(2),
151    FD_DIRTY_BLEND_COLOR = BIT(3),
152    FD_DIRTY_STENCIL_REF = BIT(4),
153    FD_DIRTY_SAMPLE_MASK = BIT(5),
154    FD_DIRTY_FRAMEBUFFER = BIT(6),
155    FD_DIRTY_STIPPLE = BIT(7),
156    FD_DIRTY_VIEWPORT = BIT(8),
157    FD_DIRTY_VTXSTATE = BIT(9),
158    FD_DIRTY_VTXBUF = BIT(10),
159    FD_DIRTY_MIN_SAMPLES = BIT(11),
160    FD_DIRTY_SCISSOR = BIT(12),
161    FD_DIRTY_STREAMOUT = BIT(13),
162    FD_DIRTY_UCP = BIT(14),
163    FD_DIRTY_PROG = BIT(15),
164    FD_DIRTY_CONST = BIT(16),
165    FD_DIRTY_TEX = BIT(17),
166    FD_DIRTY_IMAGE = BIT(18),
167    FD_DIRTY_SSBO = BIT(19),
168    FD_DIRTY_QUERY = BIT(20),
169    FD_DIRTY_SAMPLE_LOCATIONS = BIT(21),
170 
171    /* only used by a2xx.. possibly can be removed.. */
172    FD_DIRTY_TEXSTATE = BIT(22),
173 
174    /* fine grained state changes, for cases where state is not orthogonal
175     * from hw perspective:
176     */
177    FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
178    FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
179    FD_DIRTY_BLEND_DUAL = BIT(26),
180    FD_DIRTY_BLEND_COHERENT = BIT(27),
181 #define NUM_DIRTY_BITS 28
182 };
183 
184 static inline void
fd_print_dirty_state(BITMASK_ENUM (fd_dirty_3d_state)dirty)185 fd_print_dirty_state(BITMASK_ENUM(fd_dirty_3d_state) dirty)
186 {
187 #ifdef DEBUG
188    if (!FD_DBG(MSGS))
189       return;
190 
191    struct {
192       enum fd_dirty_3d_state state;
193       const char *name;
194    } tbl[] = {
195 #define STATE(n) { FD_DIRTY_ ## n, #n }
196          STATE(BLEND),
197          STATE(RASTERIZER),
198          STATE(ZSA),
199          STATE(BLEND_COLOR),
200          STATE(STENCIL_REF),
201          STATE(SAMPLE_MASK),
202          STATE(FRAMEBUFFER),
203          STATE(STIPPLE),
204          STATE(VIEWPORT),
205          STATE(VTXSTATE),
206          STATE(VTXBUF),
207          STATE(MIN_SAMPLES),
208          STATE(SCISSOR),
209          STATE(STREAMOUT),
210          STATE(UCP),
211          STATE(PROG),
212          STATE(CONST),
213          STATE(TEX),
214          STATE(IMAGE),
215          STATE(SSBO),
216          STATE(QUERY),
217          STATE(TEXSTATE),
218          STATE(RASTERIZER_DISCARD),
219          STATE(RASTERIZER_CLIP_PLANE_ENABLE),
220          STATE(BLEND_DUAL),
221          STATE(BLEND_COHERENT),
222 #undef STATE
223    };
224 
225    struct log_stream *s = mesa_log_streami();
226 
227    mesa_log_stream_printf(s, "dirty:");
228 
229    if ((uint32_t)dirty == ~0) {
230       mesa_log_stream_printf(s, " ALL");
231       dirty = 0;
232    }
233 
234    for (unsigned i = 0; i < ARRAY_SIZE(tbl); i++) {
235       if (dirty & tbl[i].state) {
236          mesa_log_stream_printf(s, " %s", tbl[i].name);
237          dirty &= ~tbl[i].state;
238       }
239    }
240 
241    assert(!dirty);
242 
243    mesa_log_stream_destroy(s);
244 #endif
245 }
246 
247 /* per shader-stage dirty state: */
248 enum fd_dirty_shader_state {
249    FD_DIRTY_SHADER_PROG = BIT(0),
250    FD_DIRTY_SHADER_CONST = BIT(1),
251    FD_DIRTY_SHADER_TEX = BIT(2),
252    FD_DIRTY_SHADER_SSBO = BIT(3),
253    FD_DIRTY_SHADER_IMAGE = BIT(4),
254 #define NUM_DIRTY_SHADER_BITS 5
255 };
256 
257 enum fd_buffer_mask {
258    /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
259    FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
260    FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
261    FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
262    FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
263 
264    /* A special internal buffer bit to signify that the LRZ buffer needs
265     * clearing
266     */
267    FD_BUFFER_LRZ = BIT(15),
268 };
269 
270 #define MAX_HW_SAMPLE_PROVIDERS 10
271 struct fd_hw_sample_provider;
272 struct fd_hw_sample;
273 
274 struct ir3_shader_key;
275 
276 struct fd_context {
277    struct pipe_context base;
278 
279    unsigned flags;      /* PIPE_CONTEXT_x */
280 
281    struct threaded_context *tc;
282 
283    struct list_head node; /* node in screen->context_list */
284 
285    /* We currently need to serialize emitting GMEM batches, because of
286     * VSC state access in the context.
287     *
288     * In practice this lock should not be contended, since pipe_context
289     * use should be single threaded.  But it is needed to protect the
290     * case, with batch reordering where a ctxB batch triggers flushing
291     * a ctxA batch
292     */
293    simple_mtx_t gmem_lock;
294 
295    struct fd_device *dev;
296    struct fd_screen *screen;
297    struct fd_pipe *pipe;
298 
299    struct blitter_context *blitter dt;
300    void *clear_rs_state[2] dt;
301 
302    /* slab for pipe_transfer allocations: */
303    struct slab_child_pool transfer_pool dt;
304    struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
305 
306    struct fd_autotune autotune dt;
307 
308    /**
309     * query related state:
310     */
311    /*@{*/
312    /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
313    struct slab_mempool sample_pool dt;
314    struct slab_mempool sample_period_pool dt;
315 
316    /* sample-providers for hw queries: */
317    const struct fd_hw_sample_provider
318       *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
319 
320    /* list of active queries: */
321    struct list_head hw_active_queries dt;
322 
323    /* sample-providers for accumulating hw queries: */
324    const struct fd_acc_sample_provider
325       *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
326 
327    /* list of active accumulating queries: */
328    struct list_head acc_active_queries dt;
329    /*@}*/
330 
331    float default_outer_level[4] dt;
332    float default_inner_level[2] dt;
333    uint8_t patch_vertices dt;
334 
335    /* Current state of pctx->set_active_query_state() (i.e. "should drawing
336     * be counted against non-perfcounter queries")
337     */
338    bool active_queries dt;
339 
340    /* shaders used by clear, and gmem->mem blits: */
341    struct fd_program_stateobj solid_prog; // TODO move to screen?
342    struct fd_program_stateobj solid_layered_prog;
343 
344    /* shaders used by mem->gmem blits: */
345    struct fd_program_stateobj
346       blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
347    struct fd_program_stateobj blit_z, blit_zs;
348 
349    /* Stats/counters:
350     */
351    struct {
352       uint64_t prims_emitted;
353       uint64_t prims_generated;
354       uint64_t draw_calls;
355       uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
356          batch_restore;
357       uint64_t staging_uploads, shadow_uploads;
358       uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
359    } stats dt;
360 
361    /* Counter for number of users who need sw counters (so we can
362     * skip collecting them when not needed)
363     */
364    unsigned stats_users;
365 
366    /* Current batch.. the rule here is that you can deref ctx->batch
367     * in codepaths from pipe_context entrypoints.  But not in code-
368     * paths from fd_batch_flush() (basically, the stuff that gets
369     * called from GMEM code), since in those code-paths the batch
370     * you care about is not necessarily the same as ctx->batch.
371     */
372    struct fd_batch *batch dt;
373 
374    /* Current nondraw batch.  Rules are the same as for draw batch.
375     */
376    struct fd_batch *batch_nondraw dt;
377 
378    /* NULL if there has been rendering since last flush.  Otherwise
379     * keeps a reference to the last fence so we can re-use it rather
380     * than having to flush no-op batch.
381     */
382    struct pipe_fence_handle *last_fence dt;
383 
384    /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
385     * if none).  The in-fence is transferred over to the batch on the
386     * next draw/blit/grid.
387     *
388     * The reason for this extra complexity is that apps will typically
389     * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
390     * first draw.  But mesa/st doesn't flush down framebuffer state
391     * change until we hit a draw, so at ->fence_server_sync() time, we
392     * don't yet have the correct batch.  If we created a batch at that
393     * point, it would be the wrong one, and we'd have to flush it pre-
394     * maturely, causing us to stall early in the frame where we could
395     * be building up cmdstream.
396     */
397    int in_fence_fd dt;
398 
399    /**
400     * If we *ever* see an in-fence-fd, assume that userspace is
401     * not relying on implicit fences.
402     */
403    bool no_implicit_sync;
404 
405    /* track last known reset status globally and per-context to
406     * determine if more resets occurred since then.  If global reset
407     * count increases, it means some other context crashed.  If
408     * per-context reset count increases, it means we crashed the
409     * gpu.
410     *
411     * Only accessed by front-end thread, never accessed by TC driver
412     * thread.
413     */
414    uint32_t context_reset_count;
415    uint32_t global_reset_count;
416 
417    /* Context sequence #, used for batch-cache key: */
418    uint16_t seqno;
419 
420    /* Cost per draw, used in conjunction with samples-passed history to
421     * estimate whether GMEM or bypass is the better option.
422     */
423    uint8_t draw_cost;
424 
425    /* Are we in process of shadowing a resource? Used to detect recursion
426     * in transfer_map, and skip unneeded synchronization.
427     */
428    bool in_shadow : 1 dt;
429 
430    /* For catching recursion problems with blit fallback: */
431    bool in_blit : 1 dt;
432 
433    /* points to either scissor or disabled_scissor depending on rast state: */
434    struct pipe_scissor_state *current_scissor dt;
435 
436    /* Note that all the scissor state that is traced is inclusive, ie the
437     * maxiumum maxx is one less than the width.
438     */
439    struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS] dt;
440 
441    /* we don't have a disable/enable bit for scissor, so instead we keep
442     * a disabled-scissor state which matches the entire bound framebuffer
443     * and use that when scissor is not enabled.
444     */
445    struct pipe_scissor_state disabled_scissor[PIPE_MAX_VIEWPORTS] dt;
446 
447    /* Per vsc pipe bo's (a2xx-a5xx): */
448    struct fd_bo *vsc_pipe_bo[32] dt;
449 
450    /* Table of bo's attached to all batches up-front (because they
451     * are commonly used, and that is easier than attaching on-use).
452     * In particular, these are driver internal buffers which do not
453     * participate in batch resource tracking.
454     */
455    struct fd_bo *private_bos[3];
456    unsigned num_private_bos;
457 
458    /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
459     * specific bitmask of state "groups".
460     */
461    uint32_t gen_dirty_map[NUM_DIRTY_BITS];
462    uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
463 
464    /* Bitmask of all possible gen_dirty bits: */
465    uint32_t gen_all_dirty;
466 
467    /* Generation specific bitmask of dirty state groups: */
468    uint32_t gen_dirty;
469 
470    /* which state objects need to be re-emit'd: */
471    BITMASK_ENUM(fd_dirty_3d_state) dirty dt;
472 
473    /* As above, but also needs draw time resource tracking: */
474    BITMASK_ENUM(fd_dirty_3d_state) dirty_resource dt;
475 
476    /* per shader-stage dirty status: */
477    BITMASK_ENUM(fd_dirty_shader_state) dirty_shader[PIPE_SHADER_TYPES] dt;
478 
479    /* As above, but also needs draw time resource tracking: */
480    BITMASK_ENUM(fd_dirty_shader_state) dirty_shader_resource[PIPE_SHADER_TYPES] dt;
481 
482    void *compute dt;
483    struct pipe_blend_state *blend dt;
484    struct pipe_rasterizer_state *rasterizer dt;
485    struct pipe_depth_stencil_alpha_state *zsa dt;
486 
487    struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
488 
489    struct fd_program_stateobj prog dt;
490    uint32_t bound_shader_stages dt;
491 
492    struct fd_vertex_state vtx dt;
493 
494    struct pipe_blend_color blend_color dt;
495    struct pipe_stencil_ref stencil_ref dt;
496    unsigned sample_mask dt;
497    unsigned min_samples dt;
498 
499    /* 1x1 grid, max 4x MSAA: */
500    uint8_t sample_locations[4] dt;
501    bool sample_locations_enabled dt;
502 
503    /* local context fb state, for when ctx->batch is null: */
504    struct pipe_framebuffer_state framebuffer dt;
505    uint32_t all_mrt_channel_mask dt;
506 
507    struct pipe_poly_stipple stipple dt;
508    struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS] dt;
509    struct pipe_scissor_state viewport_scissor[PIPE_MAX_VIEWPORTS] dt;
510    struct {
511       unsigned x, y;
512    } guardband dt;
513    struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
514    struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
515    struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
516    struct fd_streamout_stateobj streamout dt;
517    struct fd_global_bindings_stateobj global_bindings dt;
518    struct pipe_clip_state ucp dt;
519 
520    struct pipe_query *cond_query dt;
521    bool cond_cond dt; /* inverted rendering condition */
522    uint cond_mode dt;
523 
524    /* Private memory is a memory space where each fiber gets its own piece of
525     * memory, in addition to registers. It is backed by a buffer which needs
526     * to be large enough to hold the contents of every possible wavefront in
527     * every core of the GPU. Because it allocates space via the internal
528     * wavefront ID which is shared between all currently executing shaders,
529     * the same buffer can be reused by all shaders, as long as all shaders
530     * sharing the same buffer use the exact same configuration. There are two
531     * inputs to the configuration, the amount of per-fiber space and whether
532     * to use the newer per-wave or older per-fiber layout. We only ever
533     * increase the size, and shaders with a smaller size requirement simply
534     * use the larger existing buffer, so that we only need to keep track of
535     * one buffer and its size, but we still need to keep track of per-fiber
536     * and per-wave buffers separately so that we never use the same buffer
537     * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
538     * per-wave.
539     */
540    struct {
541       struct fd_bo *bo;
542       uint32_t per_fiber_size;
543       uint32_t per_sp_size;
544    } pvtmem[2] dt;
545 
546    /* maps per-shader-stage state plus variant key to hw
547     * program stateobj:
548     */
549    struct ir3_cache *shader_cache;
550 
551    struct util_debug_callback debug;
552 
553    struct u_trace_context trace_context dt;
554 
555 #ifdef HAVE_PERFETTO
556    struct fd_perfetto_state perfetto;
557 #endif
558 
559    /*
560     * Counter to generate submit-ids
561     */
562    uint32_t submit_count;
563 
564    /* Called on rebind_resource() for any per-gen cleanup required: */
565    void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
566 
567    /* GMEM/tile handling fxns: */
568    void (*emit_tile_init)(struct fd_batch *batch) dt;
569    void (*emit_tile_prep)(struct fd_batch *batch,
570                           const struct fd_tile *tile) dt;
571    void (*emit_tile_mem2gmem)(struct fd_batch *batch,
572                               const struct fd_tile *tile) dt;
573    void (*emit_tile_renderprep)(struct fd_batch *batch,
574                                 const struct fd_tile *tile) dt;
575    void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
576    void (*emit_tile_gmem2mem)(struct fd_batch *batch,
577                               const struct fd_tile *tile) dt;
578    void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
579 
580    /* optional, for GMEM bypass: */
581    void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
582    void (*emit_sysmem)(struct fd_batch *batch) dt;
583    void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
584 
585    /* draw: */
586    void (*draw_vbos)(struct fd_context *ctx, const struct pipe_draw_info *info,
587                      unsigned drawid_offset,
588                      const struct pipe_draw_indirect_info *indirect,
589                      const struct pipe_draw_start_count_bias *draws,
590                      unsigned num_draws,
591                      unsigned index_offset) dt;
592    bool (*clear)(struct fd_context *ctx, enum fd_buffer_mask buffers,
593                  const union pipe_color_union *color, double depth,
594                  unsigned stencil) dt;
595 
596    /* called to update draw_vbo func after bound shader stages change, etc: */
597    void (*update_draw)(struct fd_context *ctx);
598 
599    /* compute: */
600    void (*launch_grid)(struct fd_context *ctx,
601                        const struct pipe_grid_info *info) dt;
602 
603    /* query: */
604    struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
605                                     unsigned index);
606    void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
607    void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
608                               struct fd_ringbuffer *ring) dt;
609    void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
610 
611    /* blitter: */
612    bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
613    void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
614 
615    /* uncompress resource, if necessary, to use as the specified format: */
616    void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
617                            enum pipe_format format) dt;
618 
619    /* logger: */
620    void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
621                             unsigned offset);
622    uint64_t (*ts_to_ns)(uint64_t ts);
623 
624    /*
625     * Common pre-cooked VBO state (used for a3xx and later):
626     */
627 
628    /* for clear/gmem->mem vertices, and mem->gmem */
629    struct pipe_resource *solid_vbuf;
630 
631    /* for mem->gmem tex coords: */
632    struct pipe_resource *blit_texcoord_vbuf;
633 
634    /* vertex state for solid_vbuf:
635     *    - solid_vbuf / 12 / R32G32B32_FLOAT
636     */
637    struct fd_vertex_state solid_vbuf_state;
638 
639    /* vertex state for blit_prog:
640     *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
641     *    - solid_vbuf / 12 / R32G32B32_FLOAT
642     */
643    struct fd_vertex_state blit_vbuf_state;
644 
645    /*
646     * Info about state of previous draw, for state that comes from
647     * pipe_draw_info (ie. not part of a CSO).  This allows us to
648     * skip some register emit when the state doesn't change from
649     * draw-to-draw
650     */
651    struct {
652       bool dirty; /* last draw state unknown */
653       bool primitive_restart;
654       uint32_t index_start;
655       uint32_t instance_start;
656       uint32_t restart_index;
657       uint32_t streamout_mask;
658 
659       /* some state changes require a different shader variant.  Keep
660        * track of this so we know when we need to re-emit shader state
661        * due to variant change.  See ir3_fixup_shader_state()
662        *
663        * (used for a3xx+, NULL otherwise)
664        */
665       struct ir3_shader_key *key;
666 
667    } last dt;
668 };
669 
670 static inline struct fd_context *
fd_context(struct pipe_context * pctx)671 fd_context(struct pipe_context *pctx)
672 {
673    return (struct fd_context *)pctx;
674 }
675 
676 static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target * target)677 fd_stream_output_target(struct pipe_stream_output_target *target)
678 {
679    return (struct fd_stream_output_target *)target;
680 }
681 
682 void fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo);
683 
684 /* Mark specified non-shader-stage related state as dirty: */
685 static inline void
fd_context_dirty(struct fd_context * ctx,BITMASK_ENUM (fd_dirty_3d_state)dirty)686 fd_context_dirty(struct fd_context *ctx, BITMASK_ENUM(fd_dirty_3d_state) dirty)
687    assert_dt
688 {
689    assert(util_is_power_of_two_nonzero(dirty));
690    assert(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
691 
692    ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
693    ctx->dirty |= dirty;
694 
695    /* These are still not handled at bind time: */
696    if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_QUERY | FD_DIRTY_ZSA))
697       ctx->dirty_resource |= dirty;
698 }
699 
700 static inline enum fd_dirty_3d_state
dirty_shader_to_dirty_state(BITMASK_ENUM (fd_dirty_shader_state)dirty)701 dirty_shader_to_dirty_state(BITMASK_ENUM(fd_dirty_shader_state) dirty)
702 {
703    const enum fd_dirty_3d_state map[] = {
704       FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
705       FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
706    };
707 
708    /* Need to update the table above if these shift: */
709    STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
710    STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
711    STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
712    STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
713    STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
714 
715    assert(ffs(dirty) <= ARRAY_SIZE(map));
716 
717    return map[ffs(dirty) - 1];
718 }
719 
720 static inline void
fd_context_dirty_shader(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty)721 fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
722                         BITMASK_ENUM(fd_dirty_shader_state) dirty)
723    assert_dt
724 {
725    assert(util_is_power_of_two_nonzero(dirty));
726    ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
727    ctx->dirty_shader[shader] |= dirty;
728    fd_context_dirty(ctx, dirty_shader_to_dirty_state(dirty));
729 }
730 
731 /* mark all state dirty: */
732 static inline void
fd_context_all_dirty(struct fd_context * ctx)733 fd_context_all_dirty(struct fd_context *ctx) assert_dt
734 {
735    ctx->last.dirty = true;
736    ctx->dirty = (enum fd_dirty_3d_state) ~0;
737    ctx->dirty_resource = (enum fd_dirty_3d_state) ~0;
738 
739    /* NOTE: don't use ~0 for gen_dirty, because the gen specific
740     * emit code will loop over all the bits:
741     */
742    ctx->gen_dirty = ctx->gen_all_dirty;
743 
744    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
745       ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
746       ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state) ~0;
747    }
748 }
749 
750 static inline void
fd_context_all_clean(struct fd_context * ctx)751 fd_context_all_clean(struct fd_context *ctx) assert_dt
752 {
753    ctx->last.dirty = false;
754    ctx->dirty = (enum fd_dirty_3d_state)0;
755    ctx->dirty_resource = (enum fd_dirty_3d_state)0;
756    ctx->gen_dirty = 0;
757    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
758       ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
759       ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state)0;
760    }
761 }
762 
763 /**
764  * Add mapping between global dirty bit and generation specific dirty
765  * bit.
766  */
767 static inline void
fd_context_add_map(struct fd_context * ctx,uint32_t dirty,uint32_t gen_dirty)768 fd_context_add_map(struct fd_context *ctx, uint32_t dirty, uint32_t gen_dirty)
769 {
770    u_foreach_bit (b, dirty) {
771       ctx->gen_dirty_map[b] |= gen_dirty;
772    }
773    ctx->gen_all_dirty |= gen_dirty;
774 }
775 
776 /**
777  * Add mapping between shader stage specific dirty bit and generation
778  * specific dirty bit
779  */
780 static inline void
fd_context_add_shader_map(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty,uint32_t gen_dirty)781 fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
782                           BITMASK_ENUM(fd_dirty_shader_state) dirty, uint32_t gen_dirty)
783 {
784    u_foreach_bit (b, dirty) {
785       ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
786    }
787    ctx->gen_all_dirty |= gen_dirty;
788 }
789 
790 static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context * ctx)791 fd_context_get_scissor(struct fd_context *ctx) assert_dt
792 {
793    return ctx->current_scissor;
794 }
795 
796 void fd_context_switch_from(struct fd_context *ctx) assert_dt;
797 void fd_context_switch_to(struct fd_context *ctx,
798                           struct fd_batch *batch) assert_dt;
799 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
800 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
801 struct fd_batch *fd_context_batch_nondraw(struct fd_context *ctx) assert_dt;
802 
803 void fd_context_setup_common_vbos(struct fd_context *ctx);
804 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
805 void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
806 void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
807 __attribute__((format(printf, 3, 4))) void
808 fd_cs_trace_msg(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
809 __attribute__((format(printf, 3, 4))) void
810 fd_cs_trace_start(struct u_trace_context *utctx, void *cs, const char *fmt,
811                   ...);
812 __attribute__((format(printf, 3, 4))) void
813 fd_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
814 
815 struct pipe_context *fd_context_init(struct fd_context *ctx,
816                                      struct pipe_screen *pscreen,
817                                      void *priv, unsigned flags);
818 struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
819                                         unsigned flags);
820 
821 void fd_context_destroy(struct pipe_context *pctx) assert_dt;
822 
823 #ifdef __cplusplus
824 }
825 #endif
826 
827 #endif /* FREEDRENO_CONTEXT_H_ */
828