1 /*
2 * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <robclark@freedesktop.org>
7 */
8
9 #ifndef FREEDRENO_CONTEXT_H_
10 #define FREEDRENO_CONTEXT_H_
11
12 #include "pipe/p_context.h"
13 #include "util/libsync.h"
14 #include "util/list.h"
15 #include "util/slab.h"
16 #include "util/u_blitter.h"
17 #include "util/u_string.h"
18 #include "util/u_threaded_context.h"
19 #include "util/perf/u_trace.h"
20
21 #include "freedreno_autotune.h"
22 #include "freedreno_gmem.h"
23 #include "freedreno_perfetto.h"
24 #include "freedreno_screen.h"
25 #include "freedreno_util.h"
26
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30
31 #define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
32
33 struct fd_vertex_stateobj;
34 struct fd_batch;
35
36 struct fd_texture_stateobj {
37 struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
38 unsigned num_textures;
39 unsigned valid_textures;
40 struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
41 unsigned num_samplers;
42 unsigned valid_samplers;
43 };
44
45 struct fd_program_stateobj {
46 void *vs, *hs, *ds, *gs, *fs;
47 };
48
49 struct fd_constbuf_stateobj {
50 struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
51 uint32_t enabled_mask;
52 };
53
54 struct fd_shaderbuf_stateobj {
55 struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
56 uint32_t enabled_mask;
57 uint32_t writable_mask;
58 };
59
60 struct fd_shaderimg_stateobj {
61 struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
62 uint32_t enabled_mask;
63 };
64
65 struct fd_vertexbuf_stateobj {
66 struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
67 unsigned count;
68 uint32_t enabled_mask;
69 };
70
71 struct fd_vertex_stateobj {
72 struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
73 unsigned strides[PIPE_MAX_ATTRIBS];
74 unsigned num_elements;
75 };
76
77 struct fd_stream_output_target {
78 struct pipe_stream_output_target base;
79 struct pipe_resource *offset_buf;
80 /* stride of the last stream out recorded to this target, for
81 * glDrawTransformFeedback(). */
82 uint32_t stride;
83 };
84
85 struct fd_streamout_stateobj {
86 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
87 /* Bitmask of stream that should be reset. */
88 unsigned reset;
89
90 unsigned num_targets;
91 /* Track offset from vtxcnt for streamout data. This counter
92 * is just incremented by # of vertices on each draw until
93 * reset or new streamout buffer bound.
94 *
95 * When we eventually have GS, the CPU won't actually know the
96 * number of vertices per draw, so I think we'll have to do
97 * something more clever.
98 */
99 unsigned offsets[PIPE_MAX_SO_BUFFERS];
100
101 /* Pre-a6xx, the maximum number of vertices that could be recorded to this
102 * set of targets with the current vertex shader. a6xx and newer, hardware
103 * queries are used.
104 */
105 unsigned max_tf_vtx;
106
107 /* Pre-a6xx, the number of verts written to the buffers since the last
108 * Begin. Used for overflow checking for SW queries.
109 */
110 unsigned verts_written;
111 };
112
113 #define MAX_GLOBAL_BUFFERS 16
114 struct fd_global_bindings_stateobj {
115 struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
116 uint32_t enabled_mask;
117 };
118
119 /* group together the vertex and vertexbuf state.. for ease of passing
120 * around, and because various internal operations (gmem<->mem, etc)
121 * need their own vertex state:
122 */
123 struct fd_vertex_state {
124 struct fd_vertex_stateobj *vtx;
125 struct fd_vertexbuf_stateobj vertexbuf;
126 };
127
128 /* global 3d pipeline dirty state: */
129 enum fd_dirty_3d_state {
130 FD_DIRTY_BLEND = BIT(0),
131 FD_DIRTY_RASTERIZER = BIT(1),
132 FD_DIRTY_ZSA = BIT(2),
133 FD_DIRTY_BLEND_COLOR = BIT(3),
134 FD_DIRTY_STENCIL_REF = BIT(4),
135 FD_DIRTY_SAMPLE_MASK = BIT(5),
136 FD_DIRTY_FRAMEBUFFER = BIT(6),
137 FD_DIRTY_STIPPLE = BIT(7),
138 FD_DIRTY_VIEWPORT = BIT(8),
139 FD_DIRTY_VTXSTATE = BIT(9),
140 FD_DIRTY_VTXBUF = BIT(10),
141 FD_DIRTY_MIN_SAMPLES = BIT(11),
142 FD_DIRTY_SCISSOR = BIT(12),
143 FD_DIRTY_STREAMOUT = BIT(13),
144 FD_DIRTY_UCP = BIT(14),
145 FD_DIRTY_PROG = BIT(15),
146 FD_DIRTY_CONST = BIT(16),
147 FD_DIRTY_TEX = BIT(17),
148 FD_DIRTY_IMAGE = BIT(18),
149 FD_DIRTY_SSBO = BIT(19),
150 FD_DIRTY_QUERY = BIT(20),
151 FD_DIRTY_SAMPLE_LOCATIONS = BIT(21),
152
153 /* only used by a2xx.. possibly can be removed.. */
154 FD_DIRTY_TEXSTATE = BIT(22),
155
156 /* fine grained state changes, for cases where state is not orthogonal
157 * from hw perspective:
158 */
159 FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
160 FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
161 FD_DIRTY_BLEND_DUAL = BIT(26),
162 FD_DIRTY_BLEND_COHERENT = BIT(27),
163 #define NUM_DIRTY_BITS 28
164 };
165
166 static inline void
fd_print_dirty_state(BITMASK_ENUM (fd_dirty_3d_state)dirty)167 fd_print_dirty_state(BITMASK_ENUM(fd_dirty_3d_state) dirty)
168 {
169 #if MESA_DEBUG
170 if (!FD_DBG(MSGS))
171 return;
172
173 struct {
174 enum fd_dirty_3d_state state;
175 const char *name;
176 } tbl[] = {
177 #define STATE(n) { FD_DIRTY_ ## n, #n }
178 STATE(BLEND),
179 STATE(RASTERIZER),
180 STATE(ZSA),
181 STATE(BLEND_COLOR),
182 STATE(STENCIL_REF),
183 STATE(SAMPLE_MASK),
184 STATE(FRAMEBUFFER),
185 STATE(STIPPLE),
186 STATE(VIEWPORT),
187 STATE(VTXSTATE),
188 STATE(VTXBUF),
189 STATE(MIN_SAMPLES),
190 STATE(SCISSOR),
191 STATE(STREAMOUT),
192 STATE(UCP),
193 STATE(PROG),
194 STATE(CONST),
195 STATE(TEX),
196 STATE(IMAGE),
197 STATE(SSBO),
198 STATE(QUERY),
199 STATE(TEXSTATE),
200 STATE(RASTERIZER_DISCARD),
201 STATE(RASTERIZER_CLIP_PLANE_ENABLE),
202 STATE(BLEND_DUAL),
203 STATE(BLEND_COHERENT),
204 #undef STATE
205 };
206
207 struct log_stream *s = mesa_log_streami();
208
209 mesa_log_stream_printf(s, "dirty:");
210
211 if ((uint32_t)dirty == ~0) {
212 mesa_log_stream_printf(s, " ALL");
213 dirty = 0;
214 }
215
216 for (unsigned i = 0; i < ARRAY_SIZE(tbl); i++) {
217 if (dirty & tbl[i].state) {
218 mesa_log_stream_printf(s, " %s", tbl[i].name);
219 dirty &= ~tbl[i].state;
220 }
221 }
222
223 assert(!dirty);
224
225 mesa_log_stream_destroy(s);
226 #endif
227 }
228
229 /* per shader-stage dirty state: */
230 enum fd_dirty_shader_state {
231 FD_DIRTY_SHADER_PROG = BIT(0),
232 FD_DIRTY_SHADER_CONST = BIT(1),
233 FD_DIRTY_SHADER_TEX = BIT(2),
234 FD_DIRTY_SHADER_SSBO = BIT(3),
235 FD_DIRTY_SHADER_IMAGE = BIT(4),
236 #define NUM_DIRTY_SHADER_BITS 5
237 };
238
239 enum fd_buffer_mask {
240 /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
241 FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
242 FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
243 FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
244 FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
245
246 /* A special internal buffer bit to signify that the LRZ buffer needs
247 * clearing
248 */
249 FD_BUFFER_LRZ = BIT(15),
250 };
251
252 #define MAX_HW_SAMPLE_PROVIDERS 10
253 struct fd_hw_sample_provider;
254 struct fd_hw_sample;
255
256 struct ir3_shader_key;
257
258 struct fd_context {
259 struct pipe_context base;
260
261 unsigned flags; /* PIPE_CONTEXT_x */
262
263 struct threaded_context *tc;
264
265 struct list_head node; /* node in screen->context_list */
266
267 /* We currently need to serialize emitting GMEM batches, because of
268 * VSC state access in the context.
269 *
270 * In practice this lock should not be contended, since pipe_context
271 * use should be single threaded. But it is needed to protect the
272 * case, with batch reordering where a ctxB batch triggers flushing
273 * a ctxA batch
274 */
275 simple_mtx_t gmem_lock;
276
277 struct fd_device *dev;
278 struct fd_screen *screen;
279 struct fd_pipe *pipe;
280
281 struct blitter_context *blitter dt;
282 void *clear_rs_state[2] dt;
283
284 /* slab for pipe_transfer allocations: */
285 struct slab_child_pool transfer_pool dt;
286 struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
287
288 struct fd_autotune autotune dt;
289
290 /**
291 * query related state:
292 */
293 /*@{*/
294 /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
295 struct slab_mempool sample_pool dt;
296 struct slab_mempool sample_period_pool dt;
297
298 /* sample-providers for hw queries: */
299 const struct fd_hw_sample_provider
300 *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
301
302 /* list of active queries: */
303 struct list_head hw_active_queries dt;
304
305 /* sample-providers for accumulating hw queries: */
306 const struct fd_acc_sample_provider
307 *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
308
309 /* list of active accumulating queries: */
310 struct list_head acc_active_queries dt;
311 /*@}*/
312
313 float default_outer_level[4] dt;
314 float default_inner_level[2] dt;
315 uint8_t patch_vertices dt;
316
317 /* Current state of pctx->set_active_query_state() (i.e. "should drawing
318 * be counted against non-perfcounter queries")
319 */
320 bool active_queries dt;
321
322 uint32_t occlusion_queries_active dt;
323
324 /* shaders used by clear, and gmem->mem blits: */
325 struct fd_program_stateobj solid_prog; // TODO move to screen?
326 struct fd_program_stateobj solid_layered_prog;
327
328 /* shaders used by mem->gmem blits: */
329 struct fd_program_stateobj
330 blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
331 struct fd_program_stateobj blit_z, blit_zs;
332
333 /* Stats/counters:
334 */
335 struct {
336 uint64_t prims_emitted;
337 uint64_t prims_generated;
338 uint64_t draw_calls;
339 uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
340 batch_restore;
341 uint64_t staging_uploads, shadow_uploads;
342 uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
343 } stats dt;
344
345 /* Counter for number of users who need sw counters (so we can
346 * skip collecting them when not needed)
347 */
348 unsigned stats_users;
349
350 /* Current batch.. the rule here is that you can deref ctx->batch
351 * in codepaths from pipe_context entrypoints. But not in code-
352 * paths from fd_batch_flush() (basically, the stuff that gets
353 * called from GMEM code), since in those code-paths the batch
354 * you care about is not necessarily the same as ctx->batch.
355 */
356 struct fd_batch *batch dt;
357
358 /* Current nondraw batch. Rules are the same as for draw batch.
359 */
360 struct fd_batch *batch_nondraw dt;
361
362 /* NULL if there has been rendering since last flush. Otherwise
363 * keeps a reference to the last fence so we can re-use it rather
364 * than having to flush no-op batch.
365 */
366 struct pipe_fence_handle *last_fence dt;
367
368 /*
369 * Counter to keep track of batch's most recent update. Ie. the batch with
370 * the higher update count is the one that has been drawn/etc to the most
371 * recently (and therefore shouldn't have any other batch that should be
372 * flushed after it). This is used to figure out which fence to use at
373 * context flush time.
374 */
375 uint32_t update_count;
376
377 /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
378 * if none). The in-fence is transferred over to the batch on the
379 * next draw/blit/grid.
380 *
381 * The reason for this extra complexity is that apps will typically
382 * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
383 * first draw. But mesa/st doesn't flush down framebuffer state
384 * change until we hit a draw, so at ->fence_server_sync() time, we
385 * don't yet have the correct batch. If we created a batch at that
386 * point, it would be the wrong one, and we'd have to flush it pre-
387 * maturely, causing us to stall early in the frame where we could
388 * be building up cmdstream.
389 */
390 int in_fence_fd dt;
391
392 /**
393 * If we *ever* see an in-fence-fd, assume that userspace is
394 * not relying on implicit fences.
395 */
396 bool no_implicit_sync;
397
398 /* track last known reset status globally and per-context to
399 * determine if more resets occurred since then. If global reset
400 * count increases, it means some other context crashed. If
401 * per-context reset count increases, it means we crashed the
402 * gpu.
403 *
404 * Only accessed by front-end thread, never accessed by TC driver
405 * thread.
406 */
407 uint32_t context_reset_count;
408 uint32_t global_reset_count;
409
410 /* Context sequence #, used for batch-cache key: */
411 uint16_t seqno;
412
413 /* Cost per draw, used in conjunction with samples-passed history to
414 * estimate whether GMEM or bypass is the better option.
415 */
416 uint8_t draw_cost;
417
418 /* Are we in process of shadowing a resource? Used to detect recursion
419 * in transfer_map, and skip unneeded synchronization.
420 */
421 bool in_shadow : 1 dt;
422
423 /* For catching recursion problems with blit fallback: */
424 bool in_blit : 1 dt;
425
426 /* points to either scissor or disabled_scissor depending on rast state: */
427 struct pipe_scissor_state *current_scissor dt;
428
429 /* Note that all the scissor state that is traced is inclusive, ie the
430 * maxiumum maxx is one less than the width.
431 */
432 struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS] dt;
433
434 /* we don't have a disable/enable bit for scissor, so instead we keep
435 * a disabled-scissor state which matches the entire bound framebuffer
436 * and use that when scissor is not enabled.
437 */
438 struct pipe_scissor_state disabled_scissor[PIPE_MAX_VIEWPORTS] dt;
439
440 /* Per vsc pipe bo's (a2xx-a5xx): */
441 struct fd_bo *vsc_pipe_bo[32] dt;
442
443 /* Table of bo's attached to all batches up-front (because they
444 * are commonly used, and that is easier than attaching on-use).
445 * In particular, these are driver internal buffers which do not
446 * participate in batch resource tracking.
447 */
448 struct fd_bo *private_bos[3];
449 unsigned num_private_bos;
450
451 /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
452 * specific bitmask of state "groups".
453 */
454 uint32_t gen_dirty_map[NUM_DIRTY_BITS];
455 uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
456
457 /* Bitmask of all possible gen_dirty bits: */
458 uint32_t gen_all_dirty;
459
460 /* Generation specific bitmask of dirty state groups: */
461 uint32_t gen_dirty;
462
463 /* which state objects need to be re-emit'd: */
464 BITMASK_ENUM(fd_dirty_3d_state) dirty dt;
465
466 /* As above, but also needs draw time resource tracking: */
467 BITMASK_ENUM(fd_dirty_3d_state) dirty_resource dt;
468
469 /* per shader-stage dirty status: */
470 BITMASK_ENUM(fd_dirty_shader_state) dirty_shader[PIPE_SHADER_TYPES] dt;
471
472 /* As above, but also needs draw time resource tracking: */
473 BITMASK_ENUM(fd_dirty_shader_state) dirty_shader_resource[PIPE_SHADER_TYPES] dt;
474
475 void *compute dt;
476 struct pipe_blend_state *blend dt;
477 struct pipe_rasterizer_state *rasterizer dt;
478 struct pipe_depth_stencil_alpha_state *zsa dt;
479
480 struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
481
482 struct fd_program_stateobj prog dt;
483 uint32_t bound_shader_stages dt;
484
485 struct fd_vertex_state vtx dt;
486
487 struct pipe_blend_color blend_color dt;
488 struct pipe_stencil_ref stencil_ref dt;
489 unsigned sample_mask dt;
490 unsigned min_samples dt;
491
492 /* 1x1 grid, max 4x MSAA: */
493 uint8_t sample_locations[4] dt;
494 bool sample_locations_enabled dt;
495
496 /* local context fb state, for when ctx->batch is null: */
497 struct pipe_framebuffer_state framebuffer dt;
498 uint32_t all_mrt_channel_mask dt;
499
500 struct pipe_poly_stipple stipple dt;
501 struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS] dt;
502 struct pipe_scissor_state viewport_scissor[PIPE_MAX_VIEWPORTS] dt;
503 struct {
504 unsigned x, y;
505 } guardband dt;
506 struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
507 struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
508 struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
509 struct fd_streamout_stateobj streamout dt;
510 struct fd_global_bindings_stateobj global_bindings dt;
511 struct pipe_clip_state ucp dt;
512
513 struct pipe_query *cond_query dt;
514 bool cond_cond dt; /* inverted rendering condition */
515 uint cond_mode dt;
516
517 /* Private memory is a memory space where each fiber gets its own piece of
518 * memory, in addition to registers. It is backed by a buffer which needs
519 * to be large enough to hold the contents of every possible wavefront in
520 * every core of the GPU. Because it allocates space via the internal
521 * wavefront ID which is shared between all currently executing shaders,
522 * the same buffer can be reused by all shaders, as long as all shaders
523 * sharing the same buffer use the exact same configuration. There are two
524 * inputs to the configuration, the amount of per-fiber space and whether
525 * to use the newer per-wave or older per-fiber layout. We only ever
526 * increase the size, and shaders with a smaller size requirement simply
527 * use the larger existing buffer, so that we only need to keep track of
528 * one buffer and its size, but we still need to keep track of per-fiber
529 * and per-wave buffers separately so that we never use the same buffer
530 * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
531 * per-wave.
532 */
533 struct {
534 struct fd_bo *bo;
535 uint32_t per_fiber_size;
536 uint32_t per_sp_size;
537 } pvtmem[2] dt;
538
539 /* maps per-shader-stage state plus variant key to hw
540 * program stateobj:
541 */
542 struct ir3_cache *shader_cache;
543
544 struct util_debug_callback debug;
545
546 struct u_trace_context trace_context dt;
547
548 #ifdef HAVE_PERFETTO
549 struct fd_perfetto_state perfetto;
550 #endif
551
552 /*
553 * Counter to generate submit-ids
554 */
555 uint32_t submit_count;
556
557 /* Called on rebind_resource() for any per-gen cleanup required: */
558 void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
559
560 /* GMEM/tile handling fxns: */
561 void (*emit_tile_init)(struct fd_batch *batch) dt;
562 void (*emit_tile_prep)(struct fd_batch *batch,
563 const struct fd_tile *tile) dt;
564 void (*emit_tile_mem2gmem)(struct fd_batch *batch,
565 const struct fd_tile *tile) dt;
566 void (*emit_tile_renderprep)(struct fd_batch *batch,
567 const struct fd_tile *tile) dt;
568 void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
569 void (*emit_tile_gmem2mem)(struct fd_batch *batch,
570 const struct fd_tile *tile) dt;
571 void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
572
573 /* optional, for GMEM bypass: */
574 void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
575 void (*emit_sysmem)(struct fd_batch *batch) dt;
576 void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
577
578 /* draw: */
579 void (*draw_vbos)(struct fd_context *ctx, const struct pipe_draw_info *info,
580 unsigned drawid_offset,
581 const struct pipe_draw_indirect_info *indirect,
582 const struct pipe_draw_start_count_bias *draws,
583 unsigned num_draws,
584 unsigned index_offset) dt;
585 bool (*clear)(struct fd_context *ctx, enum fd_buffer_mask buffers,
586 const union pipe_color_union *color, double depth,
587 unsigned stencil) dt;
588
589 /* called to update draw_vbo func after bound shader stages change, etc: */
590 void (*update_draw)(struct fd_context *ctx);
591
592 /* compute: */
593 void (*launch_grid)(struct fd_context *ctx,
594 const struct pipe_grid_info *info) dt;
595
596 /* query: */
597 struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
598 unsigned index);
599 void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
600 void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
601 struct fd_ringbuffer *ring) dt;
602 void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
603
604 /* blitter: */
605 bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
606 void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
607
608 /* uncompress resource, if necessary, to use as the specified format: */
609 void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
610 enum pipe_format format) dt;
611
612 /* logger: */
613 void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
614 unsigned offset);
615 uint64_t (*ts_to_ns)(uint64_t ts);
616
617 /*
618 * Common pre-cooked VBO state (used for a3xx and later):
619 */
620
621 /* for clear/gmem->mem vertices, and mem->gmem */
622 struct pipe_resource *solid_vbuf;
623
624 /* for mem->gmem tex coords: */
625 struct pipe_resource *blit_texcoord_vbuf;
626
627 /* vertex state for solid_vbuf:
628 * - solid_vbuf / 12 / R32G32B32_FLOAT
629 */
630 struct fd_vertex_state solid_vbuf_state;
631
632 /* vertex state for blit_prog:
633 * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
634 * - solid_vbuf / 12 / R32G32B32_FLOAT
635 */
636 struct fd_vertex_state blit_vbuf_state;
637
638 /*
639 * Info about state of previous draw, for state that comes from
640 * pipe_draw_info (ie. not part of a CSO). This allows us to
641 * skip some register emit when the state doesn't change from
642 * draw-to-draw
643 */
644 struct {
645 bool dirty; /* last draw state unknown */
646 bool primitive_restart;
647 uint32_t index_start;
648 uint32_t instance_start;
649 uint32_t restart_index;
650 uint32_t streamout_mask;
651
652 /* some state changes require a different shader variant. Keep
653 * track of this so we know when we need to re-emit shader state
654 * due to variant change. See ir3_fixup_shader_state()
655 *
656 * (used for a3xx+, NULL otherwise)
657 */
658 struct ir3_shader_key *key;
659
660 } last dt;
661 };
662
663 static inline struct fd_context *
fd_context(struct pipe_context * pctx)664 fd_context(struct pipe_context *pctx)
665 {
666 return (struct fd_context *)pctx;
667 }
668
669 static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target * target)670 fd_stream_output_target(struct pipe_stream_output_target *target)
671 {
672 return (struct fd_stream_output_target *)target;
673 }
674
675 void fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo);
676
677 /* Mark specified non-shader-stage related state as dirty: */
678 static inline void
fd_context_dirty(struct fd_context * ctx,BITMASK_ENUM (fd_dirty_3d_state)dirty)679 fd_context_dirty(struct fd_context *ctx, BITMASK_ENUM(fd_dirty_3d_state) dirty)
680 assert_dt
681 {
682 assert(util_is_power_of_two_nonzero(dirty));
683 assert(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
684
685 ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
686 ctx->dirty |= dirty;
687
688 /* These are still not handled at bind time: */
689 if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_QUERY | FD_DIRTY_ZSA))
690 ctx->dirty_resource |= dirty;
691 }
692
693 static inline enum fd_dirty_3d_state
dirty_shader_to_dirty_state(BITMASK_ENUM (fd_dirty_shader_state)dirty)694 dirty_shader_to_dirty_state(BITMASK_ENUM(fd_dirty_shader_state) dirty)
695 {
696 const enum fd_dirty_3d_state map[] = {
697 FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
698 FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
699 };
700
701 /* Need to update the table above if these shift: */
702 STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
703 STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
704 STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
705 STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
706 STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
707
708 assert(ffs(dirty) <= ARRAY_SIZE(map));
709
710 return map[ffs(dirty) - 1];
711 }
712
713 static inline void
fd_context_dirty_shader(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty)714 fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
715 BITMASK_ENUM(fd_dirty_shader_state) dirty)
716 assert_dt
717 {
718 assert(util_is_power_of_two_nonzero(dirty));
719 ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
720 ctx->dirty_shader[shader] |= dirty;
721 fd_context_dirty(ctx, dirty_shader_to_dirty_state(dirty));
722 }
723
724 /* mark all state dirty: */
725 static inline void
fd_context_all_dirty(struct fd_context * ctx)726 fd_context_all_dirty(struct fd_context *ctx) assert_dt
727 {
728 ctx->last.dirty = true;
729 ctx->dirty = (enum fd_dirty_3d_state) ~0;
730 ctx->dirty_resource = (enum fd_dirty_3d_state) ~0;
731
732 /* NOTE: don't use ~0 for gen_dirty, because the gen specific
733 * emit code will loop over all the bits:
734 */
735 ctx->gen_dirty = ctx->gen_all_dirty;
736
737 for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
738 ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
739 ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state) ~0;
740 }
741 }
742
743 static inline void
fd_context_all_clean(struct fd_context * ctx)744 fd_context_all_clean(struct fd_context *ctx) assert_dt
745 {
746 ctx->last.dirty = false;
747 ctx->dirty = (enum fd_dirty_3d_state)0;
748 ctx->dirty_resource = (enum fd_dirty_3d_state)0;
749 ctx->gen_dirty = 0;
750 for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
751 ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
752 ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state)0;
753 }
754 }
755
756 /**
757 * Add mapping between global dirty bit and generation specific dirty
758 * bit.
759 */
760 static inline void
fd_context_add_map(struct fd_context * ctx,uint32_t dirty,uint32_t gen_dirty)761 fd_context_add_map(struct fd_context *ctx, uint32_t dirty, uint32_t gen_dirty)
762 {
763 u_foreach_bit (b, dirty) {
764 ctx->gen_dirty_map[b] |= gen_dirty;
765 }
766 ctx->gen_all_dirty |= gen_dirty;
767 }
768
769 /**
770 * Add mapping between shader stage specific dirty bit and generation
771 * specific dirty bit
772 */
773 static inline void
fd_context_add_shader_map(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty,uint32_t gen_dirty)774 fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
775 BITMASK_ENUM(fd_dirty_shader_state) dirty, uint32_t gen_dirty)
776 {
777 u_foreach_bit (b, dirty) {
778 ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
779 }
780 ctx->gen_all_dirty |= gen_dirty;
781 }
782
783 static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context * ctx)784 fd_context_get_scissor(struct fd_context *ctx) assert_dt
785 {
786 return ctx->current_scissor;
787 }
788
789 void fd_context_switch_from(struct fd_context *ctx) assert_dt;
790 void fd_context_switch_to(struct fd_context *ctx,
791 struct fd_batch *batch) assert_dt;
792 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
793 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
794 struct fd_batch *fd_context_batch_nondraw(struct fd_context *ctx) assert_dt;
795
796 void fd_context_setup_common_vbos(struct fd_context *ctx);
797 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
798 void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
799 void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
800 __attribute__((format(printf, 3, 4))) void
801 fd_cs_trace_msg(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
802 __attribute__((format(printf, 3, 4))) void
803 fd_cs_trace_start(struct u_trace_context *utctx, void *cs, const char *fmt,
804 ...);
805 __attribute__((format(printf, 3, 4))) void
806 fd_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
807
808 struct pipe_context *fd_context_init(struct fd_context *ctx,
809 struct pipe_screen *pscreen,
810 void *priv, unsigned flags);
811 struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
812 unsigned flags);
813
814 void fd_context_destroy(struct pipe_context *pctx) assert_dt;
815
816 #ifdef __cplusplus
817 }
818 #endif
819
820 #endif /* FREEDRENO_CONTEXT_H_ */
821