1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef FREEDRENO_CONTEXT_H_
28 #define FREEDRENO_CONTEXT_H_
29
30 #include "pipe/p_context.h"
31 #include "util/libsync.h"
32 #include "util/list.h"
33 #include "util/slab.h"
34 #include "util/u_blitter.h"
35 #include "util/u_string.h"
36 #include "util/u_threaded_context.h"
37 #include "util/perf/u_trace.h"
38
39 #include "freedreno_autotune.h"
40 #include "freedreno_gmem.h"
41 #include "freedreno_perfetto.h"
42 #include "freedreno_screen.h"
43 #include "freedreno_util.h"
44
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48
49 #define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
50
51 struct fd_vertex_stateobj;
52 struct fd_batch;
53
54 struct fd_texture_stateobj {
55 struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
56 unsigned num_textures;
57 unsigned valid_textures;
58 struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
59 unsigned num_samplers;
60 unsigned valid_samplers;
61 };
62
63 struct fd_program_stateobj {
64 void *vs, *hs, *ds, *gs, *fs;
65 };
66
67 struct fd_constbuf_stateobj {
68 struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
69 uint32_t enabled_mask;
70 };
71
72 struct fd_shaderbuf_stateobj {
73 struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
74 uint32_t enabled_mask;
75 uint32_t writable_mask;
76 };
77
78 struct fd_shaderimg_stateobj {
79 struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
80 uint32_t enabled_mask;
81 };
82
83 struct fd_vertexbuf_stateobj {
84 struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
85 unsigned count;
86 uint32_t enabled_mask;
87 };
88
89 struct fd_vertex_stateobj {
90 struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
91 unsigned num_elements;
92 };
93
94 struct fd_stream_output_target {
95 struct pipe_stream_output_target base;
96 struct pipe_resource *offset_buf;
97 /* stride of the last stream out recorded to this target, for
98 * glDrawTransformFeedback(). */
99 uint32_t stride;
100 };
101
102 struct fd_streamout_stateobj {
103 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
104 /* Bitmask of stream that should be reset. */
105 unsigned reset;
106
107 unsigned num_targets;
108 /* Track offset from vtxcnt for streamout data. This counter
109 * is just incremented by # of vertices on each draw until
110 * reset or new streamout buffer bound.
111 *
112 * When we eventually have GS, the CPU won't actually know the
113 * number of vertices per draw, so I think we'll have to do
114 * something more clever.
115 */
116 unsigned offsets[PIPE_MAX_SO_BUFFERS];
117
118 /* Pre-a6xx, the maximum number of vertices that could be recorded to this
119 * set of targets with the current vertex shader. a6xx and newer, hardware
120 * queries are used.
121 */
122 unsigned max_tf_vtx;
123
124 /* Pre-a6xx, the number of verts written to the buffers since the last
125 * Begin. Used for overflow checking for SW queries.
126 */
127 unsigned verts_written;
128 };
129
130 #define MAX_GLOBAL_BUFFERS 16
131 struct fd_global_bindings_stateobj {
132 struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
133 uint32_t enabled_mask;
134 };
135
136 /* group together the vertex and vertexbuf state.. for ease of passing
137 * around, and because various internal operations (gmem<->mem, etc)
138 * need their own vertex state:
139 */
140 struct fd_vertex_state {
141 struct fd_vertex_stateobj *vtx;
142 struct fd_vertexbuf_stateobj vertexbuf;
143 };
144
145 /* global 3d pipeline dirty state: */
146 enum fd_dirty_3d_state {
147 FD_DIRTY_BLEND = BIT(0),
148 FD_DIRTY_RASTERIZER = BIT(1),
149 FD_DIRTY_ZSA = BIT(2),
150 FD_DIRTY_BLEND_COLOR = BIT(3),
151 FD_DIRTY_STENCIL_REF = BIT(4),
152 FD_DIRTY_SAMPLE_MASK = BIT(5),
153 FD_DIRTY_FRAMEBUFFER = BIT(6),
154 FD_DIRTY_STIPPLE = BIT(7),
155 FD_DIRTY_VIEWPORT = BIT(8),
156 FD_DIRTY_VTXSTATE = BIT(9),
157 FD_DIRTY_VTXBUF = BIT(10),
158 FD_DIRTY_MIN_SAMPLES = BIT(11),
159 FD_DIRTY_SCISSOR = BIT(12),
160 FD_DIRTY_STREAMOUT = BIT(13),
161 FD_DIRTY_UCP = BIT(14),
162 FD_DIRTY_PROG = BIT(15),
163 FD_DIRTY_CONST = BIT(16),
164 FD_DIRTY_TEX = BIT(17),
165 FD_DIRTY_IMAGE = BIT(18),
166 FD_DIRTY_SSBO = BIT(19),
167
168 /* only used by a2xx.. possibly can be removed.. */
169 FD_DIRTY_TEXSTATE = BIT(20),
170
171 /* fine grained state changes, for cases where state is not orthogonal
172 * from hw perspective:
173 */
174 FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
175 FD_DIRTY_BLEND_DUAL = BIT(25),
176 #define NUM_DIRTY_BITS 26
177
178 /* additional flag for state requires updated resource tracking: */
179 FD_DIRTY_RESOURCE = BIT(31),
180 };
181
182 /* per shader-stage dirty state: */
183 enum fd_dirty_shader_state {
184 FD_DIRTY_SHADER_PROG = BIT(0),
185 FD_DIRTY_SHADER_CONST = BIT(1),
186 FD_DIRTY_SHADER_TEX = BIT(2),
187 FD_DIRTY_SHADER_SSBO = BIT(3),
188 FD_DIRTY_SHADER_IMAGE = BIT(4),
189 #define NUM_DIRTY_SHADER_BITS 5
190 };
191
192 #define MAX_HW_SAMPLE_PROVIDERS 7
193 struct fd_hw_sample_provider;
194 struct fd_hw_sample;
195
196 struct ir3_shader_key;
197
198 struct fd_context {
199 struct pipe_context base;
200
201 struct threaded_context *tc;
202
203 struct list_head node; /* node in screen->context_list */
204
205 /* We currently need to serialize emitting GMEM batches, because of
206 * VSC state access in the context.
207 *
208 * In practice this lock should not be contended, since pipe_context
209 * use should be single threaded. But it is needed to protect the
210 * case, with batch reordering where a ctxB batch triggers flushing
211 * a ctxA batch
212 */
213 simple_mtx_t gmem_lock;
214
215 struct fd_device *dev;
216 struct fd_screen *screen;
217 struct fd_pipe *pipe;
218
219 struct blitter_context *blitter dt;
220 void *clear_rs_state[2] dt;
221
222 /* slab for pipe_transfer allocations: */
223 struct slab_child_pool transfer_pool dt;
224 struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
225
226 struct fd_autotune autotune dt;
227
228 /**
229 * query related state:
230 */
231 /*@{*/
232 /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
233 struct slab_mempool sample_pool dt;
234 struct slab_mempool sample_period_pool dt;
235
236 /* sample-providers for hw queries: */
237 const struct fd_hw_sample_provider
238 *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
239
240 /* list of active queries: */
241 struct list_head hw_active_queries dt;
242
243 /* sample-providers for accumulating hw queries: */
244 const struct fd_acc_sample_provider
245 *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
246
247 /* list of active accumulating queries: */
248 struct list_head acc_active_queries dt;
249 /*@}*/
250
251 uint8_t patch_vertices;
252
253 /* Whether we need to recheck the active_queries list next
254 * fd_batch_update_queries().
255 */
256 bool update_active_queries dt;
257
258 /* Current state of pctx->set_active_query_state() (i.e. "should drawing
259 * be counted against non-perfcounter queries")
260 */
261 bool active_queries dt;
262
263 /* shaders used by clear, and gmem->mem blits: */
264 struct fd_program_stateobj solid_prog; // TODO move to screen?
265 struct fd_program_stateobj solid_layered_prog;
266
267 /* shaders used by mem->gmem blits: */
268 struct fd_program_stateobj
269 blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
270 struct fd_program_stateobj blit_z, blit_zs;
271
272 /* Stats/counters:
273 */
274 struct {
275 uint64_t prims_emitted;
276 uint64_t prims_generated;
277 uint64_t draw_calls;
278 uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
279 batch_restore;
280 uint64_t staging_uploads, shadow_uploads;
281 uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
282 } stats dt;
283
284 /* Counter for number of users who need sw counters (so we can
285 * skip collecting them when not needed)
286 */
287 unsigned stats_users;
288
289 /* Current batch.. the rule here is that you can deref ctx->batch
290 * in codepaths from pipe_context entrypoints. But not in code-
291 * paths from fd_batch_flush() (basically, the stuff that gets
292 * called from GMEM code), since in those code-paths the batch
293 * you care about is not necessarily the same as ctx->batch.
294 */
295 struct fd_batch *batch dt;
296
297 /* NULL if there has been rendering since last flush. Otherwise
298 * keeps a reference to the last fence so we can re-use it rather
299 * than having to flush no-op batch.
300 */
301 struct pipe_fence_handle *last_fence dt;
302
303 /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
304 * if none). The in-fence is transferred over to the batch on the
305 * next draw/blit/grid.
306 *
307 * The reason for this extra complexity is that apps will typically
308 * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
309 * first draw. But mesa/st doesn't flush down framebuffer state
310 * change until we hit a draw, so at ->fence_server_sync() time, we
311 * don't yet have the correct batch. If we created a batch at that
312 * point, it would be the wrong one, and we'd have to flush it pre-
313 * maturely, causing us to stall early in the frame where we could
314 * be building up cmdstream.
315 */
316 int in_fence_fd dt;
317
318 /* track last known reset status globally and per-context to
319 * determine if more resets occurred since then. If global reset
320 * count increases, it means some other context crashed. If
321 * per-context reset count increases, it means we crashed the
322 * gpu.
323 *
324 * Only accessed by front-end thread, never accessed by TC driver
325 * thread.
326 */
327 uint32_t context_reset_count;
328 uint32_t global_reset_count;
329
330 /* Context sequence #, used for batch-cache key: */
331 uint16_t seqno;
332
333 /* Cost per draw, used in conjunction with samples-passed history to
334 * estimate whether GMEM or bypass is the better option.
335 */
336 uint8_t draw_cost;
337
338 /* Are we in process of shadowing a resource? Used to detect recursion
339 * in transfer_map, and skip unneeded synchronization.
340 */
341 bool in_shadow : 1 dt;
342
343 /* For catching recursion problems with blit fallback: */
344 bool in_blit : 1 dt;
345
346 /* points to either scissor or disabled_scissor depending on rast state: */
347 struct pipe_scissor_state *current_scissor dt;
348
349 struct pipe_scissor_state scissor dt;
350
351 /* we don't have a disable/enable bit for scissor, so instead we keep
352 * a disabled-scissor state which matches the entire bound framebuffer
353 * and use that when scissor is not enabled.
354 */
355 struct pipe_scissor_state disabled_scissor dt;
356
357 /* Per vsc pipe bo's (a2xx-a5xx): */
358 struct fd_bo *vsc_pipe_bo[32] dt;
359
360 /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
361 * specific bitmask of state "groups".
362 */
363 uint32_t gen_dirty_map[NUM_DIRTY_BITS];
364 uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
365
366 /* Bitmask of all possible gen_dirty bits: */
367 uint32_t gen_all_dirty;
368
369 /* Generation specific bitmask of dirty state groups: */
370 uint32_t gen_dirty;
371
372 /* which state objects need to be re-emit'd: */
373 enum fd_dirty_3d_state dirty dt;
374
375 /* per shader-stage dirty status: */
376 enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
377
378 void *compute dt;
379 struct pipe_blend_state *blend dt;
380 struct pipe_rasterizer_state *rasterizer dt;
381 struct pipe_depth_stencil_alpha_state *zsa dt;
382
383 struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
384
385 struct fd_program_stateobj prog dt;
386 uint32_t bound_shader_stages dt;
387
388 struct fd_vertex_state vtx dt;
389
390 struct pipe_blend_color blend_color dt;
391 struct pipe_stencil_ref stencil_ref dt;
392 unsigned sample_mask dt;
393 unsigned min_samples dt;
394 /* local context fb state, for when ctx->batch is null: */
395 struct pipe_framebuffer_state framebuffer dt;
396 struct pipe_poly_stipple stipple dt;
397 struct pipe_viewport_state viewport dt;
398 struct pipe_scissor_state viewport_scissor dt;
399 struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
400 struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
401 struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
402 struct fd_streamout_stateobj streamout dt;
403 struct fd_global_bindings_stateobj global_bindings dt;
404 struct pipe_clip_state ucp dt;
405
406 struct pipe_query *cond_query dt;
407 bool cond_cond dt; /* inverted rendering condition */
408 uint cond_mode dt;
409
410 /* Private memory is a memory space where each fiber gets its own piece of
411 * memory, in addition to registers. It is backed by a buffer which needs
412 * to be large enough to hold the contents of every possible wavefront in
413 * every core of the GPU. Because it allocates space via the internal
414 * wavefront ID which is shared between all currently executing shaders,
415 * the same buffer can be reused by all shaders, as long as all shaders
416 * sharing the same buffer use the exact same configuration. There are two
417 * inputs to the configuration, the amount of per-fiber space and whether
418 * to use the newer per-wave or older per-fiber layout. We only ever
419 * increase the size, and shaders with a smaller size requirement simply
420 * use the larger existing buffer, so that we only need to keep track of
421 * one buffer and its size, but we still need to keep track of per-fiber
422 * and per-wave buffers separately so that we never use the same buffer
423 * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
424 * per-wave.
425 */
426 struct {
427 struct fd_bo *bo;
428 uint32_t per_fiber_size;
429 } pvtmem[2] dt;
430
431 /* maps per-shader-stage state plus variant key to hw
432 * program stateobj:
433 */
434 struct ir3_cache *shader_cache;
435
436 struct pipe_debug_callback debug;
437
438 struct u_trace_context trace_context dt;
439
440 #ifdef HAVE_PERFETTO
441 struct fd_perfetto_state perfetto;
442 #endif
443
444 /*
445 * Counter to generate submit-ids
446 */
447 uint32_t submit_count;
448
449 /* Called on rebind_resource() for any per-gen cleanup required: */
450 void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
451
452 /* GMEM/tile handling fxns: */
453 void (*emit_tile_init)(struct fd_batch *batch) dt;
454 void (*emit_tile_prep)(struct fd_batch *batch,
455 const struct fd_tile *tile) dt;
456 void (*emit_tile_mem2gmem)(struct fd_batch *batch,
457 const struct fd_tile *tile) dt;
458 void (*emit_tile_renderprep)(struct fd_batch *batch,
459 const struct fd_tile *tile) dt;
460 void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
461 void (*emit_tile_gmem2mem)(struct fd_batch *batch,
462 const struct fd_tile *tile) dt;
463 void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
464
465 /* optional, for GMEM bypass: */
466 void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
467 void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
468
469 /* draw: */
470 bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
471 unsigned drawid_offset,
472 const struct pipe_draw_indirect_info *indirect,
473 const struct pipe_draw_start_count_bias *draw,
474 unsigned index_offset) dt;
475 bool (*clear)(struct fd_context *ctx, unsigned buffers,
476 const union pipe_color_union *color, double depth,
477 unsigned stencil) dt;
478
479 /* compute: */
480 void (*launch_grid)(struct fd_context *ctx,
481 const struct pipe_grid_info *info) dt;
482
483 /* query: */
484 struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
485 unsigned index);
486 void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
487 void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
488 struct fd_ringbuffer *ring) dt;
489 void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
490
491 /* blitter: */
492 bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
493 void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
494
495 /* uncompress resource, if necessary, to use as the specified format: */
496 void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
497 enum pipe_format format) dt;
498
499 /* handling for barriers: */
500 void (*framebuffer_barrier)(struct fd_context *ctx) dt;
501
502 /* logger: */
503 void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
504 unsigned offset);
505 uint64_t (*ts_to_ns)(uint64_t ts);
506
507 /*
508 * Common pre-cooked VBO state (used for a3xx and later):
509 */
510
511 /* for clear/gmem->mem vertices, and mem->gmem */
512 struct pipe_resource *solid_vbuf;
513
514 /* for mem->gmem tex coords: */
515 struct pipe_resource *blit_texcoord_vbuf;
516
517 /* vertex state for solid_vbuf:
518 * - solid_vbuf / 12 / R32G32B32_FLOAT
519 */
520 struct fd_vertex_state solid_vbuf_state;
521
522 /* vertex state for blit_prog:
523 * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
524 * - solid_vbuf / 12 / R32G32B32_FLOAT
525 */
526 struct fd_vertex_state blit_vbuf_state;
527
528 /*
529 * Info about state of previous draw, for state that comes from
530 * pipe_draw_info (ie. not part of a CSO). This allows us to
531 * skip some register emit when the state doesn't change from
532 * draw-to-draw
533 */
534 struct {
535 bool dirty; /* last draw state unknown */
536 bool primitive_restart;
537 uint32_t index_start;
538 uint32_t instance_start;
539 uint32_t restart_index;
540 uint32_t streamout_mask;
541
542 /* some state changes require a different shader variant. Keep
543 * track of this so we know when we need to re-emit shader state
544 * due to variant change. See ir3_fixup_shader_state()
545 *
546 * (used for a3xx+, NULL otherwise)
547 */
548 struct ir3_shader_key *key;
549
550 } last dt;
551 };
552
553 static inline struct fd_context *
fd_context(struct pipe_context * pctx)554 fd_context(struct pipe_context *pctx)
555 {
556 return (struct fd_context *)pctx;
557 }
558
559 static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target * target)560 fd_stream_output_target(struct pipe_stream_output_target *target)
561 {
562 return (struct fd_stream_output_target *)target;
563 }
564
565 /**
566 * Does the dirty state require resource tracking, ie. in general
567 * does it reference some resource. There are some special cases:
568 *
569 * - FD_DIRTY_CONST can reference a resource, but cb0 is handled
570 * specially as if it is not a user-buffer, we expect it to be
571 * coming from const_uploader, so we can make some assumptions
572 * that future transfer_map will be UNSYNCRONIZED
573 * - FD_DIRTY_ZSA controls how the framebuffer is accessed
574 * - FD_DIRTY_BLEND needs to update GMEM reason
575 *
576 * TODO if we can make assumptions that framebuffer state is bound
577 * first, before blend/zsa/etc state we can move some of the ZSA/
578 * BLEND state handling from draw time to bind time. I think this
579 * is true of mesa/st, perhaps we can just document it to be a
580 * frontend requirement?
581 */
582 static inline bool
fd_context_dirty_resource(enum fd_dirty_3d_state dirty)583 fd_context_dirty_resource(enum fd_dirty_3d_state dirty)
584 {
585 return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND |
586 FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF |
587 FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
588 }
589
590 #ifdef __cplusplus
591 #define or_dirty(d, mask) \
592 do { \
593 decltype(mask) _d = (d); \
594 d = (decltype(mask))(_d | (mask)); \
595 } while (0)
596 #else
597 #define or_dirty(d, mask) \
598 do { \
599 d |= (mask); \
600 } while (0)
601 #endif
602
603 /* Mark specified non-shader-stage related state as dirty: */
604 static inline void
fd_context_dirty(struct fd_context * ctx,enum fd_dirty_3d_state dirty)605 fd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt
606 {
607 assert(util_is_power_of_two_nonzero(dirty));
608 STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
609
610 ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
611
612 if (fd_context_dirty_resource(dirty))
613 or_dirty(dirty, FD_DIRTY_RESOURCE);
614
615 or_dirty(ctx->dirty, dirty);
616 }
617
618 static inline void
fd_context_dirty_shader(struct fd_context * ctx,enum pipe_shader_type shader,enum fd_dirty_shader_state dirty)619 fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
620 enum fd_dirty_shader_state dirty) assert_dt
621 {
622 const enum fd_dirty_3d_state map[] = {
623 FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
624 FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
625 };
626
627 /* Need to update the table above if these shift: */
628 STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
629 STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
630 STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
631 STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
632 STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
633
634 assert(util_is_power_of_two_nonzero(dirty));
635 assert(ffs(dirty) <= ARRAY_SIZE(map));
636
637 ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
638
639 or_dirty(ctx->dirty_shader[shader], dirty);
640 fd_context_dirty(ctx, map[ffs(dirty) - 1]);
641 }
642
643 /* mark all state dirty: */
644 static inline void
fd_context_all_dirty(struct fd_context * ctx)645 fd_context_all_dirty(struct fd_context *ctx) assert_dt
646 {
647 ctx->last.dirty = true;
648 ctx->dirty = (enum fd_dirty_3d_state) ~0;
649
650 /* NOTE: don't use ~0 for gen_dirty, because the gen specific
651 * emit code will loop over all the bits:
652 */
653 ctx->gen_dirty = ctx->gen_all_dirty;
654
655 for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
656 ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
657 }
658
659 static inline void
fd_context_all_clean(struct fd_context * ctx)660 fd_context_all_clean(struct fd_context *ctx) assert_dt
661 {
662 ctx->last.dirty = false;
663 ctx->dirty = (enum fd_dirty_3d_state)0;
664 ctx->gen_dirty = 0;
665 for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
666 /* don't mark compute state as clean, since it is not emitted
667 * during normal draw call. The places that call _all_dirty(),
668 * it is safe to mark compute state dirty as well, but the
669 * inverse is not true.
670 */
671 if (i == PIPE_SHADER_COMPUTE)
672 continue;
673 ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
674 }
675 }
676
677 /**
678 * Add mapping between global dirty bit and generation specific dirty
679 * bit.
680 */
681 static inline void
fd_context_add_map(struct fd_context * ctx,enum fd_dirty_3d_state dirty,uint32_t gen_dirty)682 fd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty,
683 uint32_t gen_dirty)
684 {
685 u_foreach_bit (b, dirty) {
686 ctx->gen_dirty_map[b] |= gen_dirty;
687 }
688 ctx->gen_all_dirty |= gen_dirty;
689 }
690
691 /**
692 * Add mapping between shader stage specific dirty bit and generation
693 * specific dirty bit
694 */
695 static inline void
fd_context_add_shader_map(struct fd_context * ctx,enum pipe_shader_type shader,enum fd_dirty_shader_state dirty,uint32_t gen_dirty)696 fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
697 enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
698 {
699 u_foreach_bit (b, dirty) {
700 ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
701 }
702 ctx->gen_all_dirty |= gen_dirty;
703 }
704
705 static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context * ctx)706 fd_context_get_scissor(struct fd_context *ctx) assert_dt
707 {
708 return ctx->current_scissor;
709 }
710
711 void fd_context_switch_from(struct fd_context *ctx) assert_dt;
712 void fd_context_switch_to(struct fd_context *ctx,
713 struct fd_batch *batch) assert_dt;
714 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
715 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
716
717 void fd_context_setup_common_vbos(struct fd_context *ctx);
718 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
719 void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
720 void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
721
722 struct pipe_context *fd_context_init(struct fd_context *ctx,
723 struct pipe_screen *pscreen,
724 void *priv, unsigned flags);
725 struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
726 unsigned flags);
727
728 void fd_context_destroy(struct pipe_context *pctx) assert_dt;
729
730 #ifdef __cplusplus
731 }
732 #endif
733
734 #endif /* FREEDRENO_CONTEXT_H_ */
735