1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 /* Once all the programs are done, we know how large urb entry
49 * sizes need to be and can decide if we need to change the urb
50 * layout.
51 */
52 &brw_curbe_offsets,
53 &brw_recalculate_urb_fence,
54
55 &brw_cc_vp,
56 &brw_cc_unit,
57
58 /* Surface state setup. Must come before the VS/WM unit. The binding
59 * table upload must be last.
60 */
61 &brw_vs_pull_constants,
62 &brw_wm_pull_constants,
63 &brw_renderbuffer_surfaces,
64 &brw_renderbuffer_read_surfaces,
65 &brw_texture_surfaces,
66 &brw_vs_binding_table,
67 &brw_wm_binding_table,
68
69 &brw_fs_samplers,
70 &brw_vs_samplers,
71
72 /* These set up state for brw_psp_urb_cbs */
73 &brw_wm_unit,
74 &brw_sf_vp,
75 &brw_sf_unit,
76 &brw_vs_unit, /* always required, enabled or not */
77 &brw_clip_unit,
78 &brw_gs_unit,
79
80 /* Command packets:
81 */
82 &brw_invariant_state,
83
84 &brw_binding_table_pointers,
85 &brw_blend_constant_color,
86
87 &brw_depthbuffer,
88
89 &brw_polygon_stipple,
90 &brw_polygon_stipple_offset,
91
92 &brw_line_stipple,
93
94 &brw_psp_urb_cbs,
95
96 &brw_drawing_rect,
97 &brw_indices, /* must come before brw_vertices */
98 &brw_index_buffer,
99 &brw_vertices,
100
101 &brw_constant_buffer
102 };
103
104 static const struct brw_tracked_state *gen6_atoms[] =
105 {
106 &gen6_sf_and_clip_viewports,
107
108 /* Command packets: */
109
110 &brw_cc_vp,
111 &gen6_viewport_state, /* must do after *_vp stages */
112
113 &gen6_urb,
114 &gen6_blend_state, /* must do before cc unit */
115 &gen6_color_calc_state, /* must do before cc unit */
116 &gen6_depth_stencil_state, /* must do before cc unit */
117
118 &gen6_vs_push_constants, /* Before vs_state */
119 &gen6_gs_push_constants, /* Before gs_state */
120 &gen6_wm_push_constants, /* Before wm_state */
121
122 /* Surface state setup. Must come before the VS/WM unit. The binding
123 * table upload must be last.
124 */
125 &brw_vs_pull_constants,
126 &brw_vs_ubo_surfaces,
127 &brw_gs_pull_constants,
128 &brw_gs_ubo_surfaces,
129 &brw_wm_pull_constants,
130 &brw_wm_ubo_surfaces,
131 &gen6_renderbuffer_surfaces,
132 &brw_renderbuffer_read_surfaces,
133 &brw_texture_surfaces,
134 &gen6_sol_surface,
135 &brw_vs_binding_table,
136 &gen6_gs_binding_table,
137 &brw_wm_binding_table,
138
139 &brw_fs_samplers,
140 &brw_vs_samplers,
141 &brw_gs_samplers,
142 &gen6_sampler_state,
143 &gen6_multisample_state,
144
145 &gen6_vs_state,
146 &gen6_gs_state,
147 &gen6_clip_state,
148 &gen6_sf_state,
149 &gen6_wm_state,
150
151 &gen6_scissor_state,
152
153 &gen6_binding_table_pointers,
154
155 &brw_depthbuffer,
156
157 &brw_polygon_stipple,
158 &brw_polygon_stipple_offset,
159
160 &brw_line_stipple,
161
162 &brw_drawing_rect,
163
164 &brw_indices, /* must come before brw_vertices */
165 &brw_index_buffer,
166 &brw_vertices,
167 };
168
169 static const struct brw_tracked_state *gen7_render_atoms[] =
170 {
171 /* Command packets: */
172
173 &brw_cc_vp,
174 &gen7_sf_clip_viewport,
175
176 &gen7_l3_state,
177 &gen7_push_constant_space,
178 &gen7_urb,
179 &gen6_blend_state, /* must do before cc unit */
180 &gen6_color_calc_state, /* must do before cc unit */
181 &gen6_depth_stencil_state, /* must do before cc unit */
182
183 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
184
185 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
186 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
187 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
188 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
189 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
190
191 &gen6_vs_push_constants, /* Before vs_state */
192 &gen7_tcs_push_constants,
193 &gen7_tes_push_constants,
194 &gen6_gs_push_constants, /* Before gs_state */
195 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
196
197 /* Surface state setup. Must come before the VS/WM unit. The binding
198 * table upload must be last.
199 */
200 &brw_vs_pull_constants,
201 &brw_vs_ubo_surfaces,
202 &brw_vs_abo_surfaces,
203 &brw_tcs_pull_constants,
204 &brw_tcs_ubo_surfaces,
205 &brw_tcs_abo_surfaces,
206 &brw_tes_pull_constants,
207 &brw_tes_ubo_surfaces,
208 &brw_tes_abo_surfaces,
209 &brw_gs_pull_constants,
210 &brw_gs_ubo_surfaces,
211 &brw_gs_abo_surfaces,
212 &brw_wm_pull_constants,
213 &brw_wm_ubo_surfaces,
214 &brw_wm_abo_surfaces,
215 &gen6_renderbuffer_surfaces,
216 &brw_renderbuffer_read_surfaces,
217 &brw_texture_surfaces,
218 &brw_vs_binding_table,
219 &brw_tcs_binding_table,
220 &brw_tes_binding_table,
221 &brw_gs_binding_table,
222 &brw_wm_binding_table,
223
224 &brw_fs_samplers,
225 &brw_vs_samplers,
226 &brw_tcs_samplers,
227 &brw_tes_samplers,
228 &brw_gs_samplers,
229 &gen6_multisample_state,
230
231 &gen7_vs_state,
232 &gen7_hs_state,
233 &gen7_te_state,
234 &gen7_ds_state,
235 &gen7_gs_state,
236 &gen7_sol_state,
237 &gen6_clip_state,
238 &gen7_sbe_state,
239 &gen7_sf_state,
240 &gen7_wm_state,
241 &gen7_ps_state,
242
243 &gen6_scissor_state,
244
245 &gen7_depthbuffer,
246
247 &brw_polygon_stipple,
248 &brw_polygon_stipple_offset,
249
250 &brw_line_stipple,
251
252 &brw_drawing_rect,
253
254 &brw_indices, /* must come before brw_vertices */
255 &brw_index_buffer,
256 &brw_vertices,
257
258 &haswell_cut_index,
259 };
260
261 static const struct brw_tracked_state *gen7_compute_atoms[] =
262 {
263 &gen7_l3_state,
264 &brw_cs_image_surfaces,
265 &gen7_cs_push_constants,
266 &brw_cs_pull_constants,
267 &brw_cs_ubo_surfaces,
268 &brw_cs_abo_surfaces,
269 &brw_cs_texture_surfaces,
270 &brw_cs_work_groups_surface,
271 &brw_cs_samplers,
272 &brw_cs_state,
273 };
274
275 static const struct brw_tracked_state *gen8_render_atoms[] =
276 {
277 &brw_cc_vp,
278 &gen8_sf_clip_viewport,
279
280 &gen7_l3_state,
281 &gen7_push_constant_space,
282 &gen7_urb,
283 &gen8_blend_state,
284 &gen6_color_calc_state,
285
286 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
287
288 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
289 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
290 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
291 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
292 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
293
294 &gen6_vs_push_constants, /* Before vs_state */
295 &gen7_tcs_push_constants,
296 &gen7_tes_push_constants,
297 &gen6_gs_push_constants, /* Before gs_state */
298 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
299
300 /* Surface state setup. Must come before the VS/WM unit. The binding
301 * table upload must be last.
302 */
303 &brw_vs_pull_constants,
304 &brw_vs_ubo_surfaces,
305 &brw_vs_abo_surfaces,
306 &brw_tcs_pull_constants,
307 &brw_tcs_ubo_surfaces,
308 &brw_tcs_abo_surfaces,
309 &brw_tes_pull_constants,
310 &brw_tes_ubo_surfaces,
311 &brw_tes_abo_surfaces,
312 &brw_gs_pull_constants,
313 &brw_gs_ubo_surfaces,
314 &brw_gs_abo_surfaces,
315 &brw_wm_pull_constants,
316 &brw_wm_ubo_surfaces,
317 &brw_wm_abo_surfaces,
318 &gen6_renderbuffer_surfaces,
319 &brw_renderbuffer_read_surfaces,
320 &brw_texture_surfaces,
321 &brw_vs_binding_table,
322 &brw_tcs_binding_table,
323 &brw_tes_binding_table,
324 &brw_gs_binding_table,
325 &brw_wm_binding_table,
326
327 &brw_fs_samplers,
328 &brw_vs_samplers,
329 &brw_tcs_samplers,
330 &brw_tes_samplers,
331 &brw_gs_samplers,
332 &gen8_multisample_state,
333
334 &gen8_vs_state,
335 &gen8_hs_state,
336 &gen7_te_state,
337 &gen8_ds_state,
338 &gen8_gs_state,
339 &gen7_sol_state,
340 &gen6_clip_state,
341 &gen8_raster_state,
342 &gen8_sbe_state,
343 &gen8_sf_state,
344 &gen8_ps_blend,
345 &gen8_ps_extra,
346 &gen8_ps_state,
347 &gen8_wm_depth_stencil,
348 &gen8_wm_state,
349
350 &gen6_scissor_state,
351
352 &gen7_depthbuffer,
353
354 &brw_polygon_stipple,
355 &brw_polygon_stipple_offset,
356
357 &brw_line_stipple,
358
359 &brw_drawing_rect,
360
361 &gen8_vf_topology,
362
363 &brw_indices,
364 &gen8_index_buffer,
365 &gen8_vertices,
366
367 &haswell_cut_index,
368 &gen8_pma_fix,
369 };
370
371 static const struct brw_tracked_state *gen8_compute_atoms[] =
372 {
373 &gen7_l3_state,
374 &brw_cs_image_surfaces,
375 &gen7_cs_push_constants,
376 &brw_cs_pull_constants,
377 &brw_cs_ubo_surfaces,
378 &brw_cs_abo_surfaces,
379 &brw_cs_texture_surfaces,
380 &brw_cs_work_groups_surface,
381 &brw_cs_samplers,
382 &brw_cs_state,
383 };
384
385 static void
brw_upload_initial_gpu_state(struct brw_context * brw)386 brw_upload_initial_gpu_state(struct brw_context *brw)
387 {
388 /* On platforms with hardware contexts, we can set our initial GPU state
389 * right away rather than doing it via state atoms. This saves a small
390 * amount of overhead on every draw call.
391 */
392 if (!brw->hw_ctx)
393 return;
394
395 if (brw->gen == 6)
396 brw_emit_post_sync_nonzero_flush(brw);
397
398 brw_upload_invariant_state(brw);
399
400 /* Recommended optimization for Victim Cache eviction in pixel backend. */
401 if (brw->gen >= 9) {
402 BEGIN_BATCH(3);
403 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
404 OUT_BATCH(GEN7_CACHE_MODE_1);
405 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
406 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
407 ADVANCE_BATCH();
408 }
409
410 if (brw->gen >= 8) {
411 gen8_emit_3dstate_sample_pattern(brw);
412
413 BEGIN_BATCH(5);
414 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
415 OUT_BATCH(0);
416 OUT_BATCH(0);
417 OUT_BATCH(0);
418 OUT_BATCH(0);
419 ADVANCE_BATCH();
420
421 BEGIN_BATCH(2);
422 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
423 OUT_BATCH(0);
424 ADVANCE_BATCH();
425 }
426 }
427
428 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)429 brw_get_pipeline_atoms(struct brw_context *brw,
430 enum brw_pipeline pipeline)
431 {
432 switch (pipeline) {
433 case BRW_RENDER_PIPELINE:
434 return brw->render_atoms;
435 case BRW_COMPUTE_PIPELINE:
436 return brw->compute_atoms;
437 default:
438 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
439 unreachable("Unsupported pipeline");
440 return NULL;
441 }
442 }
443
444 static void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)445 brw_copy_pipeline_atoms(struct brw_context *brw,
446 enum brw_pipeline pipeline,
447 const struct brw_tracked_state **atoms,
448 int num_atoms)
449 {
450 /* This is to work around brw_context::atoms being declared const. We want
451 * it to be const, but it needs to be initialized somehow!
452 */
453 struct brw_tracked_state *context_atoms =
454 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
455
456 for (int i = 0; i < num_atoms; i++) {
457 context_atoms[i] = *atoms[i];
458 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
459 assert(context_atoms[i].emit);
460 }
461
462 brw->num_atoms[pipeline] = num_atoms;
463 }
464
brw_init_state(struct brw_context * brw)465 void brw_init_state( struct brw_context *brw )
466 {
467 struct gl_context *ctx = &brw->ctx;
468
469 /* Force the first brw_select_pipeline to emit pipeline select */
470 brw->last_pipeline = BRW_NUM_PIPELINES;
471
472 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
473 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
474 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
475 ARRAY_SIZE(brw->render_atoms));
476 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
477 ARRAY_SIZE(brw->render_atoms));
478 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
479 ARRAY_SIZE(brw->compute_atoms));
480 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
481 ARRAY_SIZE(brw->compute_atoms));
482
483 brw_init_caches(brw);
484
485 if (brw->gen >= 8) {
486 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
487 gen8_render_atoms,
488 ARRAY_SIZE(gen8_render_atoms));
489 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
490 gen8_compute_atoms,
491 ARRAY_SIZE(gen8_compute_atoms));
492 } else if (brw->gen == 7) {
493 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
494 gen7_render_atoms,
495 ARRAY_SIZE(gen7_render_atoms));
496 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
497 gen7_compute_atoms,
498 ARRAY_SIZE(gen7_compute_atoms));
499 } else if (brw->gen == 6) {
500 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
501 gen6_atoms, ARRAY_SIZE(gen6_atoms));
502 } else {
503 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
504 gen4_atoms, ARRAY_SIZE(gen4_atoms));
505 }
506
507 brw_upload_initial_gpu_state(brw);
508
509 brw->NewGLState = ~0;
510 brw->ctx.NewDriverState = ~0ull;
511
512 /* ~0 is a nonsensical value which won't match anything we program, so
513 * the programming will take effect on the first time around.
514 */
515 brw->pma_stall_bits = ~0;
516
517 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
518 * dirty flags.
519 */
520 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
521
522 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
523 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
524 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
525 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
526 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
527 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
528 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
529 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
530 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
531 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
532 }
533
534
brw_destroy_state(struct brw_context * brw)535 void brw_destroy_state( struct brw_context *brw )
536 {
537 brw_destroy_caches(brw);
538 }
539
540 /***********************************************************************
541 */
542
543 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)544 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
545 {
546 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
547 }
548
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)549 static void accumulate_state( struct brw_state_flags *a,
550 const struct brw_state_flags *b )
551 {
552 a->mesa |= b->mesa;
553 a->brw |= b->brw;
554 }
555
556
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)557 static void xor_states( struct brw_state_flags *result,
558 const struct brw_state_flags *a,
559 const struct brw_state_flags *b )
560 {
561 result->mesa = a->mesa ^ b->mesa;
562 result->brw = a->brw ^ b->brw;
563 }
564
565 struct dirty_bit_map {
566 uint64_t bit;
567 char *name;
568 uint32_t count;
569 };
570
571 #define DEFINE_BIT(name) {name, #name, 0}
572
573 static struct dirty_bit_map mesa_bits[] = {
574 DEFINE_BIT(_NEW_MODELVIEW),
575 DEFINE_BIT(_NEW_PROJECTION),
576 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
577 DEFINE_BIT(_NEW_COLOR),
578 DEFINE_BIT(_NEW_DEPTH),
579 DEFINE_BIT(_NEW_EVAL),
580 DEFINE_BIT(_NEW_FOG),
581 DEFINE_BIT(_NEW_HINT),
582 DEFINE_BIT(_NEW_LIGHT),
583 DEFINE_BIT(_NEW_LINE),
584 DEFINE_BIT(_NEW_PIXEL),
585 DEFINE_BIT(_NEW_POINT),
586 DEFINE_BIT(_NEW_POLYGON),
587 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
588 DEFINE_BIT(_NEW_SCISSOR),
589 DEFINE_BIT(_NEW_STENCIL),
590 DEFINE_BIT(_NEW_TEXTURE),
591 DEFINE_BIT(_NEW_TRANSFORM),
592 DEFINE_BIT(_NEW_VIEWPORT),
593 DEFINE_BIT(_NEW_ARRAY),
594 DEFINE_BIT(_NEW_RENDERMODE),
595 DEFINE_BIT(_NEW_BUFFERS),
596 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
597 DEFINE_BIT(_NEW_MULTISAMPLE),
598 DEFINE_BIT(_NEW_TRACK_MATRIX),
599 DEFINE_BIT(_NEW_PROGRAM),
600 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
601 DEFINE_BIT(_NEW_BUFFER_OBJECT),
602 DEFINE_BIT(_NEW_FRAG_CLAMP),
603 /* Avoid sign extension problems. */
604 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
605 {0, 0, 0}
606 };
607
608 static struct dirty_bit_map brw_bits[] = {
609 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
610 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
611 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
612 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
613 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
614 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
615 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
616 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
617 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
618 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
619 DEFINE_BIT(BRW_NEW_URB_FENCE),
620 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
621 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
622 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
623 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
624 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
625 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
626 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
627 DEFINE_BIT(BRW_NEW_PRIMITIVE),
628 DEFINE_BIT(BRW_NEW_CONTEXT),
629 DEFINE_BIT(BRW_NEW_PSP),
630 DEFINE_BIT(BRW_NEW_SURFACES),
631 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
632 DEFINE_BIT(BRW_NEW_INDICES),
633 DEFINE_BIT(BRW_NEW_VERTICES),
634 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
635 DEFINE_BIT(BRW_NEW_BATCH),
636 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
637 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
638 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
639 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
640 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
641 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
642 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
643 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
644 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
645 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
646 DEFINE_BIT(BRW_NEW_STATS_WM),
647 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
648 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
649 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
650 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
651 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
652 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
653 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
654 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
655 DEFINE_BIT(BRW_NEW_CC_VP),
656 DEFINE_BIT(BRW_NEW_SF_VP),
657 DEFINE_BIT(BRW_NEW_CLIP_VP),
658 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
659 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
660 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
661 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
662 DEFINE_BIT(BRW_NEW_URB_SIZE),
663 DEFINE_BIT(BRW_NEW_CC_STATE),
664 DEFINE_BIT(BRW_NEW_BLORP),
665 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
666 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
667 {0, 0, 0}
668 };
669
670 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)671 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
672 {
673 for (int i = 0; bit_map[i].bit != 0; i++) {
674 if (bit_map[i].bit & bits)
675 bit_map[i].count++;
676 }
677 }
678
679 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)680 brw_print_dirty_count(struct dirty_bit_map *bit_map)
681 {
682 for (int i = 0; bit_map[i].bit != 0; i++) {
683 if (bit_map[i].count > 1) {
684 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
685 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
686 }
687 }
688 }
689
690 static inline void
brw_upload_tess_programs(struct brw_context * brw)691 brw_upload_tess_programs(struct brw_context *brw)
692 {
693 if (brw->tess_eval_program) {
694 brw_upload_tcs_prog(brw);
695 brw_upload_tes_prog(brw);
696 } else {
697 brw->tcs.base.prog_data = NULL;
698 brw->tes.base.prog_data = NULL;
699 }
700 }
701
702 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)703 brw_upload_programs(struct brw_context *brw,
704 enum brw_pipeline pipeline)
705 {
706 struct gl_context *ctx = &brw->ctx;
707
708 if (pipeline == BRW_RENDER_PIPELINE) {
709 brw_upload_vs_prog(brw);
710 brw_upload_tess_programs(brw);
711
712 if (brw->gen < 6)
713 brw_upload_ff_gs_prog(brw);
714 else
715 brw_upload_gs_prog(brw);
716
717 /* Update the VUE map for data exiting the GS stage of the pipeline.
718 * This comes from the last enabled shader stage.
719 */
720 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
721 bool old_separate = brw->vue_map_geom_out.separate;
722 struct brw_vue_prog_data *vue_prog_data;
723 if (brw->geometry_program)
724 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
725 else if (brw->tess_eval_program)
726 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
727 else
728 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
729
730 brw->vue_map_geom_out = vue_prog_data->vue_map;
731
732 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
733 if (old_slots != brw->vue_map_geom_out.slots_valid ||
734 old_separate != brw->vue_map_geom_out.separate)
735 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
736
737 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
738 VARYING_BIT_VIEWPORT) {
739 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
740 brw->clip.viewport_count =
741 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
742 ctx->Const.MaxViewports : 1;
743 }
744
745 brw_upload_wm_prog(brw);
746
747 if (brw->gen < 6) {
748 brw_upload_clip_prog(brw);
749 brw_upload_sf_prog(brw);
750 }
751 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
752 brw_upload_cs_prog(brw);
753 }
754 }
755
756 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)757 merge_ctx_state(struct brw_context *brw,
758 struct brw_state_flags *state)
759 {
760 state->mesa |= brw->NewGLState;
761 state->brw |= brw->ctx.NewDriverState;
762 }
763
764 static inline void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)765 check_and_emit_atom(struct brw_context *brw,
766 struct brw_state_flags *state,
767 const struct brw_tracked_state *atom)
768 {
769 if (check_state(state, &atom->dirty)) {
770 atom->emit(brw);
771 merge_ctx_state(brw, state);
772 }
773 }
774
775 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)776 brw_upload_pipeline_state(struct brw_context *brw,
777 enum brw_pipeline pipeline)
778 {
779 struct gl_context *ctx = &brw->ctx;
780 int i;
781 static int dirty_count = 0;
782 struct brw_state_flags state = brw->state.pipelines[pipeline];
783 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
784
785 brw_select_pipeline(brw, pipeline);
786
787 if (0) {
788 /* Always re-emit all state. */
789 brw->NewGLState = ~0;
790 ctx->NewDriverState = ~0ull;
791 }
792
793 if (pipeline == BRW_RENDER_PIPELINE) {
794 if (brw->fragment_program != ctx->FragmentProgram._Current) {
795 brw->fragment_program = ctx->FragmentProgram._Current;
796 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
797 }
798
799 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
800 brw->tess_eval_program = ctx->TessEvalProgram._Current;
801 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
802 }
803
804 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
805 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
806 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
807 }
808
809 if (brw->geometry_program != ctx->GeometryProgram._Current) {
810 brw->geometry_program = ctx->GeometryProgram._Current;
811 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
812 }
813
814 if (brw->vertex_program != ctx->VertexProgram._Current) {
815 brw->vertex_program = ctx->VertexProgram._Current;
816 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
817 }
818 }
819
820 if (brw->compute_program != ctx->ComputeProgram._Current) {
821 brw->compute_program = ctx->ComputeProgram._Current;
822 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
823 }
824
825 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
826 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
827 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
828 }
829
830 if (brw->num_samples != fb_samples) {
831 brw->num_samples = fb_samples;
832 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
833 }
834
835 /* Exit early if no state is flagged as dirty */
836 merge_ctx_state(brw, &state);
837 if ((state.mesa | state.brw) == 0)
838 return;
839
840 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
841 if (brw->gen == 6)
842 brw_emit_post_sync_nonzero_flush(brw);
843
844 brw_upload_programs(brw, pipeline);
845 merge_ctx_state(brw, &state);
846
847 brw_upload_state_base_address(brw);
848
849 const struct brw_tracked_state *atoms =
850 brw_get_pipeline_atoms(brw, pipeline);
851 const int num_atoms = brw->num_atoms[pipeline];
852
853 if (unlikely(INTEL_DEBUG)) {
854 /* Debug version which enforces various sanity checks on the
855 * state flags which are generated and checked to help ensure
856 * state atoms are ordered correctly in the list.
857 */
858 struct brw_state_flags examined, prev;
859 memset(&examined, 0, sizeof(examined));
860 prev = state;
861
862 for (i = 0; i < num_atoms; i++) {
863 const struct brw_tracked_state *atom = &atoms[i];
864 struct brw_state_flags generated;
865
866 check_and_emit_atom(brw, &state, atom);
867
868 accumulate_state(&examined, &atom->dirty);
869
870 /* generated = (prev ^ state)
871 * if (examined & generated)
872 * fail;
873 */
874 xor_states(&generated, &prev, &state);
875 assert(!check_state(&examined, &generated));
876 prev = state;
877 }
878 }
879 else {
880 for (i = 0; i < num_atoms; i++) {
881 const struct brw_tracked_state *atom = &atoms[i];
882
883 check_and_emit_atom(brw, &state, atom);
884 }
885 }
886
887 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
888 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
889
890 brw_update_dirty_count(mesa_bits, state.mesa);
891 brw_update_dirty_count(brw_bits, state.brw);
892 if (dirty_count++ % 1000 == 0) {
893 brw_print_dirty_count(mesa_bits);
894 brw_print_dirty_count(brw_bits);
895 fprintf(stderr, "\n");
896 }
897 }
898 }
899
900 /***********************************************************************
901 * Emit all state:
902 */
brw_upload_render_state(struct brw_context * brw)903 void brw_upload_render_state(struct brw_context *brw)
904 {
905 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
906 }
907
908 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)909 brw_pipeline_state_finished(struct brw_context *brw,
910 enum brw_pipeline pipeline)
911 {
912 /* Save all dirty state into the other pipelines */
913 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
914 if (i != pipeline) {
915 brw->state.pipelines[i].mesa |= brw->NewGLState;
916 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
917 } else {
918 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
919 }
920 }
921
922 brw->NewGLState = 0;
923 brw->ctx.NewDriverState = 0ull;
924 }
925
926 /**
927 * Clear dirty bits to account for the fact that the state emitted by
928 * brw_upload_render_state() has been committed to the hardware. This is a
929 * separate call from brw_upload_render_state() because it's possible that
930 * after the call to brw_upload_render_state(), we will discover that we've
931 * run out of aperture space, and need to rewind the batch buffer to the state
932 * it had before the brw_upload_render_state() call.
933 */
934 void
brw_render_state_finished(struct brw_context * brw)935 brw_render_state_finished(struct brw_context *brw)
936 {
937 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
938 }
939
940 void
brw_upload_compute_state(struct brw_context * brw)941 brw_upload_compute_state(struct brw_context *brw)
942 {
943 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
944 }
945
946 void
brw_compute_state_finished(struct brw_context * brw)947 brw_compute_state_finished(struct brw_context *brw)
948 {
949 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
950 }
951