• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45 
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48    /* Once all the programs are done, we know how large urb entry
49     * sizes need to be and can decide if we need to change the urb
50     * layout.
51     */
52    &brw_curbe_offsets,
53    &brw_recalculate_urb_fence,
54 
55    &brw_cc_vp,
56    &brw_cc_unit,
57 
58    /* Surface state setup.  Must come before the VS/WM unit.  The binding
59     * table upload must be last.
60     */
61    &brw_vs_pull_constants,
62    &brw_wm_pull_constants,
63    &brw_renderbuffer_surfaces,
64    &brw_renderbuffer_read_surfaces,
65    &brw_texture_surfaces,
66    &brw_vs_binding_table,
67    &brw_wm_binding_table,
68 
69    &brw_fs_samplers,
70    &brw_vs_samplers,
71 
72    /* These set up state for brw_psp_urb_cbs */
73    &brw_wm_unit,
74    &brw_sf_vp,
75    &brw_sf_unit,
76    &brw_vs_unit,		/* always required, enabled or not */
77    &brw_clip_unit,
78    &brw_gs_unit,
79 
80    /* Command packets:
81     */
82    &brw_invariant_state,
83 
84    &brw_binding_table_pointers,
85    &brw_blend_constant_color,
86 
87    &brw_depthbuffer,
88 
89    &brw_polygon_stipple,
90    &brw_polygon_stipple_offset,
91 
92    &brw_line_stipple,
93 
94    &brw_psp_urb_cbs,
95 
96    &brw_drawing_rect,
97    &brw_indices, /* must come before brw_vertices */
98    &brw_index_buffer,
99    &brw_vertices,
100 
101    &brw_constant_buffer
102 };
103 
104 static const struct brw_tracked_state *gen6_atoms[] =
105 {
106    &gen6_sf_and_clip_viewports,
107 
108    /* Command packets: */
109 
110    &brw_cc_vp,
111    &gen6_viewport_state,	/* must do after *_vp stages */
112 
113    &gen6_urb,
114    &gen6_blend_state,		/* must do before cc unit */
115    &gen6_color_calc_state,	/* must do before cc unit */
116    &gen6_depth_stencil_state,	/* must do before cc unit */
117 
118    &gen6_vs_push_constants, /* Before vs_state */
119    &gen6_gs_push_constants, /* Before gs_state */
120    &gen6_wm_push_constants, /* Before wm_state */
121 
122    /* Surface state setup.  Must come before the VS/WM unit.  The binding
123     * table upload must be last.
124     */
125    &brw_vs_pull_constants,
126    &brw_vs_ubo_surfaces,
127    &brw_gs_pull_constants,
128    &brw_gs_ubo_surfaces,
129    &brw_wm_pull_constants,
130    &brw_wm_ubo_surfaces,
131    &gen6_renderbuffer_surfaces,
132    &brw_renderbuffer_read_surfaces,
133    &brw_texture_surfaces,
134    &gen6_sol_surface,
135    &brw_vs_binding_table,
136    &gen6_gs_binding_table,
137    &brw_wm_binding_table,
138 
139    &brw_fs_samplers,
140    &brw_vs_samplers,
141    &brw_gs_samplers,
142    &gen6_sampler_state,
143    &gen6_multisample_state,
144 
145    &gen6_vs_state,
146    &gen6_gs_state,
147    &gen6_clip_state,
148    &gen6_sf_state,
149    &gen6_wm_state,
150 
151    &gen6_scissor_state,
152 
153    &gen6_binding_table_pointers,
154 
155    &brw_depthbuffer,
156 
157    &brw_polygon_stipple,
158    &brw_polygon_stipple_offset,
159 
160    &brw_line_stipple,
161 
162    &brw_drawing_rect,
163 
164    &brw_indices, /* must come before brw_vertices */
165    &brw_index_buffer,
166    &brw_vertices,
167 };
168 
169 static const struct brw_tracked_state *gen7_render_atoms[] =
170 {
171    /* Command packets: */
172 
173    &brw_cc_vp,
174    &gen7_sf_clip_viewport,
175 
176    &gen7_l3_state,
177    &gen7_push_constant_space,
178    &gen7_urb,
179    &gen6_blend_state,		/* must do before cc unit */
180    &gen6_color_calc_state,	/* must do before cc unit */
181    &gen6_depth_stencil_state,	/* must do before cc unit */
182 
183    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
184 
185    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
186    &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
187    &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
188    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
189    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
190 
191    &gen6_vs_push_constants, /* Before vs_state */
192    &gen7_tcs_push_constants,
193    &gen7_tes_push_constants,
194    &gen6_gs_push_constants, /* Before gs_state */
195    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
196 
197    /* Surface state setup.  Must come before the VS/WM unit.  The binding
198     * table upload must be last.
199     */
200    &brw_vs_pull_constants,
201    &brw_vs_ubo_surfaces,
202    &brw_vs_abo_surfaces,
203    &brw_tcs_pull_constants,
204    &brw_tcs_ubo_surfaces,
205    &brw_tcs_abo_surfaces,
206    &brw_tes_pull_constants,
207    &brw_tes_ubo_surfaces,
208    &brw_tes_abo_surfaces,
209    &brw_gs_pull_constants,
210    &brw_gs_ubo_surfaces,
211    &brw_gs_abo_surfaces,
212    &brw_wm_pull_constants,
213    &brw_wm_ubo_surfaces,
214    &brw_wm_abo_surfaces,
215    &gen6_renderbuffer_surfaces,
216    &brw_renderbuffer_read_surfaces,
217    &brw_texture_surfaces,
218    &brw_vs_binding_table,
219    &brw_tcs_binding_table,
220    &brw_tes_binding_table,
221    &brw_gs_binding_table,
222    &brw_wm_binding_table,
223 
224    &brw_fs_samplers,
225    &brw_vs_samplers,
226    &brw_tcs_samplers,
227    &brw_tes_samplers,
228    &brw_gs_samplers,
229    &gen6_multisample_state,
230 
231    &gen7_vs_state,
232    &gen7_hs_state,
233    &gen7_te_state,
234    &gen7_ds_state,
235    &gen7_gs_state,
236    &gen7_sol_state,
237    &gen6_clip_state,
238    &gen7_sbe_state,
239    &gen7_sf_state,
240    &gen7_wm_state,
241    &gen7_ps_state,
242 
243    &gen6_scissor_state,
244 
245    &gen7_depthbuffer,
246 
247    &brw_polygon_stipple,
248    &brw_polygon_stipple_offset,
249 
250    &brw_line_stipple,
251 
252    &brw_drawing_rect,
253 
254    &brw_indices, /* must come before brw_vertices */
255    &brw_index_buffer,
256    &brw_vertices,
257 
258    &haswell_cut_index,
259 };
260 
261 static const struct brw_tracked_state *gen7_compute_atoms[] =
262 {
263    &gen7_l3_state,
264    &brw_cs_image_surfaces,
265    &gen7_cs_push_constants,
266    &brw_cs_pull_constants,
267    &brw_cs_ubo_surfaces,
268    &brw_cs_abo_surfaces,
269    &brw_cs_texture_surfaces,
270    &brw_cs_work_groups_surface,
271    &brw_cs_samplers,
272    &brw_cs_state,
273 };
274 
275 static const struct brw_tracked_state *gen8_render_atoms[] =
276 {
277    &brw_cc_vp,
278    &gen8_sf_clip_viewport,
279 
280    &gen7_l3_state,
281    &gen7_push_constant_space,
282    &gen7_urb,
283    &gen8_blend_state,
284    &gen6_color_calc_state,
285 
286    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
287 
288    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
289    &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
290    &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
291    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
292    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
293 
294    &gen6_vs_push_constants, /* Before vs_state */
295    &gen7_tcs_push_constants,
296    &gen7_tes_push_constants,
297    &gen6_gs_push_constants, /* Before gs_state */
298    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
299 
300    /* Surface state setup.  Must come before the VS/WM unit.  The binding
301     * table upload must be last.
302     */
303    &brw_vs_pull_constants,
304    &brw_vs_ubo_surfaces,
305    &brw_vs_abo_surfaces,
306    &brw_tcs_pull_constants,
307    &brw_tcs_ubo_surfaces,
308    &brw_tcs_abo_surfaces,
309    &brw_tes_pull_constants,
310    &brw_tes_ubo_surfaces,
311    &brw_tes_abo_surfaces,
312    &brw_gs_pull_constants,
313    &brw_gs_ubo_surfaces,
314    &brw_gs_abo_surfaces,
315    &brw_wm_pull_constants,
316    &brw_wm_ubo_surfaces,
317    &brw_wm_abo_surfaces,
318    &gen6_renderbuffer_surfaces,
319    &brw_renderbuffer_read_surfaces,
320    &brw_texture_surfaces,
321    &brw_vs_binding_table,
322    &brw_tcs_binding_table,
323    &brw_tes_binding_table,
324    &brw_gs_binding_table,
325    &brw_wm_binding_table,
326 
327    &brw_fs_samplers,
328    &brw_vs_samplers,
329    &brw_tcs_samplers,
330    &brw_tes_samplers,
331    &brw_gs_samplers,
332    &gen8_multisample_state,
333 
334    &gen8_vs_state,
335    &gen8_hs_state,
336    &gen7_te_state,
337    &gen8_ds_state,
338    &gen8_gs_state,
339    &gen7_sol_state,
340    &gen6_clip_state,
341    &gen8_raster_state,
342    &gen8_sbe_state,
343    &gen8_sf_state,
344    &gen8_ps_blend,
345    &gen8_ps_extra,
346    &gen8_ps_state,
347    &gen8_wm_depth_stencil,
348    &gen8_wm_state,
349 
350    &gen6_scissor_state,
351 
352    &gen7_depthbuffer,
353 
354    &brw_polygon_stipple,
355    &brw_polygon_stipple_offset,
356 
357    &brw_line_stipple,
358 
359    &brw_drawing_rect,
360 
361    &gen8_vf_topology,
362 
363    &brw_indices,
364    &gen8_index_buffer,
365    &gen8_vertices,
366 
367    &haswell_cut_index,
368    &gen8_pma_fix,
369 };
370 
371 static const struct brw_tracked_state *gen8_compute_atoms[] =
372 {
373    &gen7_l3_state,
374    &brw_cs_image_surfaces,
375    &gen7_cs_push_constants,
376    &brw_cs_pull_constants,
377    &brw_cs_ubo_surfaces,
378    &brw_cs_abo_surfaces,
379    &brw_cs_texture_surfaces,
380    &brw_cs_work_groups_surface,
381    &brw_cs_samplers,
382    &brw_cs_state,
383 };
384 
385 static void
brw_upload_initial_gpu_state(struct brw_context * brw)386 brw_upload_initial_gpu_state(struct brw_context *brw)
387 {
388    /* On platforms with hardware contexts, we can set our initial GPU state
389     * right away rather than doing it via state atoms.  This saves a small
390     * amount of overhead on every draw call.
391     */
392    if (!brw->hw_ctx)
393       return;
394 
395    if (brw->gen == 6)
396       brw_emit_post_sync_nonzero_flush(brw);
397 
398    brw_upload_invariant_state(brw);
399 
400    /* Recommended optimization for Victim Cache eviction in pixel backend. */
401    if (brw->gen >= 9) {
402       BEGIN_BATCH(3);
403       OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
404       OUT_BATCH(GEN7_CACHE_MODE_1);
405       OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
406                 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
407       ADVANCE_BATCH();
408    }
409 
410    if (brw->gen >= 8) {
411       gen8_emit_3dstate_sample_pattern(brw);
412 
413       BEGIN_BATCH(5);
414       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
415       OUT_BATCH(0);
416       OUT_BATCH(0);
417       OUT_BATCH(0);
418       OUT_BATCH(0);
419       ADVANCE_BATCH();
420 
421       BEGIN_BATCH(2);
422       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
423       OUT_BATCH(0);
424       ADVANCE_BATCH();
425    }
426 }
427 
428 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)429 brw_get_pipeline_atoms(struct brw_context *brw,
430                        enum brw_pipeline pipeline)
431 {
432    switch (pipeline) {
433    case BRW_RENDER_PIPELINE:
434       return brw->render_atoms;
435    case BRW_COMPUTE_PIPELINE:
436       return brw->compute_atoms;
437    default:
438       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
439       unreachable("Unsupported pipeline");
440       return NULL;
441    }
442 }
443 
444 static void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)445 brw_copy_pipeline_atoms(struct brw_context *brw,
446                         enum brw_pipeline pipeline,
447                         const struct brw_tracked_state **atoms,
448                         int num_atoms)
449 {
450    /* This is to work around brw_context::atoms being declared const.  We want
451     * it to be const, but it needs to be initialized somehow!
452     */
453    struct brw_tracked_state *context_atoms =
454       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
455 
456    for (int i = 0; i < num_atoms; i++) {
457       context_atoms[i] = *atoms[i];
458       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
459       assert(context_atoms[i].emit);
460    }
461 
462    brw->num_atoms[pipeline] = num_atoms;
463 }
464 
brw_init_state(struct brw_context * brw)465 void brw_init_state( struct brw_context *brw )
466 {
467    struct gl_context *ctx = &brw->ctx;
468 
469    /* Force the first brw_select_pipeline to emit pipeline select */
470    brw->last_pipeline = BRW_NUM_PIPELINES;
471 
472    STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
473    STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
474    STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
475                  ARRAY_SIZE(brw->render_atoms));
476    STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
477                  ARRAY_SIZE(brw->render_atoms));
478    STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
479                  ARRAY_SIZE(brw->compute_atoms));
480    STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
481                  ARRAY_SIZE(brw->compute_atoms));
482 
483    brw_init_caches(brw);
484 
485    if (brw->gen >= 8) {
486       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
487                               gen8_render_atoms,
488                               ARRAY_SIZE(gen8_render_atoms));
489       brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
490                               gen8_compute_atoms,
491                               ARRAY_SIZE(gen8_compute_atoms));
492    } else if (brw->gen == 7) {
493       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
494                               gen7_render_atoms,
495                               ARRAY_SIZE(gen7_render_atoms));
496       brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
497                               gen7_compute_atoms,
498                               ARRAY_SIZE(gen7_compute_atoms));
499    } else if (brw->gen == 6) {
500       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
501                               gen6_atoms, ARRAY_SIZE(gen6_atoms));
502    } else {
503       brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
504                               gen4_atoms, ARRAY_SIZE(gen4_atoms));
505    }
506 
507    brw_upload_initial_gpu_state(brw);
508 
509    brw->NewGLState = ~0;
510    brw->ctx.NewDriverState = ~0ull;
511 
512    /* ~0 is a nonsensical value which won't match anything we program, so
513     * the programming will take effect on the first time around.
514     */
515    brw->pma_stall_bits = ~0;
516 
517    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
518     * dirty flags.
519     */
520    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
521 
522    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
523    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
524    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
525    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
526    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
527    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
528    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
529    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
530    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
531    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
532 }
533 
534 
brw_destroy_state(struct brw_context * brw)535 void brw_destroy_state( struct brw_context *brw )
536 {
537    brw_destroy_caches(brw);
538 }
539 
540 /***********************************************************************
541  */
542 
543 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)544 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
545 {
546    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
547 }
548 
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)549 static void accumulate_state( struct brw_state_flags *a,
550 			      const struct brw_state_flags *b )
551 {
552    a->mesa |= b->mesa;
553    a->brw |= b->brw;
554 }
555 
556 
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)557 static void xor_states( struct brw_state_flags *result,
558 			     const struct brw_state_flags *a,
559 			      const struct brw_state_flags *b )
560 {
561    result->mesa = a->mesa ^ b->mesa;
562    result->brw = a->brw ^ b->brw;
563 }
564 
565 struct dirty_bit_map {
566    uint64_t bit;
567    char *name;
568    uint32_t count;
569 };
570 
571 #define DEFINE_BIT(name) {name, #name, 0}
572 
573 static struct dirty_bit_map mesa_bits[] = {
574    DEFINE_BIT(_NEW_MODELVIEW),
575    DEFINE_BIT(_NEW_PROJECTION),
576    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
577    DEFINE_BIT(_NEW_COLOR),
578    DEFINE_BIT(_NEW_DEPTH),
579    DEFINE_BIT(_NEW_EVAL),
580    DEFINE_BIT(_NEW_FOG),
581    DEFINE_BIT(_NEW_HINT),
582    DEFINE_BIT(_NEW_LIGHT),
583    DEFINE_BIT(_NEW_LINE),
584    DEFINE_BIT(_NEW_PIXEL),
585    DEFINE_BIT(_NEW_POINT),
586    DEFINE_BIT(_NEW_POLYGON),
587    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
588    DEFINE_BIT(_NEW_SCISSOR),
589    DEFINE_BIT(_NEW_STENCIL),
590    DEFINE_BIT(_NEW_TEXTURE),
591    DEFINE_BIT(_NEW_TRANSFORM),
592    DEFINE_BIT(_NEW_VIEWPORT),
593    DEFINE_BIT(_NEW_ARRAY),
594    DEFINE_BIT(_NEW_RENDERMODE),
595    DEFINE_BIT(_NEW_BUFFERS),
596    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
597    DEFINE_BIT(_NEW_MULTISAMPLE),
598    DEFINE_BIT(_NEW_TRACK_MATRIX),
599    DEFINE_BIT(_NEW_PROGRAM),
600    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
601    DEFINE_BIT(_NEW_BUFFER_OBJECT),
602    DEFINE_BIT(_NEW_FRAG_CLAMP),
603    /* Avoid sign extension problems. */
604    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
605    {0, 0, 0}
606 };
607 
608 static struct dirty_bit_map brw_bits[] = {
609    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
610    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
611    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
612    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
613    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
614    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
615    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
616    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
617    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
618    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
619    DEFINE_BIT(BRW_NEW_URB_FENCE),
620    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
621    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
622    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
623    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
624    DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
625    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
626    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
627    DEFINE_BIT(BRW_NEW_PRIMITIVE),
628    DEFINE_BIT(BRW_NEW_CONTEXT),
629    DEFINE_BIT(BRW_NEW_PSP),
630    DEFINE_BIT(BRW_NEW_SURFACES),
631    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
632    DEFINE_BIT(BRW_NEW_INDICES),
633    DEFINE_BIT(BRW_NEW_VERTICES),
634    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
635    DEFINE_BIT(BRW_NEW_BATCH),
636    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
637    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
638    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
639    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
640    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
641    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
642    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
643    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
644    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
645    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
646    DEFINE_BIT(BRW_NEW_STATS_WM),
647    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
648    DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
649    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
650    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
651    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
652    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
653    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
654    DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
655    DEFINE_BIT(BRW_NEW_CC_VP),
656    DEFINE_BIT(BRW_NEW_SF_VP),
657    DEFINE_BIT(BRW_NEW_CLIP_VP),
658    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
659    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
660    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
661    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
662    DEFINE_BIT(BRW_NEW_URB_SIZE),
663    DEFINE_BIT(BRW_NEW_CC_STATE),
664    DEFINE_BIT(BRW_NEW_BLORP),
665    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
666    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
667    {0, 0, 0}
668 };
669 
670 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)671 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
672 {
673    for (int i = 0; bit_map[i].bit != 0; i++) {
674       if (bit_map[i].bit & bits)
675 	 bit_map[i].count++;
676    }
677 }
678 
679 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)680 brw_print_dirty_count(struct dirty_bit_map *bit_map)
681 {
682    for (int i = 0; bit_map[i].bit != 0; i++) {
683       if (bit_map[i].count > 1) {
684          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
685                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
686       }
687    }
688 }
689 
690 static inline void
brw_upload_tess_programs(struct brw_context * brw)691 brw_upload_tess_programs(struct brw_context *brw)
692 {
693    if (brw->tess_eval_program) {
694       brw_upload_tcs_prog(brw);
695       brw_upload_tes_prog(brw);
696    } else {
697       brw->tcs.base.prog_data = NULL;
698       brw->tes.base.prog_data = NULL;
699    }
700 }
701 
702 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)703 brw_upload_programs(struct brw_context *brw,
704                     enum brw_pipeline pipeline)
705 {
706    struct gl_context *ctx = &brw->ctx;
707 
708    if (pipeline == BRW_RENDER_PIPELINE) {
709       brw_upload_vs_prog(brw);
710       brw_upload_tess_programs(brw);
711 
712       if (brw->gen < 6)
713          brw_upload_ff_gs_prog(brw);
714       else
715          brw_upload_gs_prog(brw);
716 
717       /* Update the VUE map for data exiting the GS stage of the pipeline.
718        * This comes from the last enabled shader stage.
719        */
720       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
721       bool old_separate = brw->vue_map_geom_out.separate;
722       struct brw_vue_prog_data *vue_prog_data;
723       if (brw->geometry_program)
724          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
725       else if (brw->tess_eval_program)
726          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
727       else
728          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
729 
730       brw->vue_map_geom_out = vue_prog_data->vue_map;
731 
732       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
733       if (old_slots != brw->vue_map_geom_out.slots_valid ||
734           old_separate != brw->vue_map_geom_out.separate)
735          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
736 
737       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
738           VARYING_BIT_VIEWPORT) {
739          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
740          brw->clip.viewport_count =
741             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
742             ctx->Const.MaxViewports : 1;
743       }
744 
745       brw_upload_wm_prog(brw);
746 
747       if (brw->gen < 6) {
748          brw_upload_clip_prog(brw);
749          brw_upload_sf_prog(brw);
750       }
751    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
752       brw_upload_cs_prog(brw);
753    }
754 }
755 
756 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)757 merge_ctx_state(struct brw_context *brw,
758                 struct brw_state_flags *state)
759 {
760    state->mesa |= brw->NewGLState;
761    state->brw |= brw->ctx.NewDriverState;
762 }
763 
764 static inline void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)765 check_and_emit_atom(struct brw_context *brw,
766                     struct brw_state_flags *state,
767                     const struct brw_tracked_state *atom)
768 {
769    if (check_state(state, &atom->dirty)) {
770       atom->emit(brw);
771       merge_ctx_state(brw, state);
772    }
773 }
774 
775 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)776 brw_upload_pipeline_state(struct brw_context *brw,
777                           enum brw_pipeline pipeline)
778 {
779    struct gl_context *ctx = &brw->ctx;
780    int i;
781    static int dirty_count = 0;
782    struct brw_state_flags state = brw->state.pipelines[pipeline];
783    unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
784 
785    brw_select_pipeline(brw, pipeline);
786 
787    if (0) {
788       /* Always re-emit all state. */
789       brw->NewGLState = ~0;
790       ctx->NewDriverState = ~0ull;
791    }
792 
793    if (pipeline == BRW_RENDER_PIPELINE) {
794       if (brw->fragment_program != ctx->FragmentProgram._Current) {
795          brw->fragment_program = ctx->FragmentProgram._Current;
796          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
797       }
798 
799       if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
800          brw->tess_eval_program = ctx->TessEvalProgram._Current;
801          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
802       }
803 
804       if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
805          brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
806          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
807       }
808 
809       if (brw->geometry_program != ctx->GeometryProgram._Current) {
810          brw->geometry_program = ctx->GeometryProgram._Current;
811          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
812       }
813 
814       if (brw->vertex_program != ctx->VertexProgram._Current) {
815          brw->vertex_program = ctx->VertexProgram._Current;
816          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
817       }
818    }
819 
820    if (brw->compute_program != ctx->ComputeProgram._Current) {
821       brw->compute_program = ctx->ComputeProgram._Current;
822       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
823    }
824 
825    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
826       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
827       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
828    }
829 
830    if (brw->num_samples != fb_samples) {
831       brw->num_samples = fb_samples;
832       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
833    }
834 
835    /* Exit early if no state is flagged as dirty */
836    merge_ctx_state(brw, &state);
837    if ((state.mesa | state.brw) == 0)
838       return;
839 
840    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
841    if (brw->gen == 6)
842       brw_emit_post_sync_nonzero_flush(brw);
843 
844    brw_upload_programs(brw, pipeline);
845    merge_ctx_state(brw, &state);
846 
847    brw_upload_state_base_address(brw);
848 
849    const struct brw_tracked_state *atoms =
850       brw_get_pipeline_atoms(brw, pipeline);
851    const int num_atoms = brw->num_atoms[pipeline];
852 
853    if (unlikely(INTEL_DEBUG)) {
854       /* Debug version which enforces various sanity checks on the
855        * state flags which are generated and checked to help ensure
856        * state atoms are ordered correctly in the list.
857        */
858       struct brw_state_flags examined, prev;
859       memset(&examined, 0, sizeof(examined));
860       prev = state;
861 
862       for (i = 0; i < num_atoms; i++) {
863 	 const struct brw_tracked_state *atom = &atoms[i];
864 	 struct brw_state_flags generated;
865 
866          check_and_emit_atom(brw, &state, atom);
867 
868 	 accumulate_state(&examined, &atom->dirty);
869 
870 	 /* generated = (prev ^ state)
871 	  * if (examined & generated)
872 	  *     fail;
873 	  */
874 	 xor_states(&generated, &prev, &state);
875 	 assert(!check_state(&examined, &generated));
876 	 prev = state;
877       }
878    }
879    else {
880       for (i = 0; i < num_atoms; i++) {
881 	 const struct brw_tracked_state *atom = &atoms[i];
882 
883          check_and_emit_atom(brw, &state, atom);
884       }
885    }
886 
887    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
888       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
889 
890       brw_update_dirty_count(mesa_bits, state.mesa);
891       brw_update_dirty_count(brw_bits, state.brw);
892       if (dirty_count++ % 1000 == 0) {
893 	 brw_print_dirty_count(mesa_bits);
894 	 brw_print_dirty_count(brw_bits);
895 	 fprintf(stderr, "\n");
896       }
897    }
898 }
899 
900 /***********************************************************************
901  * Emit all state:
902  */
brw_upload_render_state(struct brw_context * brw)903 void brw_upload_render_state(struct brw_context *brw)
904 {
905    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
906 }
907 
908 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)909 brw_pipeline_state_finished(struct brw_context *brw,
910                             enum brw_pipeline pipeline)
911 {
912    /* Save all dirty state into the other pipelines */
913    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
914       if (i != pipeline) {
915          brw->state.pipelines[i].mesa |= brw->NewGLState;
916          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
917       } else {
918          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
919       }
920    }
921 
922    brw->NewGLState = 0;
923    brw->ctx.NewDriverState = 0ull;
924 }
925 
926 /**
927  * Clear dirty bits to account for the fact that the state emitted by
928  * brw_upload_render_state() has been committed to the hardware. This is a
929  * separate call from brw_upload_render_state() because it's possible that
930  * after the call to brw_upload_render_state(), we will discover that we've
931  * run out of aperture space, and need to rewind the batch buffer to the state
932  * it had before the brw_upload_render_state() call.
933  */
934 void
brw_render_state_finished(struct brw_context * brw)935 brw_render_state_finished(struct brw_context *brw)
936 {
937    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
938 }
939 
940 void
brw_upload_compute_state(struct brw_context * brw)941 brw_upload_compute_state(struct brw_context *brw)
942 {
943    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
944 }
945 
946 void
brw_compute_state_finished(struct brw_context * brw)947 brw_compute_state_finished(struct brw_context *brw)
948 {
949    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
950 }
951