• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "main/framebuffer.h"
47 
48 static void
brw_upload_initial_gpu_state(struct brw_context * brw)49 brw_upload_initial_gpu_state(struct brw_context *brw)
50 {
51    const struct gen_device_info *devinfo = &brw->screen->devinfo;
52 
53    /* On platforms with hardware contexts, we can set our initial GPU state
54     * right away rather than doing it via state atoms.  This saves a small
55     * amount of overhead on every draw call.
56     */
57    if (!brw->hw_ctx)
58       return;
59 
60    if (devinfo->gen == 6)
61       brw_emit_post_sync_nonzero_flush(brw);
62 
63    brw_upload_invariant_state(brw);
64 
65    if (devinfo->gen == 10) {
66       brw_load_register_imm32(brw, GEN10_CACHE_MODE_SS,
67                               REG_MASK(GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
68                               GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE);
69 
70       /* From gen10 workaround table in h/w specs:
71        *
72        *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
73        *     a value of 0xFFFF"
74        *
75        * This means that we end up setting the entire 3D_MODE state. Bits
76        * in this register control things such as slice hashing and we want
77        * the default values of zero at the moment.
78        */
79       BEGIN_BATCH(2);
80       OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
81       OUT_BATCH(0xFFFF << 16);
82       ADVANCE_BATCH();
83    }
84 
85    if (devinfo->gen == 9) {
86       /* Recommended optimizations for Victim Cache eviction and floating
87        * point blending.
88        */
89       brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
90                               REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
91                               REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
92                               GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
93                               GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
94 
95       if (gen_device_info_is_9lp(devinfo)) {
96          brw_load_register_imm32(brw, GEN7_GT_MODE,
97                                  GEN9_SUBSLICE_HASHING_MASK_BITS |
98                                  GEN9_SUBSLICE_HASHING_16x16);
99       }
100    }
101 
102    if (devinfo->gen >= 8) {
103       gen8_emit_3dstate_sample_pattern(brw);
104 
105       BEGIN_BATCH(5);
106       OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
107       OUT_BATCH(0);
108       OUT_BATCH(0);
109       OUT_BATCH(0);
110       OUT_BATCH(0);
111       ADVANCE_BATCH();
112 
113       BEGIN_BATCH(2);
114       OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
115       OUT_BATCH(0);
116       ADVANCE_BATCH();
117    }
118 }
119 
120 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)121 brw_get_pipeline_atoms(struct brw_context *brw,
122                        enum brw_pipeline pipeline)
123 {
124    switch (pipeline) {
125    case BRW_RENDER_PIPELINE:
126       return brw->render_atoms;
127    case BRW_COMPUTE_PIPELINE:
128       return brw->compute_atoms;
129    default:
130       STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
131       unreachable("Unsupported pipeline");
132       return NULL;
133    }
134 }
135 
136 void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)137 brw_copy_pipeline_atoms(struct brw_context *brw,
138                         enum brw_pipeline pipeline,
139                         const struct brw_tracked_state **atoms,
140                         int num_atoms)
141 {
142    /* This is to work around brw_context::atoms being declared const.  We want
143     * it to be const, but it needs to be initialized somehow!
144     */
145    struct brw_tracked_state *context_atoms =
146       (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
147 
148    for (int i = 0; i < num_atoms; i++) {
149       context_atoms[i] = *atoms[i];
150       assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
151       assert(context_atoms[i].emit);
152    }
153 
154    brw->num_atoms[pipeline] = num_atoms;
155 }
156 
brw_init_state(struct brw_context * brw)157 void brw_init_state( struct brw_context *brw )
158 {
159    struct gl_context *ctx = &brw->ctx;
160    const struct gen_device_info *devinfo = &brw->screen->devinfo;
161 
162    /* Force the first brw_select_pipeline to emit pipeline select */
163    brw->last_pipeline = BRW_NUM_PIPELINES;
164 
165    brw_init_caches(brw);
166 
167    if (devinfo->gen >= 10)
168       gen10_init_atoms(brw);
169    else if (devinfo->gen >= 9)
170       gen9_init_atoms(brw);
171    else if (devinfo->gen >= 8)
172       gen8_init_atoms(brw);
173    else if (devinfo->is_haswell)
174       gen75_init_atoms(brw);
175    else if (devinfo->gen >= 7)
176       gen7_init_atoms(brw);
177    else if (devinfo->gen >= 6)
178       gen6_init_atoms(brw);
179    else if (devinfo->gen >= 5)
180       gen5_init_atoms(brw);
181    else if (devinfo->is_g4x)
182       gen45_init_atoms(brw);
183    else
184       gen4_init_atoms(brw);
185 
186    brw_upload_initial_gpu_state(brw);
187 
188    brw->NewGLState = ~0;
189    brw->ctx.NewDriverState = ~0ull;
190 
191    /* ~0 is a nonsensical value which won't match anything we program, so
192     * the programming will take effect on the first time around.
193     */
194    brw->pma_stall_bits = ~0;
195 
196    /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
197     * dirty flags.
198     */
199    STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
200 
201    ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
202    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
203    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
204    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
205    ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
206    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
207    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
208    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
209    ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
210    ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
211 }
212 
213 
brw_destroy_state(struct brw_context * brw)214 void brw_destroy_state( struct brw_context *brw )
215 {
216    brw_destroy_caches(brw);
217 }
218 
219 /***********************************************************************
220  */
221 
222 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)223 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
224 {
225    return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
226 }
227 
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)228 static void accumulate_state( struct brw_state_flags *a,
229 			      const struct brw_state_flags *b )
230 {
231    a->mesa |= b->mesa;
232    a->brw |= b->brw;
233 }
234 
235 
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)236 static void xor_states( struct brw_state_flags *result,
237 			     const struct brw_state_flags *a,
238 			      const struct brw_state_flags *b )
239 {
240    result->mesa = a->mesa ^ b->mesa;
241    result->brw = a->brw ^ b->brw;
242 }
243 
244 struct dirty_bit_map {
245    uint64_t bit;
246    char *name;
247    uint32_t count;
248 };
249 
250 #define DEFINE_BIT(name) {name, #name, 0}
251 
252 static struct dirty_bit_map mesa_bits[] = {
253    DEFINE_BIT(_NEW_MODELVIEW),
254    DEFINE_BIT(_NEW_PROJECTION),
255    DEFINE_BIT(_NEW_TEXTURE_MATRIX),
256    DEFINE_BIT(_NEW_COLOR),
257    DEFINE_BIT(_NEW_DEPTH),
258    DEFINE_BIT(_NEW_EVAL),
259    DEFINE_BIT(_NEW_FOG),
260    DEFINE_BIT(_NEW_HINT),
261    DEFINE_BIT(_NEW_LIGHT),
262    DEFINE_BIT(_NEW_LINE),
263    DEFINE_BIT(_NEW_PIXEL),
264    DEFINE_BIT(_NEW_POINT),
265    DEFINE_BIT(_NEW_POLYGON),
266    DEFINE_BIT(_NEW_POLYGONSTIPPLE),
267    DEFINE_BIT(_NEW_SCISSOR),
268    DEFINE_BIT(_NEW_STENCIL),
269    DEFINE_BIT(_NEW_TEXTURE_OBJECT),
270    DEFINE_BIT(_NEW_TRANSFORM),
271    DEFINE_BIT(_NEW_VIEWPORT),
272    DEFINE_BIT(_NEW_TEXTURE_STATE),
273    DEFINE_BIT(_NEW_ARRAY),
274    DEFINE_BIT(_NEW_RENDERMODE),
275    DEFINE_BIT(_NEW_BUFFERS),
276    DEFINE_BIT(_NEW_CURRENT_ATTRIB),
277    DEFINE_BIT(_NEW_MULTISAMPLE),
278    DEFINE_BIT(_NEW_TRACK_MATRIX),
279    DEFINE_BIT(_NEW_PROGRAM),
280    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
281    DEFINE_BIT(_NEW_FRAG_CLAMP),
282    /* Avoid sign extension problems. */
283    {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
284    {0, 0, 0}
285 };
286 
287 static struct dirty_bit_map brw_bits[] = {
288    DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
289    DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
290    DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
291    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
292    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
293    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
294    DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
295    DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
296    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
297    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
298    DEFINE_BIT(BRW_NEW_URB_FENCE),
299    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
300    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
301    DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
302    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
303    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
304    DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
305    DEFINE_BIT(BRW_NEW_PRIMITIVE),
306    DEFINE_BIT(BRW_NEW_CONTEXT),
307    DEFINE_BIT(BRW_NEW_PSP),
308    DEFINE_BIT(BRW_NEW_SURFACES),
309    DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
310    DEFINE_BIT(BRW_NEW_INDICES),
311    DEFINE_BIT(BRW_NEW_VERTICES),
312    DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
313    DEFINE_BIT(BRW_NEW_BATCH),
314    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
315    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
316    DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
317    DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
318    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
319    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
320    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
321    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
322    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
323    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
324    DEFINE_BIT(BRW_NEW_STATS_WM),
325    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
326    DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
327    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
328    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
329    DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
330    DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
331    DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
332    DEFINE_BIT(BRW_NEW_CC_VP),
333    DEFINE_BIT(BRW_NEW_SF_VP),
334    DEFINE_BIT(BRW_NEW_CLIP_VP),
335    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
336    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
337    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
338    DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
339    DEFINE_BIT(BRW_NEW_URB_SIZE),
340    DEFINE_BIT(BRW_NEW_CC_STATE),
341    DEFINE_BIT(BRW_NEW_BLORP),
342    DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
343    DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
344    DEFINE_BIT(BRW_NEW_DRAW_CALL),
345    DEFINE_BIT(BRW_NEW_AUX_STATE),
346    {0, 0, 0}
347 };
348 
349 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)350 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
351 {
352    for (int i = 0; bit_map[i].bit != 0; i++) {
353       if (bit_map[i].bit & bits)
354 	 bit_map[i].count++;
355    }
356 }
357 
358 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)359 brw_print_dirty_count(struct dirty_bit_map *bit_map)
360 {
361    for (int i = 0; bit_map[i].bit != 0; i++) {
362       if (bit_map[i].count > 1) {
363          fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
364                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
365       }
366    }
367 }
368 
369 static inline void
brw_upload_tess_programs(struct brw_context * brw)370 brw_upload_tess_programs(struct brw_context *brw)
371 {
372    if (brw->programs[MESA_SHADER_TESS_EVAL]) {
373       brw_upload_tcs_prog(brw);
374       brw_upload_tes_prog(brw);
375    } else {
376       brw->tcs.base.prog_data = NULL;
377       brw->tes.base.prog_data = NULL;
378    }
379 }
380 
381 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)382 brw_upload_programs(struct brw_context *brw,
383                     enum brw_pipeline pipeline)
384 {
385    struct gl_context *ctx = &brw->ctx;
386    const struct gen_device_info *devinfo = &brw->screen->devinfo;
387 
388    if (pipeline == BRW_RENDER_PIPELINE) {
389       brw_upload_vs_prog(brw);
390       brw_upload_tess_programs(brw);
391 
392       if (brw->programs[MESA_SHADER_GEOMETRY]) {
393          brw_upload_gs_prog(brw);
394       } else {
395          brw->gs.base.prog_data = NULL;
396          if (devinfo->gen < 7)
397             brw_upload_ff_gs_prog(brw);
398       }
399 
400       /* Update the VUE map for data exiting the GS stage of the pipeline.
401        * This comes from the last enabled shader stage.
402        */
403       GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
404       bool old_separate = brw->vue_map_geom_out.separate;
405       struct brw_vue_prog_data *vue_prog_data;
406       if (brw->programs[MESA_SHADER_GEOMETRY])
407          vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
408       else if (brw->programs[MESA_SHADER_TESS_EVAL])
409          vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
410       else
411          vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
412 
413       brw->vue_map_geom_out = vue_prog_data->vue_map;
414 
415       /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
416       if (old_slots != brw->vue_map_geom_out.slots_valid ||
417           old_separate != brw->vue_map_geom_out.separate)
418          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
419 
420       if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
421           VARYING_BIT_VIEWPORT) {
422          ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
423          brw->clip.viewport_count =
424             (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
425             ctx->Const.MaxViewports : 1;
426       }
427 
428       brw_upload_wm_prog(brw);
429 
430       if (devinfo->gen < 6) {
431          brw_upload_clip_prog(brw);
432          brw_upload_sf_prog(brw);
433       }
434 
435       brw_disk_cache_write_render_programs(brw);
436    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
437       brw_upload_cs_prog(brw);
438       brw_disk_cache_write_compute_program(brw);
439    }
440 }
441 
442 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)443 merge_ctx_state(struct brw_context *brw,
444                 struct brw_state_flags *state)
445 {
446    state->mesa |= brw->NewGLState;
447    state->brw |= brw->ctx.NewDriverState;
448 }
449 
450 static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)451 check_and_emit_atom(struct brw_context *brw,
452                     struct brw_state_flags *state,
453                     const struct brw_tracked_state *atom)
454 {
455    if (check_state(state, &atom->dirty)) {
456       atom->emit(brw);
457       merge_ctx_state(brw, state);
458    }
459 }
460 
461 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)462 brw_upload_pipeline_state(struct brw_context *brw,
463                           enum brw_pipeline pipeline)
464 {
465    const struct gen_device_info *devinfo = &brw->screen->devinfo;
466    struct gl_context *ctx = &brw->ctx;
467    int i;
468    static int dirty_count = 0;
469    struct brw_state_flags state = brw->state.pipelines[pipeline];
470    const unsigned fb_samples =
471       MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
472 
473    brw_select_pipeline(brw, pipeline);
474 
475    if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
476       /* Always re-emit all state. */
477       brw->NewGLState = ~0;
478       ctx->NewDriverState = ~0ull;
479    }
480 
481    if (pipeline == BRW_RENDER_PIPELINE) {
482       if (brw->programs[MESA_SHADER_FRAGMENT] !=
483           ctx->FragmentProgram._Current) {
484          brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
485          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
486       }
487 
488       if (brw->programs[MESA_SHADER_TESS_EVAL] !=
489           ctx->TessEvalProgram._Current) {
490          brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
491          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
492       }
493 
494       if (brw->programs[MESA_SHADER_TESS_CTRL] !=
495           ctx->TessCtrlProgram._Current) {
496          brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
497          brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
498       }
499 
500       if (brw->programs[MESA_SHADER_GEOMETRY] !=
501           ctx->GeometryProgram._Current) {
502          brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
503          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
504       }
505 
506       if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
507          brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
508          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
509       }
510    }
511 
512    if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
513       brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
514       brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
515    }
516 
517    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
518       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
519       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
520    }
521 
522    if (brw->num_samples != fb_samples) {
523       brw->num_samples = fb_samples;
524       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
525    }
526 
527    /* Exit early if no state is flagged as dirty */
528    merge_ctx_state(brw, &state);
529    if ((state.mesa | state.brw) == 0)
530       return;
531 
532    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
533    if (devinfo->gen == 6)
534       brw_emit_post_sync_nonzero_flush(brw);
535 
536    brw_upload_programs(brw, pipeline);
537    merge_ctx_state(brw, &state);
538 
539    brw_upload_state_base_address(brw);
540 
541    const struct brw_tracked_state *atoms =
542       brw_get_pipeline_atoms(brw, pipeline);
543    const int num_atoms = brw->num_atoms[pipeline];
544 
545    if (unlikely(INTEL_DEBUG)) {
546       /* Debug version which enforces various sanity checks on the
547        * state flags which are generated and checked to help ensure
548        * state atoms are ordered correctly in the list.
549        */
550       struct brw_state_flags examined, prev;
551       memset(&examined, 0, sizeof(examined));
552       prev = state;
553 
554       for (i = 0; i < num_atoms; i++) {
555 	 const struct brw_tracked_state *atom = &atoms[i];
556 	 struct brw_state_flags generated;
557 
558          check_and_emit_atom(brw, &state, atom);
559 
560 	 accumulate_state(&examined, &atom->dirty);
561 
562 	 /* generated = (prev ^ state)
563 	  * if (examined & generated)
564 	  *     fail;
565 	  */
566 	 xor_states(&generated, &prev, &state);
567 	 assert(!check_state(&examined, &generated));
568 	 prev = state;
569       }
570    }
571    else {
572       for (i = 0; i < num_atoms; i++) {
573 	 const struct brw_tracked_state *atom = &atoms[i];
574 
575          check_and_emit_atom(brw, &state, atom);
576       }
577    }
578 
579    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
580       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
581 
582       brw_update_dirty_count(mesa_bits, state.mesa);
583       brw_update_dirty_count(brw_bits, state.brw);
584       if (dirty_count++ % 1000 == 0) {
585 	 brw_print_dirty_count(mesa_bits);
586 	 brw_print_dirty_count(brw_bits);
587 	 fprintf(stderr, "\n");
588       }
589    }
590 }
591 
592 /***********************************************************************
593  * Emit all state:
594  */
brw_upload_render_state(struct brw_context * brw)595 void brw_upload_render_state(struct brw_context *brw)
596 {
597    brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
598 }
599 
600 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)601 brw_pipeline_state_finished(struct brw_context *brw,
602                             enum brw_pipeline pipeline)
603 {
604    /* Save all dirty state into the other pipelines */
605    for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
606       if (i != pipeline) {
607          brw->state.pipelines[i].mesa |= brw->NewGLState;
608          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
609       } else {
610          memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
611       }
612    }
613 
614    brw->NewGLState = 0;
615    brw->ctx.NewDriverState = 0ull;
616 }
617 
618 /**
619  * Clear dirty bits to account for the fact that the state emitted by
620  * brw_upload_render_state() has been committed to the hardware. This is a
621  * separate call from brw_upload_render_state() because it's possible that
622  * after the call to brw_upload_render_state(), we will discover that we've
623  * run out of aperture space, and need to rewind the batch buffer to the state
624  * it had before the brw_upload_render_state() call.
625  */
626 void
brw_render_state_finished(struct brw_context * brw)627 brw_render_state_finished(struct brw_context *brw)
628 {
629    brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
630 }
631 
632 void
brw_upload_compute_state(struct brw_context * brw)633 brw_upload_compute_state(struct brw_context *brw)
634 {
635    brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
636 }
637 
638 void
brw_compute_state_finished(struct brw_context * brw)639 brw_compute_state_finished(struct brw_context *brw)
640 {
641    brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
642 }
643