1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "brw_batch.h"
40 #include "brw_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "genxml/genX_bits.h"
47 #include "main/framebuffer.h"
48
49 void
brw_enable_obj_preemption(struct brw_context * brw,bool enable)50 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
51 {
52 ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
53 assert(devinfo->ver >= 9);
54
55 if (enable == brw->object_preemption)
56 return;
57
58 /* A fixed function pipe flush is required before modifying this field */
59 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
60
61 bool replay_mode = enable ?
62 GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
63
64 /* enable object level preemption */
65 brw_load_register_imm32(brw, CS_CHICKEN1,
66 replay_mode | GFX9_REPLAY_MODE_MASK);
67
68 brw->object_preemption = enable;
69 }
70
71 static void
brw_upload_gfx11_slice_hashing_state(struct brw_context * brw)72 brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
73 {
74 const struct intel_device_info *devinfo = &brw->screen->devinfo;
75 int subslices_delta =
76 devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
77 if (subslices_delta == 0)
78 return;
79
80 unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
81 uint32_t hash_address;
82
83 uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
84
85 unsigned idx = 0;
86
87 unsigned sl_small = 0;
88 unsigned sl_big = 1;
89 if (subslices_delta > 0) {
90 sl_small = 1;
91 sl_big = 0;
92 }
93
94 /**
95 * Create a 16x16 slice hashing table like the following one:
96 *
97 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
98 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
99 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
100 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
101 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
102 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
103 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
104 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
105 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
106 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
107 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
108 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
109 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
110 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
111 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
112 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
113 *
114 * The table above is used when the pixel pipe 0 has less subslices than
115 * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
116 * with 0's and 1's inverted is used.
117 */
118 for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
119 uint32_t dw = 0;
120
121 for (int j = 0; j < 8; j++) {
122 unsigned slice = idx++ % 3 ? sl_big : sl_small;
123 dw |= slice << (j * 4);
124 }
125 map[i] = dw;
126 }
127
128 BEGIN_BATCH(2);
129 OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
130 OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
131 ADVANCE_BATCH();
132
133 /* From gfx10/gfx11 workaround table in h/w specs:
134 *
135 * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
136 * a value of 0xFFFF"
137 *
138 * This means that whenever we update a field with this instruction, we need
139 * to update all the others.
140 *
141 * Since this is the first time we emit this
142 * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
143 * and leaving everything else at their default state (0).
144 */
145 BEGIN_BATCH(2);
146 OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
147 OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
148 ADVANCE_BATCH();
149 }
150
151 static void
brw_upload_initial_gpu_state(struct brw_context * brw)152 brw_upload_initial_gpu_state(struct brw_context *brw)
153 {
154 const struct intel_device_info *devinfo = &brw->screen->devinfo;
155 const struct brw_compiler *compiler = brw->screen->compiler;
156
157 /* On platforms with hardware contexts, we can set our initial GPU state
158 * right away rather than doing it via state atoms. This saves a small
159 * amount of overhead on every draw call.
160 */
161 if (!brw->hw_ctx)
162 return;
163
164 if (devinfo->ver == 6)
165 brw_emit_post_sync_nonzero_flush(brw);
166
167 brw_upload_invariant_state(brw);
168
169 if (devinfo->ver == 11) {
170 /* The default behavior of bit 5 "Headerless Message for Pre-emptable
171 * Contexts" in SAMPLER MODE register is set to 0, which means
172 * headerless sampler messages are not allowed for pre-emptable
173 * contexts. Set the bit 5 to 1 to allow them.
174 */
175 brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
176 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
177 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
178
179 /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
180 * HALF_SLICE_CHICKEN7 register.
181 */
182 brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
183 TEXEL_OFFSET_FIX_MASK |
184 TEXEL_OFFSET_FIX_ENABLE);
185
186 /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
187 * in L3CNTLREG register. The default setting of the bit is not the
188 * desirable behavior.
189 */
190 brw_load_register_imm32(brw, GFX8_L3CNTLREG,
191 GFX8_L3CNTLREG_EDBC_NO_HANG);
192 }
193
194 /* hardware specification recommends disabling repacking for
195 * the compatibility with decompression mechanism in display controller.
196 */
197 if (devinfo->disable_ccs_repack) {
198 brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
199 GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
200 REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
201 }
202
203 if (devinfo->ver == 9) {
204 /* Recommended optimizations for Victim Cache eviction and floating
205 * point blending.
206 */
207 brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
208 REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
209 REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
210 REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
211 GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
212 GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
213 GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
214 }
215
216 if (devinfo->ver >= 8) {
217 gfx8_emit_3dstate_sample_pattern(brw);
218
219 BEGIN_BATCH(5);
220 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
221 OUT_BATCH(0);
222 OUT_BATCH(0);
223 OUT_BATCH(0);
224 OUT_BATCH(0);
225 ADVANCE_BATCH();
226
227 BEGIN_BATCH(2);
228 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
229 OUT_BATCH(0);
230 ADVANCE_BATCH();
231 }
232
233 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
234 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
235 *
236 * This is only safe on kernels with context isolation support.
237 */
238 if (!compiler->constant_buffer_0_is_relative) {
239 if (devinfo->ver >= 9) {
240 BEGIN_BATCH(3);
241 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
242 OUT_BATCH(CS_DEBUG_MODE2);
243 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
244 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
245 ADVANCE_BATCH();
246 } else if (devinfo->ver == 8) {
247 BEGIN_BATCH(3);
248 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
249 OUT_BATCH(INSTPM);
250 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
251 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
252 ADVANCE_BATCH();
253 }
254 }
255
256 brw->object_preemption = false;
257
258 if (devinfo->ver >= 10)
259 brw_enable_obj_preemption(brw, true);
260
261 if (devinfo->ver == 11)
262 brw_upload_gfx11_slice_hashing_state(brw);
263 }
264
265 static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline)266 brw_get_pipeline_atoms(struct brw_context *brw,
267 enum brw_pipeline pipeline)
268 {
269 switch (pipeline) {
270 case BRW_RENDER_PIPELINE:
271 return brw->render_atoms;
272 case BRW_COMPUTE_PIPELINE:
273 return brw->compute_atoms;
274 default:
275 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
276 unreachable("Unsupported pipeline");
277 return NULL;
278 }
279 }
280
281 void
brw_copy_pipeline_atoms(struct brw_context * brw,enum brw_pipeline pipeline,const struct brw_tracked_state ** atoms,int num_atoms)282 brw_copy_pipeline_atoms(struct brw_context *brw,
283 enum brw_pipeline pipeline,
284 const struct brw_tracked_state **atoms,
285 int num_atoms)
286 {
287 /* This is to work around brw_context::atoms being declared const. We want
288 * it to be const, but it needs to be initialized somehow!
289 */
290 struct brw_tracked_state *context_atoms =
291 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
292
293 for (int i = 0; i < num_atoms; i++) {
294 context_atoms[i] = *atoms[i];
295 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
296 assert(context_atoms[i].emit);
297 }
298
299 brw->num_atoms[pipeline] = num_atoms;
300 }
301
brw_init_state(struct brw_context * brw)302 void brw_init_state( struct brw_context *brw )
303 {
304 struct gl_context *ctx = &brw->ctx;
305 const struct intel_device_info *devinfo = &brw->screen->devinfo;
306
307 /* Force the first brw_select_pipeline to emit pipeline select */
308 brw->last_pipeline = BRW_NUM_PIPELINES;
309
310 brw_init_caches(brw);
311
312 if (devinfo->ver >= 11)
313 gfx11_init_atoms(brw);
314 else if (devinfo->ver >= 10)
315 unreachable("Gfx10 support dropped.");
316 else if (devinfo->ver >= 9)
317 gfx9_init_atoms(brw);
318 else if (devinfo->ver >= 8)
319 gfx8_init_atoms(brw);
320 else if (devinfo->is_haswell)
321 gfx75_init_atoms(brw);
322 else if (devinfo->ver >= 7)
323 gfx7_init_atoms(brw);
324 else if (devinfo->ver >= 6)
325 gfx6_init_atoms(brw);
326 else if (devinfo->ver >= 5)
327 gfx5_init_atoms(brw);
328 else if (devinfo->is_g4x)
329 gfx45_init_atoms(brw);
330 else
331 gfx4_init_atoms(brw);
332
333 brw_upload_initial_gpu_state(brw);
334
335 brw->NewGLState = ~0;
336 brw->ctx.NewDriverState = ~0ull;
337
338 /* ~0 is a nonsensical value which won't match anything we program, so
339 * the programming will take effect on the first time around.
340 */
341 brw->pma_stall_bits = ~0;
342
343 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
344 * dirty flags.
345 */
346 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
347
348 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
349 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
350 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
351 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
352 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
353 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
354 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
355 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
356 ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
357 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
358 }
359
360
brw_destroy_state(struct brw_context * brw)361 void brw_destroy_state( struct brw_context *brw )
362 {
363 brw_destroy_caches(brw);
364 }
365
366 /***********************************************************************
367 */
368
369 static bool
check_state(const struct brw_state_flags * a,const struct brw_state_flags * b)370 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
371 {
372 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
373 }
374
375 static void
accumulate_state(struct brw_state_flags * a,const struct brw_state_flags * b)376 accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
377 {
378 a->mesa |= b->mesa;
379 a->brw |= b->brw;
380 }
381
382
383 static void
xor_states(struct brw_state_flags * result,const struct brw_state_flags * a,const struct brw_state_flags * b)384 xor_states(struct brw_state_flags *result,
385 const struct brw_state_flags *a,
386 const struct brw_state_flags *b)
387 {
388 result->mesa = a->mesa ^ b->mesa;
389 result->brw = a->brw ^ b->brw;
390 }
391
392 struct dirty_bit_map {
393 uint64_t bit;
394 char *name;
395 uint32_t count;
396 };
397
398 #define DEFINE_BIT(name) {name, #name, 0}
399
400 static struct dirty_bit_map mesa_bits[] = {
401 DEFINE_BIT(_NEW_MODELVIEW),
402 DEFINE_BIT(_NEW_PROJECTION),
403 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
404 DEFINE_BIT(_NEW_COLOR),
405 DEFINE_BIT(_NEW_DEPTH),
406 DEFINE_BIT(_NEW_FOG),
407 DEFINE_BIT(_NEW_HINT),
408 DEFINE_BIT(_NEW_LIGHT),
409 DEFINE_BIT(_NEW_LINE),
410 DEFINE_BIT(_NEW_PIXEL),
411 DEFINE_BIT(_NEW_POINT),
412 DEFINE_BIT(_NEW_POLYGON),
413 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
414 DEFINE_BIT(_NEW_SCISSOR),
415 DEFINE_BIT(_NEW_STENCIL),
416 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
417 DEFINE_BIT(_NEW_TRANSFORM),
418 DEFINE_BIT(_NEW_VIEWPORT),
419 DEFINE_BIT(_NEW_TEXTURE_STATE),
420 DEFINE_BIT(_NEW_RENDERMODE),
421 DEFINE_BIT(_NEW_BUFFERS),
422 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
423 DEFINE_BIT(_NEW_MULTISAMPLE),
424 DEFINE_BIT(_NEW_TRACK_MATRIX),
425 DEFINE_BIT(_NEW_PROGRAM),
426 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
427 DEFINE_BIT(_NEW_FRAG_CLAMP),
428 {0, 0, 0}
429 };
430
431 static struct dirty_bit_map brw_bits[] = {
432 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
433 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
434 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
435 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
436 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
437 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
438 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
439 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
440 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
441 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
442 DEFINE_BIT(BRW_NEW_URB_FENCE),
443 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
444 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
445 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
446 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
447 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
448 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
449 DEFINE_BIT(BRW_NEW_PRIMITIVE),
450 DEFINE_BIT(BRW_NEW_CONTEXT),
451 DEFINE_BIT(BRW_NEW_PSP),
452 DEFINE_BIT(BRW_NEW_SURFACES),
453 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
454 DEFINE_BIT(BRW_NEW_INDICES),
455 DEFINE_BIT(BRW_NEW_VERTICES),
456 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
457 DEFINE_BIT(BRW_NEW_BATCH),
458 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
459 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
460 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
461 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
462 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
463 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
464 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
465 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
466 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
467 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
468 DEFINE_BIT(BRW_NEW_STATS_WM),
469 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
470 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
471 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
472 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
473 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
474 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
475 DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
476 DEFINE_BIT(BRW_NEW_CC_VP),
477 DEFINE_BIT(BRW_NEW_SF_VP),
478 DEFINE_BIT(BRW_NEW_CLIP_VP),
479 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
480 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
481 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
482 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
483 DEFINE_BIT(BRW_NEW_URB_SIZE),
484 DEFINE_BIT(BRW_NEW_CC_STATE),
485 DEFINE_BIT(BRW_NEW_BLORP),
486 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
487 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
488 DEFINE_BIT(BRW_NEW_DRAW_CALL),
489 DEFINE_BIT(BRW_NEW_AUX_STATE),
490 {0, 0, 0}
491 };
492
493 static void
brw_update_dirty_count(struct dirty_bit_map * bit_map,uint64_t bits)494 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
495 {
496 for (int i = 0; bit_map[i].bit != 0; i++) {
497 if (bit_map[i].bit & bits)
498 bit_map[i].count++;
499 }
500 }
501
502 static void
brw_print_dirty_count(struct dirty_bit_map * bit_map)503 brw_print_dirty_count(struct dirty_bit_map *bit_map)
504 {
505 for (int i = 0; bit_map[i].bit != 0; i++) {
506 if (bit_map[i].count > 1) {
507 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
508 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
509 }
510 }
511 }
512
513 static inline void
brw_upload_tess_programs(struct brw_context * brw)514 brw_upload_tess_programs(struct brw_context *brw)
515 {
516 if (brw->programs[MESA_SHADER_TESS_EVAL]) {
517 brw_upload_tcs_prog(brw);
518 brw_upload_tes_prog(brw);
519 } else {
520 brw->tcs.base.prog_data = NULL;
521 brw->tes.base.prog_data = NULL;
522 }
523 }
524
525 static inline void
brw_upload_programs(struct brw_context * brw,enum brw_pipeline pipeline)526 brw_upload_programs(struct brw_context *brw,
527 enum brw_pipeline pipeline)
528 {
529 struct gl_context *ctx = &brw->ctx;
530 const struct intel_device_info *devinfo = &brw->screen->devinfo;
531
532 if (pipeline == BRW_RENDER_PIPELINE) {
533 brw_upload_vs_prog(brw);
534 brw_upload_tess_programs(brw);
535
536 if (brw->programs[MESA_SHADER_GEOMETRY]) {
537 brw_upload_gs_prog(brw);
538 } else {
539 brw->gs.base.prog_data = NULL;
540 if (devinfo->ver < 7)
541 brw_upload_ff_gs_prog(brw);
542 }
543
544 /* Update the VUE map for data exiting the GS stage of the pipeline.
545 * This comes from the last enabled shader stage.
546 */
547 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
548 bool old_separate = brw->vue_map_geom_out.separate;
549 struct brw_vue_prog_data *vue_prog_data;
550 if (brw->programs[MESA_SHADER_GEOMETRY])
551 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
552 else if (brw->programs[MESA_SHADER_TESS_EVAL])
553 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
554 else
555 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
556
557 brw->vue_map_geom_out = vue_prog_data->vue_map;
558
559 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
560 if (old_slots != brw->vue_map_geom_out.slots_valid ||
561 old_separate != brw->vue_map_geom_out.separate)
562 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
563
564 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
565 VARYING_BIT_VIEWPORT) {
566 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
567 brw->clip.viewport_count =
568 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
569 ctx->Const.MaxViewports : 1;
570 }
571
572 brw_upload_wm_prog(brw);
573
574 if (devinfo->ver < 6) {
575 brw_upload_clip_prog(brw);
576 brw_upload_sf_prog(brw);
577 }
578
579 brw_disk_cache_write_render_programs(brw);
580 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
581 brw_upload_cs_prog(brw);
582 brw_disk_cache_write_compute_program(brw);
583 }
584 }
585
586 static inline void
merge_ctx_state(struct brw_context * brw,struct brw_state_flags * state)587 merge_ctx_state(struct brw_context *brw,
588 struct brw_state_flags *state)
589 {
590 state->mesa |= brw->NewGLState;
591 state->brw |= brw->ctx.NewDriverState;
592 }
593
594 static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context * brw,struct brw_state_flags * state,const struct brw_tracked_state * atom)595 check_and_emit_atom(struct brw_context *brw,
596 struct brw_state_flags *state,
597 const struct brw_tracked_state *atom)
598 {
599 if (check_state(state, &atom->dirty)) {
600 atom->emit(brw);
601 merge_ctx_state(brw, state);
602 }
603 }
604
605 static inline void
brw_upload_pipeline_state(struct brw_context * brw,enum brw_pipeline pipeline)606 brw_upload_pipeline_state(struct brw_context *brw,
607 enum brw_pipeline pipeline)
608 {
609 const struct intel_device_info *devinfo = &brw->screen->devinfo;
610 struct gl_context *ctx = &brw->ctx;
611 int i;
612 static int dirty_count = 0;
613 struct brw_state_flags state = brw->state.pipelines[pipeline];
614 const unsigned fb_samples =
615 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
616
617 brw_select_pipeline(brw, pipeline);
618
619 if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
620 brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
621
622 if (INTEL_DEBUG(DEBUG_REEMIT)) {
623 /* Always re-emit all state. */
624 brw->NewGLState = ~0;
625 ctx->NewDriverState = ~0ull;
626 }
627
628 if (pipeline == BRW_RENDER_PIPELINE) {
629 if (brw->programs[MESA_SHADER_FRAGMENT] !=
630 ctx->FragmentProgram._Current) {
631 brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
632 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
633 }
634
635 if (brw->programs[MESA_SHADER_TESS_EVAL] !=
636 ctx->TessEvalProgram._Current) {
637 brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
638 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
639 }
640
641 if (brw->programs[MESA_SHADER_TESS_CTRL] !=
642 ctx->TessCtrlProgram._Current) {
643 brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
644 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
645 }
646
647 if (brw->programs[MESA_SHADER_GEOMETRY] !=
648 ctx->GeometryProgram._Current) {
649 brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
650 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
651 }
652
653 if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
654 brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
655 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
656 }
657 }
658
659 if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
660 brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
661 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
662 }
663
664 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
665 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
666 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
667 }
668
669 if (brw->num_samples != fb_samples) {
670 brw->num_samples = fb_samples;
671 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
672 }
673
674 /* Exit early if no state is flagged as dirty */
675 merge_ctx_state(brw, &state);
676 if ((state.mesa | state.brw) == 0)
677 return;
678
679 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
680 if (devinfo->ver == 6)
681 brw_emit_post_sync_nonzero_flush(brw);
682
683 brw_upload_programs(brw, pipeline);
684 merge_ctx_state(brw, &state);
685
686 brw_upload_state_base_address(brw);
687
688 const struct brw_tracked_state *atoms =
689 brw_get_pipeline_atoms(brw, pipeline);
690 const int num_atoms = brw->num_atoms[pipeline];
691
692 if (INTEL_DEBUG(DEBUG_ANY)) {
693 /* Debug version which enforces various sanity checks on the
694 * state flags which are generated and checked to help ensure
695 * state atoms are ordered correctly in the list.
696 */
697 struct brw_state_flags examined, prev;
698 memset(&examined, 0, sizeof(examined));
699 prev = state;
700
701 for (i = 0; i < num_atoms; i++) {
702 const struct brw_tracked_state *atom = &atoms[i];
703 struct brw_state_flags generated;
704
705 check_and_emit_atom(brw, &state, atom);
706
707 accumulate_state(&examined, &atom->dirty);
708
709 /* generated = (prev ^ state)
710 * if (examined & generated)
711 * fail;
712 */
713 xor_states(&generated, &prev, &state);
714 assert(!check_state(&examined, &generated));
715 prev = state;
716 }
717 }
718 else {
719 for (i = 0; i < num_atoms; i++) {
720 const struct brw_tracked_state *atom = &atoms[i];
721
722 check_and_emit_atom(brw, &state, atom);
723 }
724 }
725
726 if (INTEL_DEBUG(DEBUG_STATE)) {
727 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
728
729 brw_update_dirty_count(mesa_bits, state.mesa);
730 brw_update_dirty_count(brw_bits, state.brw);
731 if (dirty_count++ % 1000 == 0) {
732 brw_print_dirty_count(mesa_bits);
733 brw_print_dirty_count(brw_bits);
734 fprintf(stderr, "\n");
735 }
736 }
737 }
738
739 /***********************************************************************
740 * Emit all state:
741 */
brw_upload_render_state(struct brw_context * brw)742 void brw_upload_render_state(struct brw_context *brw)
743 {
744 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
745 }
746
747 static inline void
brw_pipeline_state_finished(struct brw_context * brw,enum brw_pipeline pipeline)748 brw_pipeline_state_finished(struct brw_context *brw,
749 enum brw_pipeline pipeline)
750 {
751 /* Save all dirty state into the other pipelines */
752 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
753 if (i != pipeline) {
754 brw->state.pipelines[i].mesa |= brw->NewGLState;
755 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
756 } else {
757 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
758 }
759 }
760
761 brw->NewGLState = 0;
762 brw->ctx.NewDriverState = 0ull;
763 }
764
765 /**
766 * Clear dirty bits to account for the fact that the state emitted by
767 * brw_upload_render_state() has been committed to the hardware. This is a
768 * separate call from brw_upload_render_state() because it's possible that
769 * after the call to brw_upload_render_state(), we will discover that we've
770 * run out of aperture space, and need to rewind the batch buffer to the state
771 * it had before the brw_upload_render_state() call.
772 */
773 void
brw_render_state_finished(struct brw_context * brw)774 brw_render_state_finished(struct brw_context *brw)
775 {
776 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
777 }
778
779 void
brw_upload_compute_state(struct brw_context * brw)780 brw_upload_compute_state(struct brw_context *brw)
781 {
782 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
783 }
784
785 void
brw_compute_state_finished(struct brw_context * brw)786 brw_compute_state_finished(struct brw_context *brw)
787 {
788 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
789 }
790