1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #ifndef CROCUS_CONTEXT_H
24 #define CROCUS_CONTEXT_H
25
26 #include "pipe/p_context.h"
27 #include "pipe/p_state.h"
28 #include "util/u_debug.h"
29 #include "util/u_threaded_context.h"
30 #include "intel/blorp/blorp.h"
31 #include "intel/dev/intel_debug.h"
32 #include "intel/compiler/brw_compiler.h"
33 #include "crocus_batch.h"
34 #include "crocus_fence.h"
35 #include "crocus_resource.h"
36 #include "crocus_screen.h"
37 #include "util/u_blitter.h"
38
39 struct crocus_bo;
40 struct crocus_context;
41 struct blorp_batch;
42 struct blorp_params;
43
44 #define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
45 #define CROCUS_MAX_TEXTURE_SAMPLERS 32
46 /* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
47 #define CROCUS_MAX_ABOS 16
48 #define CROCUS_MAX_SSBOS 16
49 #define CROCUS_MAX_VIEWPORTS 16
50 #define CROCUS_MAX_CLIP_PLANES 8
51
52 enum crocus_param_domain {
53 BRW_PARAM_DOMAIN_BUILTIN = 0,
54 BRW_PARAM_DOMAIN_IMAGE,
55 };
56
57 enum {
58 DRI_CONF_BO_REUSE_DISABLED,
59 DRI_CONF_BO_REUSE_ALL
60 };
61
62 #define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
63 #define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
64 #define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
65 #define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
66 #define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
67 #define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
68
69 /**
70 * Dirty flags. When state changes, we flag some combination of these
71 * to indicate that particular GPU commands need to be re-emitted.
72 *
73 * Each bit typically corresponds to a single 3DSTATE_* command packet, but
74 * in rare cases they map to a group of related packets that need to be
75 * emitted together.
76 *
77 * See crocus_upload_render_state().
78 */
79 #define CROCUS_DIRTY_COLOR_CALC_STATE (1ull << 0)
80 #define CROCUS_DIRTY_POLYGON_STIPPLE (1ull << 1)
81 #define CROCUS_DIRTY_CC_VIEWPORT (1ull << 2)
82 #define CROCUS_DIRTY_SF_CL_VIEWPORT (1ull << 3)
83 #define CROCUS_DIRTY_RASTER (1ull << 4)
84 #define CROCUS_DIRTY_CLIP (1ull << 5)
85 #define CROCUS_DIRTY_LINE_STIPPLE (1ull << 6)
86 #define CROCUS_DIRTY_VERTEX_ELEMENTS (1ull << 7)
87 #define CROCUS_DIRTY_VERTEX_BUFFERS (1ull << 8)
88 #define CROCUS_DIRTY_DRAWING_RECTANGLE (1ull << 9)
89 #define CROCUS_DIRTY_GEN6_URB (1ull << 10)
90 #define CROCUS_DIRTY_DEPTH_BUFFER (1ull << 11)
91 #define CROCUS_DIRTY_WM (1ull << 12)
92 #define CROCUS_DIRTY_SO_DECL_LIST (1ull << 13)
93 #define CROCUS_DIRTY_STREAMOUT (1ull << 14)
94 #define CROCUS_DIRTY_GEN4_CONSTANT_COLOR (1ull << 15)
95 #define CROCUS_DIRTY_GEN4_CURBE (1ull << 16)
96 #define CROCUS_DIRTY_GEN4_URB_FENCE (1ull << 17)
97 #define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS (1ull << 18)
98 #define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS (1ull << 19)
99 #define CROCUS_DIRTY_GEN6_BLEND_STATE (1ull << 20)
100 #define CROCUS_DIRTY_GEN6_SCISSOR_RECT (1ull << 21)
101 #define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL (1ull << 22)
102 #define CROCUS_DIRTY_GEN6_MULTISAMPLE (1ull << 23)
103 #define CROCUS_DIRTY_GEN6_SAMPLE_MASK (1ull << 24)
104 #define CROCUS_DIRTY_GEN7_SBE (1ull << 25)
105 #define CROCUS_DIRTY_GEN7_L3_CONFIG (1ull << 26)
106 #define CROCUS_DIRTY_GEN7_SO_BUFFERS (1ull << 27)
107 #define CROCUS_DIRTY_GEN75_VF (1ull << 28)
108 #define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES (1ull << 29)
109 #define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
110 #define CROCUS_DIRTY_VF_STATISTICS (1ull << 31)
111 #define CROCUS_DIRTY_GEN4_CLIP_PROG (1ull << 32)
112 #define CROCUS_DIRTY_GEN4_SF_PROG (1ull << 33)
113 #define CROCUS_DIRTY_GEN4_FF_GS_PROG (1ull << 34)
114 #define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
115 #define CROCUS_DIRTY_GEN6_SVBI (1ull << 36)
116 #define CROCUS_DIRTY_GEN8_VF_TOPOLOGY (1ull << 37)
117 #define CROCUS_DIRTY_GEN8_PMA_FIX (1ull << 38)
118 #define CROCUS_DIRTY_GEN8_VF_SGVS (1ull << 39)
119 #define CROCUS_DIRTY_GEN8_PS_BLEND (1ull << 40)
120
121 #define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
122
123 #define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
124
125 /**
126 * Per-stage dirty flags. When state changes, we flag some combination of
127 * these to indicate that particular GPU commands need to be re-emitted.
128 * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
129 * indexed by shifting the mask by the shader stage index.
130 *
131 * See crocus_upload_render_state().
132 */
133 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS (1ull << 0)
134 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS (1ull << 1)
135 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES (1ull << 2)
136 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS (1ull << 3)
137 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS (1ull << 4)
138 #define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS (1ull << 5)
139 #define CROCUS_STAGE_DIRTY_UNCOMPILED_VS (1ull << 6)
140 #define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS (1ull << 7)
141 #define CROCUS_STAGE_DIRTY_UNCOMPILED_TES (1ull << 8)
142 #define CROCUS_STAGE_DIRTY_UNCOMPILED_GS (1ull << 9)
143 #define CROCUS_STAGE_DIRTY_UNCOMPILED_FS (1ull << 10)
144 #define CROCUS_STAGE_DIRTY_UNCOMPILED_CS (1ull << 11)
145 #define CROCUS_STAGE_DIRTY_VS (1ull << 12)
146 #define CROCUS_STAGE_DIRTY_TCS (1ull << 13)
147 #define CROCUS_STAGE_DIRTY_TES (1ull << 14)
148 #define CROCUS_STAGE_DIRTY_GS (1ull << 15)
149 #define CROCUS_STAGE_DIRTY_FS (1ull << 16)
150 #define CROCUS_STAGE_DIRTY_CS (1ull << 17)
151 #define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS 18
152 #define CROCUS_STAGE_DIRTY_CONSTANTS_VS (1ull << 18)
153 #define CROCUS_STAGE_DIRTY_CONSTANTS_TCS (1ull << 19)
154 #define CROCUS_STAGE_DIRTY_CONSTANTS_TES (1ull << 20)
155 #define CROCUS_STAGE_DIRTY_CONSTANTS_GS (1ull << 21)
156 #define CROCUS_STAGE_DIRTY_CONSTANTS_FS (1ull << 22)
157 #define CROCUS_STAGE_DIRTY_CONSTANTS_CS (1ull << 23)
158 #define CROCUS_STAGE_DIRTY_BINDINGS_VS (1ull << 24)
159 #define CROCUS_STAGE_DIRTY_BINDINGS_TCS (1ull << 25)
160 #define CROCUS_STAGE_DIRTY_BINDINGS_TES (1ull << 26)
161 #define CROCUS_STAGE_DIRTY_BINDINGS_GS (1ull << 27)
162 #define CROCUS_STAGE_DIRTY_BINDINGS_FS (1ull << 28)
163 #define CROCUS_STAGE_DIRTY_BINDINGS_CS (1ull << 29)
164
165 #define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
166 CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
167 CROCUS_STAGE_DIRTY_UNCOMPILED_CS | \
168 CROCUS_STAGE_DIRTY_CONSTANTS_CS | \
169 CROCUS_STAGE_DIRTY_BINDINGS_CS)
170
171 #define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
172
173 #define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS | \
174 CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
175 CROCUS_STAGE_DIRTY_BINDINGS_TES | \
176 CROCUS_STAGE_DIRTY_BINDINGS_GS | \
177 CROCUS_STAGE_DIRTY_BINDINGS_FS | \
178 CROCUS_STAGE_DIRTY_BINDINGS_CS)
179
180 #define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS | \
181 CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
182 CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
183 CROCUS_STAGE_DIRTY_CONSTANTS_GS | \
184 CROCUS_STAGE_DIRTY_CONSTANTS_FS)
185
186 /**
187 * Non-orthogonal state (NOS) dependency flags.
188 *
189 * Shader programs may depend on non-orthogonal state. These flags are
190 * used to indicate that a shader's key depends on the state provided by
191 * a certain Gallium CSO. Changing any CSOs marked as a dependency will
192 * cause the driver to re-compute the shader key, possibly triggering a
193 * shader recompile.
194 */
195 enum crocus_nos_dep {
196 CROCUS_NOS_FRAMEBUFFER,
197 CROCUS_NOS_DEPTH_STENCIL_ALPHA,
198 CROCUS_NOS_RASTERIZER,
199 CROCUS_NOS_BLEND,
200 CROCUS_NOS_LAST_VUE_MAP,
201 CROCUS_NOS_TEXTURES,
202 CROCUS_NOS_VERTEX_ELEMENTS,
203 CROCUS_NOS_COUNT,
204 };
205
206 struct crocus_depth_stencil_alpha_state;
207
208 /**
209 * Cache IDs for the in-memory program cache (ice->shaders.cache).
210 */
211 enum crocus_program_cache_id {
212 CROCUS_CACHE_VS = MESA_SHADER_VERTEX,
213 CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
214 CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
215 CROCUS_CACHE_GS = MESA_SHADER_GEOMETRY,
216 CROCUS_CACHE_FS = MESA_SHADER_FRAGMENT,
217 CROCUS_CACHE_CS = MESA_SHADER_COMPUTE,
218 CROCUS_CACHE_BLORP,
219 CROCUS_CACHE_SF,
220 CROCUS_CACHE_CLIP,
221 CROCUS_CACHE_FF_GS,
222 };
223
224 /** @{
225 *
226 * Defines for PIPE_CONTROL operations, which trigger cache flushes,
227 * synchronization, pipelined memory writes, and so on.
228 *
229 * The bits here are not the actual hardware values. The actual fields
230 * move between various generations, so we just have flags for each
231 * potential operation, and use genxml to encode the actual packet.
232 */
233 enum pipe_control_flags
234 {
235 PIPE_CONTROL_FLUSH_LLC = (1 << 1),
236 PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
237 PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
238 PIPE_CONTROL_CS_STALL = (1 << 4),
239 PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
240 PIPE_CONTROL_SYNC_GFDT = (1 << 6),
241 PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
242 PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
243 PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
244 PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
245 PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
246 PIPE_CONTROL_DEPTH_STALL = (1 << 12),
247 PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
248 PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
249 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
250 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
251 PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
252 PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
253 PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
254 PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
255 PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
256 PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
257 PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
258 PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
259 PIPE_CONTROL_TILE_CACHE_FLUSH = (1 << 25),
260 };
261
262 #define PIPE_CONTROL_CACHE_FLUSH_BITS \
263 (PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
264 PIPE_CONTROL_DATA_CACHE_FLUSH | \
265 PIPE_CONTROL_RENDER_TARGET_FLUSH)
266
267 #define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
268 (PIPE_CONTROL_STATE_CACHE_INVALIDATE | \
269 PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
270 PIPE_CONTROL_VF_CACHE_INVALIDATE | \
271 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
272 PIPE_CONTROL_INSTRUCTION_INVALIDATE)
273
274 enum crocus_predicate_state {
275 /* The first two states are used if we can determine whether to draw
276 * without having to look at the values in the query object buffer. This
277 * will happen if there is no conditional render in progress, if the query
278 * object is already completed or if something else has already added
279 * samples to the preliminary result.
280 */
281 CROCUS_PREDICATE_STATE_RENDER,
282 CROCUS_PREDICATE_STATE_DONT_RENDER,
283
284 /* In this case whether to draw or not depends on the result of an
285 * MI_PREDICATE command so the predicate enable bit needs to be checked.
286 */
287 CROCUS_PREDICATE_STATE_USE_BIT,
288 /* In this case, either MI_PREDICATE doesn't exist or we lack the
289 * necessary kernel features to use it. Stall for the query result.
290 */
291 CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
292 };
293
294 /** @} */
295
296 /**
297 * An uncompiled, API-facing shader. This is the Gallium CSO for shaders.
298 * It primarily contains the NIR for the shader.
299 *
300 * Each API-facing shader can be compiled into multiple shader variants,
301 * based on non-orthogonal state dependencies, recorded in the shader key.
302 *
303 * See crocus_compiled_shader, which represents a compiled shader variant.
304 */
305 struct crocus_uncompiled_shader {
306 struct nir_shader *nir;
307
308 struct pipe_stream_output_info stream_output;
309
310 /* A SHA1 of the serialized NIR for the disk cache. */
311 unsigned char nir_sha1[20];
312
313 unsigned program_id;
314
315 /** Bitfield of (1 << CROCUS_NOS_*) flags. */
316 unsigned nos;
317
318 /** Have any shader variants been compiled yet? */
319 bool compiled_once;
320
321 bool needs_edge_flag;
322
323 /** Constant data scraped from the shader by nir_opt_large_constants */
324 struct pipe_resource *const_data;
325
326 /** Surface state for const_data */
327 struct crocus_state_ref const_data_state;
328 };
329
330 enum crocus_surface_group {
331 CROCUS_SURFACE_GROUP_RENDER_TARGET,
332 CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
333 CROCUS_SURFACE_GROUP_SOL,
334 CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
335 CROCUS_SURFACE_GROUP_TEXTURE,
336 CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
337 CROCUS_SURFACE_GROUP_IMAGE,
338 CROCUS_SURFACE_GROUP_UBO,
339 CROCUS_SURFACE_GROUP_SSBO,
340
341 CROCUS_SURFACE_GROUP_COUNT,
342 };
343
344 enum {
345 /* Invalid value for a binding table index. */
346 CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
347 };
348
349 struct crocus_binding_table {
350 uint32_t size_bytes;
351
352 /** Number of surfaces in each group, before compacting. */
353 uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
354
355 /** Initial offset of each group. */
356 uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
357
358 /** Mask of surfaces used in each group. */
359 uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
360 };
361
362 /**
363 * A compiled shader variant, containing a pointer to the GPU assembly,
364 * as well as program data and other packets needed by state upload.
365 *
366 * There can be several crocus_compiled_shader variants per API-level shader
367 * (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
368 */
369 struct crocus_compiled_shader {
370 /** Reference to the uploaded assembly. */
371 uint32_t offset;
372
373 /* asm size in map */
374 uint32_t map_size;
375
376 /** The program data (owned by the program cache hash table) */
377 struct brw_stage_prog_data *prog_data;
378 uint32_t prog_data_size;
379
380 /** A list of system values to be uploaded as uniforms. */
381 enum brw_param_builtin *system_values;
382 unsigned num_system_values;
383
384 /** Number of constbufs expected by the shader. */
385 unsigned num_cbufs;
386
387 /**
388 * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
389 * (the VUE-based information for transform feedback outputs).
390 */
391 uint32_t *streamout;
392
393 struct crocus_binding_table bt;
394
395 uint32_t bind_bo_offset;
396 uint32_t surf_offset[128];//TODO
397 };
398
399 /**
400 * API context state that is replicated per shader stage.
401 */
402 struct crocus_shader_state {
403 /** Uniform Buffers */
404 struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
405
406 bool sysvals_need_upload;
407
408 /** Shader Storage Buffers */
409 struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
410
411 /** Shader Storage Images (image load store) */
412 struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
413
414 struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
415 struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
416
417 /** Bitfield of which constant buffers are bound (non-null). */
418 uint32_t bound_cbufs;
419
420 /** Bitfield of which image views are bound (non-null). */
421 uint32_t bound_image_views;
422
423 /** Bitfield of which sampler views are bound (non-null). */
424 uint32_t bound_sampler_views;
425
426 /** Bitfield of which shader storage buffers are bound (non-null). */
427 uint32_t bound_ssbos;
428
429 /** Bitfield of which shader storage buffers are writable. */
430 uint32_t writable_ssbos;
431
432 uint32_t sampler_offset;
433 };
434
435 /**
436 * The API context (derived from pipe_context).
437 *
438 * Most driver state is tracked here.
439 */
440 struct crocus_context {
441 struct pipe_context ctx;
442 struct threaded_context *thrctx;
443
444 /** A debug callback for KHR_debug output. */
445 struct util_debug_callback dbg;
446
447 /** A device reset status callback for notifying that the GPU is hosed. */
448 struct pipe_device_reset_callback reset;
449
450 /** Slab allocator for crocus_transfer_map objects. */
451 struct slab_child_pool transfer_pool;
452
453 /** Slab allocator for threaded_context's crocus_transfer_map objects */
454 struct slab_child_pool transfer_pool_unsync;
455
456 struct blorp_context blorp;
457
458 int batch_count;
459 struct crocus_batch batches[CROCUS_BATCH_COUNT];
460
461 struct u_upload_mgr *query_buffer_uploader;
462
463 struct blitter_context *blitter;
464
465 struct {
466 struct {
467 /**
468 * Either the value of BaseVertex for indexed draw calls or the value
469 * of the argument <first> for non-indexed draw calls.
470 */
471 int firstvertex;
472 int baseinstance;
473 } params;
474
475 /**
476 * Are the above values the ones stored in the draw_params buffer?
477 * If so, we can compare them against new values to see if anything
478 * changed. If not, we need to assume they changed.
479 */
480 bool params_valid;
481
482 /**
483 * Resource and offset that stores draw_parameters from the indirect
484 * buffer or to the buffer that stures the previous values for non
485 * indirect draws.
486 */
487 struct crocus_state_ref draw_params;
488
489 struct {
490 /**
491 * The value of DrawID. This always comes in from it's own vertex
492 * buffer since it's not part of the indirect draw parameters.
493 */
494 int drawid;
495
496 /**
497 * Stores if an indexed or non-indexed draw (~0/0). Useful to
498 * calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
499 */
500 int is_indexed_draw;
501 } derived_params;
502
503 /**
504 * Resource and offset used for GL_ARB_shader_draw_parameters which
505 * contains parameters that are not present in the indirect buffer as
506 * drawid and is_indexed_draw. They will go in their own vertex element.
507 */
508 struct crocus_state_ref derived_draw_params;
509 } draw;
510
511 struct {
512 struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
513 struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
514 struct brw_vue_map *last_vue_map;
515
516 struct crocus_bo *cache_bo;
517 uint32_t cache_next_offset;
518 void *cache_bo_map;
519 struct hash_table *cache;
520
521 unsigned urb_size;
522
523 /* gen 4/5 clip/sf progs */
524 struct crocus_compiled_shader *clip_prog;
525 struct crocus_compiled_shader *sf_prog;
526 /* gen4/5 prims, gen6 streamout */
527 struct crocus_compiled_shader *ff_gs_prog;
528 uint32_t clip_offset;
529 uint32_t sf_offset;
530 uint32_t wm_offset;
531 uint32_t vs_offset;
532 uint32_t gs_offset;
533 uint32_t cc_offset;
534
535 /** Is a GS or TES outputting points or lines? */
536 bool output_topology_is_points_or_lines;
537
538 /* Track last VS URB entry size */
539 unsigned last_vs_entry_size;
540
541 /**
542 * Scratch buffers for various sizes and stages.
543 *
544 * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
545 * and shader stage.
546 */
547 struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
548 } shaders;
549
550 struct {
551 struct crocus_query *query;
552 bool condition;
553 enum pipe_render_cond_flag mode;
554 } condition;
555
556 struct intel_perf_context *perf_ctx;
557
558 struct {
559 uint64_t dirty;
560 uint64_t stage_dirty;
561 uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
562
563 unsigned num_viewports;
564 unsigned sample_mask;
565 struct crocus_blend_state *cso_blend;
566 struct crocus_rasterizer_state *cso_rast;
567 struct crocus_depth_stencil_alpha_state *cso_zsa;
568 struct crocus_vertex_element_state *cso_vertex_elements;
569 struct pipe_blend_color blend_color;
570 struct pipe_poly_stipple poly_stipple;
571 struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
572 struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
573 struct pipe_stencil_ref stencil_ref;
574 struct pipe_framebuffer_state framebuffer;
575 struct pipe_clip_state clip_planes;
576
577 float default_outer_level[4];
578 float default_inner_level[2];
579
580 /** Bitfield of which vertex buffers are bound (non-null). */
581 uint32_t bound_vertex_buffers;
582 struct pipe_vertex_buffer vertex_buffers[16];
583 uint32_t vb_end[16];
584
585 bool primitive_restart;
586 unsigned cut_index;
587 enum pipe_prim_type reduced_prim_mode:8;
588 enum pipe_prim_type prim_mode:8;
589 bool prim_is_points_or_lines;
590 uint8_t vertices_per_patch;
591 uint8_t patch_vertices;
592
593 bool window_space_position;
594
595 /** The last compute group size */
596 uint32_t last_block[3];
597
598 /** The last compute grid size */
599 uint32_t last_grid[3];
600 /** Reference to the BO containing the compute grid size */
601 struct crocus_state_ref grid_size;
602
603 /**
604 * Array of aux usages for drawing, altered to account for any
605 * self-dependencies from resources bound for sampling and rendering.
606 */
607 enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
608
609 /** Aux usage of the fb's depth buffer (which may or may not exist). */
610 enum isl_aux_usage hiz_usage;
611
612 /** Bitfield of whether color blending is enabled for RT[i] */
613 uint8_t blend_enables;
614
615 /** Are depth writes enabled? (Depth buffer may or may not exist.) */
616 bool depth_writes_enabled;
617
618 /** Are stencil writes enabled? (Stencil buffer may or may not exist.) */
619 bool stencil_writes_enabled;
620
621 /** GenX-specific current state */
622 struct crocus_genx_state *genx;
623
624 struct crocus_shader_state shaders[MESA_SHADER_STAGES];
625
626 /* track if geom shader is active for IVB GT2 workaround */
627 bool gs_enabled;
628 /** Do vertex shader uses shader draw parameters ? */
629 bool vs_uses_draw_params;
630 bool vs_uses_derived_draw_params;
631 bool vs_needs_sgvs_element;
632 bool vs_uses_vertexid;
633 bool vs_uses_instanceid;
634
635 /** Do vertex shader uses edge flag ? */
636 bool vs_needs_edge_flag;
637
638 struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
639 bool streamout_active;
640 int so_targets;
641
642 bool statistics_counters_enabled;
643
644 /** Current conditional rendering mode */
645 enum crocus_predicate_state predicate;
646 bool predicate_supported;
647
648 /**
649 * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
650 * render context that needs to be uploaded to the compute context.
651 */
652 struct crocus_bo *compute_predicate;
653
654 /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
655 bool prims_generated_query_active;
656
657 /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
658 uint32_t *streamout;
659
660 /**
661 * Resources containing streamed state which our render context
662 * currently points to. Used to re-add these to the validation
663 * list when we start a new batch and haven't resubmitted commands.
664 */
665 struct {
666 struct pipe_resource *res;
667 uint32_t offset;
668 uint32_t size;
669 uint32_t index_size;
670 bool prim_restart;
671 } index_buffer;
672
673 uint32_t sf_vp_address;
674 uint32_t clip_vp_address;
675 uint32_t cc_vp_address;
676
677 uint32_t stats_wm;
678 float global_depth_offset_clamp;
679
680 uint32_t last_xfb_verts_per_prim;
681 uint64_t svbi;
682 } state;
683
684 /* BRW_NEW_URB_ALLOCATIONS:
685 */
686 struct {
687 uint32_t vsize; /* vertex size plus header in urb registers */
688 uint32_t gsize; /* GS output size in urb registers */
689 uint32_t hsize; /* Tessellation control output size in urb registers */
690 uint32_t dsize; /* Tessellation evaluation output size in urb registers */
691 uint32_t csize; /* constant buffer size in urb registers */
692 uint32_t sfsize; /* setup data size in urb registers */
693
694 bool constrained;
695
696 uint32_t nr_vs_entries;
697 uint32_t nr_hs_entries;
698 uint32_t nr_ds_entries;
699 uint32_t nr_gs_entries;
700 uint32_t nr_clip_entries;
701 uint32_t nr_sf_entries;
702 uint32_t nr_cs_entries;
703
704 uint32_t vs_start;
705 uint32_t hs_start;
706 uint32_t ds_start;
707 uint32_t gs_start;
708 uint32_t clip_start;
709 uint32_t sf_start;
710 uint32_t cs_start;
711 /**
712 * URB size in the current configuration. The units this is expressed
713 * in are somewhat inconsistent, see intel_device_info::urb::size.
714 *
715 * FINISHME: Represent the URB size consistently in KB on all platforms.
716 */
717 uint32_t size;
718
719 /* True if the most recently sent _3DSTATE_URB message allocated
720 * URB space for the GS.
721 */
722 bool gs_present;
723
724 /* True if the most recently sent _3DSTATE_URB message allocated
725 * URB space for the HS and DS.
726 */
727 bool tess_present;
728 } urb;
729
730 /* GEN4/5 curbe */
731 struct {
732 unsigned wm_start;
733 unsigned wm_size;
734 unsigned clip_start;
735 unsigned clip_size;
736 unsigned vs_start;
737 unsigned vs_size;
738 unsigned total_size;
739
740 struct crocus_resource *curbe_res;
741 unsigned curbe_offset;
742 } curbe;
743
744 /**
745 * A buffer containing a marker + description of the driver. This buffer is
746 * added to all execbufs syscalls so that we can identify the driver that
747 * generated a hang by looking at the content of the buffer in the error
748 * state. It is also used for hardware workarounds that require scratch
749 * writes or reads from some unimportant memory. To avoid overriding the
750 * debug data, use the workaround_address field for workarounds.
751 */
752 struct crocus_bo *workaround_bo;
753 unsigned workaround_offset;
754 };
755
756 #define perf_debug(dbg, ...) do { \
757 if (INTEL_DEBUG(DEBUG_PERF)) \
758 dbg_printf(__VA_ARGS__); \
759 if (unlikely(dbg)) \
760 util_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
761 } while(0)
762
763
764 struct pipe_context *
765 crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
766
767 void crocus_lost_context_state(struct crocus_batch *batch);
768
769 void crocus_init_blit_functions(struct pipe_context *ctx);
770 void crocus_init_clear_functions(struct pipe_context *ctx);
771 void crocus_init_program_functions(struct pipe_context *ctx);
772 void crocus_init_resource_functions(struct pipe_context *ctx);
773 bool crocus_update_compiled_shaders(struct crocus_context *ice);
774 void crocus_update_compiled_compute_shader(struct crocus_context *ice);
775 void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
776 unsigned threads, uint32_t *dst);
777
778
779 /* crocus_blit.c */
780 enum crocus_blitter_op
781 {
782 CROCUS_SAVE_TEXTURES = 1,
783 CROCUS_SAVE_FRAMEBUFFER = 2,
784 CROCUS_SAVE_FRAGMENT_STATE = 4,
785 CROCUS_DISABLE_RENDER_COND = 8,
786 };
787 void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
788
789 void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
790 struct isl_device *isl_dev,
791 struct blorp_surf *surf,
792 struct pipe_resource *p_res,
793 enum isl_aux_usage aux_usage,
794 unsigned level,
795 bool is_render_target);
796 void crocus_copy_region(struct blorp_context *blorp,
797 struct crocus_batch *batch,
798 struct pipe_resource *dst,
799 unsigned dst_level,
800 unsigned dstx, unsigned dsty, unsigned dstz,
801 struct pipe_resource *src,
802 unsigned src_level,
803 const struct pipe_box *src_box);
804
805 /* crocus_draw.c */
806 void crocus_draw_vbo(struct pipe_context *ctx,
807 const struct pipe_draw_info *info,
808 unsigned drawid_offset,
809 const struct pipe_draw_indirect_info *indirect,
810 const struct pipe_draw_start_count_bias *draws,
811 unsigned num_draws);
812 void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
813
814 /* crocus_pipe_control.c */
815
816 void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
817 const char *reason, uint32_t flags);
818 void crocus_emit_pipe_control_write(struct crocus_batch *batch,
819 const char *reason, uint32_t flags,
820 struct crocus_bo *bo, uint32_t offset,
821 uint64_t imm);
822 void crocus_emit_mi_flush(struct crocus_batch *batch);
823 void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
824 void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
825 void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
826 const char *reason, uint32_t flags);
827 void crocus_flush_all_caches(struct crocus_batch *batch);
828
829 #define crocus_handle_always_flush_cache(batch) \
830 if (unlikely(batch->screen->driconf.always_flush_cache)) \
831 crocus_flush_all_caches(batch);
832
833 void crocus_init_flush_functions(struct pipe_context *ctx);
834
835 /* crocus_program.c */
836 const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
837 gl_shader_stage stage);
838 struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
839 unsigned per_thread_scratch,
840 gl_shader_stage stage);
841 /**
842 * Map a <group, index> pair to a binding table index.
843 *
844 * For example: <UBO, 5> => binding table index 12
845 */
crocus_group_index_to_bti(const struct crocus_binding_table * bt,enum crocus_surface_group group,uint32_t index)846 static inline uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
847 enum crocus_surface_group group,
848 uint32_t index)
849 {
850 assert(index < bt->sizes[group]);
851 uint64_t mask = bt->used_mask[group];
852 uint64_t bit = 1ull << index;
853 if (bit & mask) {
854 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
855 } else {
856 return CROCUS_SURFACE_NOT_USED;
857 }
858 }
859
860 /**
861 * Map a binding table index back to a <group, index> pair.
862 *
863 * For example: binding table index 12 => <UBO, 5>
864 */
865 static inline uint32_t
crocus_bti_to_group_index(const struct crocus_binding_table * bt,enum crocus_surface_group group,uint32_t bti)866 crocus_bti_to_group_index(const struct crocus_binding_table *bt,
867 enum crocus_surface_group group, uint32_t bti)
868 {
869 uint64_t used_mask = bt->used_mask[group];
870 assert(bti >= bt->offsets[group]);
871
872 uint32_t c = bti - bt->offsets[group];
873 while (used_mask) {
874 int i = u_bit_scan64(&used_mask);
875 if (c == 0)
876 return i;
877 c--;
878 }
879
880 return CROCUS_SURFACE_NOT_USED;
881 }
882
883
884 /* crocus_disk_cache.c */
885
886 void crocus_disk_cache_store(struct disk_cache *cache,
887 const struct crocus_uncompiled_shader *ish,
888 const struct crocus_compiled_shader *shader,
889 void *map,
890 const void *prog_key,
891 uint32_t prog_key_size);
892 struct crocus_compiled_shader *
893 crocus_disk_cache_retrieve(struct crocus_context *ice,
894 const struct crocus_uncompiled_shader *ish,
895 const void *prog_key,
896 uint32_t prog_key_size);
897
898 /* crocus_program_cache.c */
899
900 void crocus_init_program_cache(struct crocus_context *ice);
901 void crocus_destroy_program_cache(struct crocus_context *ice);
902 void crocus_print_program_cache(struct crocus_context *ice);
903 struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
904 enum crocus_program_cache_id,
905 uint32_t key_size,
906 const void *key);
907 struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
908 enum crocus_program_cache_id,
909 uint32_t key_size,
910 const void *key,
911 const void *assembly,
912 uint32_t asm_size,
913 struct brw_stage_prog_data *,
914 uint32_t prog_data_size,
915 uint32_t *streamout,
916 enum brw_param_builtin *sysv,
917 unsigned num_system_values,
918 unsigned num_cbufs,
919 const struct crocus_binding_table *bt);
920 const void *crocus_find_previous_compile(const struct crocus_context *ice,
921 enum crocus_program_cache_id cache_id,
922 unsigned program_string_id);
923 bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
924 const void *key,
925 uint32_t key_size,
926 uint32_t *kernel_out,
927 void *prog_data_out);
928 bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
929 uint32_t stage,
930 const void *key, uint32_t key_size,
931 const void *kernel, uint32_t kernel_size,
932 const struct brw_stage_prog_data *prog_data,
933 uint32_t prog_data_size,
934 uint32_t *kernel_out,
935 void *prog_data_out);
936
937 /* crocus_resolve.c */
938
939 void crocus_predraw_resolve_inputs(struct crocus_context *ice,
940 struct crocus_batch *batch,
941 bool *draw_aux_buffer_disabled,
942 gl_shader_stage stage,
943 bool consider_framebuffer);
944 void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
945 struct crocus_batch *batch,
946 bool *draw_aux_buffer_disabled);
947 void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
948 struct crocus_batch *batch);
949 void crocus_cache_sets_clear(struct crocus_batch *batch);
950 void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
951 void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
952 void crocus_cache_flush_for_render(struct crocus_batch *batch,
953 struct crocus_bo *bo,
954 enum isl_format format,
955 enum isl_aux_usage aux_usage);
956 void crocus_render_cache_add_bo(struct crocus_batch *batch,
957 struct crocus_bo *bo,
958 enum isl_format format,
959 enum isl_aux_usage aux_usage);
960 void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
961 void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
962 int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
963 struct pipe_driver_query_info *info);
964 int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
965 unsigned index,
966 struct pipe_driver_query_group_info *info);
967
968 struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
969
970 bool crocus_sw_check_cond_render(struct crocus_context *ice);
crocus_check_conditional_render(struct crocus_context * ice)971 static inline bool crocus_check_conditional_render(struct crocus_context *ice)
972 {
973 if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
974 return crocus_sw_check_cond_render(ice);
975 return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
976 }
977
978 #ifdef genX
979 # include "crocus_genx_protos.h"
980 #else
981 # define genX(x) gfx4_##x
982 # include "crocus_genx_protos.h"
983 # undef genX
984 # define genX(x) gfx45_##x
985 # include "crocus_genx_protos.h"
986 # undef genX
987 # define genX(x) gfx5_##x
988 # include "crocus_genx_protos.h"
989 # undef genX
990 # define genX(x) gfx6_##x
991 # include "crocus_genx_protos.h"
992 # undef genX
993 # define genX(x) gfx7_##x
994 # include "crocus_genx_protos.h"
995 # undef genX
996 # define genX(x) gfx75_##x
997 # include "crocus_genx_protos.h"
998 # undef genX
999 # define genX(x) gfx8_##x
1000 # include "crocus_genx_protos.h"
1001 # undef genX
1002 #endif
1003
1004 #endif
1005