1 /* Copyright © 2023 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include <stdio.h>
6 #include <errno.h>
7
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33
34 #include "iris_genx_macros.h"
35
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43
44 #include "libintel_shaders.h"
45
46 #if GFX_VERx10 == 80
47 # include "intel_gfx8_shaders_code.h"
48 #elif GFX_VERx10 == 90
49 # include "intel_gfx9_shaders_code.h"
50 #elif GFX_VERx10 == 110
51 # include "intel_gfx11_shaders_code.h"
52 #elif GFX_VERx10 == 120
53 # include "intel_gfx12_shaders_code.h"
54 #elif GFX_VERx10 == 125
55 # include "intel_gfx125_shaders_code.h"
56 #elif GFX_VERx10 == 200
57 # include "intel_gfx20_shaders_code.h"
58 #elif GFX_VERx10 == 300
59 # include "intel_gfx30_shaders_code.h"
60 #else
61 # error "Unsupported generation"
62 #endif
63
64 #define load_param(b, bit_size, struct_name, field_name) \
65 nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
66 .base = offsetof(struct_name, field_name), \
67 .range = bit_size / 8)
68
69 static nir_def *
load_fragment_index(nir_builder * b)70 load_fragment_index(nir_builder *b)
71 {
72 nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
73 return nir_iadd(b,
74 nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
75 nir_channel(b, pos_in, 0));
76 }
77
78 static nir_shader *
load_shader_lib(struct iris_screen * screen,void * mem_ctx)79 load_shader_lib(struct iris_screen *screen, void *mem_ctx)
80 {
81 const nir_shader_compiler_options *nir_options =
82 #if GFX_VER >= 9
83 screen->brw->nir_options[MESA_SHADER_KERNEL];
84 #else
85 screen->elk->nir_options[MESA_SHADER_KERNEL];
86 #endif
87
88 struct blob_reader blob;
89 blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
90 sizeof(genX(intel_shaders_nir)));
91 return nir_deserialize(mem_ctx, nir_options, &blob);
92 }
93
94 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)95 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
96 {
97 genX(libiris_write_draw)(
98 b,
99 load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
100 load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
101 load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
102 load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
103 load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
104 load_param(b, 32, struct iris_gen_indirect_params, draw_base),
105 load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
106 load_param(b, 32, struct iris_gen_indirect_params, flags),
107 load_param(b, 32, struct iris_gen_indirect_params, ring_count),
108 load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
109 load_param(b, 64, struct iris_gen_indirect_params, end_addr),
110 load_fragment_index(b));
111 return sizeof(struct iris_gen_indirect_params);
112 }
113
114 void
genX(init_screen_gen_state)115 genX(init_screen_gen_state)(struct iris_screen *screen)
116 {
117 screen->vtbl.load_shader_lib = load_shader_lib;
118 screen->vtbl.call_generation_shader = iris_call_generation_shader;
119 }
120
121 /**
122 * Stream out temporary/short-lived state.
123 *
124 * This allocates space, pins the BO, and includes the BO address in the
125 * returned offset (which works because all state lives in 32-bit memory
126 * zones).
127 */
128 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)129 upload_state(struct iris_batch *batch,
130 struct u_upload_mgr *uploader,
131 struct iris_state_ref *ref,
132 unsigned size,
133 unsigned alignment)
134 {
135 void *p = NULL;
136 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
137 iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
138 return p;
139 }
140
141 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)142 stream_state(struct iris_batch *batch,
143 struct u_upload_mgr *uploader,
144 struct pipe_resource **out_res,
145 unsigned size,
146 unsigned alignment,
147 uint32_t *out_offset)
148 {
149 void *ptr = NULL;
150
151 u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
152
153 struct iris_bo *bo = iris_resource_bo(*out_res);
154 iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
155
156 iris_record_state_size(batch->state_sizes,
157 bo->address + *out_offset, size);
158
159 *out_offset += iris_bo_offset_from_base_address(bo);
160
161 return ptr;
162 }
163
164 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)165 emit_indirect_generate_draw(struct iris_batch *batch,
166 struct iris_address params_addr,
167 unsigned params_size,
168 unsigned ring_count)
169 {
170 struct iris_screen *screen = batch->screen;
171 struct iris_context *ice = batch->ice;
172 struct isl_device *isl_dev = &screen->isl_dev;
173 const struct intel_device_info *devinfo = screen->devinfo;
174
175 /* State emission */
176 uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
177 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
178 ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
179 GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
180 }
181 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
182 ve.VertexBufferIndex = 1;
183 ve.Valid = true;
184 ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
185 ve.SourceElementOffset = 0;
186 ve.Component0Control = VFCOMP_STORE_SRC;
187 ve.Component1Control = VFCOMP_STORE_0;
188 ve.Component2Control = VFCOMP_STORE_0;
189 ve.Component3Control = VFCOMP_STORE_0;
190 }
191 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
192 ve.VertexBufferIndex = 0;
193 ve.Valid = true;
194 ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
195 ve.SourceElementOffset = 0;
196 ve.Component0Control = VFCOMP_STORE_SRC;
197 ve.Component1Control = VFCOMP_STORE_SRC;
198 ve.Component2Control = VFCOMP_STORE_SRC;
199 ve.Component3Control = VFCOMP_STORE_1_FP;
200 }
201
202 iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
203
204 iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
205 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
206 sgvs.InstanceIDEnable = true;
207 sgvs.InstanceIDComponentNumber = COMP_1;
208 sgvs.InstanceIDElementOffset = 0;
209 }
210 #if GFX_VER >= 11
211 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
212 #endif
213 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
214 vfi.InstancingEnable = false;
215 vfi.VertexElementIndex = 0;
216 }
217 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
218 vfi.InstancingEnable = false;
219 vfi.VertexElementIndex = 1;
220 }
221
222 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
223 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
224 }
225
226 ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
227 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
228 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
229 ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
230 genX(emit_urb_config)(batch,
231 false /* has_tess_eval */,
232 false /* has_geometry */);
233
234 iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
235 ps_blend.HasWriteableRT = true;
236 }
237
238 iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
239
240 #if GFX_VER >= 12
241 iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
242 db.DepthBoundsTestEnable = false;
243 db.DepthBoundsTestMinValue = 0.0;
244 db.DepthBoundsTestMaxValue = 1.0;
245 }
246 #endif
247
248 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
249 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
250 sm.SampleMask = 0x1;
251 }
252
253 iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
254 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
255 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
256 iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
257
258 iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
259
260 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
261
262 iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
263 clip.PerspectiveDivideDisable = true;
264 }
265
266 iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
267 #if GFX_VER >= 12
268 sf.DerefBlockSize = ice->state.urb_deref_block_size;
269 #endif
270 }
271
272 iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
273 raster.CullMode = CULLMODE_NONE;
274 }
275
276 const struct iris_compiled_shader *shader = ice->draw.generation.shader;
277 const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
278
279 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
280 sbe.VertexURBEntryReadOffset = 1;
281 sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
282 sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
283 sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
284 sbe.ForceVertexURBEntryReadLength = true;
285 sbe.ForceVertexURBEntryReadOffset = true;
286 #if GFX_VER >= 9
287 for (unsigned i = 0; i < 32; i++)
288 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
289 #endif
290 }
291
292 iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
293 if (fs_data->has_side_effects || fs_data->uses_kill)
294 wm.ForceThreadDispatchEnable = ForceON;
295 }
296
297 iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
298 #if GFX_VER >= 9
299 struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
300 #else
301 struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
302 #endif
303 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
304 1 /* rasterization_samples */,
305 0 /* msaa_flags */);
306
307 ps.VectorMaskEnable = fs_data->uses_vmask;
308
309 ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
310 #if GFX_VER < 20
311 ps.PushConstantEnable = shader->nr_params > 0 ||
312 shader->ubo_ranges[0].length;
313 #endif
314
315 #if GFX_VER >= 9
316 ps.DispatchGRFStartRegisterForConstantSetupData0 =
317 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
318 ps.DispatchGRFStartRegisterForConstantSetupData1 =
319 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
320 #if GFX_VER < 20
321 ps.DispatchGRFStartRegisterForConstantSetupData2 =
322 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
323 #endif
324
325 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
326 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
327 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
328 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
329 #if GFX_VER < 20
330 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
331 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
332 #endif
333 #else
334 ps.DispatchGRFStartRegisterForConstantSetupData0 =
335 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
336 ps.DispatchGRFStartRegisterForConstantSetupData1 =
337 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
338 ps.DispatchGRFStartRegisterForConstantSetupData2 =
339 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
340
341 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
342 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
343 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
344 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
345 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
346 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
347 #endif
348
349 ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
350 }
351
352 iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
353 psx.PixelShaderValid = true;
354 #if GFX_VER < 20
355 psx.AttributeEnable = fs_data->num_varying_inputs > 0;
356 #endif
357 psx.PixelShaderIsPerSample = fs_data->is_per_sample;
358 psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
359 #if GFX_VER >= 9
360 #if GFX_VER >= 20
361 assert(!fs_data->pulls_bary);
362 #else
363 psx.PixelShaderPullsBary = fs_data->pulls_bary;
364 #endif
365 psx.PixelShaderComputesStencil = fs_data->computed_stencil;
366 #endif
367 psx.PixelShaderHasUAV = GFX_VER == 8;
368 }
369
370 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
371 uint32_t cc_vp_address;
372 uint32_t *cc_vp_map =
373 stream_state(batch, ice->state.dynamic_uploader,
374 &ice->state.last_res.cc_vp,
375 4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
376
377 iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
378 ccv.MinimumDepth = 0.0f;
379 ccv.MaximumDepth = 1.0f;
380 }
381 cc.CCViewportPointer = cc_vp_address;
382 }
383
384 #if GFX_VER >= 12
385 /* Disable Primitive Replication. */
386 iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
387 #endif
388
389 #if GFX_VERx10 == 125
390 /* DG2: Wa_22011440098
391 * MTL: Wa_18022330953
392 *
393 * In 3D mode, after programming push constant alloc command immediately
394 * program push constant command(ZERO length) without any commit between
395 * them.
396 *
397 * Note that Wa_16011448509 isn't needed here as all address bits are zero.
398 */
399 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
400 /* Update empty push constants for all stages (bitmask = 11111b) */
401 c.ShaderUpdateEnable = 0x1f;
402 c.MOCS = iris_mocs(NULL, isl_dev, 0);
403 }
404 #endif
405
406 float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
407 float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
408 float z = 0.0f;
409
410 float *vertices =
411 upload_state(batch, ice->state.dynamic_uploader,
412 &ice->draw.generation.vertices,
413 ALIGN(9 * sizeof(float), 8), 8);
414
415 vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
416 vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
417 vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
418
419
420 uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
421 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
422 vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
423 GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
424 }
425 _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
426 vb.VertexBufferIndex = 0;
427 vb.AddressModifyEnable = true;
428 vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
429 ice->draw.generation.vertices.offset);
430 vb.BufferPitch = 3 * sizeof(float);
431 vb.BufferSize = 9 * sizeof(float);
432 vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
433 #if GFX_VER >= 12
434 vb.L3BypassDisable = true;
435 #endif
436 }
437 iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
438
439 #if GFX_VERx10 > 120
440 uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
441 GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
442
443 iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
444 all.DWordLength = ARRAY_SIZE(const_dws) -
445 GENX(3DSTATE_CONSTANT_ALL_length_bias);
446 all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
447 all.MOCS = isl_mocs(isl_dev, 0, false);
448 all.PointerBufferMask = 0x1;
449 }
450 _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
451 &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
452 data.PointerToConstantBuffer = params_addr;
453 data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
454 }
455 iris_batch_emit(batch, const_dws, sizeof(const_dws));
456 #else
457 /* The Skylake PRM contains the following restriction:
458 *
459 * "The driver must ensure The following case does not occur without a
460 * flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
461 * equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
462 * 0 read length not equal to zero committed."
463 *
464 * To avoid this, we program the highest slot.
465 */
466 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
467 #if GFX_VER > 8
468 c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
469 #endif
470 c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
471 c.ConstantBody.Buffer[3] = params_addr;
472 }
473 #endif
474
475 #if GFX_VER <= 9
476 /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
477 * order to commit constants. TODO: Investigate "Disable Gather at Set
478 * Shader" to go back to legacy mode...
479 *
480 * The null writes of the generation shader also appear to disturb the next
481 * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
482 * too.
483 */
484 struct iris_binder *binder = &ice->state.binder;
485 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
486 ptr.PointertoPSBindingTable =
487 binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
488 }
489 uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
490 uint32_t surf_base_offset = binder->bo->address;
491 bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
492 #endif
493
494 genX(maybe_emit_breakpoint)(batch, true);
495
496 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
497 prim.VertexAccessType = SEQUENTIAL;
498 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
499 prim.VertexCountPerInstance = 3;
500 prim.InstanceCount = 1;
501 }
502
503
504 /* We've smashed all state compared to what the normal 3D pipeline
505 * rendering tracks for GL.
506 */
507
508 uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
509 IRIS_DIRTY_SO_BUFFERS |
510 IRIS_DIRTY_SO_DECL_LIST |
511 IRIS_DIRTY_LINE_STIPPLE |
512 IRIS_ALL_DIRTY_FOR_COMPUTE |
513 IRIS_DIRTY_SCISSOR_RECT |
514 IRIS_DIRTY_VF);
515 /* Wa_14016820455
516 * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
517 * likely by a read cache invalidation when clipping is disabled, so we
518 * don't skip its dirty bit here, in order to reprogram it.
519 */
520 if (GFX_VERx10 != 125)
521 skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
522
523 uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
524 IRIS_STAGE_DIRTY_UNCOMPILED_VS |
525 IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
526 IRIS_STAGE_DIRTY_UNCOMPILED_TES |
527 IRIS_STAGE_DIRTY_UNCOMPILED_GS |
528 IRIS_STAGE_DIRTY_UNCOMPILED_FS |
529 IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
530 IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
531 IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
532 IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
533
534 if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
535 /* Generation disabled tessellation, but it was already off anyway */
536 skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
537 IRIS_STAGE_DIRTY_TES |
538 IRIS_STAGE_DIRTY_CONSTANTS_TCS |
539 IRIS_STAGE_DIRTY_CONSTANTS_TES |
540 IRIS_STAGE_DIRTY_BINDINGS_TCS |
541 IRIS_STAGE_DIRTY_BINDINGS_TES;
542 }
543
544 if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
545 /* Generation disabled geometry shaders, but it was already off
546 * anyway
547 */
548 skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
549 IRIS_STAGE_DIRTY_CONSTANTS_GS |
550 IRIS_STAGE_DIRTY_BINDINGS_GS;
551 }
552
553 ice->state.dirty |= ~skip_bits;
554 ice->state.stage_dirty |= ~skip_stage_bits;
555
556 for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
557 ice->shaders.urb.cfg.size[i] = 0;
558
559 #if GFX_VER <= 9
560 /* Now reupdate the binding tables with the new offsets for the actual
561 * application shaders.
562 */
563 iris_binder_reserve_3d(ice);
564 screen->vtbl.update_binder_address(batch, binder);
565 #endif
566 }
567
568 #define RING_SIZE (128 * 1024)
569
570 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)571 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
572 {
573 struct iris_bufmgr *bufmgr = screen->bufmgr;
574
575 if (ice->draw.generation.ring_bo != NULL)
576 return;
577
578 ice->draw.generation.ring_bo =
579 iris_bo_alloc(bufmgr, "gen ring",
580 RING_SIZE, 8, IRIS_MEMZONE_OTHER,
581 BO_ALLOC_NO_SUBALLOC);
582 iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
583 }
584
585 struct iris_gen_indirect_params *
genX(emit_indirect_generate)586 genX(emit_indirect_generate)(struct iris_batch *batch,
587 const struct pipe_draw_info *draw,
588 const struct pipe_draw_indirect_info *indirect,
589 const struct pipe_draw_start_count_bias *sc,
590 struct iris_address *out_params_addr)
591 {
592 struct iris_screen *screen = batch->screen;
593 struct iris_context *ice = batch->ice;
594
595 iris_ensure_indirect_generation_shader(batch);
596 ensure_ring_bo(ice, screen);
597
598 const size_t struct_stride = draw->index_size > 0 ?
599 sizeof(uint32_t) * 5 :
600 sizeof(uint32_t) * 4;
601 unsigned cmd_stride = 0;
602 if (ice->state.vs_uses_draw_params ||
603 ice->state.vs_uses_derived_draw_params) {
604 cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
605
606 if (ice->state.vs_uses_draw_params)
607 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
608
609 if (ice->state.vs_uses_derived_draw_params)
610 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
611 }
612 cmd_stride += 4 * GENX(3DPRIMITIVE_length);
613
614 const unsigned setup_dws =
615 #if GFX_VER >= 12
616 GENX(MI_ARB_CHECK_length) +
617 #endif
618 GENX(MI_BATCH_BUFFER_START_length);
619 const unsigned ring_count =
620 (RING_SIZE - 4 * setup_dws) /
621 (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
622
623 uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
624 struct iris_gen_indirect_params *params =
625 upload_state(batch, ice->ctx.const_uploader,
626 &ice->draw.generation.params,
627 params_size, 64);
628 *out_params_addr =
629 ro_bo(iris_resource_bo(ice->draw.generation.params.res),
630 ice->draw.generation.params.offset);
631
632 iris_use_pinned_bo(batch,
633 iris_resource_bo(indirect->buffer),
634 false, IRIS_DOMAIN_NONE);
635 if (indirect->indirect_draw_count) {
636 iris_use_pinned_bo(batch,
637 iris_resource_bo(indirect->indirect_draw_count),
638 false, IRIS_DOMAIN_NONE);
639 }
640 iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
641 false, IRIS_DOMAIN_NONE);
642
643 *params = (struct iris_gen_indirect_params) {
644 .generated_cmds_addr = ice->draw.generation.ring_bo->address,
645 .ring_count = ring_count,
646 .draw_id_addr = ice->draw.generation.ring_bo->address +
647 ring_count * cmd_stride +
648 4 * GENX(MI_BATCH_BUFFER_START_length),
649 .draw_count_addr = indirect->indirect_draw_count ?
650 (iris_resource_bo(indirect->indirect_draw_count)->address +
651 indirect->indirect_draw_count_offset) : 0,
652 .indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
653 indirect->offset,
654 .indirect_data_stride = indirect->stride == 0 ?
655 struct_stride : indirect->stride,
656 .max_draw_count = indirect->draw_count,
657 .flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
658 (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
659 ANV_GENERATED_FLAG_PREDICATED : 0) |
660 (ice->state.vs_uses_draw_params ?
661 ANV_GENERATED_FLAG_BASE : 0) |
662 (ice->state.vs_uses_derived_draw_params ?
663 ANV_GENERATED_FLAG_DRAWID : 0) |
664 (iris_mocs(NULL, &screen->isl_dev,
665 ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
666 ((cmd_stride / 4) << 16) |
667 util_bitcount64(ice->state.bound_vertex_buffers) << 24,
668 };
669
670 genX(maybe_emit_breakpoint)(batch, true);
671
672 emit_indirect_generate_draw(batch, *out_params_addr, params_size,
673 MIN2(ring_count, indirect->draw_count));
674
675 genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
676 genX(maybe_emit_breakpoint)(batch, false);
677
678
679 return params;
680 }
681