1 /* Copyright © 2023 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include <stdio.h>
6 #include <errno.h>
7
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33
34 #include "iris_genx_macros.h"
35
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43
44 #include "libintel_shaders.h"
45
46 #if GFX_VERx10 == 80
47 # include "intel_gfx80_shaders_spv.h"
48 # include "intel_gfx80_shaders_binding.h"
49 #elif GFX_VERx10 == 90
50 # include "intel_gfx90_shaders_spv.h"
51 # include "intel_gfx90_shaders_binding.h"
52 #elif GFX_VERx10 == 110
53 # include "intel_gfx110_shaders_spv.h"
54 # include "intel_gfx110_shaders_binding.h"
55 #elif GFX_VERx10 == 120
56 # include "intel_gfx120_shaders_spv.h"
57 # include "intel_gfx120_shaders_binding.h"
58 #elif GFX_VERx10 == 125
59 # include "intel_gfx125_shaders_spv.h"
60 # include "intel_gfx125_shaders_binding.h"
61 #elif GFX_VERx10 == 200
62 # include "intel_gfx200_shaders_spv.h"
63 # include "intel_gfx200_shaders_binding.h"
64 #elif GFX_VERx10 == 300
65 # include "intel_gfx300_shaders_spv.h"
66 # include "intel_gfx300_shaders_binding.h"
67 #else
68 # error "Unsupported generation"
69 #endif
70
71 #define load_param(b, bit_size, struct_name, field_name) \
72 nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
73 .base = offsetof(struct_name, field_name), \
74 .range = bit_size / 8)
75
76 static nir_def *
load_fragment_index(nir_builder * b)77 load_fragment_index(nir_builder *b)
78 {
79 nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
80 return nir_iadd(b,
81 nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
82 nir_channel(b, pos_in, 0));
83 }
84
85 static const uint32_t *
load_shader_lib_spv(uint32_t * out_size)86 load_shader_lib_spv(uint32_t *out_size)
87 {
88 *out_size = sizeof(genX(shaders_spv));
89 return genX(shaders_spv);
90 }
91
92 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)93 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
94 {
95 genX(libiris_write_draw)(
96 b,
97 load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
98 load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
99 load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
100 load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
101 load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
102 load_param(b, 32, struct iris_gen_indirect_params, draw_base),
103 load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
104 load_param(b, 32, struct iris_gen_indirect_params, flags),
105 load_param(b, 32, struct iris_gen_indirect_params, ring_count),
106 load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
107 load_param(b, 64, struct iris_gen_indirect_params, end_addr),
108 load_fragment_index(b));
109 return sizeof(struct iris_gen_indirect_params);
110 }
111
112 void
genX(init_screen_gen_state)113 genX(init_screen_gen_state)(struct iris_screen *screen)
114 {
115 screen->vtbl.load_shader_lib_spv = load_shader_lib_spv;
116 screen->vtbl.call_generation_shader = iris_call_generation_shader;
117 }
118
119 /**
120 * Stream out temporary/short-lived state.
121 *
122 * This allocates space, pins the BO, and includes the BO address in the
123 * returned offset (which works because all state lives in 32-bit memory
124 * zones).
125 */
126 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)127 upload_state(struct iris_batch *batch,
128 struct u_upload_mgr *uploader,
129 struct iris_state_ref *ref,
130 unsigned size,
131 unsigned alignment)
132 {
133 void *p = NULL;
134 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
135 iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
136 return p;
137 }
138
139 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)140 stream_state(struct iris_batch *batch,
141 struct u_upload_mgr *uploader,
142 struct pipe_resource **out_res,
143 unsigned size,
144 unsigned alignment,
145 uint32_t *out_offset)
146 {
147 void *ptr = NULL;
148
149 u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
150
151 struct iris_bo *bo = iris_resource_bo(*out_res);
152 iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
153
154 iris_record_state_size(batch->state_sizes,
155 bo->address + *out_offset, size);
156
157 *out_offset += iris_bo_offset_from_base_address(bo);
158
159 return ptr;
160 }
161
162 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)163 emit_indirect_generate_draw(struct iris_batch *batch,
164 struct iris_address params_addr,
165 unsigned params_size,
166 unsigned ring_count)
167 {
168 struct iris_screen *screen = batch->screen;
169 struct iris_context *ice = batch->ice;
170 struct isl_device *isl_dev = &screen->isl_dev;
171 const struct intel_device_info *devinfo = screen->devinfo;
172
173 /* State emission */
174 uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
175 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
176 ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
177 GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
178 }
179 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
180 ve.VertexBufferIndex = 1;
181 ve.Valid = true;
182 ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
183 ve.SourceElementOffset = 0;
184 ve.Component0Control = VFCOMP_STORE_SRC;
185 ve.Component1Control = VFCOMP_STORE_0;
186 ve.Component2Control = VFCOMP_STORE_0;
187 ve.Component3Control = VFCOMP_STORE_0;
188 }
189 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
190 ve.VertexBufferIndex = 0;
191 ve.Valid = true;
192 ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
193 ve.SourceElementOffset = 0;
194 ve.Component0Control = VFCOMP_STORE_SRC;
195 ve.Component1Control = VFCOMP_STORE_SRC;
196 ve.Component2Control = VFCOMP_STORE_SRC;
197 ve.Component3Control = VFCOMP_STORE_1_FP;
198 }
199
200 iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
201
202 iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
203 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
204 sgvs.InstanceIDEnable = true;
205 sgvs.InstanceIDComponentNumber = COMP_1;
206 sgvs.InstanceIDElementOffset = 0;
207 }
208 #if GFX_VER >= 11
209 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
210 #endif
211 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
212 vfi.InstancingEnable = false;
213 vfi.VertexElementIndex = 0;
214 }
215 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
216 vfi.InstancingEnable = false;
217 vfi.VertexElementIndex = 1;
218 }
219
220 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
221 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
222 }
223
224 ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
225 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
226 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
227 ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
228 genX(emit_urb_config)(batch,
229 false /* has_tess_eval */,
230 false /* has_geometry */);
231
232 iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
233 ps_blend.HasWriteableRT = true;
234 }
235
236 iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
237
238 #if GFX_VER >= 12
239 iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
240 db.DepthBoundsTestEnable = false;
241 db.DepthBoundsTestMinValue = 0.0;
242 db.DepthBoundsTestMaxValue = 1.0;
243 }
244 #endif
245
246 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
247 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
248 sm.SampleMask = 0x1;
249 }
250
251 iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
252 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
253 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
254 iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
255
256 iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
257
258 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
259
260 iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
261 clip.PerspectiveDivideDisable = true;
262 }
263
264 iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
265 #if GFX_VER >= 12
266 sf.DerefBlockSize = ice->state.urb_deref_block_size;
267 #endif
268 }
269
270 iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
271 raster.CullMode = CULLMODE_NONE;
272 }
273
274 const struct iris_compiled_shader *shader = ice->draw.generation.shader;
275 const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
276
277 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
278 sbe.VertexURBEntryReadOffset = 1;
279 sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
280 sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
281 sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
282 sbe.ForceVertexURBEntryReadLength = true;
283 sbe.ForceVertexURBEntryReadOffset = true;
284 #if GFX_VER >= 9
285 for (unsigned i = 0; i < 32; i++)
286 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
287 #endif
288 }
289
290 iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
291 if (fs_data->has_side_effects || fs_data->uses_kill)
292 wm.ForceThreadDispatchEnable = ForceON;
293 }
294
295 iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
296 #if GFX_VER >= 9
297 struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
298 #else
299 struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
300 #endif
301 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
302 1 /* rasterization_samples */,
303 0 /* msaa_flags */);
304
305 ps.VectorMaskEnable = fs_data->uses_vmask;
306
307 ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
308 #if GFX_VER < 20
309 ps.PushConstantEnable = shader->nr_params > 0 ||
310 shader->ubo_ranges[0].length;
311 #endif
312
313 #if GFX_VER >= 9
314 ps.DispatchGRFStartRegisterForConstantSetupData0 =
315 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
316 ps.DispatchGRFStartRegisterForConstantSetupData1 =
317 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
318 #if GFX_VER < 20
319 ps.DispatchGRFStartRegisterForConstantSetupData2 =
320 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
321 #endif
322
323 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
324 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
325 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
326 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
327 #if GFX_VER < 20
328 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
329 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
330 #endif
331
332 #if GFX_VER >= 30
333 ps.RegistersPerThread = ptl_register_blocks(wm_prog_data->base.grf_used);
334 #endif
335
336 #else
337 ps.DispatchGRFStartRegisterForConstantSetupData0 =
338 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
339 ps.DispatchGRFStartRegisterForConstantSetupData1 =
340 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
341 ps.DispatchGRFStartRegisterForConstantSetupData2 =
342 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
343
344 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
345 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
346 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
347 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
348 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
349 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
350 #endif
351
352 ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
353 }
354
355 iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
356 psx.PixelShaderValid = true;
357 #if GFX_VER < 20
358 psx.AttributeEnable = fs_data->num_varying_inputs > 0;
359 #endif
360 psx.PixelShaderIsPerSample = fs_data->is_per_sample;
361 psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
362 #if GFX_VER >= 9
363 #if GFX_VER >= 20
364 assert(!fs_data->pulls_bary);
365 #else
366 psx.PixelShaderPullsBary = fs_data->pulls_bary;
367 #endif
368 psx.PixelShaderComputesStencil = fs_data->computed_stencil;
369 #endif
370 psx.PixelShaderHasUAV = GFX_VER == 8;
371 }
372
373 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
374 uint32_t cc_vp_address;
375 uint32_t *cc_vp_map =
376 stream_state(batch, ice->state.dynamic_uploader,
377 &ice->state.last_res.cc_vp,
378 4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
379
380 iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
381 ccv.MinimumDepth = 0.0f;
382 ccv.MaximumDepth = 1.0f;
383 }
384 cc.CCViewportPointer = cc_vp_address;
385 }
386
387 #if GFX_VER >= 12
388 /* Disable Primitive Replication. */
389 iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
390 #endif
391
392 #if GFX_VERx10 == 125
393 /* DG2: Wa_22011440098
394 * MTL: Wa_18022330953
395 *
396 * In 3D mode, after programming push constant alloc command immediately
397 * program push constant command(ZERO length) without any commit between
398 * them.
399 *
400 * Note that Wa_16011448509 isn't needed here as all address bits are zero.
401 */
402 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
403 /* Update empty push constants for all stages (bitmask = 11111b) */
404 c.ShaderUpdateEnable = 0x1f;
405 c.MOCS = iris_mocs(NULL, isl_dev, 0);
406 }
407 #endif
408
409 float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
410 float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
411 float z = 0.0f;
412
413 float *vertices =
414 upload_state(batch, ice->state.dynamic_uploader,
415 &ice->draw.generation.vertices,
416 ALIGN(9 * sizeof(float), 8), 8);
417
418 vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
419 vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
420 vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
421
422
423 uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
424 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
425 vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
426 GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
427 }
428 _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
429 vb.VertexBufferIndex = 0;
430 vb.AddressModifyEnable = true;
431 vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
432 ice->draw.generation.vertices.offset);
433 vb.BufferPitch = 3 * sizeof(float);
434 vb.BufferSize = 9 * sizeof(float);
435 vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
436 #if GFX_VER >= 12
437 vb.L3BypassDisable = true;
438 #endif
439 }
440 iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
441
442 #if GFX_VERx10 > 120
443 uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
444 GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
445
446 iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
447 all.DWordLength = ARRAY_SIZE(const_dws) -
448 GENX(3DSTATE_CONSTANT_ALL_length_bias);
449 all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
450 all.MOCS = isl_mocs(isl_dev, 0, false);
451 all.PointerBufferMask = 0x1;
452 }
453 _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
454 &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
455 data.PointerToConstantBuffer = params_addr;
456 data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
457 }
458 iris_batch_emit(batch, const_dws, sizeof(const_dws));
459 #else
460 /* The Skylake PRM contains the following restriction:
461 *
462 * "The driver must ensure The following case does not occur without a
463 * flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
464 * equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
465 * 0 read length not equal to zero committed."
466 *
467 * To avoid this, we program the highest slot.
468 */
469 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
470 #if GFX_VER > 8
471 c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
472 #endif
473 c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
474 c.ConstantBody.Buffer[3] = params_addr;
475 }
476 #endif
477
478 #if GFX_VER <= 9
479 /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
480 * order to commit constants. TODO: Investigate "Disable Gather at Set
481 * Shader" to go back to legacy mode...
482 *
483 * The null writes of the generation shader also appear to disturb the next
484 * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
485 * too.
486 */
487 struct iris_binder *binder = &ice->state.binder;
488 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
489 ptr.PointertoPSBindingTable =
490 binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
491 }
492 uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
493 uint32_t surf_base_offset = binder->bo->address;
494 bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
495 #endif
496
497 genX(maybe_emit_breakpoint)(batch, true);
498
499 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
500 prim.VertexAccessType = SEQUENTIAL;
501 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
502 prim.VertexCountPerInstance = 3;
503 prim.InstanceCount = 1;
504 }
505
506
507 /* We've smashed all state compared to what the normal 3D pipeline
508 * rendering tracks for GL.
509 */
510
511 uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
512 IRIS_DIRTY_SO_BUFFERS |
513 IRIS_DIRTY_SO_DECL_LIST |
514 IRIS_DIRTY_LINE_STIPPLE |
515 IRIS_ALL_DIRTY_FOR_COMPUTE |
516 IRIS_DIRTY_SCISSOR_RECT |
517 IRIS_DIRTY_VF);
518 /* Wa_14016820455
519 * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
520 * likely by a read cache invalidation when clipping is disabled, so we
521 * don't skip its dirty bit here, in order to reprogram it.
522 */
523 if (GFX_VERx10 != 125)
524 skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
525
526 uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
527 IRIS_STAGE_DIRTY_UNCOMPILED_VS |
528 IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
529 IRIS_STAGE_DIRTY_UNCOMPILED_TES |
530 IRIS_STAGE_DIRTY_UNCOMPILED_GS |
531 IRIS_STAGE_DIRTY_UNCOMPILED_FS |
532 IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
533 IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
534 IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
535 IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
536
537 if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
538 /* Generation disabled tessellation, but it was already off anyway */
539 skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
540 IRIS_STAGE_DIRTY_TES |
541 IRIS_STAGE_DIRTY_CONSTANTS_TCS |
542 IRIS_STAGE_DIRTY_CONSTANTS_TES |
543 IRIS_STAGE_DIRTY_BINDINGS_TCS |
544 IRIS_STAGE_DIRTY_BINDINGS_TES;
545 }
546
547 if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
548 /* Generation disabled geometry shaders, but it was already off
549 * anyway
550 */
551 skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
552 IRIS_STAGE_DIRTY_CONSTANTS_GS |
553 IRIS_STAGE_DIRTY_BINDINGS_GS;
554 }
555
556 ice->state.dirty |= ~skip_bits;
557 ice->state.stage_dirty |= ~skip_stage_bits;
558
559 for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
560 ice->shaders.urb.cfg.size[i] = 0;
561
562 #if GFX_VER <= 9
563 /* Now reupdate the binding tables with the new offsets for the actual
564 * application shaders.
565 */
566 iris_binder_reserve_3d(ice);
567 screen->vtbl.update_binder_address(batch, binder);
568 #endif
569 }
570
571 #define RING_SIZE (128 * 1024)
572
573 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)574 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
575 {
576 struct iris_bufmgr *bufmgr = screen->bufmgr;
577
578 if (ice->draw.generation.ring_bo != NULL)
579 return;
580
581 ice->draw.generation.ring_bo =
582 iris_bo_alloc(bufmgr, "gen ring",
583 RING_SIZE, 8, IRIS_MEMZONE_OTHER,
584 BO_ALLOC_NO_SUBALLOC);
585 iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
586 }
587
588 struct iris_gen_indirect_params *
genX(emit_indirect_generate)589 genX(emit_indirect_generate)(struct iris_batch *batch,
590 const struct pipe_draw_info *draw,
591 const struct pipe_draw_indirect_info *indirect,
592 const struct pipe_draw_start_count_bias *sc,
593 struct iris_address *out_params_addr)
594 {
595 struct iris_screen *screen = batch->screen;
596 struct iris_context *ice = batch->ice;
597
598 iris_ensure_indirect_generation_shader(batch);
599 ensure_ring_bo(ice, screen);
600
601 const size_t struct_stride = draw->index_size > 0 ?
602 sizeof(uint32_t) * 5 :
603 sizeof(uint32_t) * 4;
604 unsigned cmd_stride = 0;
605 if (ice->state.vs_uses_draw_params ||
606 ice->state.vs_uses_derived_draw_params) {
607 cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
608
609 if (ice->state.vs_uses_draw_params)
610 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
611
612 if (ice->state.vs_uses_derived_draw_params)
613 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
614 }
615 cmd_stride += 4 * GENX(3DPRIMITIVE_length);
616
617 const unsigned setup_dws =
618 #if GFX_VER >= 12
619 GENX(MI_ARB_CHECK_length) +
620 #endif
621 GENX(MI_BATCH_BUFFER_START_length);
622 const unsigned ring_count =
623 (RING_SIZE - 4 * setup_dws) /
624 (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
625
626 uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
627 struct iris_gen_indirect_params *params =
628 upload_state(batch, ice->ctx.const_uploader,
629 &ice->draw.generation.params,
630 params_size, 64);
631 *out_params_addr =
632 ro_bo(iris_resource_bo(ice->draw.generation.params.res),
633 ice->draw.generation.params.offset);
634
635 iris_use_pinned_bo(batch,
636 iris_resource_bo(indirect->buffer),
637 false, IRIS_DOMAIN_NONE);
638 if (indirect->indirect_draw_count) {
639 iris_use_pinned_bo(batch,
640 iris_resource_bo(indirect->indirect_draw_count),
641 false, IRIS_DOMAIN_NONE);
642 }
643 iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
644 false, IRIS_DOMAIN_NONE);
645
646 *params = (struct iris_gen_indirect_params) {
647 .generated_cmds_addr = ice->draw.generation.ring_bo->address,
648 .ring_count = ring_count,
649 .draw_id_addr = ice->draw.generation.ring_bo->address +
650 ring_count * cmd_stride +
651 4 * GENX(MI_BATCH_BUFFER_START_length),
652 .draw_count_addr = indirect->indirect_draw_count ?
653 (iris_resource_bo(indirect->indirect_draw_count)->address +
654 indirect->indirect_draw_count_offset) : 0,
655 .indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
656 indirect->offset,
657 .indirect_data_stride = indirect->stride == 0 ?
658 struct_stride : indirect->stride,
659 .max_draw_count = indirect->draw_count,
660 .flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
661 (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
662 ANV_GENERATED_FLAG_PREDICATED : 0) |
663 (ice->state.vs_uses_draw_params ?
664 ANV_GENERATED_FLAG_BASE : 0) |
665 (ice->state.vs_uses_derived_draw_params ?
666 ANV_GENERATED_FLAG_DRAWID : 0) |
667 (iris_mocs(NULL, &screen->isl_dev,
668 ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
669 ((cmd_stride / 4) << 16) |
670 util_bitcount64(ice->state.bound_vertex_buffers) << 24,
671 };
672
673 genX(maybe_emit_breakpoint)(batch, true);
674
675 emit_indirect_generate_draw(batch, *out_params_addr, params_size,
676 MIN2(ring_count, indirect->draw_count));
677
678 genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
679 genX(maybe_emit_breakpoint)(batch, false);
680
681
682 return params;
683 }
684