• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2023 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include <stdio.h>
6 #include <errno.h>
7 
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15 
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33 
34 #include "iris_genx_macros.h"
35 
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43 
44 #include "libintel_shaders.h"
45 
46 #if GFX_VERx10 == 80
47 # include "intel_gfx80_shaders_spv.h"
48 # include "intel_gfx80_shaders_binding.h"
49 #elif GFX_VERx10 == 90
50 # include "intel_gfx90_shaders_spv.h"
51 # include "intel_gfx90_shaders_binding.h"
52 #elif GFX_VERx10 == 110
53 # include "intel_gfx110_shaders_spv.h"
54 # include "intel_gfx110_shaders_binding.h"
55 #elif GFX_VERx10 == 120
56 # include "intel_gfx120_shaders_spv.h"
57 # include "intel_gfx120_shaders_binding.h"
58 #elif GFX_VERx10 == 125
59 # include "intel_gfx125_shaders_spv.h"
60 # include "intel_gfx125_shaders_binding.h"
61 #elif GFX_VERx10 == 200
62 # include "intel_gfx200_shaders_spv.h"
63 # include "intel_gfx200_shaders_binding.h"
64 #elif GFX_VERx10 == 300
65 # include "intel_gfx300_shaders_spv.h"
66 # include "intel_gfx300_shaders_binding.h"
67 #else
68 # error "Unsupported generation"
69 #endif
70 
71 #define load_param(b, bit_size, struct_name, field_name)          \
72    nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0),            \
73                     .base = offsetof(struct_name, field_name),   \
74                     .range = bit_size / 8)
75 
76 static nir_def *
load_fragment_index(nir_builder * b)77 load_fragment_index(nir_builder *b)
78 {
79    nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
80    return nir_iadd(b,
81                    nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
82                    nir_channel(b, pos_in, 0));
83 }
84 
85 static const uint32_t *
load_shader_lib_spv(uint32_t * out_size)86 load_shader_lib_spv(uint32_t *out_size)
87 {
88    *out_size = sizeof(genX(shaders_spv));
89    return genX(shaders_spv);
90 }
91 
92 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)93 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
94 {
95    genX(libiris_write_draw)(
96       b,
97       load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
98       load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
99       load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
100       load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
101       load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
102       load_param(b, 32, struct iris_gen_indirect_params, draw_base),
103       load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
104       load_param(b, 32, struct iris_gen_indirect_params, flags),
105       load_param(b, 32, struct iris_gen_indirect_params, ring_count),
106       load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
107       load_param(b, 64, struct iris_gen_indirect_params, end_addr),
108       load_fragment_index(b));
109    return sizeof(struct iris_gen_indirect_params);
110 }
111 
112 void
genX(init_screen_gen_state)113 genX(init_screen_gen_state)(struct iris_screen *screen)
114 {
115    screen->vtbl.load_shader_lib_spv = load_shader_lib_spv;
116    screen->vtbl.call_generation_shader = iris_call_generation_shader;
117 }
118 
119 /**
120  * Stream out temporary/short-lived state.
121  *
122  * This allocates space, pins the BO, and includes the BO address in the
123  * returned offset (which works because all state lives in 32-bit memory
124  * zones).
125  */
126 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)127 upload_state(struct iris_batch *batch,
128              struct u_upload_mgr *uploader,
129              struct iris_state_ref *ref,
130              unsigned size,
131              unsigned alignment)
132 {
133    void *p = NULL;
134    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
135    iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
136    return p;
137 }
138 
139 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)140 stream_state(struct iris_batch *batch,
141              struct u_upload_mgr *uploader,
142              struct pipe_resource **out_res,
143              unsigned size,
144              unsigned alignment,
145              uint32_t *out_offset)
146 {
147    void *ptr = NULL;
148 
149    u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
150 
151    struct iris_bo *bo = iris_resource_bo(*out_res);
152    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
153 
154    iris_record_state_size(batch->state_sizes,
155                           bo->address + *out_offset, size);
156 
157    *out_offset += iris_bo_offset_from_base_address(bo);
158 
159    return ptr;
160 }
161 
162 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)163 emit_indirect_generate_draw(struct iris_batch *batch,
164                             struct iris_address params_addr,
165                             unsigned params_size,
166                             unsigned ring_count)
167 {
168    struct iris_screen *screen = batch->screen;
169    struct iris_context *ice = batch->ice;
170    struct isl_device *isl_dev = &screen->isl_dev;
171    const struct intel_device_info *devinfo = screen->devinfo;
172 
173    /* State emission */
174    uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
175    iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
176       ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
177                            GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
178    }
179    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
180       ve.VertexBufferIndex = 1;
181       ve.Valid = true;
182       ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
183       ve.SourceElementOffset = 0;
184       ve.Component0Control = VFCOMP_STORE_SRC;
185       ve.Component1Control = VFCOMP_STORE_0;
186       ve.Component2Control = VFCOMP_STORE_0;
187       ve.Component3Control = VFCOMP_STORE_0;
188    }
189    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
190       ve.VertexBufferIndex   = 0;
191       ve.Valid               = true;
192       ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
193       ve.SourceElementOffset = 0;
194       ve.Component0Control   = VFCOMP_STORE_SRC;
195       ve.Component1Control   = VFCOMP_STORE_SRC;
196       ve.Component2Control   = VFCOMP_STORE_SRC;
197       ve.Component3Control   = VFCOMP_STORE_1_FP;
198    }
199 
200    iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
201 
202    iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
203    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
204       sgvs.InstanceIDEnable = true;
205       sgvs.InstanceIDComponentNumber = COMP_1;
206       sgvs.InstanceIDElementOffset = 0;
207    }
208 #if GFX_VER >= 11
209    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
210 #endif
211    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
212       vfi.InstancingEnable   = false;
213       vfi.VertexElementIndex = 0;
214    }
215    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
216       vfi.InstancingEnable   = false;
217       vfi.VertexElementIndex = 1;
218    }
219 
220    iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
221       topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
222    }
223 
224    ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
225    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
226    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
227    ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
228    genX(emit_urb_config)(batch,
229                          false /* has_tess_eval */,
230                          false /* has_geometry */);
231 
232    iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
233       ps_blend.HasWriteableRT = true;
234    }
235 
236    iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
237 
238 #if GFX_VER >= 12
239    iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
240       db.DepthBoundsTestEnable = false;
241       db.DepthBoundsTestMinValue = 0.0;
242       db.DepthBoundsTestMaxValue = 1.0;
243    }
244 #endif
245 
246    iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
247    iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
248       sm.SampleMask = 0x1;
249    }
250 
251    iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
252    iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
253    iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
254    iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
255 
256    iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
257 
258    iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
259 
260    iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
261       clip.PerspectiveDivideDisable = true;
262    }
263 
264    iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
265 #if GFX_VER >= 12
266       sf.DerefBlockSize = ice->state.urb_deref_block_size;
267 #endif
268    }
269 
270    iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
271       raster.CullMode = CULLMODE_NONE;
272    }
273 
274    const struct iris_compiled_shader *shader = ice->draw.generation.shader;
275    const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
276 
277    iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
278       sbe.VertexURBEntryReadOffset = 1;
279       sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
280       sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
281       sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
282       sbe.ForceVertexURBEntryReadLength = true;
283       sbe.ForceVertexURBEntryReadOffset = true;
284 #if GFX_VER >= 9
285       for (unsigned i = 0; i < 32; i++)
286          sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
287 #endif
288    }
289 
290    iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
291       if (fs_data->has_side_effects || fs_data->uses_kill)
292          wm.ForceThreadDispatchEnable = ForceON;
293    }
294 
295    iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
296 #if GFX_VER >= 9
297       struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
298 #else
299       struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
300 #endif
301       intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
302                                   1 /* rasterization_samples */,
303                                   0 /* msaa_flags */);
304 
305       ps.VectorMaskEnable       = fs_data->uses_vmask;
306 
307       ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
308 #if GFX_VER < 20
309       ps.PushConstantEnable     = shader->nr_params > 0 ||
310                                   shader->ubo_ranges[0].length;
311 #endif
312 
313 #if GFX_VER >= 9
314       ps.DispatchGRFStartRegisterForConstantSetupData0 =
315          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
316       ps.DispatchGRFStartRegisterForConstantSetupData1 =
317          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
318 #if GFX_VER < 20
319       ps.DispatchGRFStartRegisterForConstantSetupData2 =
320          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
321 #endif
322 
323       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
324          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
325       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
326          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
327 #if GFX_VER < 20
328       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
329          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
330 #endif
331 
332 #if GFX_VER >= 30
333       ps.RegistersPerThread = ptl_register_blocks(wm_prog_data->base.grf_used);
334 #endif
335 
336 #else
337       ps.DispatchGRFStartRegisterForConstantSetupData0 =
338          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
339       ps.DispatchGRFStartRegisterForConstantSetupData1 =
340          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
341       ps.DispatchGRFStartRegisterForConstantSetupData2 =
342          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
343 
344       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
345          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
346       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
347          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
348       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
349          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
350 #endif
351 
352       ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
353    }
354 
355    iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
356       psx.PixelShaderValid = true;
357 #if GFX_VER < 20
358       psx.AttributeEnable = fs_data->num_varying_inputs > 0;
359 #endif
360       psx.PixelShaderIsPerSample = fs_data->is_per_sample;
361       psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
362 #if GFX_VER >= 9
363 #if GFX_VER >= 20
364       assert(!fs_data->pulls_bary);
365 #else
366       psx.PixelShaderPullsBary = fs_data->pulls_bary;
367 #endif
368       psx.PixelShaderComputesStencil = fs_data->computed_stencil;
369 #endif
370       psx.PixelShaderHasUAV = GFX_VER == 8;
371    }
372 
373    iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
374       uint32_t cc_vp_address;
375       uint32_t *cc_vp_map =
376          stream_state(batch, ice->state.dynamic_uploader,
377                       &ice->state.last_res.cc_vp,
378                       4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
379 
380       iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
381          ccv.MinimumDepth = 0.0f;
382          ccv.MaximumDepth = 1.0f;
383       }
384       cc.CCViewportPointer = cc_vp_address;
385    }
386 
387 #if GFX_VER >= 12
388    /* Disable Primitive Replication. */
389    iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
390 #endif
391 
392 #if GFX_VERx10 == 125
393    /* DG2: Wa_22011440098
394     * MTL: Wa_18022330953
395     *
396     * In 3D mode, after programming push constant alloc command immediately
397     * program push constant command(ZERO length) without any commit between
398     * them.
399     *
400     * Note that Wa_16011448509 isn't needed here as all address bits are zero.
401     */
402    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
403       /* Update empty push constants for all stages (bitmask = 11111b) */
404       c.ShaderUpdateEnable = 0x1f;
405       c.MOCS = iris_mocs(NULL, isl_dev, 0);
406    }
407 #endif
408 
409    float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
410    float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
411    float z = 0.0f;
412 
413    float *vertices =
414       upload_state(batch, ice->state.dynamic_uploader,
415                    &ice->draw.generation.vertices,
416                    ALIGN(9 * sizeof(float), 8), 8);
417 
418    vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
419    vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
420    vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
421 
422 
423    uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
424    iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
425       vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
426                         GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
427    }
428    _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
429       vb.VertexBufferIndex     = 0;
430       vb.AddressModifyEnable   = true;
431       vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
432                                        ice->draw.generation.vertices.offset);
433       vb.BufferPitch           = 3 * sizeof(float);
434       vb.BufferSize            = 9 * sizeof(float);
435       vb.MOCS                  = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
436 #if GFX_VER >= 12
437       vb.L3BypassDisable       = true;
438 #endif
439    }
440    iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
441 
442 #if GFX_VERx10 > 120
443    uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
444                       GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
445 
446    iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
447       all.DWordLength = ARRAY_SIZE(const_dws) -
448          GENX(3DSTATE_CONSTANT_ALL_length_bias);
449       all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
450       all.MOCS = isl_mocs(isl_dev, 0, false);
451       all.PointerBufferMask = 0x1;
452    }
453    _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
454                     &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
455       data.PointerToConstantBuffer = params_addr;
456       data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
457    }
458    iris_batch_emit(batch, const_dws, sizeof(const_dws));
459 #else
460    /* The Skylake PRM contains the following restriction:
461     *
462     *    "The driver must ensure The following case does not occur without a
463     *     flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
464     *     equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
465     *     0 read length not equal to zero committed."
466     *
467     * To avoid this, we program the highest slot.
468     */
469    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
470 #if GFX_VER > 8
471       c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
472 #endif
473       c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
474       c.ConstantBody.Buffer[3] = params_addr;
475    }
476 #endif
477 
478 #if GFX_VER <= 9
479    /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
480     * order to commit constants. TODO: Investigate "Disable Gather at Set
481     * Shader" to go back to legacy mode...
482     *
483     * The null writes of the generation shader also appear to disturb the next
484     * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
485     * too.
486     */
487    struct iris_binder *binder = &ice->state.binder;
488    iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
489       ptr.PointertoPSBindingTable =
490          binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
491    }
492    uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
493    uint32_t surf_base_offset = binder->bo->address;
494    bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
495 #endif
496 
497    genX(maybe_emit_breakpoint)(batch, true);
498 
499    iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
500       prim.VertexAccessType         = SEQUENTIAL;
501       prim.PrimitiveTopologyType    = _3DPRIM_RECTLIST;
502       prim.VertexCountPerInstance   = 3;
503       prim.InstanceCount            = 1;
504    }
505 
506 
507    /* We've smashed all state compared to what the normal 3D pipeline
508     * rendering tracks for GL.
509     */
510 
511    uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
512                          IRIS_DIRTY_SO_BUFFERS |
513                          IRIS_DIRTY_SO_DECL_LIST |
514                          IRIS_DIRTY_LINE_STIPPLE |
515                          IRIS_ALL_DIRTY_FOR_COMPUTE |
516                          IRIS_DIRTY_SCISSOR_RECT |
517                          IRIS_DIRTY_VF);
518    /* Wa_14016820455
519     * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
520     * likely by a read cache invalidation when clipping is disabled, so we
521     * don't skip its dirty bit here, in order to reprogram it.
522     */
523    if (GFX_VERx10 != 125)
524       skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
525 
526    uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
527                                IRIS_STAGE_DIRTY_UNCOMPILED_VS |
528                                IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
529                                IRIS_STAGE_DIRTY_UNCOMPILED_TES |
530                                IRIS_STAGE_DIRTY_UNCOMPILED_GS |
531                                IRIS_STAGE_DIRTY_UNCOMPILED_FS |
532                                IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
533                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
534                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
535                                IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
536 
537    if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
538       /* Generation disabled tessellation, but it was already off anyway */
539       skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
540                          IRIS_STAGE_DIRTY_TES |
541                          IRIS_STAGE_DIRTY_CONSTANTS_TCS |
542                          IRIS_STAGE_DIRTY_CONSTANTS_TES |
543                          IRIS_STAGE_DIRTY_BINDINGS_TCS |
544                          IRIS_STAGE_DIRTY_BINDINGS_TES;
545    }
546 
547    if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
548       /* Generation disabled geometry shaders, but it was already off
549        * anyway
550        */
551       skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
552                          IRIS_STAGE_DIRTY_CONSTANTS_GS |
553                          IRIS_STAGE_DIRTY_BINDINGS_GS;
554    }
555 
556    ice->state.dirty |= ~skip_bits;
557    ice->state.stage_dirty |= ~skip_stage_bits;
558 
559    for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
560       ice->shaders.urb.cfg.size[i] = 0;
561 
562 #if GFX_VER <= 9
563    /* Now reupdate the binding tables with the new offsets for the actual
564     * application shaders.
565     */
566    iris_binder_reserve_3d(ice);
567    screen->vtbl.update_binder_address(batch, binder);
568 #endif
569 }
570 
571 #define RING_SIZE (128 * 1024)
572 
573 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)574 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
575 {
576    struct iris_bufmgr *bufmgr = screen->bufmgr;
577 
578    if (ice->draw.generation.ring_bo != NULL)
579       return;
580 
581    ice->draw.generation.ring_bo =
582       iris_bo_alloc(bufmgr, "gen ring",
583                     RING_SIZE, 8, IRIS_MEMZONE_OTHER,
584                     BO_ALLOC_NO_SUBALLOC);
585    iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
586 }
587 
588 struct iris_gen_indirect_params *
genX(emit_indirect_generate)589 genX(emit_indirect_generate)(struct iris_batch *batch,
590                              const struct pipe_draw_info *draw,
591                              const struct pipe_draw_indirect_info *indirect,
592                              const struct pipe_draw_start_count_bias *sc,
593                              struct iris_address *out_params_addr)
594 {
595    struct iris_screen *screen = batch->screen;
596    struct iris_context *ice = batch->ice;
597 
598    iris_ensure_indirect_generation_shader(batch);
599    ensure_ring_bo(ice, screen);
600 
601    const size_t struct_stride = draw->index_size > 0 ?
602       sizeof(uint32_t) * 5 :
603       sizeof(uint32_t) * 4;
604    unsigned cmd_stride = 0;
605    if (ice->state.vs_uses_draw_params ||
606        ice->state.vs_uses_derived_draw_params) {
607       cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
608 
609       if (ice->state.vs_uses_draw_params)
610          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
611 
612       if (ice->state.vs_uses_derived_draw_params)
613          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
614    }
615    cmd_stride += 4 * GENX(3DPRIMITIVE_length);
616 
617    const unsigned setup_dws =
618 #if GFX_VER >= 12
619       GENX(MI_ARB_CHECK_length) +
620 #endif
621       GENX(MI_BATCH_BUFFER_START_length);
622    const unsigned ring_count =
623       (RING_SIZE - 4 * setup_dws) /
624       (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
625 
626    uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
627    struct iris_gen_indirect_params *params =
628       upload_state(batch, ice->ctx.const_uploader,
629                    &ice->draw.generation.params,
630                    params_size, 64);
631    *out_params_addr =
632       ro_bo(iris_resource_bo(ice->draw.generation.params.res),
633             ice->draw.generation.params.offset);
634 
635    iris_use_pinned_bo(batch,
636                       iris_resource_bo(indirect->buffer),
637                       false, IRIS_DOMAIN_NONE);
638    if (indirect->indirect_draw_count) {
639       iris_use_pinned_bo(batch,
640                          iris_resource_bo(indirect->indirect_draw_count),
641                          false, IRIS_DOMAIN_NONE);
642    }
643    iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
644                       false, IRIS_DOMAIN_NONE);
645 
646    *params = (struct iris_gen_indirect_params) {
647       .generated_cmds_addr  = ice->draw.generation.ring_bo->address,
648       .ring_count           = ring_count,
649       .draw_id_addr         = ice->draw.generation.ring_bo->address +
650                               ring_count * cmd_stride +
651                               4 * GENX(MI_BATCH_BUFFER_START_length),
652       .draw_count_addr      = indirect->indirect_draw_count ?
653                               (iris_resource_bo(indirect->indirect_draw_count)->address +
654                                indirect->indirect_draw_count_offset) : 0,
655       .indirect_data_addr   = iris_resource_bo(indirect->buffer)->address +
656                               indirect->offset,
657       .indirect_data_stride = indirect->stride == 0 ?
658                               struct_stride : indirect->stride,
659       .max_draw_count       = indirect->draw_count,
660       .flags                = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
661                               (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
662                                ANV_GENERATED_FLAG_PREDICATED : 0) |
663                               (ice->state.vs_uses_draw_params ?
664                                ANV_GENERATED_FLAG_BASE : 0) |
665                               (ice->state.vs_uses_derived_draw_params ?
666                                ANV_GENERATED_FLAG_DRAWID : 0) |
667                               (iris_mocs(NULL, &screen->isl_dev,
668                                          ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
669                               ((cmd_stride / 4) << 16) |
670                               util_bitcount64(ice->state.bound_vertex_buffers) << 24,
671    };
672 
673    genX(maybe_emit_breakpoint)(batch, true);
674 
675    emit_indirect_generate_draw(batch, *out_params_addr, params_size,
676                                MIN2(ring_count, indirect->draw_count));
677 
678    genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
679    genX(maybe_emit_breakpoint)(batch, false);
680 
681 
682    return params;
683 }
684