• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2023 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include <stdio.h>
6 #include <errno.h>
7 
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15 
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33 
34 #include "iris_genx_macros.h"
35 
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43 
44 #include "libintel_shaders.h"
45 
46 #if GFX_VERx10 == 80
47 # include "intel_gfx8_shaders_code.h"
48 #elif GFX_VERx10 == 90
49 # include "intel_gfx9_shaders_code.h"
50 #elif GFX_VERx10 == 110
51 # include "intel_gfx11_shaders_code.h"
52 #elif GFX_VERx10 == 120
53 # include "intel_gfx12_shaders_code.h"
54 #elif GFX_VERx10 == 125
55 # include "intel_gfx125_shaders_code.h"
56 #elif GFX_VERx10 == 200
57 # include "intel_gfx20_shaders_code.h"
58 #elif GFX_VERx10 == 300
59 # include "intel_gfx30_shaders_code.h"
60 #else
61 # error "Unsupported generation"
62 #endif
63 
64 #define load_param(b, bit_size, struct_name, field_name)          \
65    nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0),            \
66                     .base = offsetof(struct_name, field_name),   \
67                     .range = bit_size / 8)
68 
69 static nir_def *
load_fragment_index(nir_builder * b)70 load_fragment_index(nir_builder *b)
71 {
72    nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
73    return nir_iadd(b,
74                    nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
75                    nir_channel(b, pos_in, 0));
76 }
77 
78 static nir_shader *
load_shader_lib(struct iris_screen * screen,void * mem_ctx)79 load_shader_lib(struct iris_screen *screen, void *mem_ctx)
80 {
81    const nir_shader_compiler_options *nir_options =
82 #if GFX_VER >= 9
83       screen->brw->nir_options[MESA_SHADER_KERNEL];
84 #else
85       screen->elk->nir_options[MESA_SHADER_KERNEL];
86 #endif
87 
88    struct blob_reader blob;
89    blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
90                     sizeof(genX(intel_shaders_nir)));
91    return nir_deserialize(mem_ctx, nir_options, &blob);
92 }
93 
94 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)95 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
96 {
97    genX(libiris_write_draw)(
98       b,
99       load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
100       load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
101       load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
102       load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
103       load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
104       load_param(b, 32, struct iris_gen_indirect_params, draw_base),
105       load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
106       load_param(b, 32, struct iris_gen_indirect_params, flags),
107       load_param(b, 32, struct iris_gen_indirect_params, ring_count),
108       load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
109       load_param(b, 64, struct iris_gen_indirect_params, end_addr),
110       load_fragment_index(b));
111    return sizeof(struct iris_gen_indirect_params);
112 }
113 
114 void
genX(init_screen_gen_state)115 genX(init_screen_gen_state)(struct iris_screen *screen)
116 {
117    screen->vtbl.load_shader_lib = load_shader_lib;
118    screen->vtbl.call_generation_shader = iris_call_generation_shader;
119 }
120 
121 /**
122  * Stream out temporary/short-lived state.
123  *
124  * This allocates space, pins the BO, and includes the BO address in the
125  * returned offset (which works because all state lives in 32-bit memory
126  * zones).
127  */
128 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)129 upload_state(struct iris_batch *batch,
130              struct u_upload_mgr *uploader,
131              struct iris_state_ref *ref,
132              unsigned size,
133              unsigned alignment)
134 {
135    void *p = NULL;
136    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
137    iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
138    return p;
139 }
140 
141 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)142 stream_state(struct iris_batch *batch,
143              struct u_upload_mgr *uploader,
144              struct pipe_resource **out_res,
145              unsigned size,
146              unsigned alignment,
147              uint32_t *out_offset)
148 {
149    void *ptr = NULL;
150 
151    u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
152 
153    struct iris_bo *bo = iris_resource_bo(*out_res);
154    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
155 
156    iris_record_state_size(batch->state_sizes,
157                           bo->address + *out_offset, size);
158 
159    *out_offset += iris_bo_offset_from_base_address(bo);
160 
161    return ptr;
162 }
163 
164 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)165 emit_indirect_generate_draw(struct iris_batch *batch,
166                             struct iris_address params_addr,
167                             unsigned params_size,
168                             unsigned ring_count)
169 {
170    struct iris_screen *screen = batch->screen;
171    struct iris_context *ice = batch->ice;
172    struct isl_device *isl_dev = &screen->isl_dev;
173    const struct intel_device_info *devinfo = screen->devinfo;
174 
175    /* State emission */
176    uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
177    iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
178       ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
179                            GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
180    }
181    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
182       ve.VertexBufferIndex = 1;
183       ve.Valid = true;
184       ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
185       ve.SourceElementOffset = 0;
186       ve.Component0Control = VFCOMP_STORE_SRC;
187       ve.Component1Control = VFCOMP_STORE_0;
188       ve.Component2Control = VFCOMP_STORE_0;
189       ve.Component3Control = VFCOMP_STORE_0;
190    }
191    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
192       ve.VertexBufferIndex   = 0;
193       ve.Valid               = true;
194       ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
195       ve.SourceElementOffset = 0;
196       ve.Component0Control   = VFCOMP_STORE_SRC;
197       ve.Component1Control   = VFCOMP_STORE_SRC;
198       ve.Component2Control   = VFCOMP_STORE_SRC;
199       ve.Component3Control   = VFCOMP_STORE_1_FP;
200    }
201 
202    iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
203 
204    iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
205    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
206       sgvs.InstanceIDEnable = true;
207       sgvs.InstanceIDComponentNumber = COMP_1;
208       sgvs.InstanceIDElementOffset = 0;
209    }
210 #if GFX_VER >= 11
211    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
212 #endif
213    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
214       vfi.InstancingEnable   = false;
215       vfi.VertexElementIndex = 0;
216    }
217    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
218       vfi.InstancingEnable   = false;
219       vfi.VertexElementIndex = 1;
220    }
221 
222    iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
223       topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
224    }
225 
226    ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
227    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
228    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
229    ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
230    genX(emit_urb_config)(batch,
231                          false /* has_tess_eval */,
232                          false /* has_geometry */);
233 
234    iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
235       ps_blend.HasWriteableRT = true;
236    }
237 
238    iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
239 
240 #if GFX_VER >= 12
241    iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
242       db.DepthBoundsTestEnable = false;
243       db.DepthBoundsTestMinValue = 0.0;
244       db.DepthBoundsTestMaxValue = 1.0;
245    }
246 #endif
247 
248    iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
249    iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
250       sm.SampleMask = 0x1;
251    }
252 
253    iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
254    iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
255    iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
256    iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
257 
258    iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
259 
260    iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
261 
262    iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
263       clip.PerspectiveDivideDisable = true;
264    }
265 
266    iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
267 #if GFX_VER >= 12
268       sf.DerefBlockSize = ice->state.urb_deref_block_size;
269 #endif
270    }
271 
272    iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
273       raster.CullMode = CULLMODE_NONE;
274    }
275 
276    const struct iris_compiled_shader *shader = ice->draw.generation.shader;
277    const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
278 
279    iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
280       sbe.VertexURBEntryReadOffset = 1;
281       sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
282       sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
283       sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
284       sbe.ForceVertexURBEntryReadLength = true;
285       sbe.ForceVertexURBEntryReadOffset = true;
286 #if GFX_VER >= 9
287       for (unsigned i = 0; i < 32; i++)
288          sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
289 #endif
290    }
291 
292    iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
293       if (fs_data->has_side_effects || fs_data->uses_kill)
294          wm.ForceThreadDispatchEnable = ForceON;
295    }
296 
297    iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
298 #if GFX_VER >= 9
299       struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
300 #else
301       struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
302 #endif
303       intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
304                                   1 /* rasterization_samples */,
305                                   0 /* msaa_flags */);
306 
307       ps.VectorMaskEnable       = fs_data->uses_vmask;
308 
309       ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
310 #if GFX_VER < 20
311       ps.PushConstantEnable     = shader->nr_params > 0 ||
312                                   shader->ubo_ranges[0].length;
313 #endif
314 
315 #if GFX_VER >= 9
316       ps.DispatchGRFStartRegisterForConstantSetupData0 =
317          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
318       ps.DispatchGRFStartRegisterForConstantSetupData1 =
319          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
320 #if GFX_VER < 20
321       ps.DispatchGRFStartRegisterForConstantSetupData2 =
322          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
323 #endif
324 
325       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
326          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
327       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
328          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
329 #if GFX_VER < 20
330       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
331          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
332 #endif
333 #else
334       ps.DispatchGRFStartRegisterForConstantSetupData0 =
335          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
336       ps.DispatchGRFStartRegisterForConstantSetupData1 =
337          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
338       ps.DispatchGRFStartRegisterForConstantSetupData2 =
339          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
340 
341       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
342          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
343       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
344          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
345       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
346          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
347 #endif
348 
349       ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
350    }
351 
352    iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
353       psx.PixelShaderValid = true;
354 #if GFX_VER < 20
355       psx.AttributeEnable = fs_data->num_varying_inputs > 0;
356 #endif
357       psx.PixelShaderIsPerSample = fs_data->is_per_sample;
358       psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
359 #if GFX_VER >= 9
360 #if GFX_VER >= 20
361       assert(!fs_data->pulls_bary);
362 #else
363       psx.PixelShaderPullsBary = fs_data->pulls_bary;
364 #endif
365       psx.PixelShaderComputesStencil = fs_data->computed_stencil;
366 #endif
367       psx.PixelShaderHasUAV = GFX_VER == 8;
368    }
369 
370    iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
371       uint32_t cc_vp_address;
372       uint32_t *cc_vp_map =
373          stream_state(batch, ice->state.dynamic_uploader,
374                       &ice->state.last_res.cc_vp,
375                       4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
376 
377       iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
378          ccv.MinimumDepth = 0.0f;
379          ccv.MaximumDepth = 1.0f;
380       }
381       cc.CCViewportPointer = cc_vp_address;
382    }
383 
384 #if GFX_VER >= 12
385    /* Disable Primitive Replication. */
386    iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
387 #endif
388 
389 #if GFX_VERx10 == 125
390    /* DG2: Wa_22011440098
391     * MTL: Wa_18022330953
392     *
393     * In 3D mode, after programming push constant alloc command immediately
394     * program push constant command(ZERO length) without any commit between
395     * them.
396     *
397     * Note that Wa_16011448509 isn't needed here as all address bits are zero.
398     */
399    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
400       /* Update empty push constants for all stages (bitmask = 11111b) */
401       c.ShaderUpdateEnable = 0x1f;
402       c.MOCS = iris_mocs(NULL, isl_dev, 0);
403    }
404 #endif
405 
406    float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
407    float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
408    float z = 0.0f;
409 
410    float *vertices =
411       upload_state(batch, ice->state.dynamic_uploader,
412                    &ice->draw.generation.vertices,
413                    ALIGN(9 * sizeof(float), 8), 8);
414 
415    vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
416    vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
417    vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
418 
419 
420    uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
421    iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
422       vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
423                         GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
424    }
425    _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
426       vb.VertexBufferIndex     = 0;
427       vb.AddressModifyEnable   = true;
428       vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
429                                        ice->draw.generation.vertices.offset);
430       vb.BufferPitch           = 3 * sizeof(float);
431       vb.BufferSize            = 9 * sizeof(float);
432       vb.MOCS                  = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
433 #if GFX_VER >= 12
434       vb.L3BypassDisable       = true;
435 #endif
436    }
437    iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
438 
439 #if GFX_VERx10 > 120
440    uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
441                       GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
442 
443    iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
444       all.DWordLength = ARRAY_SIZE(const_dws) -
445          GENX(3DSTATE_CONSTANT_ALL_length_bias);
446       all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
447       all.MOCS = isl_mocs(isl_dev, 0, false);
448       all.PointerBufferMask = 0x1;
449    }
450    _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
451                     &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
452       data.PointerToConstantBuffer = params_addr;
453       data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
454    }
455    iris_batch_emit(batch, const_dws, sizeof(const_dws));
456 #else
457    /* The Skylake PRM contains the following restriction:
458     *
459     *    "The driver must ensure The following case does not occur without a
460     *     flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
461     *     equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
462     *     0 read length not equal to zero committed."
463     *
464     * To avoid this, we program the highest slot.
465     */
466    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
467 #if GFX_VER > 8
468       c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
469 #endif
470       c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
471       c.ConstantBody.Buffer[3] = params_addr;
472    }
473 #endif
474 
475 #if GFX_VER <= 9
476    /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
477     * order to commit constants. TODO: Investigate "Disable Gather at Set
478     * Shader" to go back to legacy mode...
479     *
480     * The null writes of the generation shader also appear to disturb the next
481     * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
482     * too.
483     */
484    struct iris_binder *binder = &ice->state.binder;
485    iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
486       ptr.PointertoPSBindingTable =
487          binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
488    }
489    uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
490    uint32_t surf_base_offset = binder->bo->address;
491    bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
492 #endif
493 
494    genX(maybe_emit_breakpoint)(batch, true);
495 
496    iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
497       prim.VertexAccessType         = SEQUENTIAL;
498       prim.PrimitiveTopologyType    = _3DPRIM_RECTLIST;
499       prim.VertexCountPerInstance   = 3;
500       prim.InstanceCount            = 1;
501    }
502 
503 
504    /* We've smashed all state compared to what the normal 3D pipeline
505     * rendering tracks for GL.
506     */
507 
508    uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
509                          IRIS_DIRTY_SO_BUFFERS |
510                          IRIS_DIRTY_SO_DECL_LIST |
511                          IRIS_DIRTY_LINE_STIPPLE |
512                          IRIS_ALL_DIRTY_FOR_COMPUTE |
513                          IRIS_DIRTY_SCISSOR_RECT |
514                          IRIS_DIRTY_VF);
515    /* Wa_14016820455
516     * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
517     * likely by a read cache invalidation when clipping is disabled, so we
518     * don't skip its dirty bit here, in order to reprogram it.
519     */
520    if (GFX_VERx10 != 125)
521       skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
522 
523    uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
524                                IRIS_STAGE_DIRTY_UNCOMPILED_VS |
525                                IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
526                                IRIS_STAGE_DIRTY_UNCOMPILED_TES |
527                                IRIS_STAGE_DIRTY_UNCOMPILED_GS |
528                                IRIS_STAGE_DIRTY_UNCOMPILED_FS |
529                                IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
530                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
531                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
532                                IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
533 
534    if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
535       /* Generation disabled tessellation, but it was already off anyway */
536       skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
537                          IRIS_STAGE_DIRTY_TES |
538                          IRIS_STAGE_DIRTY_CONSTANTS_TCS |
539                          IRIS_STAGE_DIRTY_CONSTANTS_TES |
540                          IRIS_STAGE_DIRTY_BINDINGS_TCS |
541                          IRIS_STAGE_DIRTY_BINDINGS_TES;
542    }
543 
544    if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
545       /* Generation disabled geometry shaders, but it was already off
546        * anyway
547        */
548       skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
549                          IRIS_STAGE_DIRTY_CONSTANTS_GS |
550                          IRIS_STAGE_DIRTY_BINDINGS_GS;
551    }
552 
553    ice->state.dirty |= ~skip_bits;
554    ice->state.stage_dirty |= ~skip_stage_bits;
555 
556    for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
557       ice->shaders.urb.cfg.size[i] = 0;
558 
559 #if GFX_VER <= 9
560    /* Now reupdate the binding tables with the new offsets for the actual
561     * application shaders.
562     */
563    iris_binder_reserve_3d(ice);
564    screen->vtbl.update_binder_address(batch, binder);
565 #endif
566 }
567 
568 #define RING_SIZE (128 * 1024)
569 
570 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)571 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
572 {
573    struct iris_bufmgr *bufmgr = screen->bufmgr;
574 
575    if (ice->draw.generation.ring_bo != NULL)
576       return;
577 
578    ice->draw.generation.ring_bo =
579       iris_bo_alloc(bufmgr, "gen ring",
580                     RING_SIZE, 8, IRIS_MEMZONE_OTHER,
581                     BO_ALLOC_NO_SUBALLOC);
582    iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
583 }
584 
585 struct iris_gen_indirect_params *
genX(emit_indirect_generate)586 genX(emit_indirect_generate)(struct iris_batch *batch,
587                              const struct pipe_draw_info *draw,
588                              const struct pipe_draw_indirect_info *indirect,
589                              const struct pipe_draw_start_count_bias *sc,
590                              struct iris_address *out_params_addr)
591 {
592    struct iris_screen *screen = batch->screen;
593    struct iris_context *ice = batch->ice;
594 
595    iris_ensure_indirect_generation_shader(batch);
596    ensure_ring_bo(ice, screen);
597 
598    const size_t struct_stride = draw->index_size > 0 ?
599       sizeof(uint32_t) * 5 :
600       sizeof(uint32_t) * 4;
601    unsigned cmd_stride = 0;
602    if (ice->state.vs_uses_draw_params ||
603        ice->state.vs_uses_derived_draw_params) {
604       cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
605 
606       if (ice->state.vs_uses_draw_params)
607          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
608 
609       if (ice->state.vs_uses_derived_draw_params)
610          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
611    }
612    cmd_stride += 4 * GENX(3DPRIMITIVE_length);
613 
614    const unsigned setup_dws =
615 #if GFX_VER >= 12
616       GENX(MI_ARB_CHECK_length) +
617 #endif
618       GENX(MI_BATCH_BUFFER_START_length);
619    const unsigned ring_count =
620       (RING_SIZE - 4 * setup_dws) /
621       (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
622 
623    uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
624    struct iris_gen_indirect_params *params =
625       upload_state(batch, ice->ctx.const_uploader,
626                    &ice->draw.generation.params,
627                    params_size, 64);
628    *out_params_addr =
629       ro_bo(iris_resource_bo(ice->draw.generation.params.res),
630             ice->draw.generation.params.offset);
631 
632    iris_use_pinned_bo(batch,
633                       iris_resource_bo(indirect->buffer),
634                       false, IRIS_DOMAIN_NONE);
635    if (indirect->indirect_draw_count) {
636       iris_use_pinned_bo(batch,
637                          iris_resource_bo(indirect->indirect_draw_count),
638                          false, IRIS_DOMAIN_NONE);
639    }
640    iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
641                       false, IRIS_DOMAIN_NONE);
642 
643    *params = (struct iris_gen_indirect_params) {
644       .generated_cmds_addr  = ice->draw.generation.ring_bo->address,
645       .ring_count           = ring_count,
646       .draw_id_addr         = ice->draw.generation.ring_bo->address +
647                               ring_count * cmd_stride +
648                               4 * GENX(MI_BATCH_BUFFER_START_length),
649       .draw_count_addr      = indirect->indirect_draw_count ?
650                               (iris_resource_bo(indirect->indirect_draw_count)->address +
651                                indirect->indirect_draw_count_offset) : 0,
652       .indirect_data_addr   = iris_resource_bo(indirect->buffer)->address +
653                               indirect->offset,
654       .indirect_data_stride = indirect->stride == 0 ?
655                               struct_stride : indirect->stride,
656       .max_draw_count       = indirect->draw_count,
657       .flags                = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
658                               (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
659                                ANV_GENERATED_FLAG_PREDICATED : 0) |
660                               (ice->state.vs_uses_draw_params ?
661                                ANV_GENERATED_FLAG_BASE : 0) |
662                               (ice->state.vs_uses_derived_draw_params ?
663                                ANV_GENERATED_FLAG_DRAWID : 0) |
664                               (iris_mocs(NULL, &screen->isl_dev,
665                                          ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
666                               ((cmd_stride / 4) << 16) |
667                               util_bitcount64(ice->state.bound_vertex_buffers) << 24,
668    };
669 
670    genX(maybe_emit_breakpoint)(batch, true);
671 
672    emit_indirect_generate_draw(batch, *out_params_addr, params_size,
673                                MIN2(ring_count, indirect->draw_count));
674 
675    genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
676    genX(maybe_emit_breakpoint)(batch, false);
677 
678 
679    return params;
680 }
681