1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "iris_context.h"
48 #include "nir/tgsi_to_nir.h"
49
50 #define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
51 #define BRW_KEY_INIT(gen, prog_id) \
52 .base.program_string_id = prog_id, \
53 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
54 .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
55 .base.tex.compressed_multisample_layout_mask = ~0, \
56 .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
57
58 struct iris_threaded_compile_job {
59 struct iris_screen *screen;
60 struct u_upload_mgr *uploader;
61 struct pipe_debug_callback *dbg;
62 struct iris_uncompiled_shader *ish;
63 struct iris_compiled_shader *shader;
64 };
65
66 static unsigned
get_new_program_id(struct iris_screen * screen)67 get_new_program_id(struct iris_screen *screen)
68 {
69 return p_atomic_inc_return(&screen->program_id);
70 }
71
72 void
iris_finalize_program(struct iris_compiled_shader * shader,struct brw_stage_prog_data * prog_data,uint32_t * streamout,enum brw_param_builtin * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)73 iris_finalize_program(struct iris_compiled_shader *shader,
74 struct brw_stage_prog_data *prog_data,
75 uint32_t *streamout,
76 enum brw_param_builtin *system_values,
77 unsigned num_system_values,
78 unsigned kernel_input_size,
79 unsigned num_cbufs,
80 const struct iris_binding_table *bt)
81 {
82 shader->prog_data = prog_data;
83 shader->streamout = streamout;
84 shader->system_values = system_values;
85 shader->num_system_values = num_system_values;
86 shader->kernel_input_size = kernel_input_size;
87 shader->num_cbufs = num_cbufs;
88 shader->bt = *bt;
89
90 ralloc_steal(shader, shader->prog_data);
91 ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
92 ralloc_steal(shader->prog_data, prog_data->param);
93 ralloc_steal(shader->prog_data, prog_data->pull_param);
94 ralloc_steal(shader, shader->streamout);
95 ralloc_steal(shader, shader->system_values);
96 }
97
98 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct intel_device_info * devinfo,const struct iris_vs_prog_key * key)99 iris_to_brw_vs_key(const struct intel_device_info *devinfo,
100 const struct iris_vs_prog_key *key)
101 {
102 return (struct brw_vs_prog_key) {
103 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
104
105 /* Don't tell the backend about our clip plane constants, we've
106 * already lowered them in NIR and don't want it doing it again.
107 */
108 .nr_userclip_plane_consts = 0,
109 };
110 }
111
112 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct intel_device_info * devinfo,const struct iris_tcs_prog_key * key)113 iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
114 const struct iris_tcs_prog_key *key)
115 {
116 return (struct brw_tcs_prog_key) {
117 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
118 .tes_primitive_mode = key->tes_primitive_mode,
119 .input_vertices = key->input_vertices,
120 .patch_outputs_written = key->patch_outputs_written,
121 .outputs_written = key->outputs_written,
122 .quads_workaround = key->quads_workaround,
123 };
124 }
125
126 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct intel_device_info * devinfo,const struct iris_tes_prog_key * key)127 iris_to_brw_tes_key(const struct intel_device_info *devinfo,
128 const struct iris_tes_prog_key *key)
129 {
130 return (struct brw_tes_prog_key) {
131 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
132 .patch_inputs_read = key->patch_inputs_read,
133 .inputs_read = key->inputs_read,
134 };
135 }
136
137 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct intel_device_info * devinfo,const struct iris_gs_prog_key * key)138 iris_to_brw_gs_key(const struct intel_device_info *devinfo,
139 const struct iris_gs_prog_key *key)
140 {
141 return (struct brw_gs_prog_key) {
142 BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
143 };
144 }
145
146 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct intel_device_info * devinfo,const struct iris_fs_prog_key * key)147 iris_to_brw_fs_key(const struct intel_device_info *devinfo,
148 const struct iris_fs_prog_key *key)
149 {
150 return (struct brw_wm_prog_key) {
151 BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
152 .nr_color_regions = key->nr_color_regions,
153 .flat_shade = key->flat_shade,
154 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
155 .alpha_to_coverage = key->alpha_to_coverage,
156 .clamp_fragment_color = key->clamp_fragment_color,
157 .persample_interp = key->persample_interp,
158 .multisample_fbo = key->multisample_fbo,
159 .force_dual_color_blend = key->force_dual_color_blend,
160 .coherent_fb_fetch = key->coherent_fb_fetch,
161 .color_outputs_valid = key->color_outputs_valid,
162 .input_slots_valid = key->input_slots_valid,
163 .ignore_sample_mask_out = !key->multisample_fbo,
164 };
165 }
166
167 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct intel_device_info * devinfo,const struct iris_cs_prog_key * key)168 iris_to_brw_cs_key(const struct intel_device_info *devinfo,
169 const struct iris_cs_prog_key *key)
170 {
171 return (struct brw_cs_prog_key) {
172 BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
173 };
174 }
175
176 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)177 upload_state(struct u_upload_mgr *uploader,
178 struct iris_state_ref *ref,
179 unsigned size,
180 unsigned alignment)
181 {
182 void *p = NULL;
183 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
184 return p;
185 }
186
187 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)188 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
189 struct pipe_shader_buffer *buf,
190 struct iris_state_ref *surf_state,
191 isl_surf_usage_flags_t usage)
192 {
193 struct pipe_context *ctx = &ice->ctx;
194 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
195 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
196
197 void *map =
198 upload_state(ice->state.surface_uploader, surf_state,
199 screen->isl_dev.ss.size, 64);
200 if (!unlikely(map)) {
201 surf_state->res = NULL;
202 return;
203 }
204
205 struct iris_resource *res = (void *) buf->buffer;
206 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
207 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
208
209 const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
210
211 isl_buffer_fill_state(&screen->isl_dev, map,
212 .address = res->bo->address + res->offset +
213 buf->buffer_offset,
214 .size_B = buf->buffer_size - res->offset,
215 .format = dataport ? ISL_FORMAT_RAW
216 : ISL_FORMAT_R32G32B32A32_FLOAT,
217 .swizzle = ISL_SWIZZLE_IDENTITY,
218 .stride_B = 1,
219 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
220 }
221
222 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)223 get_aoa_deref_offset(nir_builder *b,
224 nir_deref_instr *deref,
225 unsigned elem_size)
226 {
227 unsigned array_size = elem_size;
228 nir_ssa_def *offset = nir_imm_int(b, 0);
229
230 while (deref->deref_type != nir_deref_type_var) {
231 assert(deref->deref_type == nir_deref_type_array);
232
233 /* This level's element size is the previous level's array size */
234 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
235 assert(deref->arr.index.ssa);
236 offset = nir_iadd(b, offset,
237 nir_imul(b, index, nir_imm_int(b, array_size)));
238
239 deref = nir_deref_instr_parent(deref);
240 assert(glsl_type_is_array(deref->type));
241 array_size *= glsl_get_length(deref->type);
242 }
243
244 /* Accessing an invalid surface index with the dataport can result in a
245 * hang. According to the spec "if the index used to select an individual
246 * element is negative or greater than or equal to the size of the array,
247 * the results of the operation are undefined but may not lead to
248 * termination" -- which is one of the possible outcomes of the hang.
249 * Clamp the index to prevent access outside of the array bounds.
250 */
251 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
252 }
253
254 static void
iris_lower_storage_image_derefs(nir_shader * nir)255 iris_lower_storage_image_derefs(nir_shader *nir)
256 {
257 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
258
259 nir_builder b;
260 nir_builder_init(&b, impl);
261
262 nir_foreach_block(block, impl) {
263 nir_foreach_instr_safe(instr, block) {
264 if (instr->type != nir_instr_type_intrinsic)
265 continue;
266
267 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
268 switch (intrin->intrinsic) {
269 case nir_intrinsic_image_deref_load:
270 case nir_intrinsic_image_deref_store:
271 case nir_intrinsic_image_deref_atomic_add:
272 case nir_intrinsic_image_deref_atomic_imin:
273 case nir_intrinsic_image_deref_atomic_umin:
274 case nir_intrinsic_image_deref_atomic_imax:
275 case nir_intrinsic_image_deref_atomic_umax:
276 case nir_intrinsic_image_deref_atomic_and:
277 case nir_intrinsic_image_deref_atomic_or:
278 case nir_intrinsic_image_deref_atomic_xor:
279 case nir_intrinsic_image_deref_atomic_exchange:
280 case nir_intrinsic_image_deref_atomic_comp_swap:
281 case nir_intrinsic_image_deref_size:
282 case nir_intrinsic_image_deref_samples:
283 case nir_intrinsic_image_deref_load_raw_intel:
284 case nir_intrinsic_image_deref_store_raw_intel: {
285 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
286 nir_variable *var = nir_deref_instr_get_variable(deref);
287
288 b.cursor = nir_before_instr(&intrin->instr);
289 nir_ssa_def *index =
290 nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
291 get_aoa_deref_offset(&b, deref, 1));
292 nir_rewrite_image_intrinsic(intrin, index, false);
293 break;
294 }
295
296 default:
297 break;
298 }
299 }
300 }
301 }
302
303 static bool
iris_uses_image_atomic(const nir_shader * shader)304 iris_uses_image_atomic(const nir_shader *shader)
305 {
306 nir_foreach_function(function, shader) {
307 if (function->impl == NULL)
308 continue;
309
310 nir_foreach_block(block, function->impl) {
311 nir_foreach_instr(instr, block) {
312 if (instr->type != nir_instr_type_intrinsic)
313 continue;
314
315 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316 switch (intrin->intrinsic) {
317 case nir_intrinsic_image_deref_atomic_add:
318 case nir_intrinsic_image_deref_atomic_imin:
319 case nir_intrinsic_image_deref_atomic_umin:
320 case nir_intrinsic_image_deref_atomic_imax:
321 case nir_intrinsic_image_deref_atomic_umax:
322 case nir_intrinsic_image_deref_atomic_and:
323 case nir_intrinsic_image_deref_atomic_or:
324 case nir_intrinsic_image_deref_atomic_xor:
325 case nir_intrinsic_image_deref_atomic_exchange:
326 case nir_intrinsic_image_deref_atomic_comp_swap:
327 unreachable("Should have been lowered in "
328 "iris_lower_storage_image_derefs");
329
330 case nir_intrinsic_image_atomic_add:
331 case nir_intrinsic_image_atomic_imin:
332 case nir_intrinsic_image_atomic_umin:
333 case nir_intrinsic_image_atomic_imax:
334 case nir_intrinsic_image_atomic_umax:
335 case nir_intrinsic_image_atomic_and:
336 case nir_intrinsic_image_atomic_or:
337 case nir_intrinsic_image_atomic_xor:
338 case nir_intrinsic_image_atomic_exchange:
339 case nir_intrinsic_image_atomic_comp_swap:
340 return true;
341
342 default:
343 break;
344 }
345 }
346 }
347 }
348
349 return false;
350 }
351
352 /**
353 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
354 */
355 static bool
iris_fix_edge_flags(nir_shader * nir)356 iris_fix_edge_flags(nir_shader *nir)
357 {
358 if (nir->info.stage != MESA_SHADER_VERTEX) {
359 nir_shader_preserve_all_metadata(nir);
360 return false;
361 }
362
363 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
364 VARYING_SLOT_EDGE);
365 if (!var) {
366 nir_shader_preserve_all_metadata(nir);
367 return false;
368 }
369
370 var->data.mode = nir_var_shader_temp;
371 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
372 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
373 nir_fixup_deref_modes(nir);
374
375 nir_foreach_function(f, nir) {
376 if (f->impl) {
377 nir_metadata_preserve(f->impl, nir_metadata_block_index |
378 nir_metadata_dominance |
379 nir_metadata_live_ssa_defs |
380 nir_metadata_loop_analysis);
381 } else {
382 nir_metadata_preserve(f->impl, nir_metadata_all);
383 }
384 }
385
386 return true;
387 }
388
389 /**
390 * Fix an uncompiled shader's stream output info.
391 *
392 * Core Gallium stores output->register_index as a "slot" number, where
393 * slots are assigned consecutively to all outputs in info->outputs_written.
394 * This naive packing of outputs doesn't work for us - we too have slots,
395 * but the layout is defined by the VUE map, which we won't have until we
396 * compile a specific shader variant. So, we remap these and simply store
397 * VARYING_SLOT_* in our copy's output->register_index fields.
398 *
399 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
400 * components of our VUE header. See brw_vue_map.c for the layout.
401 */
402 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)403 update_so_info(struct pipe_stream_output_info *so_info,
404 uint64_t outputs_written)
405 {
406 uint8_t reverse_map[64] = {};
407 unsigned slot = 0;
408 while (outputs_written) {
409 reverse_map[slot++] = u_bit_scan64(&outputs_written);
410 }
411
412 for (unsigned i = 0; i < so_info->num_outputs; i++) {
413 struct pipe_stream_output *output = &so_info->output[i];
414
415 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
416 output->register_index = reverse_map[output->register_index];
417
418 /* The VUE header contains three scalar fields packed together:
419 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
420 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
421 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
422 */
423 switch (output->register_index) {
424 case VARYING_SLOT_LAYER:
425 assert(output->num_components == 1);
426 output->register_index = VARYING_SLOT_PSIZ;
427 output->start_component = 1;
428 break;
429 case VARYING_SLOT_VIEWPORT:
430 assert(output->num_components == 1);
431 output->register_index = VARYING_SLOT_PSIZ;
432 output->start_component = 2;
433 break;
434 case VARYING_SLOT_PSIZ:
435 assert(output->num_components == 1);
436 output->start_component = 3;
437 break;
438 }
439
440 //info->outputs_written |= 1ull << output->register_index;
441 }
442 }
443
444 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)445 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
446 unsigned offset, unsigned n)
447 {
448 assert(offset % sizeof(uint32_t) == 0);
449
450 for (unsigned i = 0; i < n; ++i)
451 sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
452
453 for (unsigned i = n; i < 4; ++i)
454 sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
455 }
456
457 /**
458 * Associate NIR uniform variables with the prog_data->param[] mechanism
459 * used by the backend. Also, decide which UBOs we'd like to push in an
460 * ideal situation (though the backend can reduce this).
461 */
462 static void
iris_setup_uniforms(const struct brw_compiler * compiler,void * mem_ctx,nir_shader * nir,struct brw_stage_prog_data * prog_data,unsigned kernel_input_size,enum brw_param_builtin ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)463 iris_setup_uniforms(const struct brw_compiler *compiler,
464 void *mem_ctx,
465 nir_shader *nir,
466 struct brw_stage_prog_data *prog_data,
467 unsigned kernel_input_size,
468 enum brw_param_builtin **out_system_values,
469 unsigned *out_num_system_values,
470 unsigned *out_num_cbufs)
471 {
472 UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
473
474 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
475
476 const unsigned IRIS_MAX_SYSTEM_VALUES =
477 PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
478 enum brw_param_builtin *system_values =
479 rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
480 unsigned num_system_values = 0;
481
482 unsigned patch_vert_idx = -1;
483 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
484 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
485 unsigned variable_group_size_idx = -1;
486 unsigned work_dim_idx = -1;
487 memset(ucp_idx, -1, sizeof(ucp_idx));
488 memset(img_idx, -1, sizeof(img_idx));
489
490 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
491
492 nir_builder b;
493 nir_builder_init(&b, impl);
494
495 b.cursor = nir_before_block(nir_start_block(impl));
496 nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
497
498 /* Turn system value intrinsics into uniforms */
499 nir_foreach_block(block, impl) {
500 nir_foreach_instr_safe(instr, block) {
501 if (instr->type != nir_instr_type_intrinsic)
502 continue;
503
504 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
505 nir_ssa_def *offset;
506
507 switch (intrin->intrinsic) {
508 case nir_intrinsic_load_constant: {
509 unsigned load_size = intrin->dest.ssa.num_components *
510 intrin->dest.ssa.bit_size / 8;
511 unsigned load_align = intrin->dest.ssa.bit_size / 8;
512
513 /* This one is special because it reads from the shader constant
514 * data and not cbuf0 which gallium uploads for us.
515 */
516 b.cursor = nir_instr_remove(&intrin->instr);
517
518 nir_ssa_def *offset =
519 nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
520 nir_intrinsic_base(intrin));
521
522 assert(load_size < b.shader->constant_data_size);
523 unsigned max_offset = b.shader->constant_data_size - load_size;
524 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
525
526 nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
527 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
528 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
529
530 nir_ssa_def *data =
531 nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
532 nir_u2u64(&b, offset)),
533 load_align,
534 intrin->dest.ssa.num_components,
535 intrin->dest.ssa.bit_size);
536
537 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
538 data);
539 continue;
540 }
541 case nir_intrinsic_load_user_clip_plane: {
542 unsigned ucp = nir_intrinsic_ucp_id(intrin);
543
544 if (ucp_idx[ucp] == -1) {
545 ucp_idx[ucp] = num_system_values;
546 num_system_values += 4;
547 }
548
549 for (int i = 0; i < 4; i++) {
550 system_values[ucp_idx[ucp] + i] =
551 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
552 }
553
554 b.cursor = nir_before_instr(instr);
555 offset = nir_imm_int(&b, system_values_start +
556 ucp_idx[ucp] * sizeof(uint32_t));
557 break;
558 }
559 case nir_intrinsic_load_patch_vertices_in:
560 if (patch_vert_idx == -1)
561 patch_vert_idx = num_system_values++;
562
563 system_values[patch_vert_idx] =
564 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
565
566 b.cursor = nir_before_instr(instr);
567 offset = nir_imm_int(&b, system_values_start +
568 patch_vert_idx * sizeof(uint32_t));
569 break;
570 case nir_intrinsic_image_deref_load_param_intel: {
571 assert(devinfo->ver < 9);
572 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
573 nir_variable *var = nir_deref_instr_get_variable(deref);
574
575 if (img_idx[var->data.binding] == -1) {
576 /* GL only allows arrays of arrays of images. */
577 assert(glsl_type_is_image(glsl_without_array(var->type)));
578 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
579
580 for (int i = 0; i < num_images; i++) {
581 const unsigned img = var->data.binding + i;
582
583 img_idx[img] = num_system_values;
584 num_system_values += BRW_IMAGE_PARAM_SIZE;
585
586 uint32_t *img_sv = &system_values[img_idx[img]];
587
588 setup_vec4_image_sysval(
589 img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
590 offsetof(struct brw_image_param, offset), 2);
591 setup_vec4_image_sysval(
592 img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
593 offsetof(struct brw_image_param, size), 3);
594 setup_vec4_image_sysval(
595 img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
596 offsetof(struct brw_image_param, stride), 4);
597 setup_vec4_image_sysval(
598 img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
599 offsetof(struct brw_image_param, tiling), 3);
600 setup_vec4_image_sysval(
601 img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
602 offsetof(struct brw_image_param, swizzling), 2);
603 }
604 }
605
606 b.cursor = nir_before_instr(instr);
607 offset = nir_iadd(&b,
608 get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
609 nir_imm_int(&b, system_values_start +
610 img_idx[var->data.binding] * 4 +
611 nir_intrinsic_base(intrin) * 16));
612 break;
613 }
614 case nir_intrinsic_load_workgroup_size: {
615 assert(nir->info.workgroup_size_variable);
616 if (variable_group_size_idx == -1) {
617 variable_group_size_idx = num_system_values;
618 num_system_values += 3;
619 for (int i = 0; i < 3; i++) {
620 system_values[variable_group_size_idx + i] =
621 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
622 }
623 }
624
625 b.cursor = nir_before_instr(instr);
626 offset = nir_imm_int(&b, system_values_start +
627 variable_group_size_idx * sizeof(uint32_t));
628 break;
629 }
630 case nir_intrinsic_load_work_dim: {
631 if (work_dim_idx == -1) {
632 work_dim_idx = num_system_values++;
633 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
634 }
635 b.cursor = nir_before_instr(instr);
636 offset = nir_imm_int(&b, system_values_start +
637 work_dim_idx * sizeof(uint32_t));
638 break;
639 }
640 case nir_intrinsic_load_kernel_input: {
641 assert(nir_intrinsic_base(intrin) +
642 nir_intrinsic_range(intrin) <= kernel_input_size);
643 b.cursor = nir_before_instr(instr);
644 offset = nir_iadd_imm(&b, intrin->src[0].ssa,
645 nir_intrinsic_base(intrin));
646 break;
647 }
648 default:
649 continue;
650 }
651
652 nir_ssa_def *load =
653 nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,
654 temp_ubo_name, offset,
655 .align_mul = 4,
656 .align_offset = 0,
657 .range_base = 0,
658 .range = ~0);
659
660 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
661 load);
662 nir_instr_remove(instr);
663 }
664 }
665
666 nir_validate_shader(nir, "before remapping");
667
668 /* Uniforms are stored in constant buffer 0, the
669 * user-facing UBOs are indexed by one. So if any constant buffer is
670 * needed, the constant buffer 0 will be needed, so account for it.
671 */
672 unsigned num_cbufs = nir->info.num_ubos;
673 if (num_cbufs || nir->num_uniforms)
674 num_cbufs++;
675
676 /* Place the new params in a new cbuf. */
677 if (num_system_values > 0 || kernel_input_size > 0) {
678 unsigned sysval_cbuf_index = num_cbufs;
679 num_cbufs++;
680
681 system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
682 num_system_values);
683
684 nir_foreach_block(block, impl) {
685 nir_foreach_instr_safe(instr, block) {
686 if (instr->type != nir_instr_type_intrinsic)
687 continue;
688
689 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
690
691 if (load->intrinsic != nir_intrinsic_load_ubo)
692 continue;
693
694 b.cursor = nir_before_instr(instr);
695
696 assert(load->src[0].is_ssa);
697
698 if (load->src[0].ssa == temp_ubo_name) {
699 nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
700 nir_instr_rewrite_src(instr, &load->src[0],
701 nir_src_for_ssa(imm));
702 }
703 }
704 }
705
706 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
707 nir_opt_constant_folding(nir);
708 } else {
709 ralloc_free(system_values);
710 system_values = NULL;
711 }
712
713 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
714 nir_validate_shader(nir, "after remap");
715
716 /* We don't use params[] but gallium leaves num_uniforms set. We use this
717 * to detect when cbuf0 exists but we don't need it anymore when we get
718 * here. Instead, zero it out so that the back-end doesn't get confused
719 * when nr_params * 4 != num_uniforms != nr_params * 4.
720 */
721 nir->num_uniforms = 0;
722
723 *out_system_values = system_values;
724 *out_num_system_values = num_system_values;
725 *out_num_cbufs = num_cbufs;
726 }
727
728 static const char *surface_group_names[] = {
729 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
730 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
731 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
732 [IRIS_SURFACE_GROUP_TEXTURE] = "texture",
733 [IRIS_SURFACE_GROUP_UBO] = "ubo",
734 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
735 [IRIS_SURFACE_GROUP_IMAGE] = "image",
736 };
737
738 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)739 iris_print_binding_table(FILE *fp, const char *name,
740 const struct iris_binding_table *bt)
741 {
742 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
743
744 uint32_t total = 0;
745 uint32_t compacted = 0;
746
747 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
748 uint32_t size = bt->sizes[i];
749 total += size;
750 if (size)
751 compacted += util_bitcount64(bt->used_mask[i]);
752 }
753
754 if (total == 0) {
755 fprintf(fp, "Binding table for %s is empty\n\n", name);
756 return;
757 }
758
759 if (total != compacted) {
760 fprintf(fp, "Binding table for %s "
761 "(compacted to %u entries from %u entries)\n",
762 name, compacted, total);
763 } else {
764 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
765 }
766
767 uint32_t entry = 0;
768 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
769 uint64_t mask = bt->used_mask[i];
770 while (mask) {
771 int index = u_bit_scan64(&mask);
772 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
773 }
774 }
775 fprintf(fp, "\n");
776 }
777
778 enum {
779 /* Max elements in a surface group. */
780 SURFACE_GROUP_MAX_ELEMENTS = 64,
781 };
782
783 /**
784 * Map a <group, index> pair to a binding table index.
785 *
786 * For example: <UBO, 5> => binding table index 12
787 */
788 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)789 iris_group_index_to_bti(const struct iris_binding_table *bt,
790 enum iris_surface_group group, uint32_t index)
791 {
792 assert(index < bt->sizes[group]);
793 uint64_t mask = bt->used_mask[group];
794 uint64_t bit = 1ull << index;
795 if (bit & mask) {
796 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
797 } else {
798 return IRIS_SURFACE_NOT_USED;
799 }
800 }
801
802 /**
803 * Map a binding table index back to a <group, index> pair.
804 *
805 * For example: binding table index 12 => <UBO, 5>
806 */
807 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)808 iris_bti_to_group_index(const struct iris_binding_table *bt,
809 enum iris_surface_group group, uint32_t bti)
810 {
811 uint64_t used_mask = bt->used_mask[group];
812 assert(bti >= bt->offsets[group]);
813
814 uint32_t c = bti - bt->offsets[group];
815 while (used_mask) {
816 int i = u_bit_scan64(&used_mask);
817 if (c == 0)
818 return i;
819 c--;
820 }
821
822 return IRIS_SURFACE_NOT_USED;
823 }
824
825 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)826 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
827 nir_instr *instr, nir_src *src,
828 enum iris_surface_group group)
829 {
830 assert(bt->sizes[group] > 0);
831
832 b->cursor = nir_before_instr(instr);
833 nir_ssa_def *bti;
834 if (nir_src_is_const(*src)) {
835 uint32_t index = nir_src_as_uint(*src);
836 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
837 src->ssa->bit_size);
838 } else {
839 /* Indirect usage makes all the surfaces of the group to be available,
840 * so we can just add the base.
841 */
842 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
843 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
844 }
845 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
846 }
847
848 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)849 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
850 enum iris_surface_group group)
851 {
852 assert(bt->sizes[group] > 0);
853
854 if (nir_src_is_const(*src)) {
855 uint64_t index = nir_src_as_uint(*src);
856 assert(index < bt->sizes[group]);
857 bt->used_mask[group] |= 1ull << index;
858 } else {
859 /* There's an indirect usage, we need all the surfaces. */
860 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
861 }
862 }
863
864 static bool
skip_compacting_binding_tables(void)865 skip_compacting_binding_tables(void)
866 {
867 static int skip = -1;
868 if (skip < 0)
869 skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
870 return skip;
871 }
872
873 /**
874 * Set up the binding table indices and apply to the shader.
875 */
876 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)877 iris_setup_binding_table(const struct intel_device_info *devinfo,
878 struct nir_shader *nir,
879 struct iris_binding_table *bt,
880 unsigned num_render_targets,
881 unsigned num_system_values,
882 unsigned num_cbufs)
883 {
884 const struct shader_info *info = &nir->info;
885
886 memset(bt, 0, sizeof(*bt));
887
888 /* Set the sizes for each surface group. For some groups, we already know
889 * upfront how many will be used, so mark them.
890 */
891 if (info->stage == MESA_SHADER_FRAGMENT) {
892 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
893 /* All render targets used. */
894 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
895 BITFIELD64_MASK(num_render_targets);
896
897 /* Setup render target read surface group in order to support non-coherent
898 * framebuffer fetch on Gfx8
899 */
900 if (devinfo->ver == 8 && info->outputs_read) {
901 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
902 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
903 BITFIELD64_MASK(num_render_targets);
904 }
905 } else if (info->stage == MESA_SHADER_COMPUTE) {
906 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
907 }
908
909 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
910 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
911
912 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
913
914 /* Allocate an extra slot in the UBO section for NIR constants.
915 * Binding table compaction will remove it if unnecessary.
916 *
917 * We don't include them in iris_compiled_shader::num_cbufs because
918 * they are uploaded separately from shs->constbuf[], but from a shader
919 * point of view, they're another UBO (at the end of the section).
920 */
921 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
922
923 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
924
925 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
926 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
927
928 /* Mark surfaces used for the cases we don't have the information available
929 * upfront.
930 */
931 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
932 nir_foreach_block (block, impl) {
933 nir_foreach_instr (instr, block) {
934 if (instr->type != nir_instr_type_intrinsic)
935 continue;
936
937 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
938 switch (intrin->intrinsic) {
939 case nir_intrinsic_load_num_workgroups:
940 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
941 break;
942
943 case nir_intrinsic_load_output:
944 if (devinfo->ver == 8) {
945 mark_used_with_src(bt, &intrin->src[0],
946 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
947 }
948 break;
949
950 case nir_intrinsic_image_size:
951 case nir_intrinsic_image_load:
952 case nir_intrinsic_image_store:
953 case nir_intrinsic_image_atomic_add:
954 case nir_intrinsic_image_atomic_imin:
955 case nir_intrinsic_image_atomic_umin:
956 case nir_intrinsic_image_atomic_imax:
957 case nir_intrinsic_image_atomic_umax:
958 case nir_intrinsic_image_atomic_and:
959 case nir_intrinsic_image_atomic_or:
960 case nir_intrinsic_image_atomic_xor:
961 case nir_intrinsic_image_atomic_exchange:
962 case nir_intrinsic_image_atomic_comp_swap:
963 case nir_intrinsic_image_load_raw_intel:
964 case nir_intrinsic_image_store_raw_intel:
965 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
966 break;
967
968 case nir_intrinsic_load_ubo:
969 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
970 break;
971
972 case nir_intrinsic_store_ssbo:
973 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
974 break;
975
976 case nir_intrinsic_get_ssbo_size:
977 case nir_intrinsic_ssbo_atomic_add:
978 case nir_intrinsic_ssbo_atomic_imin:
979 case nir_intrinsic_ssbo_atomic_umin:
980 case nir_intrinsic_ssbo_atomic_imax:
981 case nir_intrinsic_ssbo_atomic_umax:
982 case nir_intrinsic_ssbo_atomic_and:
983 case nir_intrinsic_ssbo_atomic_or:
984 case nir_intrinsic_ssbo_atomic_xor:
985 case nir_intrinsic_ssbo_atomic_exchange:
986 case nir_intrinsic_ssbo_atomic_comp_swap:
987 case nir_intrinsic_ssbo_atomic_fmin:
988 case nir_intrinsic_ssbo_atomic_fmax:
989 case nir_intrinsic_ssbo_atomic_fcomp_swap:
990 case nir_intrinsic_load_ssbo:
991 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
992 break;
993
994 default:
995 break;
996 }
997 }
998 }
999
1000 /* When disable we just mark everything as used. */
1001 if (unlikely(skip_compacting_binding_tables())) {
1002 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1003 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1004 }
1005
1006 /* Calculate the offsets and the binding table size based on the used
1007 * surfaces. After this point, the functions to go between "group indices"
1008 * and binding table indices can be used.
1009 */
1010 uint32_t next = 0;
1011 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1012 if (bt->used_mask[i] != 0) {
1013 bt->offsets[i] = next;
1014 next += util_bitcount64(bt->used_mask[i]);
1015 }
1016 }
1017 bt->size_bytes = next * 4;
1018
1019 if (INTEL_DEBUG(DEBUG_BT)) {
1020 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1021 }
1022
1023 /* Apply the binding table indices. The backend compiler is not expected
1024 * to change those, as we haven't set any of the *_start entries in brw
1025 * binding_table.
1026 */
1027 nir_builder b;
1028 nir_builder_init(&b, impl);
1029
1030 nir_foreach_block (block, impl) {
1031 nir_foreach_instr (instr, block) {
1032 if (instr->type == nir_instr_type_tex) {
1033 nir_tex_instr *tex = nir_instr_as_tex(instr);
1034 tex->texture_index =
1035 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
1036 tex->texture_index);
1037 continue;
1038 }
1039
1040 if (instr->type != nir_instr_type_intrinsic)
1041 continue;
1042
1043 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1044 switch (intrin->intrinsic) {
1045 case nir_intrinsic_image_size:
1046 case nir_intrinsic_image_load:
1047 case nir_intrinsic_image_store:
1048 case nir_intrinsic_image_atomic_add:
1049 case nir_intrinsic_image_atomic_imin:
1050 case nir_intrinsic_image_atomic_umin:
1051 case nir_intrinsic_image_atomic_imax:
1052 case nir_intrinsic_image_atomic_umax:
1053 case nir_intrinsic_image_atomic_and:
1054 case nir_intrinsic_image_atomic_or:
1055 case nir_intrinsic_image_atomic_xor:
1056 case nir_intrinsic_image_atomic_exchange:
1057 case nir_intrinsic_image_atomic_comp_swap:
1058 case nir_intrinsic_image_load_raw_intel:
1059 case nir_intrinsic_image_store_raw_intel:
1060 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1061 IRIS_SURFACE_GROUP_IMAGE);
1062 break;
1063
1064 case nir_intrinsic_load_ubo:
1065 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1066 IRIS_SURFACE_GROUP_UBO);
1067 break;
1068
1069 case nir_intrinsic_store_ssbo:
1070 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1071 IRIS_SURFACE_GROUP_SSBO);
1072 break;
1073
1074 case nir_intrinsic_load_output:
1075 if (devinfo->ver == 8) {
1076 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1077 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1078 }
1079 break;
1080
1081 case nir_intrinsic_get_ssbo_size:
1082 case nir_intrinsic_ssbo_atomic_add:
1083 case nir_intrinsic_ssbo_atomic_imin:
1084 case nir_intrinsic_ssbo_atomic_umin:
1085 case nir_intrinsic_ssbo_atomic_imax:
1086 case nir_intrinsic_ssbo_atomic_umax:
1087 case nir_intrinsic_ssbo_atomic_and:
1088 case nir_intrinsic_ssbo_atomic_or:
1089 case nir_intrinsic_ssbo_atomic_xor:
1090 case nir_intrinsic_ssbo_atomic_exchange:
1091 case nir_intrinsic_ssbo_atomic_comp_swap:
1092 case nir_intrinsic_ssbo_atomic_fmin:
1093 case nir_intrinsic_ssbo_atomic_fmax:
1094 case nir_intrinsic_ssbo_atomic_fcomp_swap:
1095 case nir_intrinsic_load_ssbo:
1096 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1097 IRIS_SURFACE_GROUP_SSBO);
1098 break;
1099
1100 default:
1101 break;
1102 }
1103 }
1104 }
1105 }
1106
1107 static void
iris_debug_recompile(struct iris_screen * screen,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1108 iris_debug_recompile(struct iris_screen *screen,
1109 struct pipe_debug_callback *dbg,
1110 struct iris_uncompiled_shader *ish,
1111 const struct brw_base_prog_key *key)
1112 {
1113 if (!ish || list_is_empty(&ish->variants)
1114 || list_is_singular(&ish->variants))
1115 return;
1116
1117 const struct intel_device_info *devinfo = &screen->devinfo;
1118 const struct brw_compiler *c = screen->compiler;
1119 const struct shader_info *info = &ish->nir->info;
1120
1121 brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1122 _mesa_shader_stage_to_string(info->stage),
1123 info->name ? info->name : "(no identifier)",
1124 info->label ? info->label : "");
1125
1126 struct iris_compiled_shader *shader =
1127 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1128 const void *old_iris_key = &shader->key;
1129
1130 union brw_any_prog_key old_key;
1131
1132 switch (info->stage) {
1133 case MESA_SHADER_VERTEX:
1134 old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1135 break;
1136 case MESA_SHADER_TESS_CTRL:
1137 old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1138 break;
1139 case MESA_SHADER_TESS_EVAL:
1140 old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1141 break;
1142 case MESA_SHADER_GEOMETRY:
1143 old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1144 break;
1145 case MESA_SHADER_FRAGMENT:
1146 old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1147 break;
1148 case MESA_SHADER_COMPUTE:
1149 old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1150 break;
1151 default:
1152 unreachable("invalid shader stage");
1153 }
1154
1155 brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1156 }
1157
1158 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1159 check_urb_size(struct iris_context *ice,
1160 unsigned needed_size,
1161 gl_shader_stage stage)
1162 {
1163 unsigned last_allocated_size = ice->shaders.urb.size[stage];
1164
1165 /* If the last URB allocation wasn't large enough for our needs,
1166 * flag it as needing to be reconfigured. Otherwise, we can use
1167 * the existing config. However, if the URB is constrained, and
1168 * we can shrink our size for this stage, we may be able to gain
1169 * extra concurrency by reconfiguring it to be smaller. Do so.
1170 */
1171 if (last_allocated_size < needed_size ||
1172 (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1173 ice->state.dirty |= IRIS_DIRTY_URB;
1174 }
1175 }
1176
1177 /**
1178 * Get the shader for the last enabled geometry stage.
1179 *
1180 * This stage is the one which will feed stream output and the rasterizer.
1181 */
1182 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1183 last_vue_stage(struct iris_context *ice)
1184 {
1185 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1186 return MESA_SHADER_GEOMETRY;
1187
1188 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1189 return MESA_SHADER_TESS_EVAL;
1190
1191 return MESA_SHADER_VERTEX;
1192 }
1193
1194 /**
1195 * \param added Set to \c true if the variant was added to the list (i.e., a
1196 * variant matching \c key was not found). Set to \c false
1197 * otherwise.
1198 */
1199 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1200 find_or_add_variant(const struct iris_screen *screen,
1201 struct iris_uncompiled_shader *ish,
1202 enum iris_program_cache_id cache_id,
1203 const void *key, unsigned key_size,
1204 bool *added)
1205 {
1206 struct list_head *start = ish->variants.next;
1207
1208 *added = false;
1209
1210 if (screen->precompile) {
1211 /* Check the first list entry. There will always be at least one
1212 * variant in the list (most likely the precompile variant), and
1213 * other contexts only append new variants, so we can safely check
1214 * it without locking, saving that cost in the common case.
1215 */
1216 struct iris_compiled_shader *first =
1217 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1218
1219 if (memcmp(&first->key, key, key_size) == 0) {
1220 util_queue_fence_wait(&first->ready);
1221 return first;
1222 }
1223
1224 /* Skip this one in the loop below */
1225 start = first->link.next;
1226 }
1227
1228 struct iris_compiled_shader *variant = NULL;
1229
1230 /* If it doesn't match, we have to walk the list; other contexts may be
1231 * concurrently appending shaders to it, so we need to lock here.
1232 */
1233 simple_mtx_lock(&ish->lock);
1234
1235 list_for_each_entry_from(struct iris_compiled_shader, v, start,
1236 &ish->variants, link) {
1237 if (memcmp(&v->key, key, key_size) == 0) {
1238 variant = v;
1239 break;
1240 }
1241 }
1242
1243 if (variant == NULL) {
1244 variant = iris_create_shader_variant(screen, NULL, cache_id,
1245 key_size, key);
1246
1247 /* Append our new variant to the shader's variant list. */
1248 list_addtail(&variant->link, &ish->variants);
1249 *added = true;
1250
1251 simple_mtx_unlock(&ish->lock);
1252 } else {
1253 simple_mtx_unlock(&ish->lock);
1254
1255 util_queue_fence_wait(&variant->ready);
1256 }
1257
1258 return variant;
1259 }
1260
1261 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1262 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1263 UNUSED int thread_index)
1264 {
1265 free(_job);
1266 }
1267
1268 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct pipe_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1269 iris_schedule_compile(struct iris_screen *screen,
1270 struct util_queue_fence *ready_fence,
1271 struct pipe_debug_callback *dbg,
1272 struct iris_threaded_compile_job *job,
1273 util_queue_execute_func execute)
1274
1275 {
1276 util_queue_fence_init(ready_fence);
1277
1278 struct util_async_debug_callback async_debug;
1279
1280 if (dbg) {
1281 u_async_debug_init(&async_debug);
1282 job->dbg = &async_debug.base;
1283 }
1284
1285 util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1286 iris_threaded_compile_job_delete, 0);
1287
1288 if (screen->driconf.sync_compile || dbg)
1289 util_queue_fence_wait(ready_fence);
1290
1291 if (dbg) {
1292 u_async_debug_drain(&async_debug, dbg);
1293 u_async_debug_cleanup(&async_debug);
1294 }
1295 }
1296
1297 /**
1298 * Compile a vertex shader, and upload the assembly.
1299 */
1300 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1301 iris_compile_vs(struct iris_screen *screen,
1302 struct u_upload_mgr *uploader,
1303 struct pipe_debug_callback *dbg,
1304 struct iris_uncompiled_shader *ish,
1305 struct iris_compiled_shader *shader)
1306 {
1307 const struct brw_compiler *compiler = screen->compiler;
1308 const struct intel_device_info *devinfo = &screen->devinfo;
1309 void *mem_ctx = ralloc_context(NULL);
1310 struct brw_vs_prog_data *vs_prog_data =
1311 rzalloc(mem_ctx, struct brw_vs_prog_data);
1312 struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1313 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1314 enum brw_param_builtin *system_values;
1315 unsigned num_system_values;
1316 unsigned num_cbufs;
1317
1318 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1319 const struct iris_vs_prog_key *const key = &shader->key.vs;
1320
1321 if (key->vue.nr_userclip_plane_consts) {
1322 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1323 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1324 true, false, NULL);
1325 nir_lower_io_to_temporaries(nir, impl, true, false);
1326 nir_lower_global_vars_to_local(nir);
1327 nir_lower_vars_to_ssa(nir);
1328 nir_shader_gather_info(nir, impl);
1329 }
1330
1331 prog_data->use_alt_mode = nir->info.is_arb_asm;
1332
1333 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1334 &num_system_values, &num_cbufs);
1335
1336 struct iris_binding_table bt;
1337 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1338 num_system_values, num_cbufs);
1339
1340 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1341
1342 brw_compute_vue_map(devinfo,
1343 &vue_prog_data->vue_map, nir->info.outputs_written,
1344 nir->info.separate_shader, /* pos_slots */ 1);
1345
1346 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1347
1348 struct brw_compile_vs_params params = {
1349 .nir = nir,
1350 .key = &brw_key,
1351 .prog_data = vs_prog_data,
1352 .log_data = dbg,
1353 };
1354
1355 const unsigned *program = brw_compile_vs(compiler, mem_ctx, ¶ms);
1356 if (program == NULL) {
1357 dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1358 ralloc_free(mem_ctx);
1359
1360 shader->compilation_failed = true;
1361 util_queue_fence_signal(&shader->ready);
1362
1363 return;
1364 }
1365
1366 shader->compilation_failed = false;
1367
1368 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1369
1370 uint32_t *so_decls =
1371 screen->vtbl.create_so_decl_list(&ish->stream_output,
1372 &vue_prog_data->vue_map);
1373
1374 iris_finalize_program(shader, prog_data, so_decls, system_values,
1375 num_system_values, 0, num_cbufs, &bt);
1376
1377 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1378 sizeof(*key), key, program);
1379
1380 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1381
1382 ralloc_free(mem_ctx);
1383 }
1384
1385 /**
1386 * Update the current vertex shader variant.
1387 *
1388 * Fill out the key, look in the cache, compile and bind if needed.
1389 */
1390 static void
iris_update_compiled_vs(struct iris_context * ice)1391 iris_update_compiled_vs(struct iris_context *ice)
1392 {
1393 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1394 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1395 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1396 struct iris_uncompiled_shader *ish =
1397 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1398
1399 struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1400 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1401
1402 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1403 bool added;
1404 struct iris_compiled_shader *shader =
1405 find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1406
1407 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1408 &key, sizeof(key))) {
1409 iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1410 }
1411
1412 if (shader->compilation_failed)
1413 shader = NULL;
1414
1415 if (old != shader) {
1416 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1417 shader);
1418 ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1419 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1420 IRIS_STAGE_DIRTY_BINDINGS_VS |
1421 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1422 shs->sysvals_need_upload = true;
1423
1424 unsigned urb_entry_size = shader ?
1425 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1426 check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1427 }
1428 }
1429
1430 /**
1431 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1432 */
1433 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)1434 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1435 {
1436 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1437
1438 if (!ish)
1439 return NULL;
1440
1441 const nir_shader *nir = ish->nir;
1442 return &nir->info;
1443 }
1444
1445 /**
1446 * Get the union of TCS output and TES input slots.
1447 *
1448 * TCS and TES need to agree on a common URB entry layout. In particular,
1449 * the data for all patch vertices is stored in a single URB entry (unlike
1450 * GS which has one entry per input vertex). This means that per-vertex
1451 * array indexing needs a stride.
1452 *
1453 * SSO requires locations to match, but doesn't require the number of
1454 * outputs/inputs to match (in fact, the TCS often has extra outputs).
1455 * So, we need to take the extra step of unifying these on the fly.
1456 */
1457 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)1458 get_unified_tess_slots(const struct iris_context *ice,
1459 uint64_t *per_vertex_slots,
1460 uint32_t *per_patch_slots)
1461 {
1462 const struct shader_info *tcs =
1463 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1464 const struct shader_info *tes =
1465 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1466
1467 *per_vertex_slots = tes->inputs_read;
1468 *per_patch_slots = tes->patch_inputs_read;
1469
1470 if (tcs) {
1471 *per_vertex_slots |= tcs->outputs_written;
1472 *per_patch_slots |= tcs->patch_outputs_written;
1473 }
1474 }
1475
1476 /**
1477 * Compile a tessellation control shader, and upload the assembly.
1478 */
1479 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1480 iris_compile_tcs(struct iris_screen *screen,
1481 struct hash_table *passthrough_ht,
1482 struct u_upload_mgr *uploader,
1483 struct pipe_debug_callback *dbg,
1484 struct iris_uncompiled_shader *ish,
1485 struct iris_compiled_shader *shader)
1486 {
1487 const struct brw_compiler *compiler = screen->compiler;
1488 const struct nir_shader_compiler_options *options =
1489 compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1490 void *mem_ctx = ralloc_context(NULL);
1491 struct brw_tcs_prog_data *tcs_prog_data =
1492 rzalloc(mem_ctx, struct brw_tcs_prog_data);
1493 struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1494 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1495 const struct intel_device_info *devinfo = &screen->devinfo;
1496 enum brw_param_builtin *system_values = NULL;
1497 unsigned num_system_values = 0;
1498 unsigned num_cbufs = 0;
1499
1500 nir_shader *nir;
1501
1502 struct iris_binding_table bt;
1503
1504 const struct iris_tcs_prog_key *const key = &shader->key.tcs;
1505 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1506
1507 if (ish) {
1508 nir = nir_shader_clone(mem_ctx, ish->nir);
1509
1510 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1511 &num_system_values, &num_cbufs);
1512 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1513 num_system_values, num_cbufs);
1514 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1515 } else {
1516 nir =
1517 brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1518
1519 /* Reserve space for passing the default tess levels as constants. */
1520 num_cbufs = 1;
1521 num_system_values = 8;
1522 system_values =
1523 rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1524 prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1525 prog_data->nr_params = num_system_values;
1526
1527 if (key->tes_primitive_mode == GL_QUADS) {
1528 for (int i = 0; i < 4; i++)
1529 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1530
1531 system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1532 system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1533 } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1534 for (int i = 0; i < 3; i++)
1535 system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1536
1537 system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1538 } else {
1539 assert(key->tes_primitive_mode == GL_ISOLINES);
1540 system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1541 system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1542 }
1543
1544 /* Manually setup the TCS binding table. */
1545 memset(&bt, 0, sizeof(bt));
1546 bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1547 bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1548 bt.size_bytes = 4;
1549
1550 prog_data->ubo_ranges[0].length = 1;
1551 }
1552
1553 char *error_str = NULL;
1554 const unsigned *program =
1555 brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data,
1556 nir, -1, NULL, &error_str);
1557 if (program == NULL) {
1558 dbg_printf("Failed to compile control shader: %s\n", error_str);
1559 ralloc_free(mem_ctx);
1560
1561 shader->compilation_failed = true;
1562 util_queue_fence_signal(&shader->ready);
1563
1564 return;
1565 }
1566
1567 shader->compilation_failed = false;
1568
1569 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1570
1571 iris_finalize_program(shader, prog_data, NULL, system_values,
1572 num_system_values, 0, num_cbufs, &bt);
1573
1574 iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
1575 IRIS_CACHE_TCS, sizeof(*key), key, program);
1576
1577 if (ish)
1578 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1579
1580 ralloc_free(mem_ctx);
1581 }
1582
1583 /**
1584 * Update the current tessellation control shader variant.
1585 *
1586 * Fill out the key, look in the cache, compile and bind if needed.
1587 */
1588 static void
iris_update_compiled_tcs(struct iris_context * ice)1589 iris_update_compiled_tcs(struct iris_context *ice)
1590 {
1591 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1592 struct iris_uncompiled_shader *tcs =
1593 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1594 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1595 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1596 const struct brw_compiler *compiler = screen->compiler;
1597 const struct intel_device_info *devinfo = &screen->devinfo;
1598
1599 const struct shader_info *tes_info =
1600 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1601 struct iris_tcs_prog_key key = {
1602 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1603 .tes_primitive_mode = tes_info->tess.primitive_mode,
1604 .input_vertices =
1605 !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1606 .quads_workaround = devinfo->ver < 9 &&
1607 tes_info->tess.primitive_mode == GL_QUADS &&
1608 tes_info->tess.spacing == TESS_SPACING_EQUAL,
1609 };
1610 get_unified_tess_slots(ice, &key.outputs_written,
1611 &key.patch_outputs_written);
1612 screen->vtbl.populate_tcs_key(ice, &key);
1613
1614 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1615 struct iris_compiled_shader *shader;
1616 bool added = false;
1617
1618 if (tcs != NULL) {
1619 shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
1620 sizeof(key), &added);
1621 } else {
1622 /* Look for and possibly create a passthrough TCS */
1623 shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1624
1625
1626 if (shader == NULL) {
1627 shader = iris_create_shader_variant(screen, ice->shaders.cache,
1628 IRIS_CACHE_TCS, sizeof(key), &key);
1629 added = true;
1630 }
1631
1632 }
1633
1634 /* If the shader was not found in (whichever cache), call iris_compile_tcs
1635 * if either ish is NULL or the shader could not be found in the disk
1636 * cache.
1637 */
1638 if (added &&
1639 (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
1640 &key, sizeof(key)))) {
1641 iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
1642 shader);
1643 }
1644
1645 if (shader->compilation_failed)
1646 shader = NULL;
1647
1648 if (old != shader) {
1649 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
1650 shader);
1651 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1652 IRIS_STAGE_DIRTY_BINDINGS_TCS |
1653 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1654 shs->sysvals_need_upload = true;
1655
1656 unsigned urb_entry_size = shader ?
1657 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1658 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
1659 }
1660 }
1661
1662 /**
1663 * Compile a tessellation evaluation shader, and upload the assembly.
1664 */
1665 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1666 iris_compile_tes(struct iris_screen *screen,
1667 struct u_upload_mgr *uploader,
1668 struct pipe_debug_callback *dbg,
1669 struct iris_uncompiled_shader *ish,
1670 struct iris_compiled_shader *shader)
1671 {
1672 const struct brw_compiler *compiler = screen->compiler;
1673 void *mem_ctx = ralloc_context(NULL);
1674 struct brw_tes_prog_data *tes_prog_data =
1675 rzalloc(mem_ctx, struct brw_tes_prog_data);
1676 struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1677 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1678 enum brw_param_builtin *system_values;
1679 const struct intel_device_info *devinfo = &screen->devinfo;
1680 unsigned num_system_values;
1681 unsigned num_cbufs;
1682
1683 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1684 const struct iris_tes_prog_key *const key = &shader->key.tes;
1685
1686 if (key->vue.nr_userclip_plane_consts) {
1687 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1688 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1689 true, false, NULL);
1690 nir_lower_io_to_temporaries(nir, impl, true, false);
1691 nir_lower_global_vars_to_local(nir);
1692 nir_lower_vars_to_ssa(nir);
1693 nir_shader_gather_info(nir, impl);
1694 }
1695
1696 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1697 &num_system_values, &num_cbufs);
1698
1699 struct iris_binding_table bt;
1700 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1701 num_system_values, num_cbufs);
1702
1703 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1704
1705 struct brw_vue_map input_vue_map;
1706 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1707 key->patch_inputs_read);
1708
1709 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1710
1711 char *error_str = NULL;
1712 const unsigned *program =
1713 brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map,
1714 tes_prog_data, nir, -1, NULL, &error_str);
1715 if (program == NULL) {
1716 dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1717 ralloc_free(mem_ctx);
1718
1719 shader->compilation_failed = true;
1720 util_queue_fence_signal(&shader->ready);
1721
1722 return;
1723 }
1724
1725 shader->compilation_failed = false;
1726
1727 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1728
1729 uint32_t *so_decls =
1730 screen->vtbl.create_so_decl_list(&ish->stream_output,
1731 &vue_prog_data->vue_map);
1732
1733 iris_finalize_program(shader, prog_data, so_decls, system_values,
1734 num_system_values, 0, num_cbufs, &bt);
1735
1736 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
1737 sizeof(*key), key, program);
1738
1739 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1740
1741 ralloc_free(mem_ctx);
1742 }
1743
1744 /**
1745 * Update the current tessellation evaluation shader variant.
1746 *
1747 * Fill out the key, look in the cache, compile and bind if needed.
1748 */
1749 static void
iris_update_compiled_tes(struct iris_context * ice)1750 iris_update_compiled_tes(struct iris_context *ice)
1751 {
1752 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1753 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1754 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1755 struct iris_uncompiled_shader *ish =
1756 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1757
1758 struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1759 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1760 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1761
1762 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1763 bool added;
1764 struct iris_compiled_shader *shader =
1765 find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
1766
1767 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1768 &key, sizeof(key))) {
1769 iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
1770 }
1771
1772 if (shader->compilation_failed)
1773 shader = NULL;
1774
1775 if (old != shader) {
1776 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
1777 shader);
1778 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1779 IRIS_STAGE_DIRTY_BINDINGS_TES |
1780 IRIS_STAGE_DIRTY_CONSTANTS_TES;
1781 shs->sysvals_need_upload = true;
1782
1783 unsigned urb_entry_size = shader ?
1784 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1785 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
1786 }
1787
1788 /* TODO: Could compare and avoid flagging this. */
1789 const struct shader_info *tes_info = &ish->nir->info;
1790 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1791 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1792 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1793 }
1794 }
1795
1796 /**
1797 * Compile a geometry shader, and upload the assembly.
1798 */
1799 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1800 iris_compile_gs(struct iris_screen *screen,
1801 struct u_upload_mgr *uploader,
1802 struct pipe_debug_callback *dbg,
1803 struct iris_uncompiled_shader *ish,
1804 struct iris_compiled_shader *shader)
1805 {
1806 const struct brw_compiler *compiler = screen->compiler;
1807 const struct intel_device_info *devinfo = &screen->devinfo;
1808 void *mem_ctx = ralloc_context(NULL);
1809 struct brw_gs_prog_data *gs_prog_data =
1810 rzalloc(mem_ctx, struct brw_gs_prog_data);
1811 struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1812 struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1813 enum brw_param_builtin *system_values;
1814 unsigned num_system_values;
1815 unsigned num_cbufs;
1816
1817 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1818 const struct iris_gs_prog_key *const key = &shader->key.gs;
1819
1820 if (key->vue.nr_userclip_plane_consts) {
1821 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1822 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1823 false, NULL);
1824 nir_lower_io_to_temporaries(nir, impl, true, false);
1825 nir_lower_global_vars_to_local(nir);
1826 nir_lower_vars_to_ssa(nir);
1827 nir_shader_gather_info(nir, impl);
1828 }
1829
1830 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1831 &num_system_values, &num_cbufs);
1832
1833 struct iris_binding_table bt;
1834 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1835 num_system_values, num_cbufs);
1836
1837 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1838
1839 brw_compute_vue_map(devinfo,
1840 &vue_prog_data->vue_map, nir->info.outputs_written,
1841 nir->info.separate_shader, /* pos_slots */ 1);
1842
1843 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1844
1845 char *error_str = NULL;
1846 const unsigned *program =
1847 brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data,
1848 nir, -1, NULL, &error_str);
1849 if (program == NULL) {
1850 dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1851 ralloc_free(mem_ctx);
1852
1853 shader->compilation_failed = true;
1854 util_queue_fence_signal(&shader->ready);
1855
1856 return;
1857 }
1858
1859 shader->compilation_failed = false;
1860
1861 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1862
1863 uint32_t *so_decls =
1864 screen->vtbl.create_so_decl_list(&ish->stream_output,
1865 &vue_prog_data->vue_map);
1866
1867 iris_finalize_program(shader, prog_data, so_decls, system_values,
1868 num_system_values, 0, num_cbufs, &bt);
1869
1870 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
1871 sizeof(*key), key, program);
1872
1873 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1874
1875 ralloc_free(mem_ctx);
1876 }
1877
1878 /**
1879 * Update the current geometry shader variant.
1880 *
1881 * Fill out the key, look in the cache, compile and bind if needed.
1882 */
1883 static void
iris_update_compiled_gs(struct iris_context * ice)1884 iris_update_compiled_gs(struct iris_context *ice)
1885 {
1886 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1887 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1888 struct iris_uncompiled_shader *ish =
1889 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1890 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1891 struct iris_compiled_shader *shader = NULL;
1892 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1893
1894 if (ish) {
1895 struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1896 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1897
1898 bool added;
1899
1900 shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
1901 sizeof(key), &added);
1902
1903 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1904 &key, sizeof(key))) {
1905 iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
1906 }
1907
1908 if (shader->compilation_failed)
1909 shader = NULL;
1910 }
1911
1912 if (old != shader) {
1913 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
1914 shader);
1915 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1916 IRIS_STAGE_DIRTY_BINDINGS_GS |
1917 IRIS_STAGE_DIRTY_CONSTANTS_GS;
1918 shs->sysvals_need_upload = true;
1919
1920 unsigned urb_entry_size = shader ?
1921 ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1922 check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
1923 }
1924 }
1925
1926 /**
1927 * Compile a fragment (pixel) shader, and upload the assembly.
1928 */
1929 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct brw_vue_map * vue_map)1930 iris_compile_fs(struct iris_screen *screen,
1931 struct u_upload_mgr *uploader,
1932 struct pipe_debug_callback *dbg,
1933 struct iris_uncompiled_shader *ish,
1934 struct iris_compiled_shader *shader,
1935 struct brw_vue_map *vue_map)
1936 {
1937 const struct brw_compiler *compiler = screen->compiler;
1938 void *mem_ctx = ralloc_context(NULL);
1939 struct brw_wm_prog_data *fs_prog_data =
1940 rzalloc(mem_ctx, struct brw_wm_prog_data);
1941 struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1942 enum brw_param_builtin *system_values;
1943 const struct intel_device_info *devinfo = &screen->devinfo;
1944 unsigned num_system_values;
1945 unsigned num_cbufs;
1946
1947 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1948 const struct iris_fs_prog_key *const key = &shader->key.fs;
1949
1950 prog_data->use_alt_mode = nir->info.is_arb_asm;
1951
1952 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1953 &num_system_values, &num_cbufs);
1954
1955 /* Lower output variables to load_output intrinsics before setting up
1956 * binding tables, so iris_setup_binding_table can map any load_output
1957 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
1958 * non-coherent framebuffer fetches.
1959 */
1960 brw_nir_lower_fs_outputs(nir);
1961
1962 /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
1963 * and do not need a binding table entry with a null surface. Earlier
1964 * generations need an entry for a null surface.
1965 */
1966 int null_rts = devinfo->ver < 11 ? 1 : 0;
1967
1968 struct iris_binding_table bt;
1969 iris_setup_binding_table(devinfo, nir, &bt,
1970 MAX2(key->nr_color_regions, null_rts),
1971 num_system_values, num_cbufs);
1972
1973 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1974
1975 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1976
1977 struct brw_compile_fs_params params = {
1978 .nir = nir,
1979 .key = &brw_key,
1980 .prog_data = fs_prog_data,
1981
1982 .allow_spilling = true,
1983 .vue_map = vue_map,
1984
1985 .log_data = dbg,
1986 };
1987
1988 const unsigned *program = brw_compile_fs(compiler, mem_ctx, ¶ms);
1989 if (program == NULL) {
1990 dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
1991 ralloc_free(mem_ctx);
1992
1993 shader->compilation_failed = true;
1994 util_queue_fence_signal(&shader->ready);
1995
1996 return;
1997 }
1998
1999 shader->compilation_failed = false;
2000
2001 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2002
2003 iris_finalize_program(shader, prog_data, NULL, system_values,
2004 num_system_values, 0, num_cbufs, &bt);
2005
2006 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2007 sizeof(*key), key, program);
2008
2009 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2010
2011 ralloc_free(mem_ctx);
2012 }
2013
2014 /**
2015 * Update the current fragment shader variant.
2016 *
2017 * Fill out the key, look in the cache, compile and bind if needed.
2018 */
2019 static void
iris_update_compiled_fs(struct iris_context * ice)2020 iris_update_compiled_fs(struct iris_context *ice)
2021 {
2022 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2023 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2024 struct iris_uncompiled_shader *ish =
2025 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2026 struct iris_fs_prog_key key = { KEY_ID(base) };
2027 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2028 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2029
2030 struct brw_vue_map *last_vue_map =
2031 &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2032
2033 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2034 key.input_slots_valid = last_vue_map->slots_valid;
2035
2036 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2037 bool added;
2038 struct iris_compiled_shader *shader =
2039 find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2040 sizeof(key), &added);
2041
2042 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2043 &key, sizeof(key))) {
2044 iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2045 }
2046
2047 if (shader->compilation_failed)
2048 shader = NULL;
2049
2050 if (old != shader) {
2051 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2052 // toggles. might be able to avoid flagging SBE too.
2053 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2054 shader);
2055 ice->state.dirty |= IRIS_DIRTY_WM |
2056 IRIS_DIRTY_CLIP |
2057 IRIS_DIRTY_SBE;
2058 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2059 IRIS_STAGE_DIRTY_BINDINGS_FS |
2060 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2061 shs->sysvals_need_upload = true;
2062 }
2063 }
2064
2065 /**
2066 * Update the last enabled stage's VUE map.
2067 *
2068 * When the shader feeding the rasterizer's output interface changes, we
2069 * need to re-emit various packets.
2070 */
2071 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2072 update_last_vue_map(struct iris_context *ice,
2073 struct iris_compiled_shader *shader)
2074 {
2075 struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;
2076 struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
2077 struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :
2078 &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2079 const uint64_t changed_slots =
2080 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2081
2082 if (changed_slots & VARYING_BIT_VIEWPORT) {
2083 ice->state.num_viewports =
2084 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2085 ice->state.dirty |= IRIS_DIRTY_CLIP |
2086 IRIS_DIRTY_SF_CL_VIEWPORT |
2087 IRIS_DIRTY_CC_VIEWPORT |
2088 IRIS_DIRTY_SCISSOR_RECT;
2089 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2090 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2091 }
2092
2093 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2094 ice->state.dirty |= IRIS_DIRTY_SBE;
2095 }
2096
2097 iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2098 }
2099
2100 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2101 iris_update_pull_constant_descriptors(struct iris_context *ice,
2102 gl_shader_stage stage)
2103 {
2104 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2105
2106 if (!shader || !shader->prog_data->has_ubo_pull)
2107 return;
2108
2109 struct iris_shader_state *shs = &ice->state.shaders[stage];
2110 bool any_new_descriptors =
2111 shader->num_system_values > 0 && shs->sysvals_need_upload;
2112
2113 unsigned bound_cbufs = shs->bound_cbufs;
2114
2115 while (bound_cbufs) {
2116 const int i = u_bit_scan(&bound_cbufs);
2117 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2118 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2119 if (!surf_state->res && cbuf->buffer) {
2120 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2121 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2122 any_new_descriptors = true;
2123 }
2124 }
2125
2126 if (any_new_descriptors)
2127 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2128 }
2129
2130 /**
2131 * Update the current shader variants for the given state.
2132 *
2133 * This should be called on every draw call to ensure that the correct
2134 * shaders are bound. It will also flag any dirty state triggered by
2135 * swapping out those shaders.
2136 */
2137 void
iris_update_compiled_shaders(struct iris_context * ice)2138 iris_update_compiled_shaders(struct iris_context *ice)
2139 {
2140 const uint64_t stage_dirty = ice->state.stage_dirty;
2141
2142 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2143 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2144 struct iris_uncompiled_shader *tes =
2145 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2146 if (tes) {
2147 iris_update_compiled_tcs(ice);
2148 iris_update_compiled_tes(ice);
2149 } else {
2150 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2151 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2152 ice->state.stage_dirty |=
2153 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2154 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2155 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2156
2157 if (ice->shaders.urb.constrained)
2158 ice->state.dirty |= IRIS_DIRTY_URB;
2159 }
2160 }
2161
2162 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2163 iris_update_compiled_vs(ice);
2164 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2165 iris_update_compiled_gs(ice);
2166
2167 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2168 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2169 const struct iris_compiled_shader *gs =
2170 ice->shaders.prog[MESA_SHADER_GEOMETRY];
2171 const struct iris_compiled_shader *tes =
2172 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2173
2174 bool points_or_lines = false;
2175
2176 if (gs) {
2177 const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2178 points_or_lines =
2179 gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2180 gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2181 } else if (tes) {
2182 const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2183 points_or_lines =
2184 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2185 tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2186 }
2187
2188 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2189 /* Outbound to XY Clip enables */
2190 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2191 ice->state.dirty |= IRIS_DIRTY_CLIP;
2192 }
2193 }
2194
2195 gl_shader_stage last_stage = last_vue_stage(ice);
2196 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2197 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2198 update_last_vue_map(ice, shader);
2199 if (ice->state.streamout != shader->streamout) {
2200 ice->state.streamout = shader->streamout;
2201 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2202 }
2203
2204 if (ice->state.streamout_active) {
2205 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2206 struct iris_stream_output_target *so =
2207 (void *) ice->state.so_target[i];
2208 if (so)
2209 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2210 }
2211 }
2212
2213 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2214 iris_update_compiled_fs(ice);
2215
2216 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2217 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2218 iris_update_pull_constant_descriptors(ice, i);
2219 }
2220 }
2221
2222 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2223 iris_compile_cs(struct iris_screen *screen,
2224 struct u_upload_mgr *uploader,
2225 struct pipe_debug_callback *dbg,
2226 struct iris_uncompiled_shader *ish,
2227 struct iris_compiled_shader *shader)
2228 {
2229 const struct brw_compiler *compiler = screen->compiler;
2230 void *mem_ctx = ralloc_context(NULL);
2231 struct brw_cs_prog_data *cs_prog_data =
2232 rzalloc(mem_ctx, struct brw_cs_prog_data);
2233 struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2234 enum brw_param_builtin *system_values;
2235 const struct intel_device_info *devinfo = &screen->devinfo;
2236 unsigned num_system_values;
2237 unsigned num_cbufs;
2238
2239 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2240 const struct iris_cs_prog_key *const key = &shader->key.cs;
2241
2242 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2243
2244 iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
2245 ish->kernel_input_size,
2246 &system_values, &num_system_values, &num_cbufs);
2247
2248 struct iris_binding_table bt;
2249 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2250 num_system_values, num_cbufs);
2251
2252 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
2253
2254 struct brw_compile_cs_params params = {
2255 .nir = nir,
2256 .key = &brw_key,
2257 .prog_data = cs_prog_data,
2258 .log_data = dbg,
2259 };
2260
2261 const unsigned *program = brw_compile_cs(compiler, mem_ctx, ¶ms);
2262 if (program == NULL) {
2263 dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2264
2265 shader->compilation_failed = true;
2266 util_queue_fence_signal(&shader->ready);
2267
2268 return;
2269 }
2270
2271 shader->compilation_failed = false;
2272
2273 iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2274
2275 iris_finalize_program(shader, prog_data, NULL, system_values,
2276 num_system_values, ish->kernel_input_size, num_cbufs,
2277 &bt);
2278
2279 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
2280 sizeof(*key), key, program);
2281
2282 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2283
2284 ralloc_free(mem_ctx);
2285 }
2286
2287 static void
iris_update_compiled_cs(struct iris_context * ice)2288 iris_update_compiled_cs(struct iris_context *ice)
2289 {
2290 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2291 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2292 struct iris_uncompiled_shader *ish =
2293 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2294
2295 struct iris_cs_prog_key key = { KEY_ID(base) };
2296 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2297 screen->vtbl.populate_cs_key(ice, &key);
2298
2299 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2300 bool added;
2301 struct iris_compiled_shader *shader =
2302 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
2303 sizeof(key), &added);
2304
2305 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2306 &key, sizeof(key))) {
2307 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2308 }
2309
2310 if (shader->compilation_failed)
2311 shader = NULL;
2312
2313 if (old != shader) {
2314 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
2315 shader);
2316 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2317 IRIS_STAGE_DIRTY_BINDINGS_CS |
2318 IRIS_STAGE_DIRTY_CONSTANTS_CS;
2319 shs->sysvals_need_upload = true;
2320 }
2321 }
2322
2323 void
iris_update_compiled_compute_shader(struct iris_context * ice)2324 iris_update_compiled_compute_shader(struct iris_context *ice)
2325 {
2326 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2327 iris_update_compiled_cs(ice);
2328
2329 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2330 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2331 }
2332
2333 void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data * cs_prog_data,unsigned threads,uint32_t * dst)2334 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2335 unsigned threads,
2336 uint32_t *dst)
2337 {
2338 assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2339 assert(cs_prog_data->push.cross_thread.size == 0);
2340 assert(cs_prog_data->push.per_thread.dwords == 1);
2341 assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2342 for (unsigned t = 0; t < threads; t++)
2343 dst[8 * t] = t;
2344 }
2345
2346 /**
2347 * Allocate scratch BOs as needed for the given per-thread size and stage.
2348 */
2349 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)2350 iris_get_scratch_space(struct iris_context *ice,
2351 unsigned per_thread_scratch,
2352 gl_shader_stage stage)
2353 {
2354 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2355 struct iris_bufmgr *bufmgr = screen->bufmgr;
2356 const struct intel_device_info *devinfo = &screen->devinfo;
2357
2358 unsigned encoded_size = ffs(per_thread_scratch) - 11;
2359 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
2360 assert(per_thread_scratch == 1 << (encoded_size + 10));
2361
2362 /* On GFX version 12.5, scratch access changed to a surface-based model.
2363 * Instead of each shader type having its own layout based on IDs passed
2364 * from the relevant fixed-function unit, all scratch access is based on
2365 * thread IDs like it always has been for compute.
2366 */
2367 if (devinfo->verx10 >= 125)
2368 stage = MESA_SHADER_COMPUTE;
2369
2370 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2371
2372 if (!*bop) {
2373 assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2374 uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2375 *bop = iris_bo_alloc(bufmgr, "scratch", size, 1, IRIS_MEMZONE_SHADER, 0);
2376 }
2377
2378 return *bop;
2379 }
2380
2381 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)2382 iris_get_scratch_surf(struct iris_context *ice,
2383 unsigned per_thread_scratch)
2384 {
2385 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2386 ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
2387
2388 assert(devinfo->verx10 >= 125);
2389
2390 unsigned encoded_size = ffs(per_thread_scratch) - 11;
2391 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
2392 assert(per_thread_scratch == 1 << (encoded_size + 10));
2393
2394 struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
2395
2396 if (ref->res)
2397 return ref;
2398
2399 struct iris_bo *scratch_bo =
2400 iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
2401
2402 void *map = upload_state(ice->state.bindless_uploader, ref,
2403 screen->isl_dev.ss.size, 64);
2404
2405 isl_buffer_fill_state(&screen->isl_dev, map,
2406 .address = scratch_bo->address,
2407 .size_B = scratch_bo->size,
2408 .format = ISL_FORMAT_RAW,
2409 .swizzle = ISL_SWIZZLE_IDENTITY,
2410 .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
2411 .stride_B = per_thread_scratch,
2412 .is_scratch = true);
2413
2414 return ref;
2415 }
2416
2417 /* ------------------------------------------------------------------- */
2418
2419 /**
2420 * The pipe->create_[stage]_state() driver hooks.
2421 *
2422 * Performs basic NIR preprocessing, records any state dependencies, and
2423 * returns an iris_uncompiled_shader as the Gallium CSO.
2424 *
2425 * Actual shader compilation to assembly happens later, at first use.
2426 */
2427 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)2428 iris_create_uncompiled_shader(struct iris_screen *screen,
2429 nir_shader *nir,
2430 const struct pipe_stream_output_info *so_info)
2431 {
2432 struct iris_uncompiled_shader *ish =
2433 calloc(1, sizeof(struct iris_uncompiled_shader));
2434 if (!ish)
2435 return NULL;
2436
2437 pipe_reference_init(&ish->ref, 1);
2438 list_inithead(&ish->variants);
2439 simple_mtx_init(&ish->lock, mtx_plain);
2440
2441 ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
2442
2443 ish->program_id = get_new_program_id(screen);
2444 ish->nir = nir;
2445 if (so_info) {
2446 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2447 update_so_info(&ish->stream_output, nir->info.outputs_written);
2448 }
2449
2450 if (screen->disk_cache) {
2451 /* Serialize the NIR to a binary blob that we can hash for the disk
2452 * cache. Drop unnecessary information (like variable names)
2453 * so the serialized NIR is smaller, and also to let us detect more
2454 * isomorphic shaders when hashing, increasing cache hits.
2455 */
2456 struct blob blob;
2457 blob_init(&blob);
2458 nir_serialize(&blob, nir, true);
2459 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2460 blob_finish(&blob);
2461 }
2462
2463 return ish;
2464 }
2465
2466 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)2467 iris_create_compute_state(struct pipe_context *ctx,
2468 const struct pipe_compute_state *state)
2469 {
2470 struct iris_context *ice = (void *) ctx;
2471 struct iris_screen *screen = (void *) ctx->screen;
2472 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2473 const nir_shader_compiler_options *options =
2474 screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions;
2475
2476 nir_shader *nir;
2477 switch (state->ir_type) {
2478 case PIPE_SHADER_IR_NIR:
2479 nir = (void *)state->prog;
2480 break;
2481
2482 case PIPE_SHADER_IR_NIR_SERIALIZED: {
2483 struct blob_reader reader;
2484 const struct pipe_binary_program_header *hdr = state->prog;
2485 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2486 nir = nir_deserialize(NULL, options, &reader);
2487 break;
2488 }
2489
2490 default:
2491 unreachable("Unsupported IR");
2492 }
2493
2494 /* Most of iris doesn't really care about the difference between compute
2495 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so
2496 * it's way easier if we just normalize to COMPUTE here.
2497 */
2498 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2499 nir->info.stage == MESA_SHADER_KERNEL);
2500 nir->info.stage = MESA_SHADER_COMPUTE;
2501
2502 struct iris_uncompiled_shader *ish =
2503 iris_create_uncompiled_shader(screen, nir, NULL);
2504 ish->kernel_input_size = state->req_input_mem;
2505 ish->kernel_shared_size = state->req_local_mem;
2506
2507 // XXX: disallow more than 64KB of shared variables
2508
2509 if (screen->precompile) {
2510 struct iris_cs_prog_key key = { KEY_ID(base) };
2511
2512 struct iris_compiled_shader *shader =
2513 iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
2514 sizeof(key), &key);
2515
2516 /* Append our new variant to the shader's variant list. */
2517 list_addtail(&shader->link, &ish->variants);
2518
2519 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2520 &key, sizeof(key))) {
2521 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2522 }
2523 }
2524
2525 return ish;
2526 }
2527
2528 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)2529 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
2530 {
2531 const struct iris_threaded_compile_job *job =
2532 (struct iris_threaded_compile_job *) _job;
2533
2534 struct iris_screen *screen = job->screen;
2535 struct u_upload_mgr *uploader = job->uploader;
2536 struct pipe_debug_callback *dbg = job->dbg;
2537 struct iris_uncompiled_shader *ish = job->ish;
2538 struct iris_compiled_shader *shader = job->shader;
2539
2540 switch (ish->nir->info.stage) {
2541 case MESA_SHADER_VERTEX:
2542 iris_compile_vs(screen, uploader, dbg, ish, shader);
2543 break;
2544 case MESA_SHADER_TESS_CTRL:
2545 iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
2546 break;
2547 case MESA_SHADER_TESS_EVAL:
2548 iris_compile_tes(screen, uploader, dbg, ish, shader);
2549 break;
2550 case MESA_SHADER_GEOMETRY:
2551 iris_compile_gs(screen, uploader, dbg, ish, shader);
2552 break;
2553 case MESA_SHADER_FRAGMENT:
2554 iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
2555 break;
2556
2557 default:
2558 unreachable("Invalid shader stage.");
2559 }
2560 }
2561
2562 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2563 iris_create_shader_state(struct pipe_context *ctx,
2564 const struct pipe_shader_state *state)
2565 {
2566 struct iris_context *ice = (void *) ctx;
2567 struct iris_screen *screen = (void *) ctx->screen;
2568 struct nir_shader *nir;
2569
2570 if (state->type == PIPE_SHADER_IR_TGSI)
2571 nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2572 else
2573 nir = state->ir.nir;
2574
2575 const struct shader_info *const info = &nir->info;
2576 struct iris_uncompiled_shader *ish =
2577 iris_create_uncompiled_shader(screen, nir, &state->stream_output);
2578
2579 union iris_any_prog_key key;
2580 unsigned key_size = 0;
2581
2582 memset(&key, 0, sizeof(key));
2583
2584 switch (info->stage) {
2585 case MESA_SHADER_VERTEX:
2586 /* User clip planes */
2587 if (info->clip_distance_array_size == 0)
2588 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2589
2590 key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) };
2591 key_size = sizeof(key.vs);
2592 break;
2593
2594 case MESA_SHADER_TESS_CTRL: {
2595 const unsigned _GL_TRIANGLES = 0x0004;
2596
2597 key.tcs = (struct iris_tcs_prog_key) {
2598 KEY_ID(vue.base),
2599 // XXX: make sure the linker fills this out from the TES...
2600 .tes_primitive_mode =
2601 info->tess.primitive_mode ? info->tess.primitive_mode
2602 : _GL_TRIANGLES,
2603 .outputs_written = info->outputs_written,
2604 .patch_outputs_written = info->patch_outputs_written,
2605 };
2606
2607 /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2608 * We don't have that information, so we randomly guess that the input
2609 * and output patches are the same size. This is a bad guess, but we
2610 * can't do much better.
2611 */
2612 if (screen->compiler->use_tcs_8_patch)
2613 key.tcs.input_vertices = info->tess.tcs_vertices_out;
2614
2615 key_size = sizeof(key.tcs);
2616 break;
2617 }
2618
2619 case MESA_SHADER_TESS_EVAL:
2620 /* User clip planes */
2621 if (info->clip_distance_array_size == 0)
2622 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2623
2624 key.tes = (struct iris_tes_prog_key) {
2625 KEY_ID(vue.base),
2626 // XXX: not ideal, need TCS output/TES input unification
2627 .inputs_read = info->inputs_read,
2628 .patch_inputs_read = info->patch_inputs_read,
2629 };
2630
2631 key_size = sizeof(key.tes);
2632 break;
2633
2634 case MESA_SHADER_GEOMETRY:
2635 /* User clip planes */
2636 if (info->clip_distance_array_size == 0)
2637 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2638
2639 key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) };
2640 key_size = sizeof(key.gs);
2641 break;
2642
2643 case MESA_SHADER_FRAGMENT:
2644 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2645 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2646 (1ull << IRIS_NOS_RASTERIZER) |
2647 (1ull << IRIS_NOS_BLEND);
2648
2649 /* The program key needs the VUE map if there are > 16 inputs */
2650 if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
2651 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2652 }
2653
2654 const uint64_t color_outputs = info->outputs_written &
2655 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2656 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2657 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2658
2659 bool can_rearrange_varyings =
2660 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2661
2662 const struct intel_device_info *devinfo = &screen->devinfo;
2663
2664 key.fs = (struct iris_fs_prog_key) {
2665 KEY_ID(base),
2666 .nr_color_regions = util_bitcount(color_outputs),
2667 .coherent_fb_fetch = devinfo->ver >= 9,
2668 .input_slots_valid =
2669 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2670 };
2671
2672 key_size = sizeof(key.fs);
2673 break;
2674
2675 default:
2676 unreachable("Invalid shader stage.");
2677 }
2678
2679 if (screen->precompile) {
2680 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2681
2682 struct iris_compiled_shader *shader =
2683 iris_create_shader_variant(screen, NULL,
2684 (enum iris_program_cache_id) info->stage,
2685 key_size, &key);
2686
2687 /* Append our new variant to the shader's variant list. */
2688 list_addtail(&shader->link, &ish->variants);
2689
2690 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2691 &key, key_size)) {
2692 assert(!util_queue_fence_is_signalled(&shader->ready));
2693
2694 struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
2695
2696 job->screen = screen;
2697 job->uploader = uploader;
2698 job->ish = ish;
2699 job->shader = shader;
2700
2701 iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
2702 iris_compile_shader);
2703 }
2704 }
2705
2706 return ish;
2707 }
2708
2709 /**
2710 * Called when the refcount on the iris_uncompiled_shader reaches 0.
2711 *
2712 * Frees the iris_uncompiled_shader.
2713 *
2714 * \sa iris_delete_shader_state
2715 */
2716 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)2717 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
2718 {
2719 struct iris_uncompiled_shader *ish = state;
2720
2721 /* No need to take ish->lock; we hold the last reference to ish */
2722 list_for_each_entry_safe(struct iris_compiled_shader, shader,
2723 &ish->variants, link) {
2724 list_del(&shader->link);
2725
2726 iris_shader_variant_reference(&shader, NULL);
2727 }
2728
2729 simple_mtx_destroy(&ish->lock);
2730 util_queue_fence_destroy(&ish->ready);
2731
2732 ralloc_free(ish->nir);
2733 free(ish);
2734 }
2735
2736 /**
2737 * The pipe->delete_[stage]_state() driver hooks.
2738 *
2739 * \sa iris_destroy_shader_state
2740 */
2741 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)2742 iris_delete_shader_state(struct pipe_context *ctx, void *state)
2743 {
2744 struct iris_uncompiled_shader *ish = state;
2745 struct iris_context *ice = (void *) ctx;
2746
2747 const gl_shader_stage stage = ish->nir->info.stage;
2748
2749 if (ice->shaders.uncompiled[stage] == ish) {
2750 ice->shaders.uncompiled[stage] = NULL;
2751 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2752 }
2753
2754 if (pipe_reference(&ish->ref, NULL))
2755 iris_destroy_shader_state(ctx, state);
2756 }
2757
2758 /**
2759 * The pipe->bind_[stage]_state() driver hook.
2760 *
2761 * Binds an uncompiled shader as the current one for a particular stage.
2762 * Updates dirty tracking to account for the shader's NOS.
2763 */
2764 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)2765 bind_shader_state(struct iris_context *ice,
2766 struct iris_uncompiled_shader *ish,
2767 gl_shader_stage stage)
2768 {
2769 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2770 const uint64_t nos = ish ? ish->nos : 0;
2771
2772 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2773 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2774
2775 if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
2776 (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
2777 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2778 }
2779
2780 ice->shaders.uncompiled[stage] = ish;
2781 ice->state.stage_dirty |= stage_dirty_bit;
2782
2783 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2784 * (or that they no longer need to do so).
2785 */
2786 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2787 if (nos & (1 << i))
2788 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2789 else
2790 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2791 }
2792 }
2793
2794 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)2795 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2796 {
2797 struct iris_context *ice = (struct iris_context *)ctx;
2798 struct iris_uncompiled_shader *ish = state;
2799
2800 if (ish) {
2801 const struct shader_info *info = &ish->nir->info;
2802 if (ice->state.window_space_position != info->vs.window_space_position) {
2803 ice->state.window_space_position = info->vs.window_space_position;
2804
2805 ice->state.dirty |= IRIS_DIRTY_CLIP |
2806 IRIS_DIRTY_RASTER |
2807 IRIS_DIRTY_CC_VIEWPORT;
2808 }
2809
2810 const bool uses_draw_params =
2811 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
2812 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
2813 const bool uses_derived_draw_params =
2814 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
2815 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
2816 const bool needs_sgvs_element = uses_draw_params ||
2817 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
2818 BITSET_TEST(info->system_values_read,
2819 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
2820
2821 if (ice->state.vs_uses_draw_params != uses_draw_params ||
2822 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
2823 ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) {
2824 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
2825 IRIS_DIRTY_VERTEX_ELEMENTS;
2826 }
2827
2828 ice->state.vs_uses_draw_params = uses_draw_params;
2829 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
2830 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
2831 ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
2832 }
2833
2834 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2835 }
2836
2837 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)2838 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2839 {
2840 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2841 }
2842
2843 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)2844 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2845 {
2846 struct iris_context *ice = (struct iris_context *)ctx;
2847
2848 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2849 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2850 ice->state.dirty |= IRIS_DIRTY_URB;
2851
2852 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2853 }
2854
2855 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)2856 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2857 {
2858 struct iris_context *ice = (struct iris_context *)ctx;
2859
2860 /* Enabling/disabling optional stages requires a URB reconfiguration. */
2861 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2862 ice->state.dirty |= IRIS_DIRTY_URB;
2863
2864 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2865 }
2866
2867 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)2868 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2869 {
2870 struct iris_context *ice = (struct iris_context *) ctx;
2871 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2872 const struct intel_device_info *devinfo = &screen->devinfo;
2873 struct iris_uncompiled_shader *old_ish =
2874 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2875 struct iris_uncompiled_shader *new_ish = state;
2876
2877 const unsigned color_bits =
2878 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2879 BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2880
2881 /* Fragment shader outputs influence HasWriteableRT */
2882 if (!old_ish || !new_ish ||
2883 (old_ish->nir->info.outputs_written & color_bits) !=
2884 (new_ish->nir->info.outputs_written & color_bits))
2885 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2886
2887 if (devinfo->ver == 8)
2888 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2889
2890 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2891 }
2892
2893 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)2894 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2895 {
2896 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2897 }
2898
2899 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)2900 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
2901 {
2902 struct iris_screen *screen = (struct iris_screen *)_screen;
2903 struct nir_shader *nir = (struct nir_shader *) nirptr;
2904 const struct intel_device_info *devinfo = &screen->devinfo;
2905
2906 NIR_PASS_V(nir, iris_fix_edge_flags);
2907
2908 brw_preprocess_nir(screen->compiler, nir, NULL);
2909
2910 NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2911 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2912
2913 nir_sweep(nir);
2914
2915 return NULL;
2916 }
2917
2918 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)2919 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
2920 unsigned max_threads)
2921 {
2922 struct iris_screen *screen = (struct iris_screen *) pscreen;
2923 util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
2924 }
2925
2926 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)2927 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
2928 void *v_shader,
2929 enum pipe_shader_type p_stage)
2930 {
2931 struct iris_screen *screen = (struct iris_screen *) pscreen;
2932
2933 /* Threaded compilation is only used for the precompile. If precompile is
2934 * disabled, threaded compilation is "done."
2935 */
2936 if (!screen->precompile)
2937 return true;
2938
2939 struct iris_uncompiled_shader *ish = v_shader;
2940
2941 /* When precompile is enabled, the first entry is the precompile variant.
2942 * Check the ready fence of the precompile variant.
2943 */
2944 struct iris_compiled_shader *first =
2945 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
2946
2947 return util_queue_fence_is_signalled(&first->ready);
2948 }
2949
2950 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)2951 iris_init_screen_program_functions(struct pipe_screen *pscreen)
2952 {
2953 pscreen->is_parallel_shader_compilation_finished =
2954 iris_is_parallel_shader_compilation_finished;
2955 pscreen->set_max_shader_compiler_threads =
2956 iris_set_max_shader_compiler_threads;
2957 pscreen->finalize_nir = iris_finalize_nir;
2958 }
2959
2960 void
iris_init_program_functions(struct pipe_context * ctx)2961 iris_init_program_functions(struct pipe_context *ctx)
2962 {
2963 ctx->create_vs_state = iris_create_shader_state;
2964 ctx->create_tcs_state = iris_create_shader_state;
2965 ctx->create_tes_state = iris_create_shader_state;
2966 ctx->create_gs_state = iris_create_shader_state;
2967 ctx->create_fs_state = iris_create_shader_state;
2968 ctx->create_compute_state = iris_create_compute_state;
2969
2970 ctx->delete_vs_state = iris_delete_shader_state;
2971 ctx->delete_tcs_state = iris_delete_shader_state;
2972 ctx->delete_tes_state = iris_delete_shader_state;
2973 ctx->delete_gs_state = iris_delete_shader_state;
2974 ctx->delete_fs_state = iris_delete_shader_state;
2975 ctx->delete_compute_state = iris_delete_shader_state;
2976
2977 ctx->bind_vs_state = iris_bind_vs_state;
2978 ctx->bind_tcs_state = iris_bind_tcs_state;
2979 ctx->bind_tes_state = iris_bind_tes_state;
2980 ctx->bind_gs_state = iris_bind_gs_state;
2981 ctx->bind_fs_state = iris_bind_fs_state;
2982 ctx->bind_compute_state = iris_bind_cs_state;
2983 }
2984