1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55
56 #define KEY_INIT(prefix) \
57 .prefix.program_string_id = ish->program_id, \
58 .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
60 .base.program_string_id = prog_id, \
61 .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input) \
63 .base.program_string_id = prog_id, \
64 .base.limit_trig_input_range = limit_trig_input
65
66 struct iris_threaded_compile_job {
67 struct iris_screen *screen;
68 struct u_upload_mgr *uploader;
69 struct util_debug_callback *dbg;
70 struct iris_uncompiled_shader *ish;
71 struct iris_compiled_shader *shader;
72 };
73
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77 return p_atomic_inc_return(&screen->program_id);
78 }
79
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82 const struct brw_wm_prog_data *brw)
83 {
84 assert(shader->stage == MESA_SHADER_FRAGMENT);
85 struct iris_fs_data *iris = &shader->fs;
86
87 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89 memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90 memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91 iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92
93 iris->num_varying_inputs = brw->num_varying_inputs;
94 iris->msaa_flags_param = brw->msaa_flags_param;
95 iris->flat_inputs = brw->flat_inputs;
96 iris->inputs = brw->inputs;
97 iris->computed_depth_mode = brw->computed_depth_mode;
98 iris->max_polygons = brw->max_polygons;
99 iris->dispatch_multi = brw->dispatch_multi;
100 iris->computed_stencil = brw->computed_stencil;
101 iris->early_fragment_tests = brw->early_fragment_tests;
102 iris->post_depth_coverage = brw->post_depth_coverage;
103 iris->inner_coverage = brw->inner_coverage;
104 iris->dispatch_8 = brw->dispatch_8;
105 iris->dispatch_16 = brw->dispatch_16;
106 iris->dispatch_32 = brw->dispatch_32;
107 iris->dual_src_blend = brw->dual_src_blend;
108 iris->uses_pos_offset = brw->uses_pos_offset;
109 iris->uses_omask = brw->uses_omask;
110 iris->uses_kill = brw->uses_kill;
111 iris->uses_src_depth = brw->uses_src_depth;
112 iris->uses_src_w = brw->uses_src_w;
113 iris->uses_sample_mask = brw->uses_sample_mask;
114 iris->uses_vmask = brw->uses_vmask;
115 iris->has_side_effects = brw->has_side_effects;
116 iris->pulls_bary = brw->pulls_bary;
117
118 iris->uses_sample_offsets = brw->uses_sample_offsets;
119 iris->uses_npc_bary_coefficients = brw->uses_npc_bary_coefficients;
120 iris->uses_pc_bary_coefficients = brw->uses_pc_bary_coefficients;
121 iris->uses_depth_w_coefficients = brw->uses_depth_w_coefficients;
122
123 iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
124
125 iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
126 }
127
128 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)129 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
130 const struct brw_cs_prog_data *brw)
131 {
132 assert(shader->stage == MESA_SHADER_COMPUTE);
133 struct iris_cs_data *iris = &shader->cs;
134
135 iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
136 iris->push.cross_thread.regs = brw->push.cross_thread.regs;
137 iris->push.cross_thread.size = brw->push.cross_thread.size;
138
139 iris->push.per_thread.dwords = brw->push.per_thread.dwords;
140 iris->push.per_thread.regs = brw->push.per_thread.regs;
141 iris->push.per_thread.size = brw->push.per_thread.size;
142
143 iris->local_size[0] = brw->local_size[0];
144 iris->local_size[1] = brw->local_size[1];
145 iris->local_size[2] = brw->local_size[2];
146 iris->prog_offset[0] = brw->prog_offset[0];
147 iris->prog_offset[1] = brw->prog_offset[1];
148 iris->prog_offset[2] = brw->prog_offset[2];
149
150 iris->generate_local_id = brw->generate_local_id;
151 iris->walk_order = brw->walk_order;
152 iris->uses_barrier = brw->uses_barrier;
153 iris->uses_sampler = brw->uses_sampler;
154 iris->prog_mask = brw->prog_mask;
155
156 iris->first_param_is_builtin_subgroup_id =
157 brw->base.nr_params > 0 &&
158 brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
159 }
160
161 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)162 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
163 struct iris_vue_data *iris)
164 {
165 memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
166
167 iris->urb_read_length = brw->urb_read_length;
168 iris->cull_distance_mask = brw->cull_distance_mask;
169 iris->urb_entry_size = brw->urb_entry_size;
170 iris->dispatch_mode = brw->dispatch_mode;
171 iris->include_vue_handles = brw->include_vue_handles;
172 }
173
174 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)175 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
176 const struct brw_vs_prog_data *brw)
177 {
178 assert(shader->stage == MESA_SHADER_VERTEX);
179 struct iris_vs_data *iris = &shader->vs;
180
181 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
182
183 iris->uses_vertexid = brw->uses_vertexid;
184 iris->uses_instanceid = brw->uses_instanceid;
185 iris->uses_firstvertex = brw->uses_firstvertex;
186 iris->uses_baseinstance = brw->uses_baseinstance;
187 iris->uses_drawid = brw->uses_drawid;
188 }
189
190 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)191 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
192 const struct brw_tcs_prog_data *brw)
193 {
194 assert(shader->stage == MESA_SHADER_TESS_CTRL);
195 struct iris_tcs_data *iris = &shader->tcs;
196
197 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
198
199 iris->instances = brw->instances;
200 iris->patch_count_threshold = brw->patch_count_threshold;
201 iris->include_primitive_id = brw->include_primitive_id;
202 }
203
204 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)205 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
206 const struct brw_tes_prog_data *brw)
207 {
208 assert(shader->stage == MESA_SHADER_TESS_EVAL);
209 struct iris_tes_data *iris = &shader->tes;
210
211 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
212
213 iris->partitioning = brw->partitioning;
214 iris->output_topology = brw->output_topology;
215 iris->domain = brw->domain;
216 iris->include_primitive_id = brw->include_primitive_id;
217 }
218
219 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)220 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
221 const struct brw_gs_prog_data *brw)
222 {
223 assert(shader->stage == MESA_SHADER_GEOMETRY);
224 struct iris_gs_data *iris = &shader->gs;
225
226 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
227
228 iris->vertices_in = brw->vertices_in;
229 iris->output_vertex_size_hwords = brw->output_vertex_size_hwords;
230 iris->output_topology = brw->output_topology;
231 iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
232 iris->control_data_format = brw->control_data_format;
233 iris->static_vertex_count = brw->static_vertex_count;
234 iris->invocations = brw->invocations;
235 iris->include_primitive_id = brw->include_primitive_id;
236 }
237
238 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)239 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
240 struct brw_stage_prog_data *brw)
241 {
242 STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
243 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
244 shader->ubo_ranges[i].block = brw->ubo_ranges[i].block;
245 shader->ubo_ranges[i].start = brw->ubo_ranges[i].start;
246 shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
247 }
248
249 shader->nr_params = brw->nr_params;
250 shader->total_scratch = brw->total_scratch;
251 shader->total_shared = brw->total_shared;
252 shader->program_size = brw->program_size;
253 shader->const_data_offset = brw->const_data_offset;
254 shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
255 shader->has_ubo_pull = brw->has_ubo_pull;
256 shader->use_alt_mode = brw->use_alt_mode;
257
258 switch (shader->stage) {
259 case MESA_SHADER_FRAGMENT:
260 iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
261 break;
262 case MESA_SHADER_COMPUTE:
263 iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
264 break;
265 case MESA_SHADER_VERTEX:
266 iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
267 break;
268 case MESA_SHADER_TESS_CTRL:
269 iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
270 break;
271 case MESA_SHADER_TESS_EVAL:
272 iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
273 break;
274 case MESA_SHADER_GEOMETRY:
275 iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
276 break;
277 default:
278 unreachable("invalid shader stage");
279 }
280
281 shader->brw_prog_data = brw;
282
283 ralloc_steal(shader, shader->brw_prog_data);
284 ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
285 ralloc_steal(shader->brw_prog_data, brw->param);
286 }
287
288 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)289 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
290 const struct elk_wm_prog_data *elk)
291 {
292 assert(shader->stage == MESA_SHADER_FRAGMENT);
293 struct iris_fs_data *iris = &shader->fs;
294
295 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
296 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
297 memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
298 memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
299 iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
300
301 iris->num_varying_inputs = elk->num_varying_inputs;
302 iris->msaa_flags_param = elk->msaa_flags_param;
303 iris->flat_inputs = elk->flat_inputs;
304 iris->inputs = elk->inputs;
305 iris->computed_depth_mode = elk->computed_depth_mode;
306 iris->max_polygons = 1;
307 iris->dispatch_multi = 0;
308 iris->computed_stencil = elk->computed_stencil;
309 iris->early_fragment_tests = elk->early_fragment_tests;
310 iris->post_depth_coverage = elk->post_depth_coverage;
311 iris->inner_coverage = elk->inner_coverage;
312 iris->dispatch_8 = elk->dispatch_8;
313 iris->dispatch_16 = elk->dispatch_16;
314 iris->dispatch_32 = elk->dispatch_32;
315 iris->dual_src_blend = elk->dual_src_blend;
316 iris->uses_pos_offset = elk->uses_pos_offset;
317 iris->uses_omask = elk->uses_omask;
318 iris->uses_kill = elk->uses_kill;
319 iris->uses_src_depth = elk->uses_src_depth;
320 iris->uses_src_w = elk->uses_src_w;
321 iris->uses_sample_mask = elk->uses_sample_mask;
322 iris->uses_vmask = elk->uses_vmask;
323 iris->pulls_bary = elk->pulls_bary;
324 iris->has_side_effects = elk->has_side_effects;
325
326 iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
327
328 iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
329 }
330
331 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)332 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
333 const struct elk_cs_prog_data *elk)
334 {
335 assert(shader->stage == MESA_SHADER_COMPUTE);
336 struct iris_cs_data *iris = &shader->cs;
337
338 iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
339 iris->push.cross_thread.regs = elk->push.cross_thread.regs;
340 iris->push.cross_thread.size = elk->push.cross_thread.size;
341
342 iris->push.per_thread.dwords = elk->push.per_thread.dwords;
343 iris->push.per_thread.regs = elk->push.per_thread.regs;
344 iris->push.per_thread.size = elk->push.per_thread.size;
345
346 iris->local_size[0] = elk->local_size[0];
347 iris->local_size[1] = elk->local_size[1];
348 iris->local_size[2] = elk->local_size[2];
349 iris->prog_offset[0] = elk->prog_offset[0];
350 iris->prog_offset[1] = elk->prog_offset[1];
351 iris->prog_offset[2] = elk->prog_offset[2];
352
353 iris->uses_barrier = elk->uses_barrier;
354 iris->prog_mask = elk->prog_mask;
355
356 iris->first_param_is_builtin_subgroup_id =
357 elk->base.nr_params > 0 &&
358 elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
359 }
360
361 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)362 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
363 struct iris_vue_data *iris)
364 {
365 memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
366
367 iris->urb_read_length = elk->urb_read_length;
368 iris->cull_distance_mask = elk->cull_distance_mask;
369 iris->urb_entry_size = elk->urb_entry_size;
370 iris->dispatch_mode = elk->dispatch_mode;
371 iris->include_vue_handles = elk->include_vue_handles;
372 }
373
374 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)375 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
376 const struct elk_vs_prog_data *elk)
377 {
378 assert(shader->stage == MESA_SHADER_VERTEX);
379 struct iris_vs_data *iris = &shader->vs;
380
381 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
382
383 iris->uses_vertexid = elk->uses_vertexid;
384 iris->uses_instanceid = elk->uses_instanceid;
385 iris->uses_firstvertex = elk->uses_firstvertex;
386 iris->uses_baseinstance = elk->uses_baseinstance;
387 iris->uses_drawid = elk->uses_drawid;
388 }
389
390 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)391 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
392 const struct elk_tcs_prog_data *elk)
393 {
394 assert(shader->stage == MESA_SHADER_TESS_CTRL);
395 struct iris_tcs_data *iris = &shader->tcs;
396
397 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
398
399 iris->instances = elk->instances;
400 iris->patch_count_threshold = elk->patch_count_threshold;
401 iris->include_primitive_id = elk->include_primitive_id;
402 }
403
404 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)405 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
406 const struct elk_tes_prog_data *elk)
407 {
408 assert(shader->stage == MESA_SHADER_TESS_EVAL);
409 struct iris_tes_data *iris = &shader->tes;
410
411 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
412
413 iris->partitioning = elk->partitioning;
414 iris->output_topology = elk->output_topology;
415 iris->domain = elk->domain;
416 iris->include_primitive_id = elk->include_primitive_id;
417 }
418
419 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)420 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
421 const struct elk_gs_prog_data *elk)
422 {
423 assert(shader->stage == MESA_SHADER_GEOMETRY);
424 struct iris_gs_data *iris = &shader->gs;
425
426 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
427
428 iris->vertices_in = elk->vertices_in;
429 iris->output_vertex_size_hwords = elk->output_vertex_size_hwords;
430 iris->output_topology = elk->output_topology;
431 iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
432 iris->control_data_format = elk->control_data_format;
433 iris->static_vertex_count = elk->static_vertex_count;
434 iris->invocations = elk->invocations;
435 iris->include_primitive_id = elk->include_primitive_id;
436 }
437
438 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)439 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
440 struct elk_stage_prog_data *elk)
441 {
442 STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
443 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
444 shader->ubo_ranges[i].block = elk->ubo_ranges[i].block;
445 shader->ubo_ranges[i].start = elk->ubo_ranges[i].start;
446 shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
447 }
448
449 shader->nr_params = elk->nr_params;
450 shader->total_scratch = elk->total_scratch;
451 shader->total_shared = elk->total_shared;
452 shader->program_size = elk->program_size;
453 shader->const_data_offset = elk->const_data_offset;
454 shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
455 shader->has_ubo_pull = elk->has_ubo_pull;
456 shader->use_alt_mode = elk->use_alt_mode;
457
458 switch (shader->stage) {
459 case MESA_SHADER_FRAGMENT:
460 iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
461 break;
462 case MESA_SHADER_COMPUTE:
463 iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
464 break;
465 case MESA_SHADER_VERTEX:
466 iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
467 break;
468 case MESA_SHADER_TESS_CTRL:
469 iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
470 break;
471 case MESA_SHADER_TESS_EVAL:
472 iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
473 break;
474 case MESA_SHADER_GEOMETRY:
475 iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
476 break;
477 default:
478 unreachable("invalid shader stage");
479 }
480
481 shader->elk_prog_data = elk;
482
483 ralloc_steal(shader, shader->elk_prog_data);
484 ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
485 ralloc_steal(shader->elk_prog_data, elk->param);
486 }
487
488 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)489 iris_finalize_program(struct iris_compiled_shader *shader,
490 uint32_t *streamout,
491 uint32_t *system_values,
492 unsigned num_system_values,
493 unsigned kernel_input_size,
494 unsigned num_cbufs,
495 const struct iris_binding_table *bt)
496 {
497 /* There can be only one. */
498 assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
499
500 shader->streamout = streamout;
501 shader->system_values = system_values;
502 shader->num_system_values = num_system_values;
503 shader->kernel_input_size = kernel_input_size;
504 shader->num_cbufs = num_cbufs;
505 shader->bt = *bt;
506
507 ralloc_steal(shader, shader->streamout);
508 ralloc_steal(shader, shader->system_values);
509 }
510
511 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)512 iris_to_brw_vs_key(const struct iris_screen *screen,
513 const struct iris_vs_prog_key *key)
514 {
515 return (struct brw_vs_prog_key) {
516 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
517 key->vue.base.limit_trig_input_range),
518 };
519 }
520
521 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)522 iris_to_brw_tcs_key(const struct iris_screen *screen,
523 const struct iris_tcs_prog_key *key)
524 {
525 return (struct brw_tcs_prog_key) {
526 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
527 key->vue.base.limit_trig_input_range),
528 ._tes_primitive_mode = key->_tes_primitive_mode,
529 .input_vertices = key->input_vertices,
530 .patch_outputs_written = key->patch_outputs_written,
531 .outputs_written = key->outputs_written,
532 };
533 }
534
535 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)536 iris_to_brw_tes_key(const struct iris_screen *screen,
537 const struct iris_tes_prog_key *key)
538 {
539 return (struct brw_tes_prog_key) {
540 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
541 key->vue.base.limit_trig_input_range),
542 .patch_inputs_read = key->patch_inputs_read,
543 .inputs_read = key->inputs_read,
544 };
545 }
546
547 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)548 iris_to_brw_gs_key(const struct iris_screen *screen,
549 const struct iris_gs_prog_key *key)
550 {
551 return (struct brw_gs_prog_key) {
552 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
553 key->vue.base.limit_trig_input_range),
554 };
555 }
556
557 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)558 iris_to_brw_fs_key(const struct iris_screen *screen,
559 const struct iris_fs_prog_key *key)
560 {
561 return (struct brw_wm_prog_key) {
562 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
563 key->base.limit_trig_input_range),
564 .nr_color_regions = key->nr_color_regions,
565 .flat_shade = key->flat_shade,
566 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
567 .alpha_to_coverage = key->alpha_to_coverage ? INTEL_ALWAYS : INTEL_NEVER,
568 .clamp_fragment_color = key->clamp_fragment_color,
569 .persample_interp = key->persample_interp ? INTEL_ALWAYS : INTEL_NEVER,
570 .multisample_fbo = key->multisample_fbo ? INTEL_ALWAYS : INTEL_NEVER,
571 .force_dual_color_blend = key->force_dual_color_blend,
572 .coherent_fb_fetch = key->coherent_fb_fetch,
573 .color_outputs_valid = key->color_outputs_valid,
574 .input_slots_valid = key->input_slots_valid,
575 .ignore_sample_mask_out = !key->multisample_fbo,
576 .null_push_constant_tbimr_workaround =
577 screen->devinfo->needs_null_push_constant_tbimr_workaround,
578 };
579 }
580
581 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)582 iris_to_brw_cs_key(const struct iris_screen *screen,
583 const struct iris_cs_prog_key *key)
584 {
585 return (struct brw_cs_prog_key) {
586 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
587 key->base.limit_trig_input_range),
588 };
589 }
590
591 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)592 iris_to_elk_vs_key(const struct iris_screen *screen,
593 const struct iris_vs_prog_key *key)
594 {
595 return (struct elk_vs_prog_key) {
596 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
597 key->vue.base.limit_trig_input_range),
598
599 /* Don't tell the backend about our clip plane constants, we've
600 * already lowered them in NIR and don't want it doing it again.
601 */
602 .nr_userclip_plane_consts = 0,
603 };
604 }
605
606 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)607 iris_to_elk_tcs_key(const struct iris_screen *screen,
608 const struct iris_tcs_prog_key *key)
609 {
610 return (struct elk_tcs_prog_key) {
611 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
612 key->vue.base.limit_trig_input_range),
613 ._tes_primitive_mode = key->_tes_primitive_mode,
614 .input_vertices = key->input_vertices,
615 .patch_outputs_written = key->patch_outputs_written,
616 .outputs_written = key->outputs_written,
617 .quads_workaround = key->quads_workaround,
618 };
619 }
620
621 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)622 iris_to_elk_tes_key(const struct iris_screen *screen,
623 const struct iris_tes_prog_key *key)
624 {
625 return (struct elk_tes_prog_key) {
626 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
627 key->vue.base.limit_trig_input_range),
628 .patch_inputs_read = key->patch_inputs_read,
629 .inputs_read = key->inputs_read,
630 };
631 }
632
633 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)634 iris_to_elk_gs_key(const struct iris_screen *screen,
635 const struct iris_gs_prog_key *key)
636 {
637 return (struct elk_gs_prog_key) {
638 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
639 key->vue.base.limit_trig_input_range),
640 };
641 }
642
643 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)644 iris_to_elk_fs_key(const struct iris_screen *screen,
645 const struct iris_fs_prog_key *key)
646 {
647 return (struct elk_wm_prog_key) {
648 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
649 key->base.limit_trig_input_range),
650 .nr_color_regions = key->nr_color_regions,
651 .flat_shade = key->flat_shade,
652 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
653 .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
654 .clamp_fragment_color = key->clamp_fragment_color,
655 .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
656 .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
657 .force_dual_color_blend = key->force_dual_color_blend,
658 .coherent_fb_fetch = key->coherent_fb_fetch,
659 .color_outputs_valid = key->color_outputs_valid,
660 .input_slots_valid = key->input_slots_valid,
661 .ignore_sample_mask_out = !key->multisample_fbo,
662 };
663 }
664
665 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)666 iris_to_elk_cs_key(const struct iris_screen *screen,
667 const struct iris_cs_prog_key *key)
668 {
669 return (struct elk_cs_prog_key) {
670 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
671 key->base.limit_trig_input_range),
672 };
673 }
674
675 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)676 upload_state(struct u_upload_mgr *uploader,
677 struct iris_state_ref *ref,
678 unsigned size,
679 unsigned alignment)
680 {
681 void *p = NULL;
682 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
683 return p;
684 }
685
686 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)687 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
688 struct pipe_shader_buffer *buf,
689 struct iris_state_ref *surf_state,
690 isl_surf_usage_flags_t usage)
691 {
692 struct pipe_context *ctx = &ice->ctx;
693 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
694 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
695
696 void *map =
697 upload_state(ice->state.surface_uploader, surf_state,
698 screen->isl_dev.ss.size, 64);
699 if (!unlikely(map)) {
700 surf_state->res = NULL;
701 return;
702 }
703
704 struct iris_resource *res = (void *) buf->buffer;
705 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
706 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
707
708 const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
709
710 isl_buffer_fill_state(&screen->isl_dev, map,
711 .address = res->bo->address + res->offset +
712 buf->buffer_offset,
713 .size_B = buf->buffer_size - res->offset,
714 .format = dataport ? ISL_FORMAT_RAW
715 : ISL_FORMAT_R32G32B32A32_FLOAT,
716 .swizzle = ISL_SWIZZLE_IDENTITY,
717 .stride_B = 1,
718 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
719 }
720
721 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)722 get_aoa_deref_offset(nir_builder *b,
723 nir_deref_instr *deref,
724 unsigned elem_size)
725 {
726 unsigned array_size = elem_size;
727 nir_def *offset = nir_imm_int(b, 0);
728
729 while (deref->deref_type != nir_deref_type_var) {
730 assert(deref->deref_type == nir_deref_type_array);
731
732 /* This level's element size is the previous level's array size */
733 nir_def *index = deref->arr.index.ssa;
734 assert(deref->arr.index.ssa);
735 offset = nir_iadd(b, offset,
736 nir_imul_imm(b, index, array_size));
737
738 deref = nir_deref_instr_parent(deref);
739 assert(glsl_type_is_array(deref->type));
740 array_size *= glsl_get_length(deref->type);
741 }
742
743 /* Accessing an invalid surface index with the dataport can result in a
744 * hang. According to the spec "if the index used to select an individual
745 * element is negative or greater than or equal to the size of the array,
746 * the results of the operation are undefined but may not lead to
747 * termination" -- which is one of the possible outcomes of the hang.
748 * Clamp the index to prevent access outside of the array bounds.
749 */
750 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
751 }
752
753 static void
iris_lower_storage_image_derefs(nir_shader * nir)754 iris_lower_storage_image_derefs(nir_shader *nir)
755 {
756 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
757
758 nir_builder b = nir_builder_create(impl);
759
760 nir_foreach_block(block, impl) {
761 nir_foreach_instr_safe(instr, block) {
762 if (instr->type != nir_instr_type_intrinsic)
763 continue;
764
765 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
766 switch (intrin->intrinsic) {
767 case nir_intrinsic_image_deref_load:
768 case nir_intrinsic_image_deref_store:
769 case nir_intrinsic_image_deref_atomic:
770 case nir_intrinsic_image_deref_atomic_swap:
771 case nir_intrinsic_image_deref_size:
772 case nir_intrinsic_image_deref_samples:
773 case nir_intrinsic_image_deref_load_raw_intel:
774 case nir_intrinsic_image_deref_store_raw_intel: {
775 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
776 nir_variable *var = nir_deref_instr_get_variable(deref);
777
778 b.cursor = nir_before_instr(&intrin->instr);
779 nir_def *index =
780 nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
781 var->data.driver_location);
782 nir_rewrite_image_intrinsic(intrin, index, false);
783 break;
784 }
785
786 default:
787 break;
788 }
789 }
790 }
791 }
792
793 static bool
iris_uses_image_atomic(const nir_shader * shader)794 iris_uses_image_atomic(const nir_shader *shader)
795 {
796 nir_foreach_function_impl(impl, shader) {
797 nir_foreach_block(block, impl) {
798 nir_foreach_instr(instr, block) {
799 if (instr->type != nir_instr_type_intrinsic)
800 continue;
801
802 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
803 switch (intrin->intrinsic) {
804 case nir_intrinsic_image_deref_atomic:
805 case nir_intrinsic_image_deref_atomic_swap:
806 unreachable("Should have been lowered in "
807 "iris_lower_storage_image_derefs");
808
809 case nir_intrinsic_image_atomic:
810 case nir_intrinsic_image_atomic_swap:
811 return true;
812
813 default:
814 break;
815 }
816 }
817 }
818 }
819
820 return false;
821 }
822
823 /**
824 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
825 */
826 static bool
iris_fix_edge_flags(nir_shader * nir)827 iris_fix_edge_flags(nir_shader *nir)
828 {
829 if (nir->info.stage != MESA_SHADER_VERTEX) {
830 nir_shader_preserve_all_metadata(nir);
831 return false;
832 }
833
834 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
835 VARYING_SLOT_EDGE);
836 if (!var) {
837 nir_shader_preserve_all_metadata(nir);
838 return false;
839 }
840
841 var->data.mode = nir_var_shader_temp;
842 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
843 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
844 nir_fixup_deref_modes(nir);
845
846 nir_foreach_function_impl(impl, nir) {
847 nir_metadata_preserve(impl, nir_metadata_control_flow |
848 nir_metadata_live_defs |
849 nir_metadata_loop_analysis);
850 }
851
852 return true;
853 }
854
855 /**
856 * Fix an uncompiled shader's stream output info.
857 *
858 * Core Gallium stores output->register_index as a "slot" number, where
859 * slots are assigned consecutively to all outputs in info->outputs_written.
860 * This naive packing of outputs doesn't work for us - we too have slots,
861 * but the layout is defined by the VUE map, which we won't have until we
862 * compile a specific shader variant. So, we remap these and simply store
863 * VARYING_SLOT_* in our copy's output->register_index fields.
864 *
865 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
866 * components of our VUE header. See brw_vue_map.c for the layout.
867 */
868 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)869 update_so_info(struct pipe_stream_output_info *so_info,
870 uint64_t outputs_written)
871 {
872 uint8_t reverse_map[64] = {};
873 unsigned slot = 0;
874 while (outputs_written) {
875 reverse_map[slot++] = u_bit_scan64(&outputs_written);
876 }
877
878 for (unsigned i = 0; i < so_info->num_outputs; i++) {
879 struct pipe_stream_output *output = &so_info->output[i];
880
881 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
882 output->register_index = reverse_map[output->register_index];
883
884 /* The VUE header contains three scalar fields packed together:
885 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
886 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
887 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
888 */
889 switch (output->register_index) {
890 case VARYING_SLOT_LAYER:
891 assert(output->num_components == 1);
892 output->register_index = VARYING_SLOT_PSIZ;
893 output->start_component = 1;
894 break;
895 case VARYING_SLOT_VIEWPORT:
896 assert(output->num_components == 1);
897 output->register_index = VARYING_SLOT_PSIZ;
898 output->start_component = 2;
899 break;
900 case VARYING_SLOT_PSIZ:
901 assert(output->num_components == 1);
902 output->start_component = 3;
903 break;
904 }
905
906 //info->outputs_written |= 1ull << output->register_index;
907 }
908 }
909
910 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)911 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
912 unsigned offset, unsigned n)
913 {
914 assert(offset % sizeof(uint32_t) == 0);
915
916 for (unsigned i = 0; i < n; ++i)
917 sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
918
919 for (unsigned i = n; i < 4; ++i)
920 sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
921 }
922
923 /**
924 * Associate NIR uniform variables with the prog_data->param[] mechanism
925 * used by the backend. Also, decide which UBOs we'd like to push in an
926 * ideal situation (though the backend can reduce this).
927 */
928 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)929 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
930 void *mem_ctx,
931 nir_shader *nir,
932 unsigned kernel_input_size,
933 uint32_t **out_system_values,
934 unsigned *out_num_system_values,
935 unsigned *out_num_cbufs)
936 {
937 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
938
939 const unsigned IRIS_MAX_SYSTEM_VALUES =
940 PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
941 unsigned *system_values =
942 rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
943 unsigned num_system_values = 0;
944
945 unsigned patch_vert_idx = -1;
946 unsigned tess_outer_default_idx = -1;
947 unsigned tess_inner_default_idx = -1;
948 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
949 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
950 unsigned variable_group_size_idx = -1;
951 unsigned work_dim_idx = -1;
952 memset(ucp_idx, -1, sizeof(ucp_idx));
953 memset(img_idx, -1, sizeof(img_idx));
954
955 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
956
957 nir_builder b = nir_builder_at(nir_before_impl(impl));
958
959 nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
960
961 /* Turn system value intrinsics into uniforms */
962 nir_foreach_block(block, impl) {
963 nir_foreach_instr_safe(instr, block) {
964 if (instr->type != nir_instr_type_intrinsic)
965 continue;
966
967 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
968 nir_def *offset;
969
970 switch (intrin->intrinsic) {
971 case nir_intrinsic_load_base_workgroup_id: {
972 /* GL doesn't have a concept of base workgroup */
973 b.cursor = nir_instr_remove(&intrin->instr);
974 nir_def_rewrite_uses(&intrin->def,
975 nir_imm_zero(&b, 3, 32));
976 continue;
977 }
978 case nir_intrinsic_load_constant: {
979 unsigned load_size = intrin->def.num_components *
980 intrin->def.bit_size / 8;
981 unsigned load_align = intrin->def.bit_size / 8;
982
983 /* This one is special because it reads from the shader constant
984 * data and not cbuf0 which gallium uploads for us.
985 */
986 b.cursor = nir_instr_remove(&intrin->instr);
987
988 nir_def *offset =
989 nir_iadd_imm(&b, intrin->src[0].ssa,
990 nir_intrinsic_base(intrin));
991
992 assert(load_size < b.shader->constant_data_size);
993 unsigned max_offset = b.shader->constant_data_size - load_size;
994 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
995
996 /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
997 * and cannot cross that 4GB boundary, so we can do the address
998 * calculation with 32-bit adds. Also, we can ignore the high
999 * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
1000 */
1001 assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
1002
1003 nir_def *const_data_addr =
1004 nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1005
1006 nir_def *data =
1007 nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1008 load_align,
1009 intrin->def.num_components,
1010 intrin->def.bit_size);
1011
1012 nir_def_rewrite_uses(&intrin->def,
1013 data);
1014 continue;
1015 }
1016 case nir_intrinsic_load_user_clip_plane: {
1017 unsigned ucp = nir_intrinsic_ucp_id(intrin);
1018
1019 if (ucp_idx[ucp] == -1) {
1020 ucp_idx[ucp] = num_system_values;
1021 num_system_values += 4;
1022 }
1023
1024 for (int i = 0; i < 4; i++) {
1025 system_values[ucp_idx[ucp] + i] =
1026 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1027 }
1028
1029 b.cursor = nir_before_instr(instr);
1030 offset = nir_imm_int(&b, system_values_start +
1031 ucp_idx[ucp] * sizeof(uint32_t));
1032 break;
1033 }
1034 case nir_intrinsic_load_patch_vertices_in:
1035 if (patch_vert_idx == -1)
1036 patch_vert_idx = num_system_values++;
1037
1038 system_values[patch_vert_idx] =
1039 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1040
1041 b.cursor = nir_before_instr(instr);
1042 offset = nir_imm_int(&b, system_values_start +
1043 patch_vert_idx * sizeof(uint32_t));
1044 break;
1045 case nir_intrinsic_load_tess_level_outer_default:
1046 if (tess_outer_default_idx == -1) {
1047 tess_outer_default_idx = num_system_values;
1048 num_system_values += 4;
1049 }
1050
1051 for (int i = 0; i < 4; i++) {
1052 system_values[tess_outer_default_idx + i] =
1053 BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1054 }
1055
1056 b.cursor = nir_before_instr(instr);
1057 offset = nir_imm_int(&b, system_values_start +
1058 tess_outer_default_idx * sizeof(uint32_t));
1059 break;
1060 case nir_intrinsic_load_tess_level_inner_default:
1061 if (tess_inner_default_idx == -1) {
1062 tess_inner_default_idx = num_system_values;
1063 num_system_values += 2;
1064 }
1065
1066 for (int i = 0; i < 2; i++) {
1067 system_values[tess_inner_default_idx + i] =
1068 BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1069 }
1070
1071 b.cursor = nir_before_instr(instr);
1072 offset = nir_imm_int(&b, system_values_start +
1073 tess_inner_default_idx * sizeof(uint32_t));
1074 break;
1075 case nir_intrinsic_image_deref_load_param_intel: {
1076 assert(devinfo->ver < 9);
1077 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1078 nir_variable *var = nir_deref_instr_get_variable(deref);
1079
1080 if (img_idx[var->data.binding] == -1) {
1081 /* GL only allows arrays of arrays of images. */
1082 assert(glsl_type_is_image(glsl_without_array(var->type)));
1083 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1084
1085 for (int i = 0; i < num_images; i++) {
1086 const unsigned img = var->data.binding + i;
1087
1088 img_idx[img] = num_system_values;
1089 num_system_values += ISL_IMAGE_PARAM_SIZE;
1090
1091 uint32_t *img_sv = &system_values[img_idx[img]];
1092
1093 setup_vec4_image_sysval(
1094 img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1095 offsetof(struct isl_image_param, offset), 2);
1096 setup_vec4_image_sysval(
1097 img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1098 offsetof(struct isl_image_param, size), 3);
1099 setup_vec4_image_sysval(
1100 img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1101 offsetof(struct isl_image_param, stride), 4);
1102 setup_vec4_image_sysval(
1103 img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1104 offsetof(struct isl_image_param, tiling), 3);
1105 setup_vec4_image_sysval(
1106 img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1107 offsetof(struct isl_image_param, swizzling), 2);
1108 }
1109 }
1110
1111 b.cursor = nir_before_instr(instr);
1112 offset = nir_iadd_imm(&b,
1113 get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1114 system_values_start +
1115 img_idx[var->data.binding] * 4 +
1116 nir_intrinsic_base(intrin) * 16);
1117 break;
1118 }
1119 case nir_intrinsic_load_workgroup_size: {
1120 assert(nir->info.workgroup_size_variable);
1121 if (variable_group_size_idx == -1) {
1122 variable_group_size_idx = num_system_values;
1123 num_system_values += 3;
1124 for (int i = 0; i < 3; i++) {
1125 system_values[variable_group_size_idx + i] =
1126 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1127 }
1128 }
1129
1130 b.cursor = nir_before_instr(instr);
1131 offset = nir_imm_int(&b, system_values_start +
1132 variable_group_size_idx * sizeof(uint32_t));
1133 break;
1134 }
1135 case nir_intrinsic_load_work_dim: {
1136 if (work_dim_idx == -1) {
1137 work_dim_idx = num_system_values++;
1138 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1139 }
1140 b.cursor = nir_before_instr(instr);
1141 offset = nir_imm_int(&b, system_values_start +
1142 work_dim_idx * sizeof(uint32_t));
1143 break;
1144 }
1145 case nir_intrinsic_load_kernel_input: {
1146 assert(nir_intrinsic_base(intrin) +
1147 nir_intrinsic_range(intrin) <= kernel_input_size);
1148 b.cursor = nir_before_instr(instr);
1149 offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1150 nir_intrinsic_base(intrin));
1151 break;
1152 }
1153 default:
1154 continue;
1155 }
1156
1157 nir_def *load =
1158 nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1159 temp_ubo_name, offset,
1160 .align_mul = 4,
1161 .align_offset = 0,
1162 .range_base = 0,
1163 .range = ~0);
1164
1165 nir_def_rewrite_uses(&intrin->def,
1166 load);
1167 nir_instr_remove(instr);
1168 }
1169 }
1170
1171 nir_validate_shader(nir, "before remapping");
1172
1173 /* Uniforms are stored in constant buffer 0, the
1174 * user-facing UBOs are indexed by one. So if any constant buffer is
1175 * needed, the constant buffer 0 will be needed, so account for it.
1176 */
1177 unsigned num_cbufs = nir->info.num_ubos;
1178 if (num_cbufs || nir->num_uniforms)
1179 num_cbufs++;
1180
1181 /* Place the new params in a new cbuf. */
1182 if (num_system_values > 0 || kernel_input_size > 0) {
1183 unsigned sysval_cbuf_index = num_cbufs;
1184 num_cbufs++;
1185
1186 system_values = reralloc(mem_ctx, system_values, unsigned,
1187 num_system_values);
1188
1189 nir_foreach_block(block, impl) {
1190 nir_foreach_instr_safe(instr, block) {
1191 if (instr->type != nir_instr_type_intrinsic)
1192 continue;
1193
1194 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1195
1196 if (load->intrinsic != nir_intrinsic_load_ubo)
1197 continue;
1198
1199 b.cursor = nir_before_instr(instr);
1200
1201 if (load->src[0].ssa == temp_ubo_name) {
1202 nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1203 nir_src_rewrite(&load->src[0], imm);
1204 }
1205 }
1206 }
1207
1208 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1209 nir_opt_constant_folding(nir);
1210 } else {
1211 ralloc_free(system_values);
1212 system_values = NULL;
1213 }
1214
1215 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1216 nir_validate_shader(nir, "after remap");
1217
1218 /* We don't use params[] but gallium leaves num_uniforms set. We use this
1219 * to detect when cbuf0 exists but we don't need it anymore when we get
1220 * here. Instead, zero it out so that the back-end doesn't get confused
1221 * when nr_params * 4 != num_uniforms != nr_params * 4.
1222 */
1223 nir->num_uniforms = 0;
1224
1225 *out_system_values = system_values;
1226 *out_num_system_values = num_system_values;
1227 *out_num_cbufs = num_cbufs;
1228 }
1229
1230 static const char *surface_group_names[] = {
1231 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
1232 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1233 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
1234 [IRIS_SURFACE_GROUP_TEXTURE_LOW64] = "texture",
1235 [IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = "texture",
1236 [IRIS_SURFACE_GROUP_UBO] = "ubo",
1237 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
1238 [IRIS_SURFACE_GROUP_IMAGE] = "image",
1239 };
1240
1241 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1242 iris_print_binding_table(FILE *fp, const char *name,
1243 const struct iris_binding_table *bt)
1244 {
1245 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1246
1247 uint32_t total = 0;
1248 uint32_t compacted = 0;
1249
1250 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1251 uint32_t size = bt->sizes[i];
1252 total += size;
1253 if (size)
1254 compacted += util_bitcount64(bt->used_mask[i]);
1255 }
1256
1257 if (total == 0) {
1258 fprintf(fp, "Binding table for %s is empty\n\n", name);
1259 return;
1260 }
1261
1262 if (total != compacted) {
1263 fprintf(fp, "Binding table for %s "
1264 "(compacted to %u entries from %u entries)\n",
1265 name, compacted, total);
1266 } else {
1267 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1268 }
1269
1270 uint32_t entry = 0;
1271 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1272 uint64_t mask = bt->used_mask[i];
1273 while (mask) {
1274 int index = u_bit_scan64(&mask);
1275 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
1276 }
1277 }
1278 fprintf(fp, "\n");
1279 }
1280
1281 enum {
1282 /* Max elements in a surface group. */
1283 SURFACE_GROUP_MAX_ELEMENTS = 64,
1284 };
1285
1286 /**
1287 * Map a <group, index> pair to a binding table index.
1288 *
1289 * For example: <UBO, 5> => binding table index 12
1290 */
1291 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1292 iris_group_index_to_bti(const struct iris_binding_table *bt,
1293 enum iris_surface_group group, uint32_t index)
1294 {
1295 assert(index < bt->sizes[group]);
1296 uint64_t mask = bt->used_mask[group];
1297 uint64_t bit = 1ull << index;
1298 if (bit & mask) {
1299 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1300 } else {
1301 return IRIS_SURFACE_NOT_USED;
1302 }
1303 }
1304
1305 /**
1306 * Map a binding table index back to a <group, index> pair.
1307 *
1308 * For example: binding table index 12 => <UBO, 5>
1309 */
1310 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1311 iris_bti_to_group_index(const struct iris_binding_table *bt,
1312 enum iris_surface_group group, uint32_t bti)
1313 {
1314 uint64_t used_mask = bt->used_mask[group];
1315 assert(bti >= bt->offsets[group]);
1316
1317 uint32_t c = bti - bt->offsets[group];
1318 while (used_mask) {
1319 int i = u_bit_scan64(&used_mask);
1320 if (c == 0)
1321 return i;
1322 c--;
1323 }
1324
1325 return IRIS_SURFACE_NOT_USED;
1326 }
1327
1328 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1329 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1330 nir_instr *instr, nir_src *src,
1331 enum iris_surface_group group)
1332 {
1333 assert(bt->sizes[group] > 0);
1334
1335 b->cursor = nir_before_instr(instr);
1336 nir_def *bti;
1337 if (nir_src_is_const(*src)) {
1338 uint32_t index = nir_src_as_uint(*src);
1339 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1340 src->ssa->bit_size);
1341 } else {
1342 /* Indirect usage makes all the surfaces of the group to be available,
1343 * so we can just add the base.
1344 */
1345 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1346 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1347 }
1348 nir_src_rewrite(src, bti);
1349 }
1350
1351 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1352 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1353 enum iris_surface_group group)
1354 {
1355 assert(bt->sizes[group] > 0);
1356
1357 if (nir_src_is_const(*src)) {
1358 uint64_t index = nir_src_as_uint(*src);
1359 assert(index < bt->sizes[group]);
1360 bt->used_mask[group] |= 1ull << index;
1361 } else {
1362 /* There's an indirect usage, we need all the surfaces. */
1363 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1364 }
1365 }
1366
1367 static bool
skip_compacting_binding_tables(void)1368 skip_compacting_binding_tables(void)
1369 {
1370 static int skip = -1;
1371 if (skip < 0)
1372 skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1373 return skip;
1374 }
1375
1376 /**
1377 * Set up the binding table indices and apply to the shader.
1378 */
1379 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs,bool use_null_rt)1380 iris_setup_binding_table(const struct intel_device_info *devinfo,
1381 struct nir_shader *nir,
1382 struct iris_binding_table *bt,
1383 unsigned num_render_targets,
1384 unsigned num_system_values,
1385 unsigned num_cbufs,
1386 bool use_null_rt)
1387 {
1388 const struct shader_info *info = &nir->info;
1389
1390 memset(bt, 0, sizeof(*bt));
1391
1392 /* Set the sizes for each surface group. For some groups, we already know
1393 * upfront how many will be used, so mark them.
1394 */
1395 if (info->stage == MESA_SHADER_FRAGMENT) {
1396 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1397 /* All render targets used. */
1398 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1399 BITFIELD64_MASK(num_render_targets);
1400
1401 /* Setup render target read surface group in order to support non-coherent
1402 * framebuffer fetch on Gfx8
1403 */
1404 if (devinfo->ver == 8 && info->outputs_read) {
1405 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1406 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1407 BITFIELD64_MASK(num_render_targets);
1408 }
1409
1410 bt->use_null_rt = use_null_rt;
1411 } else if (info->stage == MESA_SHADER_COMPUTE) {
1412 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1413 }
1414
1415 assert(ARRAY_SIZE(info->textures_used) >= 4);
1416 int max_tex = BITSET_LAST_BIT(info->textures_used);
1417 assert(max_tex <= 128);
1418 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1419 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1420 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1421 info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1422 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1423 info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1424 bt->samplers_used_mask = info->samplers_used[0];
1425
1426 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1427
1428 /* Allocate an extra slot in the UBO section for NIR constants.
1429 * Binding table compaction will remove it if unnecessary.
1430 *
1431 * We don't include them in iris_compiled_shader::num_cbufs because
1432 * they are uploaded separately from shs->constbuf[], but from a shader
1433 * point of view, they're another UBO (at the end of the section).
1434 */
1435 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1436
1437 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1438
1439 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1440 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1441
1442 /* Mark surfaces used for the cases we don't have the information available
1443 * upfront.
1444 */
1445 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1446 nir_foreach_block (block, impl) {
1447 nir_foreach_instr (instr, block) {
1448 if (instr->type != nir_instr_type_intrinsic)
1449 continue;
1450
1451 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1452 switch (intrin->intrinsic) {
1453 case nir_intrinsic_load_num_workgroups:
1454 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1455 break;
1456
1457 case nir_intrinsic_load_output:
1458 if (devinfo->ver == 8) {
1459 mark_used_with_src(bt, &intrin->src[0],
1460 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1461 }
1462 break;
1463
1464 case nir_intrinsic_image_size:
1465 case nir_intrinsic_image_load:
1466 case nir_intrinsic_image_store:
1467 case nir_intrinsic_image_atomic:
1468 case nir_intrinsic_image_atomic_swap:
1469 case nir_intrinsic_image_load_raw_intel:
1470 case nir_intrinsic_image_store_raw_intel:
1471 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1472 break;
1473
1474 case nir_intrinsic_load_ubo:
1475 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1476 break;
1477
1478 case nir_intrinsic_store_ssbo:
1479 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1480 break;
1481
1482 case nir_intrinsic_get_ssbo_size:
1483 case nir_intrinsic_ssbo_atomic:
1484 case nir_intrinsic_ssbo_atomic_swap:
1485 case nir_intrinsic_load_ssbo:
1486 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1487 break;
1488
1489 default:
1490 break;
1491 }
1492 }
1493 }
1494
1495 /* When disable we just mark everything as used. */
1496 if (unlikely(skip_compacting_binding_tables())) {
1497 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1498 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1499 }
1500
1501 /* Calculate the offsets and the binding table size based on the used
1502 * surfaces. After this point, the functions to go between "group indices"
1503 * and binding table indices can be used.
1504 */
1505 uint32_t next = 0;
1506 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1507 if (bt->used_mask[i] != 0) {
1508 bt->offsets[i] = next;
1509 next += util_bitcount64(bt->used_mask[i]);
1510 }
1511 }
1512 bt->size_bytes = next * 4;
1513
1514 if (INTEL_DEBUG(DEBUG_BT)) {
1515 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1516 }
1517
1518 /* Apply the binding table indices. The backend compiler is not expected
1519 * to change those, as we haven't set any of the *_start entries in brw
1520 * binding_table.
1521 */
1522 nir_builder b = nir_builder_create(impl);
1523
1524 nir_foreach_block (block, impl) {
1525 nir_foreach_instr (instr, block) {
1526 if (instr->type == nir_instr_type_tex) {
1527 nir_tex_instr *tex = nir_instr_as_tex(instr);
1528 if (tex->texture_index < 64) {
1529 tex->texture_index =
1530 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1531 tex->texture_index);
1532 } else {
1533 tex->texture_index =
1534 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1535 tex->texture_index - 64);
1536 }
1537 continue;
1538 }
1539
1540 if (instr->type != nir_instr_type_intrinsic)
1541 continue;
1542
1543 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1544 switch (intrin->intrinsic) {
1545 case nir_intrinsic_image_size:
1546 case nir_intrinsic_image_load:
1547 case nir_intrinsic_image_store:
1548 case nir_intrinsic_image_atomic:
1549 case nir_intrinsic_image_atomic_swap:
1550 case nir_intrinsic_image_load_raw_intel:
1551 case nir_intrinsic_image_store_raw_intel:
1552 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1553 IRIS_SURFACE_GROUP_IMAGE);
1554 break;
1555
1556 case nir_intrinsic_load_ubo:
1557 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1558 IRIS_SURFACE_GROUP_UBO);
1559 break;
1560
1561 case nir_intrinsic_store_ssbo:
1562 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1563 IRIS_SURFACE_GROUP_SSBO);
1564 break;
1565
1566 case nir_intrinsic_load_output:
1567 if (devinfo->ver == 8) {
1568 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1569 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1570 }
1571 break;
1572
1573 case nir_intrinsic_get_ssbo_size:
1574 case nir_intrinsic_ssbo_atomic:
1575 case nir_intrinsic_ssbo_atomic_swap:
1576 case nir_intrinsic_load_ssbo:
1577 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1578 IRIS_SURFACE_GROUP_SSBO);
1579 break;
1580
1581 default:
1582 break;
1583 }
1584 }
1585 }
1586 }
1587
1588 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1589 iris_debug_recompile_brw(struct iris_screen *screen,
1590 struct util_debug_callback *dbg,
1591 struct iris_uncompiled_shader *ish,
1592 const struct brw_base_prog_key *key)
1593 {
1594 if (!ish || list_is_empty(&ish->variants)
1595 || list_is_singular(&ish->variants))
1596 return;
1597
1598 const struct brw_compiler *c = screen->brw;
1599 const struct shader_info *info = &ish->nir->info;
1600
1601 brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1602 _mesa_shader_stage_to_string(info->stage),
1603 info->name ? info->name : "(no identifier)",
1604 info->label ? info->label : "");
1605
1606 struct iris_compiled_shader *shader =
1607 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1608 const void *old_iris_key = &shader->key;
1609
1610 union brw_any_prog_key old_key;
1611
1612 switch (info->stage) {
1613 case MESA_SHADER_VERTEX:
1614 old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1615 break;
1616 case MESA_SHADER_TESS_CTRL:
1617 old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1618 break;
1619 case MESA_SHADER_TESS_EVAL:
1620 old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1621 break;
1622 case MESA_SHADER_GEOMETRY:
1623 old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1624 break;
1625 case MESA_SHADER_FRAGMENT:
1626 old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1627 break;
1628 case MESA_SHADER_COMPUTE:
1629 old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1630 break;
1631 default:
1632 unreachable("invalid shader stage");
1633 }
1634
1635 brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1636 }
1637
1638 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1639 iris_debug_recompile_elk(struct iris_screen *screen,
1640 struct util_debug_callback *dbg,
1641 struct iris_uncompiled_shader *ish,
1642 const struct elk_base_prog_key *key)
1643 {
1644 if (!ish || list_is_empty(&ish->variants)
1645 || list_is_singular(&ish->variants))
1646 return;
1647
1648 const struct elk_compiler *c = screen->elk;
1649 const struct shader_info *info = &ish->nir->info;
1650
1651 elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1652 _mesa_shader_stage_to_string(info->stage),
1653 info->name ? info->name : "(no identifier)",
1654 info->label ? info->label : "");
1655
1656 struct iris_compiled_shader *shader =
1657 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1658 const void *old_iris_key = &shader->key;
1659
1660 union elk_any_prog_key old_key;
1661
1662 switch (info->stage) {
1663 case MESA_SHADER_VERTEX:
1664 old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1665 break;
1666 case MESA_SHADER_TESS_CTRL:
1667 old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1668 break;
1669 case MESA_SHADER_TESS_EVAL:
1670 old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1671 break;
1672 case MESA_SHADER_GEOMETRY:
1673 old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1674 break;
1675 case MESA_SHADER_FRAGMENT:
1676 old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1677 break;
1678 case MESA_SHADER_COMPUTE:
1679 old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1680 break;
1681 default:
1682 unreachable("invalid shader stage");
1683 }
1684
1685 elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1686 }
1687
1688
1689 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1690 check_urb_size(struct iris_context *ice,
1691 unsigned needed_size,
1692 gl_shader_stage stage)
1693 {
1694 unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1695
1696 /* If the last URB allocation wasn't large enough for our needs,
1697 * flag it as needing to be reconfigured. Otherwise, we can use
1698 * the existing config. However, if the URB is constrained, and
1699 * we can shrink our size for this stage, we may be able to gain
1700 * extra concurrency by reconfiguring it to be smaller. Do so.
1701 */
1702 if (last_allocated_size < needed_size ||
1703 (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1704 ice->state.dirty |= IRIS_DIRTY_URB;
1705 }
1706 }
1707
1708 /**
1709 * Get the shader for the last enabled geometry stage.
1710 *
1711 * This stage is the one which will feed stream output and the rasterizer.
1712 */
1713 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1714 last_vue_stage(struct iris_context *ice)
1715 {
1716 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1717 return MESA_SHADER_GEOMETRY;
1718
1719 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1720 return MESA_SHADER_TESS_EVAL;
1721
1722 return MESA_SHADER_VERTEX;
1723 }
1724
1725 /**
1726 * \param added Set to \c true if the variant was added to the list (i.e., a
1727 * variant matching \c key was not found). Set to \c false
1728 * otherwise.
1729 */
1730 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1731 find_or_add_variant(const struct iris_screen *screen,
1732 struct iris_uncompiled_shader *ish,
1733 enum iris_program_cache_id cache_id,
1734 const void *key, unsigned key_size,
1735 bool *added)
1736 {
1737 struct list_head *start = ish->variants.next;
1738
1739 *added = false;
1740
1741 if (screen->precompile) {
1742 /* Check the first list entry. There will always be at least one
1743 * variant in the list (most likely the precompile variant), and
1744 * other contexts only append new variants, so we can safely check
1745 * it without locking, saving that cost in the common case.
1746 */
1747 struct iris_compiled_shader *first =
1748 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1749
1750 if (memcmp(&first->key, key, key_size) == 0) {
1751 util_queue_fence_wait(&first->ready);
1752 return first;
1753 }
1754
1755 /* Skip this one in the loop below */
1756 start = first->link.next;
1757 }
1758
1759 struct iris_compiled_shader *variant = NULL;
1760
1761 /* If it doesn't match, we have to walk the list; other contexts may be
1762 * concurrently appending shaders to it, so we need to lock here.
1763 */
1764 simple_mtx_lock(&ish->lock);
1765
1766 list_for_each_entry_from(struct iris_compiled_shader, v, start,
1767 &ish->variants, link) {
1768 if (memcmp(&v->key, key, key_size) == 0) {
1769 variant = v;
1770 break;
1771 }
1772 }
1773
1774 gl_shader_stage stage = ish->nir->info.stage;
1775
1776 if (variant == NULL) {
1777 variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1778 key_size, key);
1779
1780 /* Append our new variant to the shader's variant list. */
1781 list_addtail(&variant->link, &ish->variants);
1782 *added = true;
1783
1784 simple_mtx_unlock(&ish->lock);
1785 } else {
1786 simple_mtx_unlock(&ish->lock);
1787
1788 util_queue_fence_wait(&variant->ready);
1789 }
1790
1791 assert(stage == variant->stage);
1792 return variant;
1793 }
1794
1795 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1796 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1797 UNUSED int thread_index)
1798 {
1799 free(_job);
1800 }
1801
1802 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1803 iris_schedule_compile(struct iris_screen *screen,
1804 struct util_queue_fence *ready_fence,
1805 struct util_debug_callback *dbg,
1806 struct iris_threaded_compile_job *job,
1807 util_queue_execute_func execute)
1808
1809 {
1810 struct util_async_debug_callback async_debug;
1811
1812 if (dbg) {
1813 u_async_debug_init(&async_debug);
1814 job->dbg = &async_debug.base;
1815 }
1816
1817 util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1818 iris_threaded_compile_job_delete, 0);
1819
1820 if (screen->driconf.sync_compile || dbg)
1821 util_queue_fence_wait(ready_fence);
1822
1823 if (dbg) {
1824 u_async_debug_drain(&async_debug, dbg);
1825 u_async_debug_cleanup(&async_debug);
1826 }
1827 }
1828
1829 /**
1830 * Compile a vertex shader, and upload the assembly.
1831 */
1832 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1833 iris_compile_vs(struct iris_screen *screen,
1834 struct u_upload_mgr *uploader,
1835 struct util_debug_callback *dbg,
1836 struct iris_uncompiled_shader *ish,
1837 struct iris_compiled_shader *shader)
1838 {
1839 const struct intel_device_info *devinfo = screen->devinfo;
1840 void *mem_ctx = ralloc_context(NULL);
1841 uint32_t *system_values;
1842 unsigned num_system_values;
1843 unsigned num_cbufs;
1844
1845 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1846 const struct iris_vs_prog_key *const key = &shader->key.vs;
1847
1848 if (key->vue.nr_userclip_plane_consts) {
1849 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1850 /* Check if variables were found. */
1851 if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1852 true, false, NULL)) {
1853 nir_lower_io_to_temporaries(nir, impl, true, false);
1854 nir_lower_global_vars_to_local(nir);
1855 nir_lower_vars_to_ssa(nir);
1856 nir_shader_gather_info(nir, impl);
1857 }
1858 }
1859
1860 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1861 &num_system_values, &num_cbufs);
1862
1863 struct iris_binding_table bt;
1864 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1865 num_system_values, num_cbufs, false);
1866
1867 const char *error;
1868 const unsigned *program;
1869 if (screen->brw) {
1870 struct brw_vs_prog_data *brw_prog_data =
1871 rzalloc(mem_ctx, struct brw_vs_prog_data);
1872
1873 brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1874
1875 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1876
1877 brw_compute_vue_map(devinfo,
1878 &brw_prog_data->base.vue_map, nir->info.outputs_written,
1879 nir->info.separate_shader, /* pos_slots */ 1);
1880
1881 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1882
1883 struct brw_compile_vs_params params = {
1884 .base = {
1885 .mem_ctx = mem_ctx,
1886 .nir = nir,
1887 .log_data = dbg,
1888 .source_hash = ish->source_hash,
1889 },
1890 .key = &brw_key,
1891 .prog_data = brw_prog_data,
1892 };
1893
1894 program = brw_compile_vs(screen->brw, ¶ms);
1895 error = params.base.error_str;
1896 if (program) {
1897 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1898 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1899 }
1900 } else {
1901 struct elk_vs_prog_data *elk_prog_data =
1902 rzalloc(mem_ctx, struct elk_vs_prog_data);
1903
1904 elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1905
1906 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1907
1908 elk_compute_vue_map(devinfo,
1909 &elk_prog_data->base.vue_map, nir->info.outputs_written,
1910 nir->info.separate_shader, /* pos_slots */ 1);
1911
1912 struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1913
1914 struct elk_compile_vs_params params = {
1915 .base = {
1916 .mem_ctx = mem_ctx,
1917 .nir = nir,
1918 .log_data = dbg,
1919 .source_hash = ish->source_hash,
1920 },
1921 .key = &elk_key,
1922 .prog_data = elk_prog_data,
1923 };
1924
1925 program = elk_compile_vs(screen->elk, ¶ms);
1926 error = params.base.error_str;
1927 if (program) {
1928 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1929 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1930 }
1931 }
1932
1933 if (program == NULL) {
1934 dbg_printf("Failed to compile vertex shader: %s\n", error);
1935 ralloc_free(mem_ctx);
1936
1937 shader->compilation_failed = true;
1938 util_queue_fence_signal(&shader->ready);
1939
1940 return;
1941 }
1942
1943 shader->compilation_failed = false;
1944
1945 uint32_t *so_decls =
1946 screen->vtbl.create_so_decl_list(&ish->stream_output,
1947 &iris_vue_data(shader)->vue_map);
1948
1949 iris_finalize_program(shader, so_decls,
1950 system_values, num_system_values, 0, num_cbufs, &bt);
1951
1952 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1953 sizeof(*key), key, program);
1954
1955 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1956
1957 ralloc_free(mem_ctx);
1958 }
1959
1960 /**
1961 * Update the current vertex shader variant.
1962 *
1963 * Fill out the key, look in the cache, compile and bind if needed.
1964 */
1965 static void
iris_update_compiled_vs(struct iris_context * ice)1966 iris_update_compiled_vs(struct iris_context *ice)
1967 {
1968 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1969 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1970 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1971 struct iris_uncompiled_shader *ish =
1972 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1973
1974 struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1975 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1976
1977 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1978 bool added;
1979 struct iris_compiled_shader *shader =
1980 find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1981
1982 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1983 &key, sizeof(key))) {
1984 iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1985 }
1986
1987 if (shader->compilation_failed)
1988 shader = NULL;
1989
1990 if (old != shader) {
1991 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1992 shader);
1993 ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1994 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1995 IRIS_STAGE_DIRTY_BINDINGS_VS |
1996 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1997 shs->sysvals_need_upload = true;
1998
1999 unsigned urb_entry_size = shader ?
2000 iris_vue_data(shader)->urb_entry_size : 0;
2001 check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
2002 }
2003 }
2004
2005 /**
2006 * Get the shader_info for a given stage, or NULL if the stage is disabled.
2007 */
2008 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2009 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2010 {
2011 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2012
2013 if (!ish)
2014 return NULL;
2015
2016 const nir_shader *nir = ish->nir;
2017 return &nir->info;
2018 }
2019
2020 /**
2021 * Get the union of TCS output and TES input slots.
2022 *
2023 * TCS and TES need to agree on a common URB entry layout. In particular,
2024 * the data for all patch vertices is stored in a single URB entry (unlike
2025 * GS which has one entry per input vertex). This means that per-vertex
2026 * array indexing needs a stride.
2027 *
2028 * SSO requires locations to match, but doesn't require the number of
2029 * outputs/inputs to match (in fact, the TCS often has extra outputs).
2030 * So, we need to take the extra step of unifying these on the fly.
2031 */
2032 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2033 get_unified_tess_slots(const struct iris_context *ice,
2034 uint64_t *per_vertex_slots,
2035 uint32_t *per_patch_slots)
2036 {
2037 const struct shader_info *tcs =
2038 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2039 const struct shader_info *tes =
2040 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2041
2042 *per_vertex_slots = tes->inputs_read;
2043 *per_patch_slots = tes->patch_inputs_read;
2044
2045 if (tcs) {
2046 *per_vertex_slots |= tcs->outputs_written;
2047 *per_patch_slots |= tcs->patch_outputs_written;
2048 }
2049 }
2050
2051 /**
2052 * Compile a tessellation control shader, and upload the assembly.
2053 */
2054 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2055 iris_compile_tcs(struct iris_screen *screen,
2056 struct hash_table *passthrough_ht,
2057 struct u_upload_mgr *uploader,
2058 struct util_debug_callback *dbg,
2059 struct iris_uncompiled_shader *ish,
2060 struct iris_compiled_shader *shader)
2061 {
2062 void *mem_ctx = ralloc_context(NULL);
2063 const struct intel_device_info *devinfo = screen->devinfo;
2064 uint32_t *system_values = NULL;
2065 unsigned num_system_values = 0;
2066 unsigned num_cbufs = 0;
2067
2068 nir_shader *nir;
2069
2070 struct iris_binding_table bt;
2071
2072 const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2073 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2074 struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2075 uint32_t source_hash;
2076
2077 if (ish) {
2078 nir = nir_shader_clone(mem_ctx, ish->nir);
2079 source_hash = ish->source_hash;
2080 } else {
2081 if (screen->brw) {
2082 nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2083 } else {
2084 assert(screen->elk);
2085 nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2086 }
2087 source_hash = *(uint32_t*)nir->info.source_blake3;
2088 }
2089
2090 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2091 &num_system_values, &num_cbufs);
2092 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2093 num_system_values, num_cbufs, false);
2094
2095 const char *error = NULL;
2096 const unsigned *program;
2097 if (screen->brw) {
2098 struct brw_tcs_prog_data *brw_prog_data =
2099 rzalloc(mem_ctx, struct brw_tcs_prog_data);
2100 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2101
2102 struct brw_compile_tcs_params params = {
2103 .base = {
2104 .mem_ctx = mem_ctx,
2105 .nir = nir,
2106 .log_data = dbg,
2107 .source_hash = source_hash,
2108 },
2109 .key = &brw_key,
2110 .prog_data = brw_prog_data,
2111 };
2112
2113 program = brw_compile_tcs(screen->brw, ¶ms);
2114 error = params.base.error_str;
2115
2116 if (program) {
2117 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2118 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2119 }
2120 } else {
2121 assert(screen->elk);
2122 struct elk_tcs_prog_data *elk_prog_data =
2123 rzalloc(mem_ctx, struct elk_tcs_prog_data);
2124 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2125
2126 struct elk_compile_tcs_params params = {
2127 .base = {
2128 .mem_ctx = mem_ctx,
2129 .nir = nir,
2130 .log_data = dbg,
2131 .source_hash = source_hash,
2132 },
2133 .key = &elk_key,
2134 .prog_data = elk_prog_data,
2135 };
2136
2137 program = elk_compile_tcs(screen->elk, ¶ms);
2138 error = params.base.error_str;
2139
2140 if (program) {
2141 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2142 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2143 }
2144 }
2145
2146 if (program == NULL) {
2147 dbg_printf("Failed to compile control shader: %s\n", error);
2148 ralloc_free(mem_ctx);
2149
2150 shader->compilation_failed = true;
2151 util_queue_fence_signal(&shader->ready);
2152
2153 return;
2154 }
2155
2156 shader->compilation_failed = false;
2157
2158 iris_finalize_program(shader, NULL, system_values,
2159 num_system_values, 0, num_cbufs, &bt);
2160
2161 iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2162 IRIS_CACHE_TCS, sizeof(*key), key, program);
2163
2164 if (ish)
2165 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2166
2167 ralloc_free(mem_ctx);
2168 }
2169
2170 /**
2171 * Update the current tessellation control shader variant.
2172 *
2173 * Fill out the key, look in the cache, compile and bind if needed.
2174 */
2175 static void
iris_update_compiled_tcs(struct iris_context * ice)2176 iris_update_compiled_tcs(struct iris_context *ice)
2177 {
2178 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2179 struct iris_uncompiled_shader *tcs =
2180 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2181 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2182 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2183 const struct intel_device_info *devinfo = screen->devinfo;
2184
2185 const struct shader_info *tes_info =
2186 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2187 struct iris_tcs_prog_key key = {
2188 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2189 ._tes_primitive_mode = tes_info->tess._primitive_mode,
2190 .input_vertices =
2191 !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2192 .quads_workaround = devinfo->ver < 9 &&
2193 tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2194 tes_info->tess.spacing == TESS_SPACING_EQUAL,
2195 };
2196 get_unified_tess_slots(ice, &key.outputs_written,
2197 &key.patch_outputs_written);
2198 screen->vtbl.populate_tcs_key(ice, &key);
2199
2200 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2201 struct iris_compiled_shader *shader;
2202 bool added = false;
2203
2204 if (tcs != NULL) {
2205 shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2206 sizeof(key), &added);
2207 } else {
2208 /* Look for and possibly create a passthrough TCS */
2209 shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2210
2211
2212 if (shader == NULL) {
2213 shader = iris_create_shader_variant(screen, ice->shaders.cache,
2214 MESA_SHADER_TESS_CTRL,
2215 IRIS_CACHE_TCS, sizeof(key), &key);
2216 added = true;
2217 }
2218
2219 }
2220
2221 /* If the shader was not found in (whichever cache), call iris_compile_tcs
2222 * if either ish is NULL or the shader could not be found in the disk
2223 * cache.
2224 */
2225 if (added &&
2226 (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2227 &key, sizeof(key)))) {
2228 iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2229 shader);
2230 }
2231
2232 if (shader->compilation_failed)
2233 shader = NULL;
2234
2235 if (old != shader) {
2236 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2237 shader);
2238 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2239 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2240 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2241 shs->sysvals_need_upload = true;
2242
2243 unsigned urb_entry_size = shader ?
2244 iris_vue_data(shader)->urb_entry_size : 0;
2245 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2246 }
2247 }
2248
2249 /**
2250 * Compile a tessellation evaluation shader, and upload the assembly.
2251 */
2252 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2253 iris_compile_tes(struct iris_screen *screen,
2254 struct u_upload_mgr *uploader,
2255 struct util_debug_callback *dbg,
2256 struct iris_uncompiled_shader *ish,
2257 struct iris_compiled_shader *shader)
2258 {
2259 void *mem_ctx = ralloc_context(NULL);
2260 uint32_t *system_values;
2261 const struct intel_device_info *devinfo = screen->devinfo;
2262 unsigned num_system_values;
2263 unsigned num_cbufs;
2264
2265 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2266 const struct iris_tes_prog_key *const key = &shader->key.tes;
2267
2268 if (key->vue.nr_userclip_plane_consts) {
2269 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2270 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2271 true, false, NULL);
2272 nir_lower_io_to_temporaries(nir, impl, true, false);
2273 nir_lower_global_vars_to_local(nir);
2274 nir_lower_vars_to_ssa(nir);
2275 nir_shader_gather_info(nir, impl);
2276 }
2277
2278 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2279 &num_system_values, &num_cbufs);
2280
2281 struct iris_binding_table bt;
2282 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2283 num_system_values, num_cbufs, false);
2284
2285 const char *error;
2286 const unsigned *program;
2287
2288 if (screen->brw) {
2289 struct brw_tes_prog_data *brw_prog_data =
2290 rzalloc(mem_ctx, struct brw_tes_prog_data);
2291
2292 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2293
2294 struct intel_vue_map input_vue_map;
2295 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2296 key->patch_inputs_read);
2297
2298 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2299
2300 struct brw_compile_tes_params params = {
2301 .base = {
2302 .mem_ctx = mem_ctx,
2303 .nir = nir,
2304 .log_data = dbg,
2305 .source_hash = ish->source_hash,
2306 },
2307 .key = &brw_key,
2308 .prog_data = brw_prog_data,
2309 .input_vue_map = &input_vue_map,
2310 };
2311
2312 program = brw_compile_tes(screen->brw, ¶ms);
2313 error = params.base.error_str;
2314
2315 if (program) {
2316 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2317 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2318 }
2319 } else {
2320 struct elk_tes_prog_data *elk_prog_data =
2321 rzalloc(mem_ctx, struct elk_tes_prog_data);
2322
2323 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2324
2325 struct intel_vue_map input_vue_map;
2326 elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2327 key->patch_inputs_read);
2328
2329 struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2330
2331 struct elk_compile_tes_params params = {
2332 .base = {
2333 .mem_ctx = mem_ctx,
2334 .nir = nir,
2335 .log_data = dbg,
2336 .source_hash = ish->source_hash,
2337 },
2338 .key = &elk_key,
2339 .prog_data = elk_prog_data,
2340 .input_vue_map = &input_vue_map,
2341 };
2342
2343 program = elk_compile_tes(screen->elk, ¶ms);
2344 error = params.base.error_str;
2345
2346 if (program) {
2347 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2348 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2349 }
2350 }
2351
2352 if (program == NULL) {
2353 dbg_printf("Failed to compile evaluation shader: %s\n", error);
2354 ralloc_free(mem_ctx);
2355
2356 shader->compilation_failed = true;
2357 util_queue_fence_signal(&shader->ready);
2358
2359 return;
2360 }
2361
2362 shader->compilation_failed = false;
2363
2364 uint32_t *so_decls =
2365 screen->vtbl.create_so_decl_list(&ish->stream_output,
2366 &iris_vue_data(shader)->vue_map);
2367
2368 iris_finalize_program(shader, so_decls, system_values,
2369 num_system_values, 0, num_cbufs, &bt);
2370
2371 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2372 sizeof(*key), key, program);
2373
2374 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2375
2376 ralloc_free(mem_ctx);
2377 }
2378
2379 /**
2380 * Update the current tessellation evaluation shader variant.
2381 *
2382 * Fill out the key, look in the cache, compile and bind if needed.
2383 */
2384 static void
iris_update_compiled_tes(struct iris_context * ice)2385 iris_update_compiled_tes(struct iris_context *ice)
2386 {
2387 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2388 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2389 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2390 struct iris_uncompiled_shader *ish =
2391 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2392
2393 struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2394 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2395 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2396
2397 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2398 bool added;
2399 struct iris_compiled_shader *shader =
2400 find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2401
2402 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2403 &key, sizeof(key))) {
2404 iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2405 }
2406
2407 if (shader->compilation_failed)
2408 shader = NULL;
2409
2410 if (old != shader) {
2411 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2412 shader);
2413 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2414 IRIS_STAGE_DIRTY_BINDINGS_TES |
2415 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2416 shs->sysvals_need_upload = true;
2417
2418 unsigned urb_entry_size = shader ?
2419 iris_vue_data(shader)->urb_entry_size : 0;
2420 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2421 }
2422
2423 /* TODO: Could compare and avoid flagging this. */
2424 const struct shader_info *tes_info = &ish->nir->info;
2425 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2426 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2427 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2428 }
2429 }
2430
2431 /**
2432 * Compile a geometry shader, and upload the assembly.
2433 */
2434 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2435 iris_compile_gs(struct iris_screen *screen,
2436 struct u_upload_mgr *uploader,
2437 struct util_debug_callback *dbg,
2438 struct iris_uncompiled_shader *ish,
2439 struct iris_compiled_shader *shader)
2440 {
2441 const struct intel_device_info *devinfo = screen->devinfo;
2442 void *mem_ctx = ralloc_context(NULL);
2443 uint32_t *system_values;
2444 unsigned num_system_values;
2445 unsigned num_cbufs;
2446
2447 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2448 const struct iris_gs_prog_key *const key = &shader->key.gs;
2449
2450 if (key->vue.nr_userclip_plane_consts) {
2451 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2452 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2453 false, NULL);
2454 nir_lower_io_to_temporaries(nir, impl, true, false);
2455 nir_lower_global_vars_to_local(nir);
2456 nir_lower_vars_to_ssa(nir);
2457 nir_shader_gather_info(nir, impl);
2458 }
2459
2460 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2461 &num_system_values, &num_cbufs);
2462
2463 struct iris_binding_table bt;
2464 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2465 num_system_values, num_cbufs, false);
2466
2467 const char *error;
2468 const unsigned *program;
2469 if (screen->brw) {
2470 struct brw_gs_prog_data *brw_prog_data =
2471 rzalloc(mem_ctx, struct brw_gs_prog_data);
2472
2473 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2474
2475 brw_compute_vue_map(devinfo,
2476 &brw_prog_data->base.vue_map, nir->info.outputs_written,
2477 nir->info.separate_shader, /* pos_slots */ 1);
2478
2479 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2480
2481 struct brw_compile_gs_params params = {
2482 .base = {
2483 .mem_ctx = mem_ctx,
2484 .nir = nir,
2485 .log_data = dbg,
2486 .source_hash = ish->source_hash,
2487 },
2488 .key = &brw_key,
2489 .prog_data = brw_prog_data,
2490 };
2491
2492 program = brw_compile_gs(screen->brw, ¶ms);
2493 error = params.base.error_str;
2494 if (program) {
2495 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2496 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2497 }
2498 } else {
2499 struct elk_gs_prog_data *elk_prog_data =
2500 rzalloc(mem_ctx, struct elk_gs_prog_data);
2501
2502 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2503
2504 elk_compute_vue_map(devinfo,
2505 &elk_prog_data->base.vue_map, nir->info.outputs_written,
2506 nir->info.separate_shader, /* pos_slots */ 1);
2507
2508 struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2509
2510 struct elk_compile_gs_params params = {
2511 .base = {
2512 .mem_ctx = mem_ctx,
2513 .nir = nir,
2514 .log_data = dbg,
2515 .source_hash = ish->source_hash,
2516 },
2517 .key = &elk_key,
2518 .prog_data = elk_prog_data,
2519 };
2520
2521 program = elk_compile_gs(screen->elk, ¶ms);
2522 error = params.base.error_str;
2523 if (program) {
2524 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2525 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2526 }
2527 }
2528
2529 if (program == NULL) {
2530 dbg_printf("Failed to compile geometry shader: %s\n", error);
2531 ralloc_free(mem_ctx);
2532
2533 shader->compilation_failed = true;
2534 util_queue_fence_signal(&shader->ready);
2535
2536 return;
2537 }
2538
2539 shader->compilation_failed = false;
2540
2541 uint32_t *so_decls =
2542 screen->vtbl.create_so_decl_list(&ish->stream_output,
2543 &iris_vue_data(shader)->vue_map);
2544
2545 iris_finalize_program(shader, so_decls, system_values,
2546 num_system_values, 0, num_cbufs, &bt);
2547
2548 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2549 sizeof(*key), key, program);
2550
2551 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2552
2553 ralloc_free(mem_ctx);
2554 }
2555
2556 /**
2557 * Update the current geometry shader variant.
2558 *
2559 * Fill out the key, look in the cache, compile and bind if needed.
2560 */
2561 static void
iris_update_compiled_gs(struct iris_context * ice)2562 iris_update_compiled_gs(struct iris_context *ice)
2563 {
2564 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2565 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2566 struct iris_uncompiled_shader *ish =
2567 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2568 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2569 struct iris_compiled_shader *shader = NULL;
2570 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2571
2572 if (ish) {
2573 struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2574 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2575
2576 bool added;
2577
2578 shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2579 sizeof(key), &added);
2580
2581 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2582 &key, sizeof(key))) {
2583 iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2584 }
2585
2586 if (shader->compilation_failed)
2587 shader = NULL;
2588 }
2589
2590 if (old != shader) {
2591 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2592 shader);
2593 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2594 IRIS_STAGE_DIRTY_BINDINGS_GS |
2595 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2596 shs->sysvals_need_upload = true;
2597
2598 unsigned urb_entry_size = shader ?
2599 iris_vue_data(shader)->urb_entry_size : 0;
2600 check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2601 }
2602 }
2603
2604 /**
2605 * Compile a fragment (pixel) shader, and upload the assembly.
2606 */
2607 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2608 iris_compile_fs(struct iris_screen *screen,
2609 struct u_upload_mgr *uploader,
2610 struct util_debug_callback *dbg,
2611 struct iris_uncompiled_shader *ish,
2612 struct iris_compiled_shader *shader,
2613 struct intel_vue_map *vue_map)
2614 {
2615 void *mem_ctx = ralloc_context(NULL);
2616 uint32_t *system_values;
2617 const struct intel_device_info *devinfo = screen->devinfo;
2618 unsigned num_system_values;
2619 unsigned num_cbufs;
2620
2621 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2622 const struct iris_fs_prog_key *const key = &shader->key.fs;
2623
2624 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2625 &num_system_values, &num_cbufs);
2626
2627 /* Lower output variables to load_output intrinsics before setting up
2628 * binding tables, so iris_setup_binding_table can map any load_output
2629 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2630 * non-coherent framebuffer fetches.
2631 */
2632 brw_nir_lower_fs_outputs(nir);
2633
2634 int null_rts = brw_nir_fs_needs_null_rt(devinfo, nir,
2635 key->multisample_fbo,
2636 key->alpha_to_coverage) ? 1 : 0;
2637
2638 struct iris_binding_table bt;
2639 iris_setup_binding_table(devinfo, nir, &bt,
2640 MAX2(key->nr_color_regions, null_rts),
2641 num_system_values, num_cbufs, null_rts != 0);
2642
2643 const char *error;
2644 const unsigned *program;
2645
2646 if (screen->brw) {
2647 struct brw_wm_prog_data *brw_prog_data =
2648 rzalloc(mem_ctx, struct brw_wm_prog_data);
2649
2650 brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2651
2652 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2653
2654 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2655
2656 struct brw_compile_fs_params params = {
2657 .base = {
2658 .mem_ctx = mem_ctx,
2659 .nir = nir,
2660 .log_data = dbg,
2661 .source_hash = ish->source_hash,
2662 },
2663 .key = &brw_key,
2664 .prog_data = brw_prog_data,
2665
2666 .allow_spilling = true,
2667 .max_polygons = UCHAR_MAX,
2668 .vue_map = vue_map,
2669 };
2670
2671 program = brw_compile_fs(screen->brw, ¶ms);
2672 error = params.base.error_str;
2673 if (program) {
2674 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2675 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2676 }
2677 } else {
2678 struct elk_wm_prog_data *elk_prog_data =
2679 rzalloc(mem_ctx, struct elk_wm_prog_data);
2680
2681 elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2682
2683 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2684
2685 struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2686
2687 struct elk_compile_fs_params params = {
2688 .base = {
2689 .mem_ctx = mem_ctx,
2690 .nir = nir,
2691 .log_data = dbg,
2692 .source_hash = ish->source_hash,
2693 },
2694 .key = &elk_key,
2695 .prog_data = elk_prog_data,
2696
2697 .allow_spilling = true,
2698 .max_polygons = UCHAR_MAX,
2699 .vue_map = vue_map,
2700 };
2701
2702 program = elk_compile_fs(screen->elk, ¶ms);
2703 error = params.base.error_str;
2704 if (program) {
2705 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2706 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2707 }
2708 }
2709
2710 if (program == NULL) {
2711 dbg_printf("Failed to compile fragment shader: %s\n", error);
2712 ralloc_free(mem_ctx);
2713
2714 shader->compilation_failed = true;
2715 util_queue_fence_signal(&shader->ready);
2716
2717 return;
2718 }
2719
2720 shader->compilation_failed = false;
2721
2722 iris_finalize_program(shader, NULL, system_values,
2723 num_system_values, 0, num_cbufs, &bt);
2724
2725 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2726 sizeof(*key), key, program);
2727
2728 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2729
2730 ralloc_free(mem_ctx);
2731 }
2732
2733 /**
2734 * Update the current fragment shader variant.
2735 *
2736 * Fill out the key, look in the cache, compile and bind if needed.
2737 */
2738 static void
iris_update_compiled_fs(struct iris_context * ice)2739 iris_update_compiled_fs(struct iris_context *ice)
2740 {
2741 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2742 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2743 struct iris_uncompiled_shader *ish =
2744 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2745 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2746 struct iris_fs_prog_key key = { KEY_INIT(base) };
2747 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2748
2749 struct intel_vue_map *last_vue_map =
2750 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2751
2752 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2753 key.input_slots_valid = last_vue_map->slots_valid;
2754
2755 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2756 bool added;
2757 struct iris_compiled_shader *shader =
2758 find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2759 sizeof(key), &added);
2760
2761 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2762 &key, sizeof(key))) {
2763 iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2764 }
2765
2766 if (shader->compilation_failed)
2767 shader = NULL;
2768
2769 if (old != shader) {
2770 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2771 // toggles. might be able to avoid flagging SBE too.
2772 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2773 shader);
2774 ice->state.dirty |= IRIS_DIRTY_WM |
2775 IRIS_DIRTY_CLIP |
2776 IRIS_DIRTY_SBE;
2777 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2778 IRIS_STAGE_DIRTY_BINDINGS_FS |
2779 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2780 shs->sysvals_need_upload = true;
2781 }
2782 }
2783
2784 /**
2785 * Update the last enabled stage's VUE map.
2786 *
2787 * When the shader feeding the rasterizer's output interface changes, we
2788 * need to re-emit various packets.
2789 */
2790 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2791 update_last_vue_map(struct iris_context *ice,
2792 struct iris_compiled_shader *shader)
2793 {
2794 const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2795 const struct intel_vue_map *old_map =
2796 !ice->shaders.last_vue_shader ? NULL :
2797 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2798 const uint64_t changed_slots =
2799 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2800
2801 if (changed_slots & VARYING_BIT_VIEWPORT) {
2802 ice->state.num_viewports =
2803 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2804 ice->state.dirty |= IRIS_DIRTY_CLIP |
2805 IRIS_DIRTY_SF_CL_VIEWPORT |
2806 IRIS_DIRTY_CC_VIEWPORT |
2807 IRIS_DIRTY_SCISSOR_RECT;
2808 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2809 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2810 }
2811
2812 if (changed_slots & VARYING_BIT_LAYER) {
2813 ice->state.dirty |= IRIS_DIRTY_CLIP;
2814 }
2815
2816 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2817 ice->state.dirty |= IRIS_DIRTY_SBE;
2818 }
2819
2820 iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2821 }
2822
2823 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2824 iris_update_pull_constant_descriptors(struct iris_context *ice,
2825 gl_shader_stage stage)
2826 {
2827 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2828
2829 if (!shader || !shader->has_ubo_pull)
2830 return;
2831
2832 struct iris_shader_state *shs = &ice->state.shaders[stage];
2833 bool any_new_descriptors =
2834 shader->num_system_values > 0 && shs->sysvals_need_upload;
2835
2836 unsigned bound_cbufs = shs->bound_cbufs;
2837
2838 while (bound_cbufs) {
2839 const int i = u_bit_scan(&bound_cbufs);
2840 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2841 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2842 if (!surf_state->res && cbuf->buffer) {
2843 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2844 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2845 any_new_descriptors = true;
2846 }
2847 }
2848
2849 if (any_new_descriptors)
2850 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2851 }
2852
2853 /**
2854 * Update the current shader variants for the given state.
2855 *
2856 * This should be called on every draw call to ensure that the correct
2857 * shaders are bound. It will also flag any dirty state triggered by
2858 * swapping out those shaders.
2859 */
2860 void
iris_update_compiled_shaders(struct iris_context * ice)2861 iris_update_compiled_shaders(struct iris_context *ice)
2862 {
2863 const uint64_t stage_dirty = ice->state.stage_dirty;
2864
2865 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2866 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2867 struct iris_uncompiled_shader *tes =
2868 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2869 if (tes) {
2870 iris_update_compiled_tcs(ice);
2871 iris_update_compiled_tes(ice);
2872 } else {
2873 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2874 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2875 ice->state.stage_dirty |=
2876 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2877 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2878 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2879
2880 if (ice->shaders.urb.constrained)
2881 ice->state.dirty |= IRIS_DIRTY_URB;
2882 }
2883 }
2884
2885 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2886 iris_update_compiled_vs(ice);
2887 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2888 iris_update_compiled_gs(ice);
2889
2890 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2891 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2892 const struct iris_compiled_shader *gs =
2893 ice->shaders.prog[MESA_SHADER_GEOMETRY];
2894 const struct iris_compiled_shader *tes =
2895 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2896
2897 bool points_or_lines = false;
2898
2899 if (gs) {
2900 const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2901 points_or_lines =
2902 gs_data->output_topology == _3DPRIM_POINTLIST ||
2903 gs_data->output_topology == _3DPRIM_LINESTRIP;
2904 } else if (tes) {
2905 const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2906 points_or_lines =
2907 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2908 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2909 }
2910
2911 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2912 /* Outbound to XY Clip enables */
2913 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2914 ice->state.dirty |= IRIS_DIRTY_CLIP;
2915 }
2916 }
2917
2918 gl_shader_stage last_stage = last_vue_stage(ice);
2919 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2920 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2921 update_last_vue_map(ice, shader);
2922 if (ice->state.streamout != shader->streamout) {
2923 ice->state.streamout = shader->streamout;
2924 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2925 }
2926
2927 if (ice->state.streamout_active) {
2928 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2929 struct iris_stream_output_target *so =
2930 (void *) ice->state.so_target[i];
2931 if (so)
2932 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2933 }
2934 }
2935
2936 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2937 iris_update_compiled_fs(ice);
2938
2939 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2940 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2941 iris_update_pull_constant_descriptors(ice, i);
2942 }
2943 }
2944
2945 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2946 iris_compile_cs(struct iris_screen *screen,
2947 struct u_upload_mgr *uploader,
2948 struct util_debug_callback *dbg,
2949 struct iris_uncompiled_shader *ish,
2950 struct iris_compiled_shader *shader)
2951 {
2952 void *mem_ctx = ralloc_context(NULL);
2953 uint32_t *system_values;
2954 const struct intel_device_info *devinfo = screen->devinfo;
2955 unsigned num_system_values;
2956 unsigned num_cbufs;
2957
2958 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2959 const struct iris_cs_prog_key *const key = &shader->key.cs;
2960
2961 if (screen->brw)
2962 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2963 else
2964 NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2965
2966 iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2967 &system_values, &num_system_values, &num_cbufs);
2968
2969 struct iris_binding_table bt;
2970 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2971 num_system_values, num_cbufs, false);
2972
2973 const char *error;
2974 const unsigned *program;
2975
2976 if (screen->brw) {
2977 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2978
2979 struct brw_cs_prog_data *brw_prog_data =
2980 rzalloc(mem_ctx, struct brw_cs_prog_data);
2981
2982 struct brw_compile_cs_params params = {
2983 .base = {
2984 .mem_ctx = mem_ctx,
2985 .nir = nir,
2986 .log_data = dbg,
2987 .source_hash = ish->source_hash,
2988 },
2989 .key = &brw_key,
2990 .prog_data = brw_prog_data,
2991 };
2992
2993 program = brw_compile_cs(screen->brw, ¶ms);
2994 error = params.base.error_str;
2995 if (program) {
2996 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2997 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2998 }
2999 } else {
3000 struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
3001
3002 struct elk_cs_prog_data *elk_prog_data =
3003 rzalloc(mem_ctx, struct elk_cs_prog_data);
3004
3005 struct elk_compile_cs_params params = {
3006 .base = {
3007 .mem_ctx = mem_ctx,
3008 .nir = nir,
3009 .log_data = dbg,
3010 .source_hash = ish->source_hash,
3011 },
3012 .key = &elk_key,
3013 .prog_data = elk_prog_data,
3014 };
3015
3016 program = elk_compile_cs(screen->elk, ¶ms);
3017 error = params.base.error_str;
3018 if (program) {
3019 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3020 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3021 }
3022 }
3023
3024 if (program == NULL) {
3025 dbg_printf("Failed to compile compute shader: %s\n", error);
3026
3027 shader->compilation_failed = true;
3028 util_queue_fence_signal(&shader->ready);
3029
3030 return;
3031 }
3032
3033 shader->compilation_failed = false;
3034
3035 iris_finalize_program(shader, NULL, system_values,
3036 num_system_values, ish->kernel_input_size, num_cbufs,
3037 &bt);
3038
3039 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3040 sizeof(*key), key, program);
3041
3042 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3043
3044 ralloc_free(mem_ctx);
3045 }
3046
3047 static void
iris_update_compiled_cs(struct iris_context * ice)3048 iris_update_compiled_cs(struct iris_context *ice)
3049 {
3050 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3051 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3052 struct iris_uncompiled_shader *ish =
3053 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3054 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3055 struct iris_cs_prog_key key = { KEY_INIT(base) };
3056 screen->vtbl.populate_cs_key(ice, &key);
3057
3058 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3059 bool added;
3060 struct iris_compiled_shader *shader =
3061 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3062 sizeof(key), &added);
3063
3064 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3065 &key, sizeof(key))) {
3066 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3067 }
3068
3069 if (shader->compilation_failed)
3070 shader = NULL;
3071
3072 if (old != shader) {
3073 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3074 shader);
3075 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3076 IRIS_STAGE_DIRTY_BINDINGS_CS |
3077 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3078 shs->sysvals_need_upload = true;
3079 }
3080 }
3081
3082 void
iris_update_compiled_compute_shader(struct iris_context * ice)3083 iris_update_compiled_compute_shader(struct iris_context *ice)
3084 {
3085 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3086 iris_update_compiled_cs(ice);
3087
3088 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3089 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3090 }
3091
3092 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3093 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3094 struct iris_compiled_shader *shader,
3095 unsigned threads,
3096 uint32_t *dst)
3097 {
3098 struct iris_cs_data *cs_data = iris_cs_data(shader);
3099 assert(iris_cs_push_const_total_size(shader, threads) > 0);
3100 assert(cs_data->push.cross_thread.size == 0);
3101 assert(cs_data->push.per_thread.dwords == 1);
3102 assert(cs_data->first_param_is_builtin_subgroup_id);
3103 for (unsigned t = 0; t < threads; t++)
3104 dst[8 * t] = t;
3105 }
3106
3107 /**
3108 * Allocate scratch BOs as needed for the given per-thread size and stage.
3109 */
3110 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3111 iris_get_scratch_space(struct iris_context *ice,
3112 unsigned per_thread_scratch,
3113 gl_shader_stage stage)
3114 {
3115 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3116 struct iris_bufmgr *bufmgr = screen->bufmgr;
3117 const struct intel_device_info *devinfo = screen->devinfo;
3118
3119 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3120 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3121 assert(per_thread_scratch == 1 << (encoded_size + 10));
3122
3123 /* On GFX version 12.5, scratch access changed to a surface-based model.
3124 * Instead of each shader type having its own layout based on IDs passed
3125 * from the relevant fixed-function unit, all scratch access is based on
3126 * thread IDs like it always has been for compute.
3127 */
3128 if (devinfo->verx10 >= 125)
3129 stage = MESA_SHADER_COMPUTE;
3130
3131 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3132
3133 if (!*bop) {
3134 assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3135 uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3136 *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3137 IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3138 }
3139
3140 return *bop;
3141 }
3142
3143 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3144 iris_get_scratch_surf(struct iris_context *ice,
3145 unsigned per_thread_scratch)
3146 {
3147 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3148 ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3149
3150 assert(devinfo->verx10 >= 125);
3151
3152 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3153 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3154 assert(per_thread_scratch == 1 << (encoded_size + 10));
3155
3156 struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3157
3158 if (ref->res)
3159 return ref;
3160
3161 struct iris_bo *scratch_bo =
3162 iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3163
3164 void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3165 screen->isl_dev.ss.size, 64);
3166
3167 isl_buffer_fill_state(&screen->isl_dev, map,
3168 .address = scratch_bo->address,
3169 .size_B = scratch_bo->size,
3170 .format = ISL_FORMAT_RAW,
3171 .swizzle = ISL_SWIZZLE_IDENTITY,
3172 .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3173 .stride_B = per_thread_scratch,
3174 .is_scratch = true);
3175
3176 return ref;
3177 }
3178
3179 /* ------------------------------------------------------------------- */
3180
3181 /**
3182 * The pipe->create_[stage]_state() driver hooks.
3183 *
3184 * Performs basic NIR preprocessing, records any state dependencies, and
3185 * returns an iris_uncompiled_shader as the Gallium CSO.
3186 *
3187 * Actual shader compilation to assembly happens later, at first use.
3188 */
3189 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3190 iris_create_uncompiled_shader(struct iris_screen *screen,
3191 nir_shader *nir,
3192 const struct pipe_stream_output_info *so_info)
3193 {
3194 struct iris_uncompiled_shader *ish =
3195 calloc(1, sizeof(struct iris_uncompiled_shader));
3196 if (!ish)
3197 return NULL;
3198
3199 pipe_reference_init(&ish->ref, 1);
3200 list_inithead(&ish->variants);
3201 simple_mtx_init(&ish->lock, mtx_plain);
3202 util_queue_fence_init(&ish->ready);
3203
3204 ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3205
3206 ish->program_id = get_new_program_id(screen);
3207 ish->nir = nir;
3208 if (so_info) {
3209 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3210 update_so_info(&ish->stream_output, nir->info.outputs_written);
3211 }
3212
3213 /* Use lowest dword of source shader blake3 for shader hash. */
3214 ish->source_hash = *(uint32_t*)nir->info.source_blake3;
3215
3216 if (screen->disk_cache) {
3217 /* Serialize the NIR to a binary blob that we can hash for the disk
3218 * cache. Drop unnecessary information (like variable names)
3219 * so the serialized NIR is smaller, and also to let us detect more
3220 * isomorphic shaders when hashing, increasing cache hits.
3221 */
3222 struct blob blob;
3223 blob_init(&blob);
3224 nir_serialize(&blob, nir, true);
3225 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3226 blob_finish(&blob);
3227 }
3228
3229 return ish;
3230 }
3231
3232 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3233 iris_create_compute_state(struct pipe_context *ctx,
3234 const struct pipe_compute_state *state)
3235 {
3236 struct iris_context *ice = (void *) ctx;
3237 struct iris_screen *screen = (void *) ctx->screen;
3238 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3239
3240 nir_shader *nir;
3241 switch (state->ir_type) {
3242 case PIPE_SHADER_IR_NIR:
3243 nir = (void *)state->prog;
3244 break;
3245
3246 default:
3247 unreachable("Unsupported IR");
3248 }
3249
3250 /* Most of iris doesn't really care about the difference between compute
3251 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so
3252 * it's way easier if we just normalize to COMPUTE here.
3253 */
3254 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3255 nir->info.stage == MESA_SHADER_KERNEL);
3256 nir->info.stage = MESA_SHADER_COMPUTE;
3257
3258 struct iris_uncompiled_shader *ish =
3259 iris_create_uncompiled_shader(screen, nir, NULL);
3260 ish->kernel_input_size = state->req_input_mem;
3261 ish->kernel_shared_size = state->static_shared_mem;
3262
3263 // XXX: disallow more than 64KB of shared variables
3264
3265 if (screen->precompile) {
3266 struct iris_cs_prog_key key = { KEY_INIT(base) };
3267
3268 struct iris_compiled_shader *shader =
3269 iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3270 IRIS_CACHE_CS, sizeof(key), &key);
3271
3272 /* Append our new variant to the shader's variant list. */
3273 list_addtail(&shader->link, &ish->variants);
3274
3275 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3276 &key, sizeof(key))) {
3277 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3278 }
3279 }
3280
3281 return ish;
3282 }
3283
3284 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3285 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3286 struct pipe_compute_state_object_info *info)
3287 {
3288 struct iris_screen *screen = (void *) ctx->screen;
3289 struct iris_uncompiled_shader *ish = state;
3290
3291 info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3292 info->private_memory = 0;
3293 info->preferred_simd_size = 32;
3294 info->simd_sizes = 8 | 16 | 32;
3295
3296 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3297 &ish->variants, link) {
3298 info->private_memory = MAX2(info->private_memory,
3299 shader->total_scratch);
3300 }
3301 }
3302
3303 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3304 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3305 const uint32_t block[3])
3306 {
3307 struct iris_context *ice = (void *) ctx;
3308 struct iris_screen *screen = (void *) ctx->screen;
3309 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3310 struct iris_uncompiled_shader *ish = state;
3311
3312 struct iris_cs_prog_key key = { KEY_INIT(base) };
3313 screen->vtbl.populate_cs_key(ice, &key);
3314
3315 bool added;
3316 struct iris_compiled_shader *shader =
3317 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3318 sizeof(key), &added);
3319
3320 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3321 &key, sizeof(key))) {
3322 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3323 }
3324
3325 return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3326 }
3327
3328 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3329 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3330 {
3331 const struct iris_threaded_compile_job *job =
3332 (struct iris_threaded_compile_job *) _job;
3333
3334 struct iris_screen *screen = job->screen;
3335 struct u_upload_mgr *uploader = job->uploader;
3336 struct util_debug_callback *dbg = job->dbg;
3337 struct iris_uncompiled_shader *ish = job->ish;
3338 struct iris_compiled_shader *shader = job->shader;
3339
3340 switch (ish->nir->info.stage) {
3341 case MESA_SHADER_VERTEX:
3342 iris_compile_vs(screen, uploader, dbg, ish, shader);
3343 break;
3344 case MESA_SHADER_TESS_CTRL:
3345 iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3346 break;
3347 case MESA_SHADER_TESS_EVAL:
3348 iris_compile_tes(screen, uploader, dbg, ish, shader);
3349 break;
3350 case MESA_SHADER_GEOMETRY:
3351 iris_compile_gs(screen, uploader, dbg, ish, shader);
3352 break;
3353 case MESA_SHADER_FRAGMENT:
3354 iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3355 break;
3356
3357 default:
3358 unreachable("Invalid shader stage.");
3359 }
3360 }
3361
3362 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3363 iris_create_shader_state(struct pipe_context *ctx,
3364 const struct pipe_shader_state *state)
3365 {
3366 struct iris_context *ice = (void *) ctx;
3367 struct iris_screen *screen = (void *) ctx->screen;
3368 struct nir_shader *nir;
3369
3370 if (state->type == PIPE_SHADER_IR_TGSI)
3371 nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3372 else
3373 nir = state->ir.nir;
3374
3375 const struct shader_info *const info = &nir->info;
3376 struct iris_uncompiled_shader *ish =
3377 iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3378
3379 union iris_any_prog_key key;
3380 unsigned key_size = 0;
3381
3382 memset(&key, 0, sizeof(key));
3383
3384 switch (info->stage) {
3385 case MESA_SHADER_VERTEX:
3386 /* User clip planes */
3387 if (info->clip_distance_array_size == 0)
3388 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3389
3390 key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3391 key_size = sizeof(key.vs);
3392 break;
3393
3394 case MESA_SHADER_TESS_CTRL: {
3395 key.tcs = (struct iris_tcs_prog_key) {
3396 KEY_INIT(vue.base),
3397 // XXX: make sure the linker fills this out from the TES...
3398 ._tes_primitive_mode =
3399 info->tess._primitive_mode ? info->tess._primitive_mode
3400 : TESS_PRIMITIVE_TRIANGLES,
3401 .outputs_written = info->outputs_written,
3402 .patch_outputs_written = info->patch_outputs_written,
3403 };
3404
3405 /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3406 * We don't have that information, so we randomly guess that the input
3407 * and output patches are the same size. This is a bad guess, but we
3408 * can't do much better.
3409 */
3410 if (iris_use_tcs_multi_patch(screen))
3411 key.tcs.input_vertices = info->tess.tcs_vertices_out;
3412
3413 key_size = sizeof(key.tcs);
3414 break;
3415 }
3416
3417 case MESA_SHADER_TESS_EVAL:
3418 /* User clip planes */
3419 if (info->clip_distance_array_size == 0)
3420 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3421
3422 key.tes = (struct iris_tes_prog_key) {
3423 KEY_INIT(vue.base),
3424 // XXX: not ideal, need TCS output/TES input unification
3425 .inputs_read = info->inputs_read,
3426 .patch_inputs_read = info->patch_inputs_read,
3427 };
3428
3429 key_size = sizeof(key.tes);
3430 break;
3431
3432 case MESA_SHADER_GEOMETRY:
3433 /* User clip planes */
3434 if (info->clip_distance_array_size == 0)
3435 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3436
3437 key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3438 key_size = sizeof(key.gs);
3439 break;
3440
3441 case MESA_SHADER_FRAGMENT:
3442 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3443 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3444 (1ull << IRIS_NOS_RASTERIZER) |
3445 (1ull << IRIS_NOS_BLEND);
3446
3447 STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3448
3449 /* The program key needs the VUE map if there are > 16 inputs */
3450 if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3451 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3452 }
3453
3454 const uint64_t color_outputs = info->outputs_written &
3455 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3456 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3457 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3458
3459 bool can_rearrange_varyings =
3460 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3461
3462 const struct intel_device_info *devinfo = screen->devinfo;
3463
3464 key.fs = (struct iris_fs_prog_key) {
3465 KEY_INIT(base),
3466 .nr_color_regions = util_bitcount(color_outputs),
3467 .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
3468 .input_slots_valid =
3469 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3470 };
3471
3472 key_size = sizeof(key.fs);
3473 break;
3474
3475 default:
3476 unreachable("Invalid shader stage.");
3477 }
3478
3479 if (screen->precompile) {
3480 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3481
3482 struct iris_compiled_shader *shader =
3483 iris_create_shader_variant(screen, NULL, info->stage,
3484 (enum iris_program_cache_id) info->stage,
3485 key_size, &key);
3486
3487 /* Append our new variant to the shader's variant list. */
3488 list_addtail(&shader->link, &ish->variants);
3489
3490 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3491 &key, key_size)) {
3492 assert(!util_queue_fence_is_signalled(&shader->ready));
3493
3494 struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3495
3496 job->screen = screen;
3497 job->uploader = uploader;
3498 job->ish = ish;
3499 job->shader = shader;
3500
3501 iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3502 iris_compile_shader);
3503 }
3504 }
3505
3506 return ish;
3507 }
3508
3509 /**
3510 * Called when the refcount on the iris_uncompiled_shader reaches 0.
3511 *
3512 * Frees the iris_uncompiled_shader.
3513 *
3514 * \sa iris_delete_shader_state
3515 */
3516 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3517 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3518 {
3519 struct iris_uncompiled_shader *ish = state;
3520
3521 /* No need to take ish->lock; we hold the last reference to ish */
3522 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3523 &ish->variants, link) {
3524 list_del(&shader->link);
3525
3526 iris_shader_variant_reference(&shader, NULL);
3527 }
3528
3529 simple_mtx_destroy(&ish->lock);
3530 util_queue_fence_destroy(&ish->ready);
3531
3532 ralloc_free(ish->nir);
3533 free(ish);
3534 }
3535
3536 /**
3537 * The pipe->delete_[stage]_state() driver hooks.
3538 *
3539 * \sa iris_destroy_shader_state
3540 */
3541 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3542 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3543 {
3544 struct iris_uncompiled_shader *ish = state;
3545 struct iris_context *ice = (void *) ctx;
3546
3547 const gl_shader_stage stage = ish->nir->info.stage;
3548
3549 if (ice->shaders.uncompiled[stage] == ish) {
3550 ice->shaders.uncompiled[stage] = NULL;
3551 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3552 }
3553
3554 if (pipe_reference(&ish->ref, NULL))
3555 iris_destroy_shader_state(ctx, state);
3556 }
3557
3558 /**
3559 * The pipe->bind_[stage]_state() driver hook.
3560 *
3561 * Binds an uncompiled shader as the current one for a particular stage.
3562 * Updates dirty tracking to account for the shader's NOS.
3563 */
3564 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3565 bind_shader_state(struct iris_context *ice,
3566 struct iris_uncompiled_shader *ish,
3567 gl_shader_stage stage)
3568 {
3569 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3570 const uint64_t nos = ish ? ish->nos : 0;
3571
3572 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3573 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3574
3575 if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3576 (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3577 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3578 }
3579
3580 ice->shaders.uncompiled[stage] = ish;
3581 ice->state.stage_dirty |= stage_dirty_bit;
3582
3583 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3584 * (or that they no longer need to do so).
3585 */
3586 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3587 if (nos & (1 << i))
3588 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3589 else
3590 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3591 }
3592 }
3593
3594 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3595 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3596 {
3597 struct iris_context *ice = (struct iris_context *)ctx;
3598 struct iris_uncompiled_shader *ish = state;
3599
3600 if (ish) {
3601 const struct shader_info *info = &ish->nir->info;
3602 if (ice->state.window_space_position != info->vs.window_space_position) {
3603 ice->state.window_space_position = info->vs.window_space_position;
3604
3605 ice->state.dirty |= IRIS_DIRTY_CLIP |
3606 IRIS_DIRTY_RASTER |
3607 IRIS_DIRTY_CC_VIEWPORT;
3608 }
3609
3610 const bool uses_draw_params =
3611 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3612 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3613 const bool uses_derived_draw_params =
3614 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3615 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3616 const bool needs_sgvs_element = uses_draw_params ||
3617 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3618 BITSET_TEST(info->system_values_read,
3619 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3620
3621 if (ice->state.vs_uses_draw_params != uses_draw_params ||
3622 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3623 ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3624 ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3625 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3626 IRIS_DIRTY_VERTEX_ELEMENTS;
3627 }
3628
3629 ice->state.vs_uses_draw_params = uses_draw_params;
3630 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3631 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3632 ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3633 }
3634
3635 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3636 }
3637
3638 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3639 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3640 {
3641 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3642 }
3643
3644 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3645 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3646 {
3647 struct iris_context *ice = (struct iris_context *)ctx;
3648 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3649 const struct intel_device_info *devinfo = screen->devinfo;
3650
3651 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3652 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3653 ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3654 IRIS_DIRTY_VFG : 0);
3655
3656 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3657 }
3658
3659 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3660 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3661 {
3662 struct iris_context *ice = (struct iris_context *)ctx;
3663
3664 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3665 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3666 ice->state.dirty |= IRIS_DIRTY_URB;
3667
3668 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3669 }
3670
3671 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3672 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3673 {
3674 struct iris_context *ice = (struct iris_context *) ctx;
3675 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3676 const struct intel_device_info *devinfo = screen->devinfo;
3677 struct iris_uncompiled_shader *old_ish =
3678 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3679 struct iris_uncompiled_shader *new_ish = state;
3680
3681 const unsigned color_bits =
3682 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3683 BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3684
3685 /* Fragment shader outputs influence HasWriteableRT */
3686 if (!old_ish || !new_ish ||
3687 (old_ish->nir->info.outputs_written & color_bits) !=
3688 (new_ish->nir->info.outputs_written & color_bits))
3689 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3690
3691 if (devinfo->ver == 8)
3692 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3693
3694 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3695 }
3696
3697 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3698 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3699 {
3700 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3701 }
3702
3703 static char *
iris_finalize_nir(struct pipe_screen * _screen,struct nir_shader * nir)3704 iris_finalize_nir(struct pipe_screen *_screen, struct nir_shader *nir)
3705 {
3706 struct iris_screen *screen = (struct iris_screen *)_screen;
3707 const struct intel_device_info *devinfo = screen->devinfo;
3708
3709 NIR_PASS_V(nir, iris_fix_edge_flags);
3710
3711 if (screen->brw) {
3712 struct brw_nir_compiler_opts opts = {};
3713 brw_preprocess_nir(screen->brw, nir, &opts);
3714
3715 NIR_PASS_V(nir, brw_nir_lower_storage_image,
3716 &(struct brw_nir_lower_storage_image_opts) {
3717 .devinfo = devinfo,
3718 .lower_loads = true,
3719 .lower_stores = true,
3720 });
3721 } else {
3722 assert(screen->elk);
3723
3724 struct elk_nir_compiler_opts opts = {};
3725 elk_preprocess_nir(screen->elk, nir, &opts);
3726
3727 NIR_PASS_V(nir, elk_nir_lower_storage_image,
3728 &(struct elk_nir_lower_storage_image_opts) {
3729 .devinfo = devinfo,
3730 .lower_loads = true,
3731 .lower_stores = true,
3732
3733 /* Iris uploads image params used by
3734 * get_size lowering only in Gfx8.
3735 */
3736 .lower_get_size = true,
3737 });
3738 }
3739
3740 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3741
3742 nir_sweep(nir);
3743
3744 return NULL;
3745 }
3746
3747 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3748 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3749 unsigned max_threads)
3750 {
3751 struct iris_screen *screen = (struct iris_screen *) pscreen;
3752 util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3753 false);
3754 }
3755
3756 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3757 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3758 void *v_shader,
3759 enum pipe_shader_type p_stage)
3760 {
3761 struct iris_screen *screen = (struct iris_screen *) pscreen;
3762
3763 /* Threaded compilation is only used for the precompile. If precompile is
3764 * disabled, threaded compilation is "done."
3765 */
3766 if (!screen->precompile)
3767 return true;
3768
3769 struct iris_uncompiled_shader *ish = v_shader;
3770
3771 /* When precompile is enabled, the first entry is the precompile variant.
3772 * Check the ready fence of the precompile variant.
3773 */
3774 struct iris_compiled_shader *first =
3775 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3776
3777 return util_queue_fence_is_signalled(&first->ready);
3778 }
3779
3780 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3781 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3782 {
3783 pscreen->is_parallel_shader_compilation_finished =
3784 iris_is_parallel_shader_compilation_finished;
3785 pscreen->set_max_shader_compiler_threads =
3786 iris_set_max_shader_compiler_threads;
3787 pscreen->finalize_nir = iris_finalize_nir;
3788 }
3789
3790 void
iris_init_program_functions(struct pipe_context * ctx)3791 iris_init_program_functions(struct pipe_context *ctx)
3792 {
3793 ctx->create_vs_state = iris_create_shader_state;
3794 ctx->create_tcs_state = iris_create_shader_state;
3795 ctx->create_tes_state = iris_create_shader_state;
3796 ctx->create_gs_state = iris_create_shader_state;
3797 ctx->create_fs_state = iris_create_shader_state;
3798 ctx->create_compute_state = iris_create_compute_state;
3799
3800 ctx->delete_vs_state = iris_delete_shader_state;
3801 ctx->delete_tcs_state = iris_delete_shader_state;
3802 ctx->delete_tes_state = iris_delete_shader_state;
3803 ctx->delete_gs_state = iris_delete_shader_state;
3804 ctx->delete_fs_state = iris_delete_shader_state;
3805 ctx->delete_compute_state = iris_delete_shader_state;
3806
3807 ctx->bind_vs_state = iris_bind_vs_state;
3808 ctx->bind_tcs_state = iris_bind_tcs_state;
3809 ctx->bind_tes_state = iris_bind_tes_state;
3810 ctx->bind_gs_state = iris_bind_gs_state;
3811 ctx->bind_fs_state = iris_bind_fs_state;
3812 ctx->bind_compute_state = iris_bind_cs_state;
3813
3814 ctx->get_compute_state_info = iris_get_compute_state_info;
3815 ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3816 }
3817
3818 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3819 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3820 const struct iris_compiled_shader *shader,
3821 const uint32_t block[3])
3822 {
3823 if (shader->brw_prog_data) {
3824 return brw_cs_get_dispatch_info(devinfo,
3825 brw_cs_prog_data(shader->brw_prog_data),
3826 block);
3827 } else {
3828 assert(shader->elk_prog_data);
3829 return elk_cs_get_dispatch_info(devinfo,
3830 elk_cs_prog_data(shader->elk_prog_data),
3831 block);
3832 }
3833 }
3834
3835 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3836 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3837 unsigned threads)
3838 {
3839 if (shader->brw_prog_data) {
3840 return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3841 threads);
3842 } else {
3843 assert(shader->elk_prog_data);
3844 return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3845 threads);
3846 }
3847 }
3848
3849 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3850 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3851 enum intel_msaa_flags pushed_msaa_flags)
3852 {
3853 if (shader->brw_prog_data) {
3854 return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3855 pushed_msaa_flags);
3856 } else {
3857 assert(shader->elk_prog_data);
3858 return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3859 pushed_msaa_flags);
3860 }
3861 }
3862
3863 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3864 iris_use_tcs_multi_patch(struct iris_screen *screen)
3865 {
3866 return screen->brw && screen->brw->use_tcs_multi_patch;
3867 }
3868
3869 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3870 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3871 {
3872 if (screen->brw) {
3873 return screen->brw->indirect_ubos_use_sampler;
3874 } else {
3875 assert(screen->elk);
3876 return screen->elk->indirect_ubos_use_sampler;
3877 }
3878 }
3879
3880 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3881 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3882 {
3883 struct util_debug_callback *dbg = data;
3884 va_list args;
3885
3886 if (!dbg->debug_message)
3887 return;
3888
3889 va_start(args, fmt);
3890 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3891 va_end(args);
3892 }
3893
3894 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3895 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3896 {
3897 struct util_debug_callback *dbg = data;
3898 va_list args;
3899 va_start(args, fmt);
3900
3901 if (INTEL_DEBUG(DEBUG_PERF)) {
3902 va_list args_copy;
3903 va_copy(args_copy, args);
3904 vfprintf(stderr, fmt, args_copy);
3905 va_end(args_copy);
3906 }
3907
3908 if (dbg->debug_message) {
3909 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3910 }
3911
3912 va_end(args);
3913 }
3914
3915 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3916 iris_get_compiler_options(struct pipe_screen *pscreen,
3917 enum pipe_shader_ir ir,
3918 enum pipe_shader_type pstage)
3919 {
3920 struct iris_screen *screen = (struct iris_screen *) pscreen;
3921 gl_shader_stage stage = stage_from_pipe(pstage);
3922 assert(ir == PIPE_SHADER_IR_NIR);
3923
3924 return screen->brw ? screen->brw->nir_options[stage]
3925 : screen->elk->nir_options[stage];
3926 }
3927
3928 void
iris_compiler_init(struct iris_screen * screen)3929 iris_compiler_init(struct iris_screen *screen)
3930 {
3931 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3932 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3933 STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3934
3935 if (screen->devinfo->ver >= 9) {
3936 screen->brw = brw_compiler_create(screen, screen->devinfo);
3937 screen->brw->shader_debug_log = iris_shader_debug_log;
3938 screen->brw->shader_perf_log = iris_shader_perf_log;
3939 } else {
3940 screen->elk = elk_compiler_create(screen, screen->devinfo);
3941 screen->elk->shader_debug_log = iris_shader_debug_log;
3942 screen->elk->shader_perf_log = iris_shader_perf_log;
3943 screen->elk->supports_shader_constants = true;
3944 }
3945 }
3946