1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_program.c
25 *
26 * This file contains the driver interface for compiling shaders.
27 *
28 * See iris_program_cache.c for the in-memory program cache where the
29 * compiled shaders are stored.
30 */
31
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55
56 #define KEY_INIT(prefix) \
57 .prefix.program_string_id = ish->program_id, \
58 .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
60 .base.program_string_id = prog_id, \
61 .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input) \
63 .base.program_string_id = prog_id, \
64 .base.limit_trig_input_range = limit_trig_input
65
66 struct iris_threaded_compile_job {
67 struct iris_screen *screen;
68 struct u_upload_mgr *uploader;
69 struct util_debug_callback *dbg;
70 struct iris_uncompiled_shader *ish;
71 struct iris_compiled_shader *shader;
72 };
73
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77 return p_atomic_inc_return(&screen->program_id);
78 }
79
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82 const struct brw_wm_prog_data *brw)
83 {
84 assert(shader->stage == MESA_SHADER_FRAGMENT);
85 struct iris_fs_data *iris = &shader->fs;
86
87 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89 memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90 memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91 iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92
93 iris->num_varying_inputs = brw->num_varying_inputs;
94 iris->msaa_flags_param = brw->msaa_flags_param;
95 iris->flat_inputs = brw->flat_inputs;
96 iris->inputs = brw->inputs;
97 iris->computed_depth_mode = brw->computed_depth_mode;
98 iris->max_polygons = brw->max_polygons;
99 iris->dispatch_multi = brw->dispatch_multi;
100 iris->computed_stencil = brw->computed_stencil;
101 iris->early_fragment_tests = brw->early_fragment_tests;
102 iris->post_depth_coverage = brw->post_depth_coverage;
103 iris->inner_coverage = brw->inner_coverage;
104 iris->dispatch_8 = brw->dispatch_8;
105 iris->dispatch_16 = brw->dispatch_16;
106 iris->dispatch_32 = brw->dispatch_32;
107 iris->dual_src_blend = brw->dual_src_blend;
108 iris->uses_pos_offset = brw->uses_pos_offset;
109 iris->uses_omask = brw->uses_omask;
110 iris->uses_kill = brw->uses_kill;
111 iris->uses_src_depth = brw->uses_src_depth;
112 iris->uses_src_w = brw->uses_src_w;
113 iris->uses_sample_mask = brw->uses_sample_mask;
114 iris->uses_vmask = brw->uses_vmask;
115 iris->has_side_effects = brw->has_side_effects;
116 iris->pulls_bary = brw->pulls_bary;
117
118 iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
119
120 iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
121 }
122
123 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)124 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
125 const struct brw_cs_prog_data *brw)
126 {
127 assert(shader->stage == MESA_SHADER_COMPUTE);
128 struct iris_cs_data *iris = &shader->cs;
129
130 iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
131 iris->push.cross_thread.regs = brw->push.cross_thread.regs;
132 iris->push.cross_thread.size = brw->push.cross_thread.size;
133
134 iris->push.per_thread.dwords = brw->push.per_thread.dwords;
135 iris->push.per_thread.regs = brw->push.per_thread.regs;
136 iris->push.per_thread.size = brw->push.per_thread.size;
137
138 iris->local_size[0] = brw->local_size[0];
139 iris->local_size[1] = brw->local_size[1];
140 iris->local_size[2] = brw->local_size[2];
141 iris->prog_offset[0] = brw->prog_offset[0];
142 iris->prog_offset[1] = brw->prog_offset[1];
143 iris->prog_offset[2] = brw->prog_offset[2];
144
145 iris->generate_local_id = brw->generate_local_id;
146 iris->walk_order = brw->walk_order;
147 iris->uses_barrier = brw->uses_barrier;
148 iris->prog_mask = brw->prog_mask;
149
150 iris->first_param_is_builtin_subgroup_id =
151 brw->base.nr_params > 0 &&
152 brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
153 }
154
155 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)156 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
157 struct iris_vue_data *iris)
158 {
159 memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
160
161 iris->urb_read_length = brw->urb_read_length;
162 iris->cull_distance_mask = brw->cull_distance_mask;
163 iris->urb_entry_size = brw->urb_entry_size;
164 iris->dispatch_mode = brw->dispatch_mode;
165 iris->include_vue_handles = brw->include_vue_handles;
166 }
167
168 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)169 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
170 const struct brw_vs_prog_data *brw)
171 {
172 assert(shader->stage == MESA_SHADER_VERTEX);
173 struct iris_vs_data *iris = &shader->vs;
174
175 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
176
177 iris->uses_vertexid = brw->uses_vertexid;
178 iris->uses_instanceid = brw->uses_instanceid;
179 iris->uses_firstvertex = brw->uses_firstvertex;
180 iris->uses_baseinstance = brw->uses_baseinstance;
181 iris->uses_drawid = brw->uses_drawid;
182 }
183
184 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)185 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
186 const struct brw_tcs_prog_data *brw)
187 {
188 assert(shader->stage == MESA_SHADER_TESS_CTRL);
189 struct iris_tcs_data *iris = &shader->tcs;
190
191 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
192
193 iris->instances = brw->instances;
194 iris->patch_count_threshold = brw->patch_count_threshold;
195 iris->include_primitive_id = brw->include_primitive_id;
196 }
197
198 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)199 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
200 const struct brw_tes_prog_data *brw)
201 {
202 assert(shader->stage == MESA_SHADER_TESS_EVAL);
203 struct iris_tes_data *iris = &shader->tes;
204
205 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
206
207 iris->partitioning = brw->partitioning;
208 iris->output_topology = brw->output_topology;
209 iris->domain = brw->domain;
210 iris->include_primitive_id = brw->include_primitive_id;
211 }
212
213 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)214 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
215 const struct brw_gs_prog_data *brw)
216 {
217 assert(shader->stage == MESA_SHADER_GEOMETRY);
218 struct iris_gs_data *iris = &shader->gs;
219
220 iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
221
222 iris->vertices_in = brw->vertices_in;
223 iris->output_vertex_size_hwords = brw->output_vertex_size_hwords;
224 iris->output_topology = brw->output_topology;
225 iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
226 iris->control_data_format = brw->control_data_format;
227 iris->static_vertex_count = brw->static_vertex_count;
228 iris->invocations = brw->invocations;
229 iris->include_primitive_id = brw->include_primitive_id;
230 }
231
232 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)233 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
234 struct brw_stage_prog_data *brw)
235 {
236 STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
237 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
238 shader->ubo_ranges[i].block = brw->ubo_ranges[i].block;
239 shader->ubo_ranges[i].start = brw->ubo_ranges[i].start;
240 shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
241 }
242
243 shader->nr_params = brw->nr_params;
244 shader->total_scratch = brw->total_scratch;
245 shader->total_shared = brw->total_shared;
246 shader->program_size = brw->program_size;
247 shader->const_data_offset = brw->const_data_offset;
248 shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
249 shader->has_ubo_pull = brw->has_ubo_pull;
250 shader->use_alt_mode = brw->use_alt_mode;
251
252 switch (shader->stage) {
253 case MESA_SHADER_FRAGMENT:
254 iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
255 break;
256 case MESA_SHADER_COMPUTE:
257 iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
258 break;
259 case MESA_SHADER_VERTEX:
260 iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
261 break;
262 case MESA_SHADER_TESS_CTRL:
263 iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
264 break;
265 case MESA_SHADER_TESS_EVAL:
266 iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
267 break;
268 case MESA_SHADER_GEOMETRY:
269 iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
270 break;
271 default:
272 unreachable("invalid shader stage");
273 }
274
275 shader->brw_prog_data = brw;
276
277 ralloc_steal(shader, shader->brw_prog_data);
278 ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
279 ralloc_steal(shader->brw_prog_data, brw->param);
280 }
281
282 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)283 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
284 const struct elk_wm_prog_data *elk)
285 {
286 assert(shader->stage == MESA_SHADER_FRAGMENT);
287 struct iris_fs_data *iris = &shader->fs;
288
289 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
290 STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
291 memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
292 memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
293 iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
294
295 iris->num_varying_inputs = elk->num_varying_inputs;
296 iris->msaa_flags_param = elk->msaa_flags_param;
297 iris->flat_inputs = elk->flat_inputs;
298 iris->inputs = elk->inputs;
299 iris->computed_depth_mode = elk->computed_depth_mode;
300 iris->max_polygons = elk->max_polygons;
301 iris->dispatch_multi = elk->dispatch_multi;
302 iris->computed_stencil = elk->computed_stencil;
303 iris->early_fragment_tests = elk->early_fragment_tests;
304 iris->post_depth_coverage = elk->post_depth_coverage;
305 iris->inner_coverage = elk->inner_coverage;
306 iris->dispatch_8 = elk->dispatch_8;
307 iris->dispatch_16 = elk->dispatch_16;
308 iris->dispatch_32 = elk->dispatch_32;
309 iris->dual_src_blend = elk->dual_src_blend;
310 iris->uses_pos_offset = elk->uses_pos_offset;
311 iris->uses_omask = elk->uses_omask;
312 iris->uses_kill = elk->uses_kill;
313 iris->uses_src_depth = elk->uses_src_depth;
314 iris->uses_src_w = elk->uses_src_w;
315 iris->uses_sample_mask = elk->uses_sample_mask;
316 iris->uses_vmask = elk->uses_vmask;
317 iris->pulls_bary = elk->pulls_bary;
318 iris->has_side_effects = elk->has_side_effects;
319
320 iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
321
322 iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
323 }
324
325 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)326 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
327 const struct elk_cs_prog_data *elk)
328 {
329 assert(shader->stage == MESA_SHADER_COMPUTE);
330 struct iris_cs_data *iris = &shader->cs;
331
332 iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
333 iris->push.cross_thread.regs = elk->push.cross_thread.regs;
334 iris->push.cross_thread.size = elk->push.cross_thread.size;
335
336 iris->push.per_thread.dwords = elk->push.per_thread.dwords;
337 iris->push.per_thread.regs = elk->push.per_thread.regs;
338 iris->push.per_thread.size = elk->push.per_thread.size;
339
340 iris->local_size[0] = elk->local_size[0];
341 iris->local_size[1] = elk->local_size[1];
342 iris->local_size[2] = elk->local_size[2];
343 iris->prog_offset[0] = elk->prog_offset[0];
344 iris->prog_offset[1] = elk->prog_offset[1];
345 iris->prog_offset[2] = elk->prog_offset[2];
346
347 iris->generate_local_id = elk->generate_local_id;
348 iris->walk_order = elk->walk_order;
349 iris->uses_barrier = elk->uses_barrier;
350 iris->prog_mask = elk->prog_mask;
351
352 iris->first_param_is_builtin_subgroup_id =
353 elk->base.nr_params > 0 &&
354 elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
355 }
356
357 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)358 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
359 struct iris_vue_data *iris)
360 {
361 memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
362
363 iris->urb_read_length = elk->urb_read_length;
364 iris->cull_distance_mask = elk->cull_distance_mask;
365 iris->urb_entry_size = elk->urb_entry_size;
366 iris->dispatch_mode = elk->dispatch_mode;
367 iris->include_vue_handles = elk->include_vue_handles;
368 }
369
370 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)371 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
372 const struct elk_vs_prog_data *elk)
373 {
374 assert(shader->stage == MESA_SHADER_VERTEX);
375 struct iris_vs_data *iris = &shader->vs;
376
377 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
378
379 iris->uses_vertexid = elk->uses_vertexid;
380 iris->uses_instanceid = elk->uses_instanceid;
381 iris->uses_firstvertex = elk->uses_firstvertex;
382 iris->uses_baseinstance = elk->uses_baseinstance;
383 iris->uses_drawid = elk->uses_drawid;
384 }
385
386 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)387 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
388 const struct elk_tcs_prog_data *elk)
389 {
390 assert(shader->stage == MESA_SHADER_TESS_CTRL);
391 struct iris_tcs_data *iris = &shader->tcs;
392
393 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
394
395 iris->instances = elk->instances;
396 iris->patch_count_threshold = elk->patch_count_threshold;
397 iris->include_primitive_id = elk->include_primitive_id;
398 }
399
400 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)401 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
402 const struct elk_tes_prog_data *elk)
403 {
404 assert(shader->stage == MESA_SHADER_TESS_EVAL);
405 struct iris_tes_data *iris = &shader->tes;
406
407 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
408
409 iris->partitioning = elk->partitioning;
410 iris->output_topology = elk->output_topology;
411 iris->domain = elk->domain;
412 iris->include_primitive_id = elk->include_primitive_id;
413 }
414
415 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)416 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
417 const struct elk_gs_prog_data *elk)
418 {
419 assert(shader->stage == MESA_SHADER_GEOMETRY);
420 struct iris_gs_data *iris = &shader->gs;
421
422 iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
423
424 iris->vertices_in = elk->vertices_in;
425 iris->output_vertex_size_hwords = elk->output_vertex_size_hwords;
426 iris->output_topology = elk->output_topology;
427 iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
428 iris->control_data_format = elk->control_data_format;
429 iris->static_vertex_count = elk->static_vertex_count;
430 iris->invocations = elk->invocations;
431 iris->include_primitive_id = elk->include_primitive_id;
432 }
433
434 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)435 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
436 struct elk_stage_prog_data *elk)
437 {
438 STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
439 for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
440 shader->ubo_ranges[i].block = elk->ubo_ranges[i].block;
441 shader->ubo_ranges[i].start = elk->ubo_ranges[i].start;
442 shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
443 }
444
445 shader->nr_params = elk->nr_params;
446 shader->total_scratch = elk->total_scratch;
447 shader->total_shared = elk->total_shared;
448 shader->program_size = elk->program_size;
449 shader->const_data_offset = elk->const_data_offset;
450 shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
451 shader->has_ubo_pull = elk->has_ubo_pull;
452 shader->use_alt_mode = elk->use_alt_mode;
453
454 switch (shader->stage) {
455 case MESA_SHADER_FRAGMENT:
456 iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
457 break;
458 case MESA_SHADER_COMPUTE:
459 iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
460 break;
461 case MESA_SHADER_VERTEX:
462 iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
463 break;
464 case MESA_SHADER_TESS_CTRL:
465 iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
466 break;
467 case MESA_SHADER_TESS_EVAL:
468 iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
469 break;
470 case MESA_SHADER_GEOMETRY:
471 iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
472 break;
473 default:
474 unreachable("invalid shader stage");
475 }
476
477 shader->elk_prog_data = elk;
478
479 ralloc_steal(shader, shader->elk_prog_data);
480 ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
481 ralloc_steal(shader->elk_prog_data, elk->param);
482 }
483
484 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)485 iris_finalize_program(struct iris_compiled_shader *shader,
486 uint32_t *streamout,
487 uint32_t *system_values,
488 unsigned num_system_values,
489 unsigned kernel_input_size,
490 unsigned num_cbufs,
491 const struct iris_binding_table *bt)
492 {
493 /* There can be only one. */
494 assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
495
496 shader->streamout = streamout;
497 shader->system_values = system_values;
498 shader->num_system_values = num_system_values;
499 shader->kernel_input_size = kernel_input_size;
500 shader->num_cbufs = num_cbufs;
501 shader->bt = *bt;
502
503 ralloc_steal(shader, shader->streamout);
504 ralloc_steal(shader, shader->system_values);
505 }
506
507 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)508 iris_to_brw_vs_key(const struct iris_screen *screen,
509 const struct iris_vs_prog_key *key)
510 {
511 return (struct brw_vs_prog_key) {
512 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
513 key->vue.base.limit_trig_input_range),
514 };
515 }
516
517 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)518 iris_to_brw_tcs_key(const struct iris_screen *screen,
519 const struct iris_tcs_prog_key *key)
520 {
521 return (struct brw_tcs_prog_key) {
522 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
523 key->vue.base.limit_trig_input_range),
524 ._tes_primitive_mode = key->_tes_primitive_mode,
525 .input_vertices = key->input_vertices,
526 .patch_outputs_written = key->patch_outputs_written,
527 .outputs_written = key->outputs_written,
528 };
529 }
530
531 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)532 iris_to_brw_tes_key(const struct iris_screen *screen,
533 const struct iris_tes_prog_key *key)
534 {
535 return (struct brw_tes_prog_key) {
536 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
537 key->vue.base.limit_trig_input_range),
538 .patch_inputs_read = key->patch_inputs_read,
539 .inputs_read = key->inputs_read,
540 };
541 }
542
543 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)544 iris_to_brw_gs_key(const struct iris_screen *screen,
545 const struct iris_gs_prog_key *key)
546 {
547 return (struct brw_gs_prog_key) {
548 BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
549 key->vue.base.limit_trig_input_range),
550 };
551 }
552
553 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)554 iris_to_brw_fs_key(const struct iris_screen *screen,
555 const struct iris_fs_prog_key *key)
556 {
557 return (struct brw_wm_prog_key) {
558 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
559 key->base.limit_trig_input_range),
560 .nr_color_regions = key->nr_color_regions,
561 .flat_shade = key->flat_shade,
562 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
563 .alpha_to_coverage = key->alpha_to_coverage ? BRW_ALWAYS : BRW_NEVER,
564 .clamp_fragment_color = key->clamp_fragment_color,
565 .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
566 .multisample_fbo = key->multisample_fbo ? BRW_ALWAYS : BRW_NEVER,
567 .force_dual_color_blend = key->force_dual_color_blend,
568 .coherent_fb_fetch = key->coherent_fb_fetch,
569 .color_outputs_valid = key->color_outputs_valid,
570 .input_slots_valid = key->input_slots_valid,
571 .ignore_sample_mask_out = !key->multisample_fbo,
572 };
573 }
574
575 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)576 iris_to_brw_cs_key(const struct iris_screen *screen,
577 const struct iris_cs_prog_key *key)
578 {
579 return (struct brw_cs_prog_key) {
580 BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
581 key->base.limit_trig_input_range),
582 };
583 }
584
585 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)586 iris_to_elk_vs_key(const struct iris_screen *screen,
587 const struct iris_vs_prog_key *key)
588 {
589 return (struct elk_vs_prog_key) {
590 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
591 key->vue.base.limit_trig_input_range),
592
593 /* Don't tell the backend about our clip plane constants, we've
594 * already lowered them in NIR and don't want it doing it again.
595 */
596 .nr_userclip_plane_consts = 0,
597 };
598 }
599
600 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)601 iris_to_elk_tcs_key(const struct iris_screen *screen,
602 const struct iris_tcs_prog_key *key)
603 {
604 return (struct elk_tcs_prog_key) {
605 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
606 key->vue.base.limit_trig_input_range),
607 ._tes_primitive_mode = key->_tes_primitive_mode,
608 .input_vertices = key->input_vertices,
609 .patch_outputs_written = key->patch_outputs_written,
610 .outputs_written = key->outputs_written,
611 .quads_workaround = key->quads_workaround,
612 };
613 }
614
615 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)616 iris_to_elk_tes_key(const struct iris_screen *screen,
617 const struct iris_tes_prog_key *key)
618 {
619 return (struct elk_tes_prog_key) {
620 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
621 key->vue.base.limit_trig_input_range),
622 .patch_inputs_read = key->patch_inputs_read,
623 .inputs_read = key->inputs_read,
624 };
625 }
626
627 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)628 iris_to_elk_gs_key(const struct iris_screen *screen,
629 const struct iris_gs_prog_key *key)
630 {
631 return (struct elk_gs_prog_key) {
632 ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
633 key->vue.base.limit_trig_input_range),
634 };
635 }
636
637 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)638 iris_to_elk_fs_key(const struct iris_screen *screen,
639 const struct iris_fs_prog_key *key)
640 {
641 return (struct elk_wm_prog_key) {
642 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
643 key->base.limit_trig_input_range),
644 .nr_color_regions = key->nr_color_regions,
645 .flat_shade = key->flat_shade,
646 .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
647 .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
648 .clamp_fragment_color = key->clamp_fragment_color,
649 .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
650 .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
651 .force_dual_color_blend = key->force_dual_color_blend,
652 .coherent_fb_fetch = key->coherent_fb_fetch,
653 .color_outputs_valid = key->color_outputs_valid,
654 .input_slots_valid = key->input_slots_valid,
655 .ignore_sample_mask_out = !key->multisample_fbo,
656 };
657 }
658
659 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)660 iris_to_elk_cs_key(const struct iris_screen *screen,
661 const struct iris_cs_prog_key *key)
662 {
663 return (struct elk_cs_prog_key) {
664 ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
665 key->base.limit_trig_input_range),
666 };
667 }
668
669 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)670 upload_state(struct u_upload_mgr *uploader,
671 struct iris_state_ref *ref,
672 unsigned size,
673 unsigned alignment)
674 {
675 void *p = NULL;
676 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
677 return p;
678 }
679
680 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)681 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
682 struct pipe_shader_buffer *buf,
683 struct iris_state_ref *surf_state,
684 isl_surf_usage_flags_t usage)
685 {
686 struct pipe_context *ctx = &ice->ctx;
687 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
688 bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
689
690 void *map =
691 upload_state(ice->state.surface_uploader, surf_state,
692 screen->isl_dev.ss.size, 64);
693 if (!unlikely(map)) {
694 surf_state->res = NULL;
695 return;
696 }
697
698 struct iris_resource *res = (void *) buf->buffer;
699 struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
700 surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
701
702 const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
703
704 isl_buffer_fill_state(&screen->isl_dev, map,
705 .address = res->bo->address + res->offset +
706 buf->buffer_offset,
707 .size_B = buf->buffer_size - res->offset,
708 .format = dataport ? ISL_FORMAT_RAW
709 : ISL_FORMAT_R32G32B32A32_FLOAT,
710 .swizzle = ISL_SWIZZLE_IDENTITY,
711 .stride_B = 1,
712 .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
713 }
714
715 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)716 get_aoa_deref_offset(nir_builder *b,
717 nir_deref_instr *deref,
718 unsigned elem_size)
719 {
720 unsigned array_size = elem_size;
721 nir_def *offset = nir_imm_int(b, 0);
722
723 while (deref->deref_type != nir_deref_type_var) {
724 assert(deref->deref_type == nir_deref_type_array);
725
726 /* This level's element size is the previous level's array size */
727 nir_def *index = deref->arr.index.ssa;
728 assert(deref->arr.index.ssa);
729 offset = nir_iadd(b, offset,
730 nir_imul_imm(b, index, array_size));
731
732 deref = nir_deref_instr_parent(deref);
733 assert(glsl_type_is_array(deref->type));
734 array_size *= glsl_get_length(deref->type);
735 }
736
737 /* Accessing an invalid surface index with the dataport can result in a
738 * hang. According to the spec "if the index used to select an individual
739 * element is negative or greater than or equal to the size of the array,
740 * the results of the operation are undefined but may not lead to
741 * termination" -- which is one of the possible outcomes of the hang.
742 * Clamp the index to prevent access outside of the array bounds.
743 */
744 return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
745 }
746
747 static void
iris_lower_storage_image_derefs(nir_shader * nir)748 iris_lower_storage_image_derefs(nir_shader *nir)
749 {
750 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
751
752 nir_builder b = nir_builder_create(impl);
753
754 nir_foreach_block(block, impl) {
755 nir_foreach_instr_safe(instr, block) {
756 if (instr->type != nir_instr_type_intrinsic)
757 continue;
758
759 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
760 switch (intrin->intrinsic) {
761 case nir_intrinsic_image_deref_load:
762 case nir_intrinsic_image_deref_store:
763 case nir_intrinsic_image_deref_atomic:
764 case nir_intrinsic_image_deref_atomic_swap:
765 case nir_intrinsic_image_deref_size:
766 case nir_intrinsic_image_deref_samples:
767 case nir_intrinsic_image_deref_load_raw_intel:
768 case nir_intrinsic_image_deref_store_raw_intel: {
769 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
770 nir_variable *var = nir_deref_instr_get_variable(deref);
771
772 b.cursor = nir_before_instr(&intrin->instr);
773 nir_def *index =
774 nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
775 var->data.driver_location);
776 nir_rewrite_image_intrinsic(intrin, index, false);
777 break;
778 }
779
780 default:
781 break;
782 }
783 }
784 }
785 }
786
787 static bool
iris_uses_image_atomic(const nir_shader * shader)788 iris_uses_image_atomic(const nir_shader *shader)
789 {
790 nir_foreach_function_impl(impl, shader) {
791 nir_foreach_block(block, impl) {
792 nir_foreach_instr(instr, block) {
793 if (instr->type != nir_instr_type_intrinsic)
794 continue;
795
796 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
797 switch (intrin->intrinsic) {
798 case nir_intrinsic_image_deref_atomic:
799 case nir_intrinsic_image_deref_atomic_swap:
800 unreachable("Should have been lowered in "
801 "iris_lower_storage_image_derefs");
802
803 case nir_intrinsic_image_atomic:
804 case nir_intrinsic_image_atomic_swap:
805 return true;
806
807 default:
808 break;
809 }
810 }
811 }
812 }
813
814 return false;
815 }
816
817 /**
818 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
819 */
820 static bool
iris_fix_edge_flags(nir_shader * nir)821 iris_fix_edge_flags(nir_shader *nir)
822 {
823 if (nir->info.stage != MESA_SHADER_VERTEX) {
824 nir_shader_preserve_all_metadata(nir);
825 return false;
826 }
827
828 nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
829 VARYING_SLOT_EDGE);
830 if (!var) {
831 nir_shader_preserve_all_metadata(nir);
832 return false;
833 }
834
835 var->data.mode = nir_var_shader_temp;
836 nir->info.outputs_written &= ~VARYING_BIT_EDGE;
837 nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
838 nir_fixup_deref_modes(nir);
839
840 nir_foreach_function_impl(impl, nir) {
841 nir_metadata_preserve(impl, nir_metadata_block_index |
842 nir_metadata_dominance |
843 nir_metadata_live_defs |
844 nir_metadata_loop_analysis);
845 }
846
847 return true;
848 }
849
850 /**
851 * Fix an uncompiled shader's stream output info.
852 *
853 * Core Gallium stores output->register_index as a "slot" number, where
854 * slots are assigned consecutively to all outputs in info->outputs_written.
855 * This naive packing of outputs doesn't work for us - we too have slots,
856 * but the layout is defined by the VUE map, which we won't have until we
857 * compile a specific shader variant. So, we remap these and simply store
858 * VARYING_SLOT_* in our copy's output->register_index fields.
859 *
860 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
861 * components of our VUE header. See brw_vue_map.c for the layout.
862 */
863 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)864 update_so_info(struct pipe_stream_output_info *so_info,
865 uint64_t outputs_written)
866 {
867 uint8_t reverse_map[64] = {};
868 unsigned slot = 0;
869 while (outputs_written) {
870 reverse_map[slot++] = u_bit_scan64(&outputs_written);
871 }
872
873 for (unsigned i = 0; i < so_info->num_outputs; i++) {
874 struct pipe_stream_output *output = &so_info->output[i];
875
876 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
877 output->register_index = reverse_map[output->register_index];
878
879 /* The VUE header contains three scalar fields packed together:
880 * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
881 * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
882 * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
883 */
884 switch (output->register_index) {
885 case VARYING_SLOT_LAYER:
886 assert(output->num_components == 1);
887 output->register_index = VARYING_SLOT_PSIZ;
888 output->start_component = 1;
889 break;
890 case VARYING_SLOT_VIEWPORT:
891 assert(output->num_components == 1);
892 output->register_index = VARYING_SLOT_PSIZ;
893 output->start_component = 2;
894 break;
895 case VARYING_SLOT_PSIZ:
896 assert(output->num_components == 1);
897 output->start_component = 3;
898 break;
899 }
900
901 //info->outputs_written |= 1ull << output->register_index;
902 }
903 }
904
905 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)906 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
907 unsigned offset, unsigned n)
908 {
909 assert(offset % sizeof(uint32_t) == 0);
910
911 for (unsigned i = 0; i < n; ++i)
912 sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
913
914 for (unsigned i = n; i < 4; ++i)
915 sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
916 }
917
918 /**
919 * Associate NIR uniform variables with the prog_data->param[] mechanism
920 * used by the backend. Also, decide which UBOs we'd like to push in an
921 * ideal situation (though the backend can reduce this).
922 */
923 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)924 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
925 void *mem_ctx,
926 nir_shader *nir,
927 unsigned kernel_input_size,
928 uint32_t **out_system_values,
929 unsigned *out_num_system_values,
930 unsigned *out_num_cbufs)
931 {
932 unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
933
934 const unsigned IRIS_MAX_SYSTEM_VALUES =
935 PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
936 unsigned *system_values =
937 rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
938 unsigned num_system_values = 0;
939
940 unsigned patch_vert_idx = -1;
941 unsigned tess_outer_default_idx = -1;
942 unsigned tess_inner_default_idx = -1;
943 unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
944 unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
945 unsigned variable_group_size_idx = -1;
946 unsigned work_dim_idx = -1;
947 memset(ucp_idx, -1, sizeof(ucp_idx));
948 memset(img_idx, -1, sizeof(img_idx));
949
950 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
951
952 nir_builder b = nir_builder_at(nir_before_impl(impl));
953
954 nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
955
956 /* Turn system value intrinsics into uniforms */
957 nir_foreach_block(block, impl) {
958 nir_foreach_instr_safe(instr, block) {
959 if (instr->type != nir_instr_type_intrinsic)
960 continue;
961
962 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
963 nir_def *offset;
964
965 switch (intrin->intrinsic) {
966 case nir_intrinsic_load_base_workgroup_id: {
967 /* GL doesn't have a concept of base workgroup */
968 b.cursor = nir_instr_remove(&intrin->instr);
969 nir_def_rewrite_uses(&intrin->def,
970 nir_imm_zero(&b, 3, 32));
971 continue;
972 }
973 case nir_intrinsic_load_constant: {
974 unsigned load_size = intrin->def.num_components *
975 intrin->def.bit_size / 8;
976 unsigned load_align = intrin->def.bit_size / 8;
977
978 /* This one is special because it reads from the shader constant
979 * data and not cbuf0 which gallium uploads for us.
980 */
981 b.cursor = nir_instr_remove(&intrin->instr);
982
983 nir_def *offset =
984 nir_iadd_imm(&b, intrin->src[0].ssa,
985 nir_intrinsic_base(intrin));
986
987 assert(load_size < b.shader->constant_data_size);
988 unsigned max_offset = b.shader->constant_data_size - load_size;
989 offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
990
991 /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
992 * and cannot cross that 4GB boundary, so we can do the address
993 * calculation with 32-bit adds. Also, we can ignore the high
994 * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
995 */
996 assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
997
998 nir_def *const_data_addr =
999 nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1000
1001 nir_def *data =
1002 nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1003 load_align,
1004 intrin->def.num_components,
1005 intrin->def.bit_size);
1006
1007 nir_def_rewrite_uses(&intrin->def,
1008 data);
1009 continue;
1010 }
1011 case nir_intrinsic_load_user_clip_plane: {
1012 unsigned ucp = nir_intrinsic_ucp_id(intrin);
1013
1014 if (ucp_idx[ucp] == -1) {
1015 ucp_idx[ucp] = num_system_values;
1016 num_system_values += 4;
1017 }
1018
1019 for (int i = 0; i < 4; i++) {
1020 system_values[ucp_idx[ucp] + i] =
1021 BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1022 }
1023
1024 b.cursor = nir_before_instr(instr);
1025 offset = nir_imm_int(&b, system_values_start +
1026 ucp_idx[ucp] * sizeof(uint32_t));
1027 break;
1028 }
1029 case nir_intrinsic_load_patch_vertices_in:
1030 if (patch_vert_idx == -1)
1031 patch_vert_idx = num_system_values++;
1032
1033 system_values[patch_vert_idx] =
1034 BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1035
1036 b.cursor = nir_before_instr(instr);
1037 offset = nir_imm_int(&b, system_values_start +
1038 patch_vert_idx * sizeof(uint32_t));
1039 break;
1040 case nir_intrinsic_load_tess_level_outer_default:
1041 if (tess_outer_default_idx == -1) {
1042 tess_outer_default_idx = num_system_values;
1043 num_system_values += 4;
1044 }
1045
1046 for (int i = 0; i < 4; i++) {
1047 system_values[tess_outer_default_idx + i] =
1048 BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1049 }
1050
1051 b.cursor = nir_before_instr(instr);
1052 offset = nir_imm_int(&b, system_values_start +
1053 tess_outer_default_idx * sizeof(uint32_t));
1054 break;
1055 case nir_intrinsic_load_tess_level_inner_default:
1056 if (tess_inner_default_idx == -1) {
1057 tess_inner_default_idx = num_system_values;
1058 num_system_values += 2;
1059 }
1060
1061 for (int i = 0; i < 2; i++) {
1062 system_values[tess_inner_default_idx + i] =
1063 BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1064 }
1065
1066 b.cursor = nir_before_instr(instr);
1067 offset = nir_imm_int(&b, system_values_start +
1068 tess_inner_default_idx * sizeof(uint32_t));
1069 break;
1070 case nir_intrinsic_image_deref_load_param_intel: {
1071 assert(devinfo->ver < 9);
1072 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1073 nir_variable *var = nir_deref_instr_get_variable(deref);
1074
1075 if (img_idx[var->data.binding] == -1) {
1076 /* GL only allows arrays of arrays of images. */
1077 assert(glsl_type_is_image(glsl_without_array(var->type)));
1078 unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1079
1080 for (int i = 0; i < num_images; i++) {
1081 const unsigned img = var->data.binding + i;
1082
1083 img_idx[img] = num_system_values;
1084 num_system_values += ISL_IMAGE_PARAM_SIZE;
1085
1086 uint32_t *img_sv = &system_values[img_idx[img]];
1087
1088 setup_vec4_image_sysval(
1089 img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1090 offsetof(struct isl_image_param, offset), 2);
1091 setup_vec4_image_sysval(
1092 img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1093 offsetof(struct isl_image_param, size), 3);
1094 setup_vec4_image_sysval(
1095 img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1096 offsetof(struct isl_image_param, stride), 4);
1097 setup_vec4_image_sysval(
1098 img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1099 offsetof(struct isl_image_param, tiling), 3);
1100 setup_vec4_image_sysval(
1101 img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1102 offsetof(struct isl_image_param, swizzling), 2);
1103 }
1104 }
1105
1106 b.cursor = nir_before_instr(instr);
1107 offset = nir_iadd_imm(&b,
1108 get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1109 system_values_start +
1110 img_idx[var->data.binding] * 4 +
1111 nir_intrinsic_base(intrin) * 16);
1112 break;
1113 }
1114 case nir_intrinsic_load_workgroup_size: {
1115 assert(nir->info.workgroup_size_variable);
1116 if (variable_group_size_idx == -1) {
1117 variable_group_size_idx = num_system_values;
1118 num_system_values += 3;
1119 for (int i = 0; i < 3; i++) {
1120 system_values[variable_group_size_idx + i] =
1121 BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1122 }
1123 }
1124
1125 b.cursor = nir_before_instr(instr);
1126 offset = nir_imm_int(&b, system_values_start +
1127 variable_group_size_idx * sizeof(uint32_t));
1128 break;
1129 }
1130 case nir_intrinsic_load_work_dim: {
1131 if (work_dim_idx == -1) {
1132 work_dim_idx = num_system_values++;
1133 system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1134 }
1135 b.cursor = nir_before_instr(instr);
1136 offset = nir_imm_int(&b, system_values_start +
1137 work_dim_idx * sizeof(uint32_t));
1138 break;
1139 }
1140 case nir_intrinsic_load_kernel_input: {
1141 assert(nir_intrinsic_base(intrin) +
1142 nir_intrinsic_range(intrin) <= kernel_input_size);
1143 b.cursor = nir_before_instr(instr);
1144 offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1145 nir_intrinsic_base(intrin));
1146 break;
1147 }
1148 default:
1149 continue;
1150 }
1151
1152 nir_def *load =
1153 nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1154 temp_ubo_name, offset,
1155 .align_mul = 4,
1156 .align_offset = 0,
1157 .range_base = 0,
1158 .range = ~0);
1159
1160 nir_def_rewrite_uses(&intrin->def,
1161 load);
1162 nir_instr_remove(instr);
1163 }
1164 }
1165
1166 nir_validate_shader(nir, "before remapping");
1167
1168 /* Uniforms are stored in constant buffer 0, the
1169 * user-facing UBOs are indexed by one. So if any constant buffer is
1170 * needed, the constant buffer 0 will be needed, so account for it.
1171 */
1172 unsigned num_cbufs = nir->info.num_ubos;
1173 if (num_cbufs || nir->num_uniforms)
1174 num_cbufs++;
1175
1176 /* Place the new params in a new cbuf. */
1177 if (num_system_values > 0 || kernel_input_size > 0) {
1178 unsigned sysval_cbuf_index = num_cbufs;
1179 num_cbufs++;
1180
1181 system_values = reralloc(mem_ctx, system_values, unsigned,
1182 num_system_values);
1183
1184 nir_foreach_block(block, impl) {
1185 nir_foreach_instr_safe(instr, block) {
1186 if (instr->type != nir_instr_type_intrinsic)
1187 continue;
1188
1189 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1190
1191 if (load->intrinsic != nir_intrinsic_load_ubo)
1192 continue;
1193
1194 b.cursor = nir_before_instr(instr);
1195
1196 if (load->src[0].ssa == temp_ubo_name) {
1197 nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1198 nir_src_rewrite(&load->src[0], imm);
1199 }
1200 }
1201 }
1202
1203 /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1204 nir_opt_constant_folding(nir);
1205 } else {
1206 ralloc_free(system_values);
1207 system_values = NULL;
1208 }
1209
1210 assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1211 nir_validate_shader(nir, "after remap");
1212
1213 /* We don't use params[] but gallium leaves num_uniforms set. We use this
1214 * to detect when cbuf0 exists but we don't need it anymore when we get
1215 * here. Instead, zero it out so that the back-end doesn't get confused
1216 * when nr_params * 4 != num_uniforms != nr_params * 4.
1217 */
1218 nir->num_uniforms = 0;
1219
1220 *out_system_values = system_values;
1221 *out_num_system_values = num_system_values;
1222 *out_num_cbufs = num_cbufs;
1223 }
1224
1225 static const char *surface_group_names[] = {
1226 [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
1227 [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1228 [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
1229 [IRIS_SURFACE_GROUP_TEXTURE_LOW64] = "texture",
1230 [IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = "texture",
1231 [IRIS_SURFACE_GROUP_UBO] = "ubo",
1232 [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
1233 [IRIS_SURFACE_GROUP_IMAGE] = "image",
1234 };
1235
1236 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1237 iris_print_binding_table(FILE *fp, const char *name,
1238 const struct iris_binding_table *bt)
1239 {
1240 STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1241
1242 uint32_t total = 0;
1243 uint32_t compacted = 0;
1244
1245 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1246 uint32_t size = bt->sizes[i];
1247 total += size;
1248 if (size)
1249 compacted += util_bitcount64(bt->used_mask[i]);
1250 }
1251
1252 if (total == 0) {
1253 fprintf(fp, "Binding table for %s is empty\n\n", name);
1254 return;
1255 }
1256
1257 if (total != compacted) {
1258 fprintf(fp, "Binding table for %s "
1259 "(compacted to %u entries from %u entries)\n",
1260 name, compacted, total);
1261 } else {
1262 fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1263 }
1264
1265 uint32_t entry = 0;
1266 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1267 uint64_t mask = bt->used_mask[i];
1268 while (mask) {
1269 int index = u_bit_scan64(&mask);
1270 fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
1271 }
1272 }
1273 fprintf(fp, "\n");
1274 }
1275
1276 enum {
1277 /* Max elements in a surface group. */
1278 SURFACE_GROUP_MAX_ELEMENTS = 64,
1279 };
1280
1281 /**
1282 * Map a <group, index> pair to a binding table index.
1283 *
1284 * For example: <UBO, 5> => binding table index 12
1285 */
1286 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1287 iris_group_index_to_bti(const struct iris_binding_table *bt,
1288 enum iris_surface_group group, uint32_t index)
1289 {
1290 assert(index < bt->sizes[group]);
1291 uint64_t mask = bt->used_mask[group];
1292 uint64_t bit = 1ull << index;
1293 if (bit & mask) {
1294 return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1295 } else {
1296 return IRIS_SURFACE_NOT_USED;
1297 }
1298 }
1299
1300 /**
1301 * Map a binding table index back to a <group, index> pair.
1302 *
1303 * For example: binding table index 12 => <UBO, 5>
1304 */
1305 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1306 iris_bti_to_group_index(const struct iris_binding_table *bt,
1307 enum iris_surface_group group, uint32_t bti)
1308 {
1309 uint64_t used_mask = bt->used_mask[group];
1310 assert(bti >= bt->offsets[group]);
1311
1312 uint32_t c = bti - bt->offsets[group];
1313 while (used_mask) {
1314 int i = u_bit_scan64(&used_mask);
1315 if (c == 0)
1316 return i;
1317 c--;
1318 }
1319
1320 return IRIS_SURFACE_NOT_USED;
1321 }
1322
1323 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1324 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1325 nir_instr *instr, nir_src *src,
1326 enum iris_surface_group group)
1327 {
1328 assert(bt->sizes[group] > 0);
1329
1330 b->cursor = nir_before_instr(instr);
1331 nir_def *bti;
1332 if (nir_src_is_const(*src)) {
1333 uint32_t index = nir_src_as_uint(*src);
1334 bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1335 src->ssa->bit_size);
1336 } else {
1337 /* Indirect usage makes all the surfaces of the group to be available,
1338 * so we can just add the base.
1339 */
1340 assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1341 bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1342 }
1343 nir_src_rewrite(src, bti);
1344 }
1345
1346 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1347 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1348 enum iris_surface_group group)
1349 {
1350 assert(bt->sizes[group] > 0);
1351
1352 if (nir_src_is_const(*src)) {
1353 uint64_t index = nir_src_as_uint(*src);
1354 assert(index < bt->sizes[group]);
1355 bt->used_mask[group] |= 1ull << index;
1356 } else {
1357 /* There's an indirect usage, we need all the surfaces. */
1358 bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1359 }
1360 }
1361
1362 static bool
skip_compacting_binding_tables(void)1363 skip_compacting_binding_tables(void)
1364 {
1365 static int skip = -1;
1366 if (skip < 0)
1367 skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1368 return skip;
1369 }
1370
1371 /**
1372 * Set up the binding table indices and apply to the shader.
1373 */
1374 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)1375 iris_setup_binding_table(const struct intel_device_info *devinfo,
1376 struct nir_shader *nir,
1377 struct iris_binding_table *bt,
1378 unsigned num_render_targets,
1379 unsigned num_system_values,
1380 unsigned num_cbufs)
1381 {
1382 const struct shader_info *info = &nir->info;
1383
1384 memset(bt, 0, sizeof(*bt));
1385
1386 /* Set the sizes for each surface group. For some groups, we already know
1387 * upfront how many will be used, so mark them.
1388 */
1389 if (info->stage == MESA_SHADER_FRAGMENT) {
1390 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1391 /* All render targets used. */
1392 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1393 BITFIELD64_MASK(num_render_targets);
1394
1395 /* Setup render target read surface group in order to support non-coherent
1396 * framebuffer fetch on Gfx8
1397 */
1398 if (devinfo->ver == 8 && info->outputs_read) {
1399 bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1400 bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1401 BITFIELD64_MASK(num_render_targets);
1402 }
1403 } else if (info->stage == MESA_SHADER_COMPUTE) {
1404 bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1405 }
1406
1407 assert(ARRAY_SIZE(info->textures_used) >= 4);
1408 int max_tex = BITSET_LAST_BIT(info->textures_used);
1409 assert(max_tex <= 128);
1410 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1411 bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1412 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1413 info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1414 bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1415 info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1416 bt->samplers_used_mask = info->samplers_used[0];
1417
1418 bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1419
1420 /* Allocate an extra slot in the UBO section for NIR constants.
1421 * Binding table compaction will remove it if unnecessary.
1422 *
1423 * We don't include them in iris_compiled_shader::num_cbufs because
1424 * they are uploaded separately from shs->constbuf[], but from a shader
1425 * point of view, they're another UBO (at the end of the section).
1426 */
1427 bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1428
1429 bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1430
1431 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1432 assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1433
1434 /* Mark surfaces used for the cases we don't have the information available
1435 * upfront.
1436 */
1437 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1438 nir_foreach_block (block, impl) {
1439 nir_foreach_instr (instr, block) {
1440 if (instr->type != nir_instr_type_intrinsic)
1441 continue;
1442
1443 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1444 switch (intrin->intrinsic) {
1445 case nir_intrinsic_load_num_workgroups:
1446 bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1447 break;
1448
1449 case nir_intrinsic_load_output:
1450 if (devinfo->ver == 8) {
1451 mark_used_with_src(bt, &intrin->src[0],
1452 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1453 }
1454 break;
1455
1456 case nir_intrinsic_image_size:
1457 case nir_intrinsic_image_load:
1458 case nir_intrinsic_image_store:
1459 case nir_intrinsic_image_atomic:
1460 case nir_intrinsic_image_atomic_swap:
1461 case nir_intrinsic_image_load_raw_intel:
1462 case nir_intrinsic_image_store_raw_intel:
1463 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1464 break;
1465
1466 case nir_intrinsic_load_ubo:
1467 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1468 break;
1469
1470 case nir_intrinsic_store_ssbo:
1471 mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1472 break;
1473
1474 case nir_intrinsic_get_ssbo_size:
1475 case nir_intrinsic_ssbo_atomic:
1476 case nir_intrinsic_ssbo_atomic_swap:
1477 case nir_intrinsic_load_ssbo:
1478 mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1479 break;
1480
1481 default:
1482 break;
1483 }
1484 }
1485 }
1486
1487 /* When disable we just mark everything as used. */
1488 if (unlikely(skip_compacting_binding_tables())) {
1489 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1490 bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1491 }
1492
1493 /* Calculate the offsets and the binding table size based on the used
1494 * surfaces. After this point, the functions to go between "group indices"
1495 * and binding table indices can be used.
1496 */
1497 uint32_t next = 0;
1498 for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1499 if (bt->used_mask[i] != 0) {
1500 bt->offsets[i] = next;
1501 next += util_bitcount64(bt->used_mask[i]);
1502 }
1503 }
1504 bt->size_bytes = next * 4;
1505
1506 if (INTEL_DEBUG(DEBUG_BT)) {
1507 iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1508 }
1509
1510 /* Apply the binding table indices. The backend compiler is not expected
1511 * to change those, as we haven't set any of the *_start entries in brw
1512 * binding_table.
1513 */
1514 nir_builder b = nir_builder_create(impl);
1515
1516 nir_foreach_block (block, impl) {
1517 nir_foreach_instr (instr, block) {
1518 if (instr->type == nir_instr_type_tex) {
1519 nir_tex_instr *tex = nir_instr_as_tex(instr);
1520 if (tex->texture_index < 64) {
1521 tex->texture_index =
1522 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1523 tex->texture_index);
1524 } else {
1525 tex->texture_index =
1526 iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1527 tex->texture_index - 64);
1528 }
1529 continue;
1530 }
1531
1532 if (instr->type != nir_instr_type_intrinsic)
1533 continue;
1534
1535 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1536 switch (intrin->intrinsic) {
1537 case nir_intrinsic_image_size:
1538 case nir_intrinsic_image_load:
1539 case nir_intrinsic_image_store:
1540 case nir_intrinsic_image_atomic:
1541 case nir_intrinsic_image_atomic_swap:
1542 case nir_intrinsic_image_load_raw_intel:
1543 case nir_intrinsic_image_store_raw_intel:
1544 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1545 IRIS_SURFACE_GROUP_IMAGE);
1546 break;
1547
1548 case nir_intrinsic_load_ubo:
1549 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1550 IRIS_SURFACE_GROUP_UBO);
1551 break;
1552
1553 case nir_intrinsic_store_ssbo:
1554 rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1555 IRIS_SURFACE_GROUP_SSBO);
1556 break;
1557
1558 case nir_intrinsic_load_output:
1559 if (devinfo->ver == 8) {
1560 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1561 IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1562 }
1563 break;
1564
1565 case nir_intrinsic_get_ssbo_size:
1566 case nir_intrinsic_ssbo_atomic:
1567 case nir_intrinsic_ssbo_atomic_swap:
1568 case nir_intrinsic_load_ssbo:
1569 rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1570 IRIS_SURFACE_GROUP_SSBO);
1571 break;
1572
1573 default:
1574 break;
1575 }
1576 }
1577 }
1578 }
1579
1580 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1581 iris_debug_recompile_brw(struct iris_screen *screen,
1582 struct util_debug_callback *dbg,
1583 struct iris_uncompiled_shader *ish,
1584 const struct brw_base_prog_key *key)
1585 {
1586 if (!ish || list_is_empty(&ish->variants)
1587 || list_is_singular(&ish->variants))
1588 return;
1589
1590 const struct brw_compiler *c = screen->brw;
1591 const struct shader_info *info = &ish->nir->info;
1592
1593 brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1594 _mesa_shader_stage_to_string(info->stage),
1595 info->name ? info->name : "(no identifier)",
1596 info->label ? info->label : "");
1597
1598 struct iris_compiled_shader *shader =
1599 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1600 const void *old_iris_key = &shader->key;
1601
1602 union brw_any_prog_key old_key;
1603
1604 switch (info->stage) {
1605 case MESA_SHADER_VERTEX:
1606 old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1607 break;
1608 case MESA_SHADER_TESS_CTRL:
1609 old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1610 break;
1611 case MESA_SHADER_TESS_EVAL:
1612 old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1613 break;
1614 case MESA_SHADER_GEOMETRY:
1615 old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1616 break;
1617 case MESA_SHADER_FRAGMENT:
1618 old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1619 break;
1620 case MESA_SHADER_COMPUTE:
1621 old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1622 break;
1623 default:
1624 unreachable("invalid shader stage");
1625 }
1626
1627 brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1628 }
1629
1630 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1631 iris_debug_recompile_elk(struct iris_screen *screen,
1632 struct util_debug_callback *dbg,
1633 struct iris_uncompiled_shader *ish,
1634 const struct elk_base_prog_key *key)
1635 {
1636 if (!ish || list_is_empty(&ish->variants)
1637 || list_is_singular(&ish->variants))
1638 return;
1639
1640 const struct elk_compiler *c = screen->elk;
1641 const struct shader_info *info = &ish->nir->info;
1642
1643 elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1644 _mesa_shader_stage_to_string(info->stage),
1645 info->name ? info->name : "(no identifier)",
1646 info->label ? info->label : "");
1647
1648 struct iris_compiled_shader *shader =
1649 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1650 const void *old_iris_key = &shader->key;
1651
1652 union elk_any_prog_key old_key;
1653
1654 switch (info->stage) {
1655 case MESA_SHADER_VERTEX:
1656 old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1657 break;
1658 case MESA_SHADER_TESS_CTRL:
1659 old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1660 break;
1661 case MESA_SHADER_TESS_EVAL:
1662 old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1663 break;
1664 case MESA_SHADER_GEOMETRY:
1665 old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1666 break;
1667 case MESA_SHADER_FRAGMENT:
1668 old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1669 break;
1670 case MESA_SHADER_COMPUTE:
1671 old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1672 break;
1673 default:
1674 unreachable("invalid shader stage");
1675 }
1676
1677 elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1678 }
1679
1680
1681 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1682 check_urb_size(struct iris_context *ice,
1683 unsigned needed_size,
1684 gl_shader_stage stage)
1685 {
1686 unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1687
1688 /* If the last URB allocation wasn't large enough for our needs,
1689 * flag it as needing to be reconfigured. Otherwise, we can use
1690 * the existing config. However, if the URB is constrained, and
1691 * we can shrink our size for this stage, we may be able to gain
1692 * extra concurrency by reconfiguring it to be smaller. Do so.
1693 */
1694 if (last_allocated_size < needed_size ||
1695 (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1696 ice->state.dirty |= IRIS_DIRTY_URB;
1697 }
1698 }
1699
1700 /**
1701 * Get the shader for the last enabled geometry stage.
1702 *
1703 * This stage is the one which will feed stream output and the rasterizer.
1704 */
1705 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1706 last_vue_stage(struct iris_context *ice)
1707 {
1708 if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1709 return MESA_SHADER_GEOMETRY;
1710
1711 if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1712 return MESA_SHADER_TESS_EVAL;
1713
1714 return MESA_SHADER_VERTEX;
1715 }
1716
1717 /**
1718 * \param added Set to \c true if the variant was added to the list (i.e., a
1719 * variant matching \c key was not found). Set to \c false
1720 * otherwise.
1721 */
1722 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1723 find_or_add_variant(const struct iris_screen *screen,
1724 struct iris_uncompiled_shader *ish,
1725 enum iris_program_cache_id cache_id,
1726 const void *key, unsigned key_size,
1727 bool *added)
1728 {
1729 struct list_head *start = ish->variants.next;
1730
1731 *added = false;
1732
1733 if (screen->precompile) {
1734 /* Check the first list entry. There will always be at least one
1735 * variant in the list (most likely the precompile variant), and
1736 * other contexts only append new variants, so we can safely check
1737 * it without locking, saving that cost in the common case.
1738 */
1739 struct iris_compiled_shader *first =
1740 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1741
1742 if (memcmp(&first->key, key, key_size) == 0) {
1743 util_queue_fence_wait(&first->ready);
1744 return first;
1745 }
1746
1747 /* Skip this one in the loop below */
1748 start = first->link.next;
1749 }
1750
1751 struct iris_compiled_shader *variant = NULL;
1752
1753 /* If it doesn't match, we have to walk the list; other contexts may be
1754 * concurrently appending shaders to it, so we need to lock here.
1755 */
1756 simple_mtx_lock(&ish->lock);
1757
1758 list_for_each_entry_from(struct iris_compiled_shader, v, start,
1759 &ish->variants, link) {
1760 if (memcmp(&v->key, key, key_size) == 0) {
1761 variant = v;
1762 break;
1763 }
1764 }
1765
1766 gl_shader_stage stage = ish->nir->info.stage;
1767
1768 if (variant == NULL) {
1769 variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1770 key_size, key);
1771
1772 /* Append our new variant to the shader's variant list. */
1773 list_addtail(&variant->link, &ish->variants);
1774 *added = true;
1775
1776 simple_mtx_unlock(&ish->lock);
1777 } else {
1778 simple_mtx_unlock(&ish->lock);
1779
1780 util_queue_fence_wait(&variant->ready);
1781 }
1782
1783 assert(stage == variant->stage);
1784 return variant;
1785 }
1786
1787 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1788 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1789 UNUSED int thread_index)
1790 {
1791 free(_job);
1792 }
1793
1794 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1795 iris_schedule_compile(struct iris_screen *screen,
1796 struct util_queue_fence *ready_fence,
1797 struct util_debug_callback *dbg,
1798 struct iris_threaded_compile_job *job,
1799 util_queue_execute_func execute)
1800
1801 {
1802 struct util_async_debug_callback async_debug;
1803
1804 if (dbg) {
1805 u_async_debug_init(&async_debug);
1806 job->dbg = &async_debug.base;
1807 }
1808
1809 util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1810 iris_threaded_compile_job_delete, 0);
1811
1812 if (screen->driconf.sync_compile || dbg)
1813 util_queue_fence_wait(ready_fence);
1814
1815 if (dbg) {
1816 u_async_debug_drain(&async_debug, dbg);
1817 u_async_debug_cleanup(&async_debug);
1818 }
1819 }
1820
1821 /**
1822 * Compile a vertex shader, and upload the assembly.
1823 */
1824 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1825 iris_compile_vs(struct iris_screen *screen,
1826 struct u_upload_mgr *uploader,
1827 struct util_debug_callback *dbg,
1828 struct iris_uncompiled_shader *ish,
1829 struct iris_compiled_shader *shader)
1830 {
1831 const struct intel_device_info *devinfo = screen->devinfo;
1832 void *mem_ctx = ralloc_context(NULL);
1833 uint32_t *system_values;
1834 unsigned num_system_values;
1835 unsigned num_cbufs;
1836
1837 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1838 const struct iris_vs_prog_key *const key = &shader->key.vs;
1839
1840 if (key->vue.nr_userclip_plane_consts) {
1841 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1842 /* Check if variables were found. */
1843 if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1844 true, false, NULL)) {
1845 nir_lower_io_to_temporaries(nir, impl, true, false);
1846 nir_lower_global_vars_to_local(nir);
1847 nir_lower_vars_to_ssa(nir);
1848 nir_shader_gather_info(nir, impl);
1849 }
1850 }
1851
1852 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1853 &num_system_values, &num_cbufs);
1854
1855 struct iris_binding_table bt;
1856 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1857 num_system_values, num_cbufs);
1858
1859 const char *error;
1860 const unsigned *program;
1861 if (screen->brw) {
1862 struct brw_vs_prog_data *brw_prog_data =
1863 rzalloc(mem_ctx, struct brw_vs_prog_data);
1864
1865 brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1866
1867 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1868
1869 brw_compute_vue_map(devinfo,
1870 &brw_prog_data->base.vue_map, nir->info.outputs_written,
1871 nir->info.separate_shader, /* pos_slots */ 1);
1872
1873 struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1874
1875 struct brw_compile_vs_params params = {
1876 .base = {
1877 .mem_ctx = mem_ctx,
1878 .nir = nir,
1879 .log_data = dbg,
1880 .source_hash = ish->source_hash,
1881 },
1882 .key = &brw_key,
1883 .prog_data = brw_prog_data,
1884 };
1885
1886 program = brw_compile_vs(screen->brw, ¶ms);
1887 error = params.base.error_str;
1888 if (program) {
1889 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1890 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1891 }
1892 } else {
1893 struct elk_vs_prog_data *elk_prog_data =
1894 rzalloc(mem_ctx, struct elk_vs_prog_data);
1895
1896 elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1897
1898 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1899
1900 elk_compute_vue_map(devinfo,
1901 &elk_prog_data->base.vue_map, nir->info.outputs_written,
1902 nir->info.separate_shader, /* pos_slots */ 1);
1903
1904 struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1905
1906 struct elk_compile_vs_params params = {
1907 .base = {
1908 .mem_ctx = mem_ctx,
1909 .nir = nir,
1910 .log_data = dbg,
1911 .source_hash = ish->source_hash,
1912 },
1913 .key = &elk_key,
1914 .prog_data = elk_prog_data,
1915 };
1916
1917 program = elk_compile_vs(screen->elk, ¶ms);
1918 error = params.base.error_str;
1919 if (program) {
1920 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1921 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1922 }
1923 }
1924
1925 if (program == NULL) {
1926 dbg_printf("Failed to compile vertex shader: %s\n", error);
1927 ralloc_free(mem_ctx);
1928
1929 shader->compilation_failed = true;
1930 util_queue_fence_signal(&shader->ready);
1931
1932 return;
1933 }
1934
1935 shader->compilation_failed = false;
1936
1937 uint32_t *so_decls =
1938 screen->vtbl.create_so_decl_list(&ish->stream_output,
1939 &iris_vue_data(shader)->vue_map);
1940
1941 iris_finalize_program(shader, so_decls,
1942 system_values, num_system_values, 0, num_cbufs, &bt);
1943
1944 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1945 sizeof(*key), key, program);
1946
1947 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1948
1949 ralloc_free(mem_ctx);
1950 }
1951
1952 /**
1953 * Update the current vertex shader variant.
1954 *
1955 * Fill out the key, look in the cache, compile and bind if needed.
1956 */
1957 static void
iris_update_compiled_vs(struct iris_context * ice)1958 iris_update_compiled_vs(struct iris_context *ice)
1959 {
1960 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1961 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1962 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1963 struct iris_uncompiled_shader *ish =
1964 ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1965
1966 struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1967 screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1968
1969 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1970 bool added;
1971 struct iris_compiled_shader *shader =
1972 find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1973
1974 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1975 &key, sizeof(key))) {
1976 iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1977 }
1978
1979 if (shader->compilation_failed)
1980 shader = NULL;
1981
1982 if (old != shader) {
1983 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1984 shader);
1985 ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1986 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1987 IRIS_STAGE_DIRTY_BINDINGS_VS |
1988 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1989 shs->sysvals_need_upload = true;
1990
1991 unsigned urb_entry_size = shader ?
1992 iris_vue_data(shader)->urb_entry_size : 0;
1993 check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1994 }
1995 }
1996
1997 /**
1998 * Get the shader_info for a given stage, or NULL if the stage is disabled.
1999 */
2000 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2001 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2002 {
2003 const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2004
2005 if (!ish)
2006 return NULL;
2007
2008 const nir_shader *nir = ish->nir;
2009 return &nir->info;
2010 }
2011
2012 /**
2013 * Get the union of TCS output and TES input slots.
2014 *
2015 * TCS and TES need to agree on a common URB entry layout. In particular,
2016 * the data for all patch vertices is stored in a single URB entry (unlike
2017 * GS which has one entry per input vertex). This means that per-vertex
2018 * array indexing needs a stride.
2019 *
2020 * SSO requires locations to match, but doesn't require the number of
2021 * outputs/inputs to match (in fact, the TCS often has extra outputs).
2022 * So, we need to take the extra step of unifying these on the fly.
2023 */
2024 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2025 get_unified_tess_slots(const struct iris_context *ice,
2026 uint64_t *per_vertex_slots,
2027 uint32_t *per_patch_slots)
2028 {
2029 const struct shader_info *tcs =
2030 iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2031 const struct shader_info *tes =
2032 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2033
2034 *per_vertex_slots = tes->inputs_read;
2035 *per_patch_slots = tes->patch_inputs_read;
2036
2037 if (tcs) {
2038 *per_vertex_slots |= tcs->outputs_written;
2039 *per_patch_slots |= tcs->patch_outputs_written;
2040 }
2041 }
2042
2043 /**
2044 * Compile a tessellation control shader, and upload the assembly.
2045 */
2046 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2047 iris_compile_tcs(struct iris_screen *screen,
2048 struct hash_table *passthrough_ht,
2049 struct u_upload_mgr *uploader,
2050 struct util_debug_callback *dbg,
2051 struct iris_uncompiled_shader *ish,
2052 struct iris_compiled_shader *shader)
2053 {
2054 void *mem_ctx = ralloc_context(NULL);
2055 const struct intel_device_info *devinfo = screen->devinfo;
2056 uint32_t *system_values = NULL;
2057 unsigned num_system_values = 0;
2058 unsigned num_cbufs = 0;
2059
2060 nir_shader *nir;
2061
2062 struct iris_binding_table bt;
2063
2064 const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2065 struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2066 struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2067 uint32_t source_hash;
2068
2069 if (ish) {
2070 nir = nir_shader_clone(mem_ctx, ish->nir);
2071 source_hash = ish->source_hash;
2072 } else {
2073 if (screen->brw) {
2074 nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2075 } else {
2076 assert(screen->elk);
2077 nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2078 }
2079 source_hash = *(uint32_t*)nir->info.source_sha1;
2080 }
2081
2082 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2083 &num_system_values, &num_cbufs);
2084 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2085 num_system_values, num_cbufs);
2086
2087 const char *error = NULL;
2088 const unsigned *program;
2089 if (screen->brw) {
2090 struct brw_tcs_prog_data *brw_prog_data =
2091 rzalloc(mem_ctx, struct brw_tcs_prog_data);
2092 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2093
2094 struct brw_compile_tcs_params params = {
2095 .base = {
2096 .mem_ctx = mem_ctx,
2097 .nir = nir,
2098 .log_data = dbg,
2099 .source_hash = source_hash,
2100 },
2101 .key = &brw_key,
2102 .prog_data = brw_prog_data,
2103 };
2104
2105 program = brw_compile_tcs(screen->brw, ¶ms);
2106 error = params.base.error_str;
2107
2108 if (program) {
2109 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2110 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2111 }
2112 } else {
2113 assert(screen->elk);
2114 struct elk_tcs_prog_data *elk_prog_data =
2115 rzalloc(mem_ctx, struct elk_tcs_prog_data);
2116 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2117
2118 struct elk_compile_tcs_params params = {
2119 .base = {
2120 .mem_ctx = mem_ctx,
2121 .nir = nir,
2122 .log_data = dbg,
2123 .source_hash = source_hash,
2124 },
2125 .key = &elk_key,
2126 .prog_data = elk_prog_data,
2127 };
2128
2129 program = elk_compile_tcs(screen->elk, ¶ms);
2130 error = params.base.error_str;
2131
2132 if (program) {
2133 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2134 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2135 }
2136 }
2137
2138 if (program == NULL) {
2139 dbg_printf("Failed to compile control shader: %s\n", error);
2140 ralloc_free(mem_ctx);
2141
2142 shader->compilation_failed = true;
2143 util_queue_fence_signal(&shader->ready);
2144
2145 return;
2146 }
2147
2148 shader->compilation_failed = false;
2149
2150 iris_finalize_program(shader, NULL, system_values,
2151 num_system_values, 0, num_cbufs, &bt);
2152
2153 iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2154 IRIS_CACHE_TCS, sizeof(*key), key, program);
2155
2156 if (ish)
2157 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2158
2159 ralloc_free(mem_ctx);
2160 }
2161
2162 /**
2163 * Update the current tessellation control shader variant.
2164 *
2165 * Fill out the key, look in the cache, compile and bind if needed.
2166 */
2167 static void
iris_update_compiled_tcs(struct iris_context * ice)2168 iris_update_compiled_tcs(struct iris_context *ice)
2169 {
2170 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2171 struct iris_uncompiled_shader *tcs =
2172 ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2173 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2174 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2175 const struct intel_device_info *devinfo = screen->devinfo;
2176
2177 const struct shader_info *tes_info =
2178 iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2179 struct iris_tcs_prog_key key = {
2180 .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2181 ._tes_primitive_mode = tes_info->tess._primitive_mode,
2182 .input_vertices =
2183 !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2184 .quads_workaround = devinfo->ver < 9 &&
2185 tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2186 tes_info->tess.spacing == TESS_SPACING_EQUAL,
2187 };
2188 get_unified_tess_slots(ice, &key.outputs_written,
2189 &key.patch_outputs_written);
2190 screen->vtbl.populate_tcs_key(ice, &key);
2191
2192 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2193 struct iris_compiled_shader *shader;
2194 bool added = false;
2195
2196 if (tcs != NULL) {
2197 shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2198 sizeof(key), &added);
2199 } else {
2200 /* Look for and possibly create a passthrough TCS */
2201 shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2202
2203
2204 if (shader == NULL) {
2205 shader = iris_create_shader_variant(screen, ice->shaders.cache,
2206 MESA_SHADER_TESS_CTRL,
2207 IRIS_CACHE_TCS, sizeof(key), &key);
2208 added = true;
2209 }
2210
2211 }
2212
2213 /* If the shader was not found in (whichever cache), call iris_compile_tcs
2214 * if either ish is NULL or the shader could not be found in the disk
2215 * cache.
2216 */
2217 if (added &&
2218 (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2219 &key, sizeof(key)))) {
2220 iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2221 shader);
2222 }
2223
2224 if (shader->compilation_failed)
2225 shader = NULL;
2226
2227 if (old != shader) {
2228 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2229 shader);
2230 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2231 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2232 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2233 shs->sysvals_need_upload = true;
2234
2235 unsigned urb_entry_size = shader ?
2236 iris_vue_data(shader)->urb_entry_size : 0;
2237 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2238 }
2239 }
2240
2241 /**
2242 * Compile a tessellation evaluation shader, and upload the assembly.
2243 */
2244 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2245 iris_compile_tes(struct iris_screen *screen,
2246 struct u_upload_mgr *uploader,
2247 struct util_debug_callback *dbg,
2248 struct iris_uncompiled_shader *ish,
2249 struct iris_compiled_shader *shader)
2250 {
2251 void *mem_ctx = ralloc_context(NULL);
2252 uint32_t *system_values;
2253 const struct intel_device_info *devinfo = screen->devinfo;
2254 unsigned num_system_values;
2255 unsigned num_cbufs;
2256
2257 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2258 const struct iris_tes_prog_key *const key = &shader->key.tes;
2259
2260 if (key->vue.nr_userclip_plane_consts) {
2261 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2262 nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2263 true, false, NULL);
2264 nir_lower_io_to_temporaries(nir, impl, true, false);
2265 nir_lower_global_vars_to_local(nir);
2266 nir_lower_vars_to_ssa(nir);
2267 nir_shader_gather_info(nir, impl);
2268 }
2269
2270 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2271 &num_system_values, &num_cbufs);
2272
2273 struct iris_binding_table bt;
2274 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2275 num_system_values, num_cbufs);
2276
2277 const char *error;
2278 const unsigned *program;
2279
2280 if (screen->brw) {
2281 struct brw_tes_prog_data *brw_prog_data =
2282 rzalloc(mem_ctx, struct brw_tes_prog_data);
2283
2284 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2285
2286 struct intel_vue_map input_vue_map;
2287 brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2288 key->patch_inputs_read);
2289
2290 struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2291
2292 struct brw_compile_tes_params params = {
2293 .base = {
2294 .mem_ctx = mem_ctx,
2295 .nir = nir,
2296 .log_data = dbg,
2297 .source_hash = ish->source_hash,
2298 },
2299 .key = &brw_key,
2300 .prog_data = brw_prog_data,
2301 .input_vue_map = &input_vue_map,
2302 };
2303
2304 program = brw_compile_tes(screen->brw, ¶ms);
2305 error = params.base.error_str;
2306
2307 if (program) {
2308 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2309 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2310 }
2311 } else {
2312 struct elk_tes_prog_data *elk_prog_data =
2313 rzalloc(mem_ctx, struct elk_tes_prog_data);
2314
2315 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2316
2317 struct intel_vue_map input_vue_map;
2318 elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2319 key->patch_inputs_read);
2320
2321 struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2322
2323 struct elk_compile_tes_params params = {
2324 .base = {
2325 .mem_ctx = mem_ctx,
2326 .nir = nir,
2327 .log_data = dbg,
2328 .source_hash = ish->source_hash,
2329 },
2330 .key = &elk_key,
2331 .prog_data = elk_prog_data,
2332 .input_vue_map = &input_vue_map,
2333 };
2334
2335 program = elk_compile_tes(screen->elk, ¶ms);
2336 error = params.base.error_str;
2337
2338 if (program) {
2339 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2340 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2341 }
2342 }
2343
2344 if (program == NULL) {
2345 dbg_printf("Failed to compile evaluation shader: %s\n", error);
2346 ralloc_free(mem_ctx);
2347
2348 shader->compilation_failed = true;
2349 util_queue_fence_signal(&shader->ready);
2350
2351 return;
2352 }
2353
2354 shader->compilation_failed = false;
2355
2356 uint32_t *so_decls =
2357 screen->vtbl.create_so_decl_list(&ish->stream_output,
2358 &iris_vue_data(shader)->vue_map);
2359
2360 iris_finalize_program(shader, so_decls, system_values,
2361 num_system_values, 0, num_cbufs, &bt);
2362
2363 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2364 sizeof(*key), key, program);
2365
2366 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2367
2368 ralloc_free(mem_ctx);
2369 }
2370
2371 /**
2372 * Update the current tessellation evaluation shader variant.
2373 *
2374 * Fill out the key, look in the cache, compile and bind if needed.
2375 */
2376 static void
iris_update_compiled_tes(struct iris_context * ice)2377 iris_update_compiled_tes(struct iris_context *ice)
2378 {
2379 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2380 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2381 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2382 struct iris_uncompiled_shader *ish =
2383 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2384
2385 struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2386 get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2387 screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2388
2389 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2390 bool added;
2391 struct iris_compiled_shader *shader =
2392 find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2393
2394 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2395 &key, sizeof(key))) {
2396 iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2397 }
2398
2399 if (shader->compilation_failed)
2400 shader = NULL;
2401
2402 if (old != shader) {
2403 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2404 shader);
2405 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2406 IRIS_STAGE_DIRTY_BINDINGS_TES |
2407 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2408 shs->sysvals_need_upload = true;
2409
2410 unsigned urb_entry_size = shader ?
2411 iris_vue_data(shader)->urb_entry_size : 0;
2412 check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2413 }
2414
2415 /* TODO: Could compare and avoid flagging this. */
2416 const struct shader_info *tes_info = &ish->nir->info;
2417 if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2418 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2419 ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2420 }
2421 }
2422
2423 /**
2424 * Compile a geometry shader, and upload the assembly.
2425 */
2426 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2427 iris_compile_gs(struct iris_screen *screen,
2428 struct u_upload_mgr *uploader,
2429 struct util_debug_callback *dbg,
2430 struct iris_uncompiled_shader *ish,
2431 struct iris_compiled_shader *shader)
2432 {
2433 const struct intel_device_info *devinfo = screen->devinfo;
2434 void *mem_ctx = ralloc_context(NULL);
2435 uint32_t *system_values;
2436 unsigned num_system_values;
2437 unsigned num_cbufs;
2438
2439 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2440 const struct iris_gs_prog_key *const key = &shader->key.gs;
2441
2442 if (key->vue.nr_userclip_plane_consts) {
2443 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2444 nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2445 false, NULL);
2446 nir_lower_io_to_temporaries(nir, impl, true, false);
2447 nir_lower_global_vars_to_local(nir);
2448 nir_lower_vars_to_ssa(nir);
2449 nir_shader_gather_info(nir, impl);
2450 }
2451
2452 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2453 &num_system_values, &num_cbufs);
2454
2455 struct iris_binding_table bt;
2456 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2457 num_system_values, num_cbufs);
2458
2459 const char *error;
2460 const unsigned *program;
2461 if (screen->brw) {
2462 struct brw_gs_prog_data *brw_prog_data =
2463 rzalloc(mem_ctx, struct brw_gs_prog_data);
2464
2465 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2466
2467 brw_compute_vue_map(devinfo,
2468 &brw_prog_data->base.vue_map, nir->info.outputs_written,
2469 nir->info.separate_shader, /* pos_slots */ 1);
2470
2471 struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2472
2473 struct brw_compile_gs_params params = {
2474 .base = {
2475 .mem_ctx = mem_ctx,
2476 .nir = nir,
2477 .log_data = dbg,
2478 .source_hash = ish->source_hash,
2479 },
2480 .key = &brw_key,
2481 .prog_data = brw_prog_data,
2482 };
2483
2484 program = brw_compile_gs(screen->brw, ¶ms);
2485 error = params.base.error_str;
2486 if (program) {
2487 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2488 iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2489 }
2490 } else {
2491 struct elk_gs_prog_data *elk_prog_data =
2492 rzalloc(mem_ctx, struct elk_gs_prog_data);
2493
2494 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2495
2496 elk_compute_vue_map(devinfo,
2497 &elk_prog_data->base.vue_map, nir->info.outputs_written,
2498 nir->info.separate_shader, /* pos_slots */ 1);
2499
2500 struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2501
2502 struct elk_compile_gs_params params = {
2503 .base = {
2504 .mem_ctx = mem_ctx,
2505 .nir = nir,
2506 .log_data = dbg,
2507 .source_hash = ish->source_hash,
2508 },
2509 .key = &elk_key,
2510 .prog_data = elk_prog_data,
2511 };
2512
2513 program = elk_compile_gs(screen->elk, ¶ms);
2514 error = params.base.error_str;
2515 if (program) {
2516 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2517 iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2518 }
2519 }
2520
2521 if (program == NULL) {
2522 dbg_printf("Failed to compile geometry shader: %s\n", error);
2523 ralloc_free(mem_ctx);
2524
2525 shader->compilation_failed = true;
2526 util_queue_fence_signal(&shader->ready);
2527
2528 return;
2529 }
2530
2531 shader->compilation_failed = false;
2532
2533 uint32_t *so_decls =
2534 screen->vtbl.create_so_decl_list(&ish->stream_output,
2535 &iris_vue_data(shader)->vue_map);
2536
2537 iris_finalize_program(shader, so_decls, system_values,
2538 num_system_values, 0, num_cbufs, &bt);
2539
2540 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2541 sizeof(*key), key, program);
2542
2543 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2544
2545 ralloc_free(mem_ctx);
2546 }
2547
2548 /**
2549 * Update the current geometry shader variant.
2550 *
2551 * Fill out the key, look in the cache, compile and bind if needed.
2552 */
2553 static void
iris_update_compiled_gs(struct iris_context * ice)2554 iris_update_compiled_gs(struct iris_context *ice)
2555 {
2556 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2557 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2558 struct iris_uncompiled_shader *ish =
2559 ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2560 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2561 struct iris_compiled_shader *shader = NULL;
2562 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2563
2564 if (ish) {
2565 struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2566 screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2567
2568 bool added;
2569
2570 shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2571 sizeof(key), &added);
2572
2573 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2574 &key, sizeof(key))) {
2575 iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2576 }
2577
2578 if (shader->compilation_failed)
2579 shader = NULL;
2580 }
2581
2582 if (old != shader) {
2583 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2584 shader);
2585 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2586 IRIS_STAGE_DIRTY_BINDINGS_GS |
2587 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2588 shs->sysvals_need_upload = true;
2589
2590 unsigned urb_entry_size = shader ?
2591 iris_vue_data(shader)->urb_entry_size : 0;
2592 check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2593 }
2594 }
2595
2596 /**
2597 * Compile a fragment (pixel) shader, and upload the assembly.
2598 */
2599 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2600 iris_compile_fs(struct iris_screen *screen,
2601 struct u_upload_mgr *uploader,
2602 struct util_debug_callback *dbg,
2603 struct iris_uncompiled_shader *ish,
2604 struct iris_compiled_shader *shader,
2605 struct intel_vue_map *vue_map)
2606 {
2607 void *mem_ctx = ralloc_context(NULL);
2608 uint32_t *system_values;
2609 const struct intel_device_info *devinfo = screen->devinfo;
2610 unsigned num_system_values;
2611 unsigned num_cbufs;
2612
2613 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2614 const struct iris_fs_prog_key *const key = &shader->key.fs;
2615
2616 iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2617 &num_system_values, &num_cbufs);
2618
2619 /* Lower output variables to load_output intrinsics before setting up
2620 * binding tables, so iris_setup_binding_table can map any load_output
2621 * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2622 * non-coherent framebuffer fetches.
2623 */
2624 brw_nir_lower_fs_outputs(nir);
2625
2626 /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
2627 * and do not need a binding table entry with a null surface. Earlier
2628 * generations need an entry for a null surface.
2629 */
2630 int null_rts = devinfo->ver < 11 ? 1 : 0;
2631
2632 struct iris_binding_table bt;
2633 iris_setup_binding_table(devinfo, nir, &bt,
2634 MAX2(key->nr_color_regions, null_rts),
2635 num_system_values, num_cbufs);
2636
2637 const char *error;
2638 const unsigned *program;
2639
2640 if (screen->brw) {
2641 struct brw_wm_prog_data *brw_prog_data =
2642 rzalloc(mem_ctx, struct brw_wm_prog_data);
2643
2644 brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2645
2646 brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2647
2648 struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2649
2650 struct brw_compile_fs_params params = {
2651 .base = {
2652 .mem_ctx = mem_ctx,
2653 .nir = nir,
2654 .log_data = dbg,
2655 .source_hash = ish->source_hash,
2656 },
2657 .key = &brw_key,
2658 .prog_data = brw_prog_data,
2659
2660 .allow_spilling = true,
2661 .max_polygons = UCHAR_MAX,
2662 .vue_map = vue_map,
2663 };
2664
2665 program = brw_compile_fs(screen->brw, ¶ms);
2666 error = params.base.error_str;
2667 if (program) {
2668 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2669 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2670 }
2671 } else {
2672 struct elk_wm_prog_data *elk_prog_data =
2673 rzalloc(mem_ctx, struct elk_wm_prog_data);
2674
2675 elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2676
2677 elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2678
2679 struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2680
2681 struct elk_compile_fs_params params = {
2682 .base = {
2683 .mem_ctx = mem_ctx,
2684 .nir = nir,
2685 .log_data = dbg,
2686 .source_hash = ish->source_hash,
2687 },
2688 .key = &elk_key,
2689 .prog_data = elk_prog_data,
2690
2691 .allow_spilling = true,
2692 .max_polygons = UCHAR_MAX,
2693 .vue_map = vue_map,
2694 };
2695
2696 program = elk_compile_fs(screen->elk, ¶ms);
2697 error = params.base.error_str;
2698 if (program) {
2699 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2700 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2701 }
2702 }
2703
2704 if (program == NULL) {
2705 dbg_printf("Failed to compile fragment shader: %s\n", error);
2706 ralloc_free(mem_ctx);
2707
2708 shader->compilation_failed = true;
2709 util_queue_fence_signal(&shader->ready);
2710
2711 return;
2712 }
2713
2714 shader->compilation_failed = false;
2715
2716 iris_finalize_program(shader, NULL, system_values,
2717 num_system_values, 0, num_cbufs, &bt);
2718
2719 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2720 sizeof(*key), key, program);
2721
2722 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2723
2724 ralloc_free(mem_ctx);
2725 }
2726
2727 /**
2728 * Update the current fragment shader variant.
2729 *
2730 * Fill out the key, look in the cache, compile and bind if needed.
2731 */
2732 static void
iris_update_compiled_fs(struct iris_context * ice)2733 iris_update_compiled_fs(struct iris_context *ice)
2734 {
2735 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2736 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2737 struct iris_uncompiled_shader *ish =
2738 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2739 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2740 struct iris_fs_prog_key key = { KEY_INIT(base) };
2741 screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2742
2743 struct intel_vue_map *last_vue_map =
2744 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2745
2746 if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2747 key.input_slots_valid = last_vue_map->slots_valid;
2748
2749 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2750 bool added;
2751 struct iris_compiled_shader *shader =
2752 find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2753 sizeof(key), &added);
2754
2755 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2756 &key, sizeof(key))) {
2757 iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2758 }
2759
2760 if (shader->compilation_failed)
2761 shader = NULL;
2762
2763 if (old != shader) {
2764 // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2765 // toggles. might be able to avoid flagging SBE too.
2766 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2767 shader);
2768 ice->state.dirty |= IRIS_DIRTY_WM |
2769 IRIS_DIRTY_CLIP |
2770 IRIS_DIRTY_SBE;
2771 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2772 IRIS_STAGE_DIRTY_BINDINGS_FS |
2773 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2774 shs->sysvals_need_upload = true;
2775 }
2776 }
2777
2778 /**
2779 * Update the last enabled stage's VUE map.
2780 *
2781 * When the shader feeding the rasterizer's output interface changes, we
2782 * need to re-emit various packets.
2783 */
2784 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2785 update_last_vue_map(struct iris_context *ice,
2786 struct iris_compiled_shader *shader)
2787 {
2788 const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2789 const struct intel_vue_map *old_map =
2790 !ice->shaders.last_vue_shader ? NULL :
2791 &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2792 const uint64_t changed_slots =
2793 (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2794
2795 if (changed_slots & VARYING_BIT_VIEWPORT) {
2796 ice->state.num_viewports =
2797 (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2798 ice->state.dirty |= IRIS_DIRTY_CLIP |
2799 IRIS_DIRTY_SF_CL_VIEWPORT |
2800 IRIS_DIRTY_CC_VIEWPORT |
2801 IRIS_DIRTY_SCISSOR_RECT;
2802 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2803 ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2804 }
2805
2806 if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2807 ice->state.dirty |= IRIS_DIRTY_SBE;
2808 }
2809
2810 iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2811 }
2812
2813 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2814 iris_update_pull_constant_descriptors(struct iris_context *ice,
2815 gl_shader_stage stage)
2816 {
2817 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2818
2819 if (!shader || !shader->has_ubo_pull)
2820 return;
2821
2822 struct iris_shader_state *shs = &ice->state.shaders[stage];
2823 bool any_new_descriptors =
2824 shader->num_system_values > 0 && shs->sysvals_need_upload;
2825
2826 unsigned bound_cbufs = shs->bound_cbufs;
2827
2828 while (bound_cbufs) {
2829 const int i = u_bit_scan(&bound_cbufs);
2830 struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2831 struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2832 if (!surf_state->res && cbuf->buffer) {
2833 iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2834 ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2835 any_new_descriptors = true;
2836 }
2837 }
2838
2839 if (any_new_descriptors)
2840 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2841 }
2842
2843 /**
2844 * Update the current shader variants for the given state.
2845 *
2846 * This should be called on every draw call to ensure that the correct
2847 * shaders are bound. It will also flag any dirty state triggered by
2848 * swapping out those shaders.
2849 */
2850 void
iris_update_compiled_shaders(struct iris_context * ice)2851 iris_update_compiled_shaders(struct iris_context *ice)
2852 {
2853 const uint64_t stage_dirty = ice->state.stage_dirty;
2854
2855 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2856 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2857 struct iris_uncompiled_shader *tes =
2858 ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2859 if (tes) {
2860 iris_update_compiled_tcs(ice);
2861 iris_update_compiled_tes(ice);
2862 } else {
2863 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2864 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2865 ice->state.stage_dirty |=
2866 IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2867 IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2868 IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2869
2870 if (ice->shaders.urb.constrained)
2871 ice->state.dirty |= IRIS_DIRTY_URB;
2872 }
2873 }
2874
2875 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2876 iris_update_compiled_vs(ice);
2877 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2878 iris_update_compiled_gs(ice);
2879
2880 if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2881 IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2882 const struct iris_compiled_shader *gs =
2883 ice->shaders.prog[MESA_SHADER_GEOMETRY];
2884 const struct iris_compiled_shader *tes =
2885 ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2886
2887 bool points_or_lines = false;
2888
2889 if (gs) {
2890 const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2891 points_or_lines =
2892 gs_data->output_topology == _3DPRIM_POINTLIST ||
2893 gs_data->output_topology == _3DPRIM_LINESTRIP;
2894 } else if (tes) {
2895 const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2896 points_or_lines =
2897 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2898 tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2899 }
2900
2901 if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2902 /* Outbound to XY Clip enables */
2903 ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2904 ice->state.dirty |= IRIS_DIRTY_CLIP;
2905 }
2906 }
2907
2908 gl_shader_stage last_stage = last_vue_stage(ice);
2909 struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2910 struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2911 update_last_vue_map(ice, shader);
2912 if (ice->state.streamout != shader->streamout) {
2913 ice->state.streamout = shader->streamout;
2914 ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2915 }
2916
2917 if (ice->state.streamout_active) {
2918 for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2919 struct iris_stream_output_target *so =
2920 (void *) ice->state.so_target[i];
2921 if (so)
2922 so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2923 }
2924 }
2925
2926 if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2927 iris_update_compiled_fs(ice);
2928
2929 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2930 if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2931 iris_update_pull_constant_descriptors(ice, i);
2932 }
2933 }
2934
2935 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2936 iris_compile_cs(struct iris_screen *screen,
2937 struct u_upload_mgr *uploader,
2938 struct util_debug_callback *dbg,
2939 struct iris_uncompiled_shader *ish,
2940 struct iris_compiled_shader *shader)
2941 {
2942 void *mem_ctx = ralloc_context(NULL);
2943 uint32_t *system_values;
2944 const struct intel_device_info *devinfo = screen->devinfo;
2945 unsigned num_system_values;
2946 unsigned num_cbufs;
2947
2948 nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2949 const struct iris_cs_prog_key *const key = &shader->key.cs;
2950
2951 if (screen->brw)
2952 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2953 else
2954 NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2955
2956 iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2957 &system_values, &num_system_values, &num_cbufs);
2958
2959 struct iris_binding_table bt;
2960 iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2961 num_system_values, num_cbufs);
2962
2963 const char *error;
2964 const unsigned *program;
2965
2966 if (screen->brw) {
2967 struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2968
2969 struct brw_cs_prog_data *brw_prog_data =
2970 rzalloc(mem_ctx, struct brw_cs_prog_data);
2971
2972 struct brw_compile_cs_params params = {
2973 .base = {
2974 .mem_ctx = mem_ctx,
2975 .nir = nir,
2976 .log_data = dbg,
2977 .source_hash = ish->source_hash,
2978 },
2979 .key = &brw_key,
2980 .prog_data = brw_prog_data,
2981 };
2982
2983 program = brw_compile_cs(screen->brw, ¶ms);
2984 error = params.base.error_str;
2985 if (program) {
2986 iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2987 iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2988 }
2989 } else {
2990 struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
2991
2992 struct elk_cs_prog_data *elk_prog_data =
2993 rzalloc(mem_ctx, struct elk_cs_prog_data);
2994
2995 struct elk_compile_cs_params params = {
2996 .base = {
2997 .mem_ctx = mem_ctx,
2998 .nir = nir,
2999 .log_data = dbg,
3000 .source_hash = ish->source_hash,
3001 },
3002 .key = &elk_key,
3003 .prog_data = elk_prog_data,
3004 };
3005
3006 program = elk_compile_cs(screen->elk, ¶ms);
3007 error = params.base.error_str;
3008 if (program) {
3009 iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3010 iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3011 }
3012 }
3013
3014 if (program == NULL) {
3015 dbg_printf("Failed to compile compute shader: %s\n", error);
3016
3017 shader->compilation_failed = true;
3018 util_queue_fence_signal(&shader->ready);
3019
3020 return;
3021 }
3022
3023 shader->compilation_failed = false;
3024
3025 iris_finalize_program(shader, NULL, system_values,
3026 num_system_values, ish->kernel_input_size, num_cbufs,
3027 &bt);
3028
3029 iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3030 sizeof(*key), key, program);
3031
3032 iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3033
3034 ralloc_free(mem_ctx);
3035 }
3036
3037 static void
iris_update_compiled_cs(struct iris_context * ice)3038 iris_update_compiled_cs(struct iris_context *ice)
3039 {
3040 struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3041 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3042 struct iris_uncompiled_shader *ish =
3043 ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3044 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3045 struct iris_cs_prog_key key = { KEY_INIT(base) };
3046 screen->vtbl.populate_cs_key(ice, &key);
3047
3048 struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3049 bool added;
3050 struct iris_compiled_shader *shader =
3051 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3052 sizeof(key), &added);
3053
3054 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3055 &key, sizeof(key))) {
3056 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3057 }
3058
3059 if (shader->compilation_failed)
3060 shader = NULL;
3061
3062 if (old != shader) {
3063 iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3064 shader);
3065 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3066 IRIS_STAGE_DIRTY_BINDINGS_CS |
3067 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3068 shs->sysvals_need_upload = true;
3069 }
3070 }
3071
3072 void
iris_update_compiled_compute_shader(struct iris_context * ice)3073 iris_update_compiled_compute_shader(struct iris_context *ice)
3074 {
3075 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3076 iris_update_compiled_cs(ice);
3077
3078 if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3079 iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3080 }
3081
3082 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3083 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3084 struct iris_compiled_shader *shader,
3085 unsigned threads,
3086 uint32_t *dst)
3087 {
3088 struct iris_cs_data *cs_data = iris_cs_data(shader);
3089 assert(iris_cs_push_const_total_size(shader, threads) > 0);
3090 assert(cs_data->push.cross_thread.size == 0);
3091 assert(cs_data->push.per_thread.dwords == 1);
3092 assert(cs_data->first_param_is_builtin_subgroup_id);
3093 for (unsigned t = 0; t < threads; t++)
3094 dst[8 * t] = t;
3095 }
3096
3097 /**
3098 * Allocate scratch BOs as needed for the given per-thread size and stage.
3099 */
3100 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3101 iris_get_scratch_space(struct iris_context *ice,
3102 unsigned per_thread_scratch,
3103 gl_shader_stage stage)
3104 {
3105 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3106 struct iris_bufmgr *bufmgr = screen->bufmgr;
3107 const struct intel_device_info *devinfo = screen->devinfo;
3108
3109 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3110 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3111 assert(per_thread_scratch == 1 << (encoded_size + 10));
3112
3113 /* On GFX version 12.5, scratch access changed to a surface-based model.
3114 * Instead of each shader type having its own layout based on IDs passed
3115 * from the relevant fixed-function unit, all scratch access is based on
3116 * thread IDs like it always has been for compute.
3117 */
3118 if (devinfo->verx10 >= 125)
3119 stage = MESA_SHADER_COMPUTE;
3120
3121 struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3122
3123 if (!*bop) {
3124 assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3125 uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3126 *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3127 IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3128 }
3129
3130 return *bop;
3131 }
3132
3133 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3134 iris_get_scratch_surf(struct iris_context *ice,
3135 unsigned per_thread_scratch)
3136 {
3137 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3138 ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3139
3140 assert(devinfo->verx10 >= 125);
3141
3142 unsigned encoded_size = ffs(per_thread_scratch) - 11;
3143 assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3144 assert(per_thread_scratch == 1 << (encoded_size + 10));
3145
3146 struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3147
3148 if (ref->res)
3149 return ref;
3150
3151 struct iris_bo *scratch_bo =
3152 iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3153
3154 void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3155 screen->isl_dev.ss.size, 64);
3156
3157 isl_buffer_fill_state(&screen->isl_dev, map,
3158 .address = scratch_bo->address,
3159 .size_B = scratch_bo->size,
3160 .format = ISL_FORMAT_RAW,
3161 .swizzle = ISL_SWIZZLE_IDENTITY,
3162 .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3163 .stride_B = per_thread_scratch,
3164 .is_scratch = true);
3165
3166 return ref;
3167 }
3168
3169 /* ------------------------------------------------------------------- */
3170
3171 /**
3172 * The pipe->create_[stage]_state() driver hooks.
3173 *
3174 * Performs basic NIR preprocessing, records any state dependencies, and
3175 * returns an iris_uncompiled_shader as the Gallium CSO.
3176 *
3177 * Actual shader compilation to assembly happens later, at first use.
3178 */
3179 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3180 iris_create_uncompiled_shader(struct iris_screen *screen,
3181 nir_shader *nir,
3182 const struct pipe_stream_output_info *so_info)
3183 {
3184 struct iris_uncompiled_shader *ish =
3185 calloc(1, sizeof(struct iris_uncompiled_shader));
3186 if (!ish)
3187 return NULL;
3188
3189 pipe_reference_init(&ish->ref, 1);
3190 list_inithead(&ish->variants);
3191 simple_mtx_init(&ish->lock, mtx_plain);
3192 util_queue_fence_init(&ish->ready);
3193
3194 ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3195
3196 ish->program_id = get_new_program_id(screen);
3197 ish->nir = nir;
3198 if (so_info) {
3199 memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3200 update_so_info(&ish->stream_output, nir->info.outputs_written);
3201 }
3202
3203 /* Use lowest dword of source shader sha1 for shader hash. */
3204 ish->source_hash = *(uint32_t*)nir->info.source_sha1;
3205
3206 if (screen->disk_cache) {
3207 /* Serialize the NIR to a binary blob that we can hash for the disk
3208 * cache. Drop unnecessary information (like variable names)
3209 * so the serialized NIR is smaller, and also to let us detect more
3210 * isomorphic shaders when hashing, increasing cache hits.
3211 */
3212 struct blob blob;
3213 blob_init(&blob);
3214 nir_serialize(&blob, nir, true);
3215 _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3216 blob_finish(&blob);
3217 }
3218
3219 return ish;
3220 }
3221
3222 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3223 iris_create_compute_state(struct pipe_context *ctx,
3224 const struct pipe_compute_state *state)
3225 {
3226 struct iris_context *ice = (void *) ctx;
3227 struct iris_screen *screen = (void *) ctx->screen;
3228 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3229 const nir_shader_compiler_options *options =
3230 screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
3231 : screen->elk->nir_options[MESA_SHADER_COMPUTE];
3232
3233 nir_shader *nir;
3234 switch (state->ir_type) {
3235 case PIPE_SHADER_IR_NIR:
3236 nir = (void *)state->prog;
3237 break;
3238
3239 case PIPE_SHADER_IR_NIR_SERIALIZED: {
3240 struct blob_reader reader;
3241 const struct pipe_binary_program_header *hdr = state->prog;
3242 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
3243 nir = nir_deserialize(NULL, options, &reader);
3244 break;
3245 }
3246
3247 default:
3248 unreachable("Unsupported IR");
3249 }
3250
3251 /* Most of iris doesn't really care about the difference between compute
3252 * shaders and kernels. We also tend to hard-code COMPUTE everywhere so
3253 * it's way easier if we just normalize to COMPUTE here.
3254 */
3255 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3256 nir->info.stage == MESA_SHADER_KERNEL);
3257 nir->info.stage = MESA_SHADER_COMPUTE;
3258
3259 struct iris_uncompiled_shader *ish =
3260 iris_create_uncompiled_shader(screen, nir, NULL);
3261 ish->kernel_input_size = state->req_input_mem;
3262 ish->kernel_shared_size = state->static_shared_mem;
3263
3264 // XXX: disallow more than 64KB of shared variables
3265
3266 if (screen->precompile) {
3267 struct iris_cs_prog_key key = { KEY_INIT(base) };
3268
3269 struct iris_compiled_shader *shader =
3270 iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3271 IRIS_CACHE_CS, sizeof(key), &key);
3272
3273 /* Append our new variant to the shader's variant list. */
3274 list_addtail(&shader->link, &ish->variants);
3275
3276 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3277 &key, sizeof(key))) {
3278 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3279 }
3280 }
3281
3282 return ish;
3283 }
3284
3285 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3286 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3287 struct pipe_compute_state_object_info *info)
3288 {
3289 struct iris_screen *screen = (void *) ctx->screen;
3290 struct iris_uncompiled_shader *ish = state;
3291
3292 info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3293 info->private_memory = 0;
3294 info->preferred_simd_size = 32;
3295 info->simd_sizes = 8 | 16 | 32;
3296
3297 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3298 &ish->variants, link) {
3299 info->private_memory = MAX2(info->private_memory,
3300 shader->total_scratch);
3301 }
3302 }
3303
3304 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3305 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3306 const uint32_t block[3])
3307 {
3308 struct iris_context *ice = (void *) ctx;
3309 struct iris_screen *screen = (void *) ctx->screen;
3310 struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3311 struct iris_uncompiled_shader *ish = state;
3312
3313 struct iris_cs_prog_key key = { KEY_INIT(base) };
3314 screen->vtbl.populate_cs_key(ice, &key);
3315
3316 bool added;
3317 struct iris_compiled_shader *shader =
3318 find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3319 sizeof(key), &added);
3320
3321 if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3322 &key, sizeof(key))) {
3323 iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3324 }
3325
3326 return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3327 }
3328
3329 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3330 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3331 {
3332 const struct iris_threaded_compile_job *job =
3333 (struct iris_threaded_compile_job *) _job;
3334
3335 struct iris_screen *screen = job->screen;
3336 struct u_upload_mgr *uploader = job->uploader;
3337 struct util_debug_callback *dbg = job->dbg;
3338 struct iris_uncompiled_shader *ish = job->ish;
3339 struct iris_compiled_shader *shader = job->shader;
3340
3341 switch (ish->nir->info.stage) {
3342 case MESA_SHADER_VERTEX:
3343 iris_compile_vs(screen, uploader, dbg, ish, shader);
3344 break;
3345 case MESA_SHADER_TESS_CTRL:
3346 iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3347 break;
3348 case MESA_SHADER_TESS_EVAL:
3349 iris_compile_tes(screen, uploader, dbg, ish, shader);
3350 break;
3351 case MESA_SHADER_GEOMETRY:
3352 iris_compile_gs(screen, uploader, dbg, ish, shader);
3353 break;
3354 case MESA_SHADER_FRAGMENT:
3355 iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3356 break;
3357
3358 default:
3359 unreachable("Invalid shader stage.");
3360 }
3361 }
3362
3363 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3364 iris_create_shader_state(struct pipe_context *ctx,
3365 const struct pipe_shader_state *state)
3366 {
3367 struct iris_context *ice = (void *) ctx;
3368 struct iris_screen *screen = (void *) ctx->screen;
3369 struct nir_shader *nir;
3370
3371 if (state->type == PIPE_SHADER_IR_TGSI)
3372 nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3373 else
3374 nir = state->ir.nir;
3375
3376 const struct shader_info *const info = &nir->info;
3377 struct iris_uncompiled_shader *ish =
3378 iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3379
3380 union iris_any_prog_key key;
3381 unsigned key_size = 0;
3382
3383 memset(&key, 0, sizeof(key));
3384
3385 switch (info->stage) {
3386 case MESA_SHADER_VERTEX:
3387 /* User clip planes */
3388 if (info->clip_distance_array_size == 0)
3389 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3390
3391 key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3392 key_size = sizeof(key.vs);
3393 break;
3394
3395 case MESA_SHADER_TESS_CTRL: {
3396 key.tcs = (struct iris_tcs_prog_key) {
3397 KEY_INIT(vue.base),
3398 // XXX: make sure the linker fills this out from the TES...
3399 ._tes_primitive_mode =
3400 info->tess._primitive_mode ? info->tess._primitive_mode
3401 : TESS_PRIMITIVE_TRIANGLES,
3402 .outputs_written = info->outputs_written,
3403 .patch_outputs_written = info->patch_outputs_written,
3404 };
3405
3406 /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3407 * We don't have that information, so we randomly guess that the input
3408 * and output patches are the same size. This is a bad guess, but we
3409 * can't do much better.
3410 */
3411 if (iris_use_tcs_multi_patch(screen))
3412 key.tcs.input_vertices = info->tess.tcs_vertices_out;
3413
3414 key_size = sizeof(key.tcs);
3415 break;
3416 }
3417
3418 case MESA_SHADER_TESS_EVAL:
3419 /* User clip planes */
3420 if (info->clip_distance_array_size == 0)
3421 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3422
3423 key.tes = (struct iris_tes_prog_key) {
3424 KEY_INIT(vue.base),
3425 // XXX: not ideal, need TCS output/TES input unification
3426 .inputs_read = info->inputs_read,
3427 .patch_inputs_read = info->patch_inputs_read,
3428 };
3429
3430 key_size = sizeof(key.tes);
3431 break;
3432
3433 case MESA_SHADER_GEOMETRY:
3434 /* User clip planes */
3435 if (info->clip_distance_array_size == 0)
3436 ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3437
3438 key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3439 key_size = sizeof(key.gs);
3440 break;
3441
3442 case MESA_SHADER_FRAGMENT:
3443 ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3444 (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3445 (1ull << IRIS_NOS_RASTERIZER) |
3446 (1ull << IRIS_NOS_BLEND);
3447
3448 STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3449
3450 /* The program key needs the VUE map if there are > 16 inputs */
3451 if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3452 ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3453 }
3454
3455 const uint64_t color_outputs = info->outputs_written &
3456 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3457 BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3458 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3459
3460 bool can_rearrange_varyings =
3461 util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3462
3463 const struct intel_device_info *devinfo = screen->devinfo;
3464
3465 key.fs = (struct iris_fs_prog_key) {
3466 KEY_INIT(base),
3467 .nr_color_regions = util_bitcount(color_outputs),
3468 .coherent_fb_fetch = devinfo->ver >= 9,
3469 .input_slots_valid =
3470 can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3471 };
3472
3473 key_size = sizeof(key.fs);
3474 break;
3475
3476 default:
3477 unreachable("Invalid shader stage.");
3478 }
3479
3480 if (screen->precompile) {
3481 struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3482
3483 struct iris_compiled_shader *shader =
3484 iris_create_shader_variant(screen, NULL, info->stage,
3485 (enum iris_program_cache_id) info->stage,
3486 key_size, &key);
3487
3488 /* Append our new variant to the shader's variant list. */
3489 list_addtail(&shader->link, &ish->variants);
3490
3491 if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3492 &key, key_size)) {
3493 assert(!util_queue_fence_is_signalled(&shader->ready));
3494
3495 struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3496
3497 job->screen = screen;
3498 job->uploader = uploader;
3499 job->ish = ish;
3500 job->shader = shader;
3501
3502 iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3503 iris_compile_shader);
3504 }
3505 }
3506
3507 return ish;
3508 }
3509
3510 /**
3511 * Called when the refcount on the iris_uncompiled_shader reaches 0.
3512 *
3513 * Frees the iris_uncompiled_shader.
3514 *
3515 * \sa iris_delete_shader_state
3516 */
3517 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3518 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3519 {
3520 struct iris_uncompiled_shader *ish = state;
3521
3522 /* No need to take ish->lock; we hold the last reference to ish */
3523 list_for_each_entry_safe(struct iris_compiled_shader, shader,
3524 &ish->variants, link) {
3525 list_del(&shader->link);
3526
3527 iris_shader_variant_reference(&shader, NULL);
3528 }
3529
3530 simple_mtx_destroy(&ish->lock);
3531 util_queue_fence_destroy(&ish->ready);
3532
3533 ralloc_free(ish->nir);
3534 free(ish);
3535 }
3536
3537 /**
3538 * The pipe->delete_[stage]_state() driver hooks.
3539 *
3540 * \sa iris_destroy_shader_state
3541 */
3542 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3543 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3544 {
3545 struct iris_uncompiled_shader *ish = state;
3546 struct iris_context *ice = (void *) ctx;
3547
3548 const gl_shader_stage stage = ish->nir->info.stage;
3549
3550 if (ice->shaders.uncompiled[stage] == ish) {
3551 ice->shaders.uncompiled[stage] = NULL;
3552 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3553 }
3554
3555 if (pipe_reference(&ish->ref, NULL))
3556 iris_destroy_shader_state(ctx, state);
3557 }
3558
3559 /**
3560 * The pipe->bind_[stage]_state() driver hook.
3561 *
3562 * Binds an uncompiled shader as the current one for a particular stage.
3563 * Updates dirty tracking to account for the shader's NOS.
3564 */
3565 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3566 bind_shader_state(struct iris_context *ice,
3567 struct iris_uncompiled_shader *ish,
3568 gl_shader_stage stage)
3569 {
3570 uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3571 const uint64_t nos = ish ? ish->nos : 0;
3572
3573 const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3574 const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3575
3576 if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3577 (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3578 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3579 }
3580
3581 ice->shaders.uncompiled[stage] = ish;
3582 ice->state.stage_dirty |= stage_dirty_bit;
3583
3584 /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3585 * (or that they no longer need to do so).
3586 */
3587 for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3588 if (nos & (1 << i))
3589 ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3590 else
3591 ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3592 }
3593 }
3594
3595 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3596 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3597 {
3598 struct iris_context *ice = (struct iris_context *)ctx;
3599 struct iris_uncompiled_shader *ish = state;
3600
3601 if (ish) {
3602 const struct shader_info *info = &ish->nir->info;
3603 if (ice->state.window_space_position != info->vs.window_space_position) {
3604 ice->state.window_space_position = info->vs.window_space_position;
3605
3606 ice->state.dirty |= IRIS_DIRTY_CLIP |
3607 IRIS_DIRTY_RASTER |
3608 IRIS_DIRTY_CC_VIEWPORT;
3609 }
3610
3611 const bool uses_draw_params =
3612 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3613 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3614 const bool uses_derived_draw_params =
3615 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3616 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3617 const bool needs_sgvs_element = uses_draw_params ||
3618 BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3619 BITSET_TEST(info->system_values_read,
3620 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3621
3622 if (ice->state.vs_uses_draw_params != uses_draw_params ||
3623 ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3624 ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3625 ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3626 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3627 IRIS_DIRTY_VERTEX_ELEMENTS;
3628 }
3629
3630 ice->state.vs_uses_draw_params = uses_draw_params;
3631 ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3632 ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3633 ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3634 }
3635
3636 bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3637 }
3638
3639 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3640 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3641 {
3642 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3643 }
3644
3645 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3646 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3647 {
3648 struct iris_context *ice = (struct iris_context *)ctx;
3649 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3650 const struct intel_device_info *devinfo = screen->devinfo;
3651
3652 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3653 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3654 ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3655 IRIS_DIRTY_VFG : 0);
3656
3657 bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3658 }
3659
3660 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3661 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3662 {
3663 struct iris_context *ice = (struct iris_context *)ctx;
3664
3665 /* Enabling/disabling optional stages requires a URB reconfiguration. */
3666 if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3667 ice->state.dirty |= IRIS_DIRTY_URB;
3668
3669 bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3670 }
3671
3672 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3673 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3674 {
3675 struct iris_context *ice = (struct iris_context *) ctx;
3676 struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3677 const struct intel_device_info *devinfo = screen->devinfo;
3678 struct iris_uncompiled_shader *old_ish =
3679 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3680 struct iris_uncompiled_shader *new_ish = state;
3681
3682 const unsigned color_bits =
3683 BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3684 BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3685
3686 /* Fragment shader outputs influence HasWriteableRT */
3687 if (!old_ish || !new_ish ||
3688 (old_ish->nir->info.outputs_written & color_bits) !=
3689 (new_ish->nir->info.outputs_written & color_bits))
3690 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3691
3692 if (devinfo->ver == 8)
3693 ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3694
3695 bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3696 }
3697
3698 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3699 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3700 {
3701 bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3702 }
3703
3704 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)3705 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
3706 {
3707 struct iris_screen *screen = (struct iris_screen *)_screen;
3708 struct nir_shader *nir = (struct nir_shader *) nirptr;
3709 const struct intel_device_info *devinfo = screen->devinfo;
3710
3711 NIR_PASS_V(nir, iris_fix_edge_flags);
3712
3713 if (screen->brw) {
3714 struct brw_nir_compiler_opts opts = {};
3715 brw_preprocess_nir(screen->brw, nir, &opts);
3716
3717 NIR_PASS_V(nir, brw_nir_lower_storage_image,
3718 &(struct brw_nir_lower_storage_image_opts) {
3719 .devinfo = devinfo,
3720 .lower_loads = true,
3721 .lower_stores = true,
3722 });
3723 } else {
3724 assert(screen->elk);
3725
3726 struct elk_nir_compiler_opts opts = {};
3727 elk_preprocess_nir(screen->elk, nir, &opts);
3728
3729 NIR_PASS_V(nir, elk_nir_lower_storage_image,
3730 &(struct elk_nir_lower_storage_image_opts) {
3731 .devinfo = devinfo,
3732 .lower_loads = true,
3733 .lower_stores = true,
3734
3735 /* Iris uploads image params used by
3736 * get_size lowering only in Gfx8.
3737 */
3738 .lower_get_size = true,
3739 });
3740 }
3741
3742 NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3743
3744 nir_sweep(nir);
3745
3746 return NULL;
3747 }
3748
3749 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3750 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3751 unsigned max_threads)
3752 {
3753 struct iris_screen *screen = (struct iris_screen *) pscreen;
3754 util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3755 false);
3756 }
3757
3758 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3759 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3760 void *v_shader,
3761 enum pipe_shader_type p_stage)
3762 {
3763 struct iris_screen *screen = (struct iris_screen *) pscreen;
3764
3765 /* Threaded compilation is only used for the precompile. If precompile is
3766 * disabled, threaded compilation is "done."
3767 */
3768 if (!screen->precompile)
3769 return true;
3770
3771 struct iris_uncompiled_shader *ish = v_shader;
3772
3773 /* When precompile is enabled, the first entry is the precompile variant.
3774 * Check the ready fence of the precompile variant.
3775 */
3776 struct iris_compiled_shader *first =
3777 list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3778
3779 return util_queue_fence_is_signalled(&first->ready);
3780 }
3781
3782 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3783 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3784 {
3785 pscreen->is_parallel_shader_compilation_finished =
3786 iris_is_parallel_shader_compilation_finished;
3787 pscreen->set_max_shader_compiler_threads =
3788 iris_set_max_shader_compiler_threads;
3789 pscreen->finalize_nir = iris_finalize_nir;
3790 }
3791
3792 void
iris_init_program_functions(struct pipe_context * ctx)3793 iris_init_program_functions(struct pipe_context *ctx)
3794 {
3795 ctx->create_vs_state = iris_create_shader_state;
3796 ctx->create_tcs_state = iris_create_shader_state;
3797 ctx->create_tes_state = iris_create_shader_state;
3798 ctx->create_gs_state = iris_create_shader_state;
3799 ctx->create_fs_state = iris_create_shader_state;
3800 ctx->create_compute_state = iris_create_compute_state;
3801
3802 ctx->delete_vs_state = iris_delete_shader_state;
3803 ctx->delete_tcs_state = iris_delete_shader_state;
3804 ctx->delete_tes_state = iris_delete_shader_state;
3805 ctx->delete_gs_state = iris_delete_shader_state;
3806 ctx->delete_fs_state = iris_delete_shader_state;
3807 ctx->delete_compute_state = iris_delete_shader_state;
3808
3809 ctx->bind_vs_state = iris_bind_vs_state;
3810 ctx->bind_tcs_state = iris_bind_tcs_state;
3811 ctx->bind_tes_state = iris_bind_tes_state;
3812 ctx->bind_gs_state = iris_bind_gs_state;
3813 ctx->bind_fs_state = iris_bind_fs_state;
3814 ctx->bind_compute_state = iris_bind_cs_state;
3815
3816 ctx->get_compute_state_info = iris_get_compute_state_info;
3817 ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3818 }
3819
3820 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3821 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3822 const struct iris_compiled_shader *shader,
3823 const uint32_t block[3])
3824 {
3825 if (shader->brw_prog_data) {
3826 return brw_cs_get_dispatch_info(devinfo,
3827 brw_cs_prog_data(shader->brw_prog_data),
3828 block);
3829 } else {
3830 assert(shader->elk_prog_data);
3831 return elk_cs_get_dispatch_info(devinfo,
3832 elk_cs_prog_data(shader->elk_prog_data),
3833 block);
3834 }
3835 }
3836
3837 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3838 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3839 unsigned threads)
3840 {
3841 if (shader->brw_prog_data) {
3842 return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3843 threads);
3844 } else {
3845 assert(shader->elk_prog_data);
3846 return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3847 threads);
3848 }
3849 }
3850
3851 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3852 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3853 enum intel_msaa_flags pushed_msaa_flags)
3854 {
3855 if (shader->brw_prog_data) {
3856 return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3857 pushed_msaa_flags);
3858 } else {
3859 assert(shader->elk_prog_data);
3860 return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3861 pushed_msaa_flags);
3862 }
3863 }
3864
3865 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3866 iris_use_tcs_multi_patch(struct iris_screen *screen)
3867 {
3868 return screen->brw && screen->brw->use_tcs_multi_patch;
3869 }
3870
3871 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3872 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3873 {
3874 return screen->devinfo->ver < 12;
3875 }
3876
3877 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3878 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3879 {
3880 struct util_debug_callback *dbg = data;
3881 va_list args;
3882
3883 if (!dbg->debug_message)
3884 return;
3885
3886 va_start(args, fmt);
3887 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3888 va_end(args);
3889 }
3890
3891 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3892 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3893 {
3894 struct util_debug_callback *dbg = data;
3895 va_list args;
3896 va_start(args, fmt);
3897
3898 if (INTEL_DEBUG(DEBUG_PERF)) {
3899 va_list args_copy;
3900 va_copy(args_copy, args);
3901 vfprintf(stderr, fmt, args_copy);
3902 va_end(args_copy);
3903 }
3904
3905 if (dbg->debug_message) {
3906 dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3907 }
3908
3909 va_end(args);
3910 }
3911
3912 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3913 iris_get_compiler_options(struct pipe_screen *pscreen,
3914 enum pipe_shader_ir ir,
3915 enum pipe_shader_type pstage)
3916 {
3917 struct iris_screen *screen = (struct iris_screen *) pscreen;
3918 gl_shader_stage stage = stage_from_pipe(pstage);
3919 assert(ir == PIPE_SHADER_IR_NIR);
3920
3921 return screen->brw ? screen->brw->nir_options[stage]
3922 : screen->elk->nir_options[stage];
3923 }
3924
3925 void
iris_compiler_init(struct iris_screen * screen)3926 iris_compiler_init(struct iris_screen *screen)
3927 {
3928 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3929 STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3930 STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3931
3932 if (screen->devinfo->ver >= 9) {
3933 screen->brw = brw_compiler_create(screen, screen->devinfo);
3934 screen->brw->shader_debug_log = iris_shader_debug_log;
3935 screen->brw->shader_perf_log = iris_shader_perf_log;
3936 screen->brw->supports_shader_constants = true;
3937 screen->brw->indirect_ubos_use_sampler = iris_indirect_ubos_use_sampler(screen);
3938 } else {
3939 screen->elk = elk_compiler_create(screen, screen->devinfo);
3940 screen->elk->shader_debug_log = iris_shader_debug_log;
3941 screen->elk->shader_perf_log = iris_shader_perf_log;
3942 screen->elk->supports_shader_constants = true;
3943 screen->elk->indirect_ubos_use_sampler = iris_indirect_ubos_use_sampler(screen);
3944 }
3945 }
3946