• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #ifdef INTEL_USE_ELK
50 #include "intel/compiler/elk/elk_compiler.h"
51 #include "intel/compiler/elk/elk_nir.h"
52 #include "intel/compiler/elk/elk_prim.h"
53 #endif
54 #include "iris_context.h"
55 #include "iris_pipe.h"
56 #include "nir/tgsi_to_nir.h"
57 
58 #define KEY_INIT(prefix)                                                   \
59    .prefix.program_string_id = ish->program_id,                            \
60    .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
61 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
62    .base.program_string_id = prog_id,                      \
63    .base.limit_trig_input_range = limit_trig_input
64 
65 #ifdef INTEL_USE_ELK
66 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input)       \
67    .base.program_string_id = prog_id,                      \
68    .base.limit_trig_input_range = limit_trig_input
69 #endif
70 
71 struct iris_threaded_compile_job {
72    struct iris_screen *screen;
73    struct u_upload_mgr *uploader;
74    struct util_debug_callback *dbg;
75    struct iris_uncompiled_shader *ish;
76    struct iris_compiled_shader *shader;
77 };
78 
79 static unsigned
get_new_program_id(struct iris_screen * screen)80 get_new_program_id(struct iris_screen *screen)
81 {
82    return p_atomic_inc_return(&screen->program_id);
83 }
84 
85 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)86 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
87                             const struct brw_wm_prog_data *brw)
88 {
89    assert(shader->stage == MESA_SHADER_FRAGMENT);
90    struct iris_fs_data *iris = &shader->fs;
91 
92    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
93    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
94    memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
95    memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
96    iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
97 
98    iris->num_varying_inputs   = brw->num_varying_inputs;
99    iris->msaa_flags_param     = brw->msaa_flags_param;
100    iris->flat_inputs          = brw->flat_inputs;
101    iris->inputs               = brw->inputs;
102    iris->computed_depth_mode  = brw->computed_depth_mode;
103    iris->max_polygons         = brw->max_polygons;
104    iris->dispatch_multi       = brw->dispatch_multi;
105    iris->computed_stencil     = brw->computed_stencil;
106    iris->early_fragment_tests = brw->early_fragment_tests;
107    iris->post_depth_coverage  = brw->post_depth_coverage;
108    iris->inner_coverage       = brw->inner_coverage;
109    iris->dispatch_8           = brw->dispatch_8;
110    iris->dispatch_16          = brw->dispatch_16;
111    iris->dispatch_32          = brw->dispatch_32;
112    iris->dual_src_blend       = brw->dual_src_blend;
113    iris->uses_pos_offset      = brw->uses_pos_offset;
114    iris->uses_omask           = brw->uses_omask;
115    iris->uses_kill            = brw->uses_kill;
116    iris->uses_src_depth       = brw->uses_src_depth;
117    iris->uses_src_w           = brw->uses_src_w;
118    iris->uses_sample_mask     = brw->uses_sample_mask;
119    iris->uses_vmask           = brw->uses_vmask;
120    iris->has_side_effects     = brw->has_side_effects;
121    iris->pulls_bary           = brw->pulls_bary;
122 
123    iris->uses_sample_offsets        = brw->uses_sample_offsets;
124    iris->uses_npc_bary_coefficients = brw->uses_npc_bary_coefficients;
125    iris->uses_pc_bary_coefficients  = brw->uses_pc_bary_coefficients;
126    iris->uses_depth_w_coefficients  = brw->uses_depth_w_coefficients;
127 
128    iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
129 
130    iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
131 }
132 
133 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)134 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
135                             const struct brw_cs_prog_data *brw)
136 {
137    assert(shader->stage == MESA_SHADER_COMPUTE);
138    struct iris_cs_data *iris = &shader->cs;
139 
140    iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
141    iris->push.cross_thread.regs   = brw->push.cross_thread.regs;
142    iris->push.cross_thread.size   = brw->push.cross_thread.size;
143 
144    iris->push.per_thread.dwords = brw->push.per_thread.dwords;
145    iris->push.per_thread.regs   = brw->push.per_thread.regs;
146    iris->push.per_thread.size   = brw->push.per_thread.size;
147 
148    iris->local_size[0]  = brw->local_size[0];
149    iris->local_size[1]  = brw->local_size[1];
150    iris->local_size[2]  = brw->local_size[2];
151    iris->prog_offset[0] = brw->prog_offset[0];
152    iris->prog_offset[1] = brw->prog_offset[1];
153    iris->prog_offset[2] = brw->prog_offset[2];
154 
155    iris->generate_local_id = brw->generate_local_id;
156    iris->walk_order        = brw->walk_order;
157    iris->uses_barrier      = brw->uses_barrier;
158    iris->uses_sampler      = brw->uses_sampler;
159    iris->prog_mask         = brw->prog_mask;
160 
161    iris->first_param_is_builtin_subgroup_id =
162       brw->base.nr_params > 0 &&
163       brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
164 }
165 
166 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)167 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
168                              struct iris_vue_data *iris)
169 {
170    memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
171 
172    iris->urb_read_length     = brw->urb_read_length;
173    iris->cull_distance_mask  = brw->cull_distance_mask;
174    iris->urb_entry_size      = brw->urb_entry_size;
175    iris->dispatch_mode       = brw->dispatch_mode;
176    iris->include_vue_handles = brw->include_vue_handles;
177 }
178 
179 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)180 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
181                             const struct brw_vs_prog_data *brw)
182 {
183    assert(shader->stage == MESA_SHADER_VERTEX);
184    struct iris_vs_data *iris = &shader->vs;
185 
186    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
187 
188    iris->uses_vertexid     = brw->uses_vertexid;
189    iris->uses_instanceid   = brw->uses_instanceid;
190    iris->uses_firstvertex  = brw->uses_firstvertex;
191    iris->uses_baseinstance = brw->uses_baseinstance;
192    iris->uses_drawid       = brw->uses_drawid;
193 }
194 
195 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)196 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
197                              const struct brw_tcs_prog_data *brw)
198 {
199    assert(shader->stage == MESA_SHADER_TESS_CTRL);
200    struct iris_tcs_data *iris = &shader->tcs;
201 
202    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
203 
204    iris->instances             = brw->instances;
205    iris->patch_count_threshold = brw->patch_count_threshold;
206    iris->include_primitive_id  = brw->include_primitive_id;
207 }
208 
209 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)210 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
211                              const struct brw_tes_prog_data *brw)
212 {
213    assert(shader->stage == MESA_SHADER_TESS_EVAL);
214    struct iris_tes_data *iris = &shader->tes;
215 
216    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
217 
218    iris->partitioning         = brw->partitioning;
219    iris->output_topology      = brw->output_topology;
220    iris->domain               = brw->domain;
221    iris->include_primitive_id = brw->include_primitive_id;
222 }
223 
224 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)225 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
226                             const struct brw_gs_prog_data *brw)
227 {
228    assert(shader->stage == MESA_SHADER_GEOMETRY);
229    struct iris_gs_data *iris = &shader->gs;
230 
231    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
232 
233    iris->vertices_in                     = brw->vertices_in;
234    iris->output_vertex_size_hwords       = brw->output_vertex_size_hwords;
235    iris->output_topology                 = brw->output_topology;
236    iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
237    iris->control_data_format             = brw->control_data_format;
238    iris->static_vertex_count             = brw->static_vertex_count;
239    iris->invocations                     = brw->invocations;
240    iris->include_primitive_id            = brw->include_primitive_id;
241 }
242 
243 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)244 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
245                          struct brw_stage_prog_data *brw)
246 {
247    STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
248    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
249       shader->ubo_ranges[i].block  = brw->ubo_ranges[i].block;
250       shader->ubo_ranges[i].start  = brw->ubo_ranges[i].start;
251       shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
252    }
253 
254    shader->nr_params              = brw->nr_params;
255    shader->total_scratch          = brw->total_scratch;
256    shader->total_shared           = brw->total_shared;
257    shader->program_size           = brw->program_size;
258    shader->const_data_offset      = brw->const_data_offset;
259    shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
260    shader->has_ubo_pull           = brw->has_ubo_pull;
261    shader->use_alt_mode           = brw->use_alt_mode;
262 
263    switch (shader->stage) {
264    case MESA_SHADER_FRAGMENT:
265       iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
266       break;
267    case MESA_SHADER_COMPUTE:
268       iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
269       break;
270    case MESA_SHADER_VERTEX:
271       iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
272       break;
273    case MESA_SHADER_TESS_CTRL:
274       iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
275       break;
276    case MESA_SHADER_TESS_EVAL:
277       iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
278       break;
279    case MESA_SHADER_GEOMETRY:
280       iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
281       break;
282    default:
283       unreachable("invalid shader stage");
284    }
285 
286    shader->brw_prog_data = brw;
287 
288    ralloc_steal(shader, shader->brw_prog_data);
289    ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
290    ralloc_steal(shader->brw_prog_data, brw->param);
291 }
292 
293 #ifdef INTEL_USE_ELK
294 
295 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)296 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
297                             const struct elk_wm_prog_data *elk)
298 {
299    assert(shader->stage == MESA_SHADER_FRAGMENT);
300    struct iris_fs_data *iris = &shader->fs;
301 
302    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
303    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
304    memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
305    memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
306    iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
307 
308    iris->num_varying_inputs   = elk->num_varying_inputs;
309    iris->msaa_flags_param     = elk->msaa_flags_param;
310    iris->flat_inputs          = elk->flat_inputs;
311    iris->inputs               = elk->inputs;
312    iris->computed_depth_mode  = elk->computed_depth_mode;
313    iris->max_polygons         = 1;
314    iris->dispatch_multi       = 0;
315    iris->computed_stencil     = elk->computed_stencil;
316    iris->early_fragment_tests = elk->early_fragment_tests;
317    iris->post_depth_coverage  = elk->post_depth_coverage;
318    iris->inner_coverage       = elk->inner_coverage;
319    iris->dispatch_8           = elk->dispatch_8;
320    iris->dispatch_16          = elk->dispatch_16;
321    iris->dispatch_32          = elk->dispatch_32;
322    iris->dual_src_blend       = elk->dual_src_blend;
323    iris->uses_pos_offset      = elk->uses_pos_offset;
324    iris->uses_omask           = elk->uses_omask;
325    iris->uses_kill            = elk->uses_kill;
326    iris->uses_src_depth       = elk->uses_src_depth;
327    iris->uses_src_w           = elk->uses_src_w;
328    iris->uses_sample_mask     = elk->uses_sample_mask;
329    iris->uses_vmask           = elk->uses_vmask;
330    iris->pulls_bary           = elk->pulls_bary;
331    iris->has_side_effects     = elk->has_side_effects;
332 
333    iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
334 
335    iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
336 }
337 
338 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)339 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
340                             const struct elk_cs_prog_data *elk)
341 {
342    assert(shader->stage == MESA_SHADER_COMPUTE);
343    struct iris_cs_data *iris = &shader->cs;
344 
345    iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
346    iris->push.cross_thread.regs   = elk->push.cross_thread.regs;
347    iris->push.cross_thread.size   = elk->push.cross_thread.size;
348 
349    iris->push.per_thread.dwords = elk->push.per_thread.dwords;
350    iris->push.per_thread.regs   = elk->push.per_thread.regs;
351    iris->push.per_thread.size   = elk->push.per_thread.size;
352 
353    iris->local_size[0]  = elk->local_size[0];
354    iris->local_size[1]  = elk->local_size[1];
355    iris->local_size[2]  = elk->local_size[2];
356    iris->prog_offset[0] = elk->prog_offset[0];
357    iris->prog_offset[1] = elk->prog_offset[1];
358    iris->prog_offset[2] = elk->prog_offset[2];
359 
360    iris->uses_barrier      = elk->uses_barrier;
361    iris->prog_mask         = elk->prog_mask;
362 
363    iris->first_param_is_builtin_subgroup_id =
364       elk->base.nr_params > 0 &&
365       elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
366 }
367 
368 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)369 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
370                              struct iris_vue_data *iris)
371 {
372    memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
373 
374    iris->urb_read_length     = elk->urb_read_length;
375    iris->cull_distance_mask  = elk->cull_distance_mask;
376    iris->urb_entry_size      = elk->urb_entry_size;
377    iris->dispatch_mode       = elk->dispatch_mode;
378    iris->include_vue_handles = elk->include_vue_handles;
379 }
380 
381 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)382 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
383                             const struct elk_vs_prog_data *elk)
384 {
385    assert(shader->stage == MESA_SHADER_VERTEX);
386    struct iris_vs_data *iris = &shader->vs;
387 
388    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
389 
390    iris->uses_vertexid     = elk->uses_vertexid;
391    iris->uses_instanceid   = elk->uses_instanceid;
392    iris->uses_firstvertex  = elk->uses_firstvertex;
393    iris->uses_baseinstance = elk->uses_baseinstance;
394    iris->uses_drawid       = elk->uses_drawid;
395 }
396 
397 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)398 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
399                              const struct elk_tcs_prog_data *elk)
400 {
401    assert(shader->stage == MESA_SHADER_TESS_CTRL);
402    struct iris_tcs_data *iris = &shader->tcs;
403 
404    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
405 
406    iris->instances             = elk->instances;
407    iris->patch_count_threshold = elk->patch_count_threshold;
408    iris->include_primitive_id  = elk->include_primitive_id;
409 }
410 
411 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)412 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
413                              const struct elk_tes_prog_data *elk)
414 {
415    assert(shader->stage == MESA_SHADER_TESS_EVAL);
416    struct iris_tes_data *iris = &shader->tes;
417 
418    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
419 
420    iris->partitioning         = elk->partitioning;
421    iris->output_topology      = elk->output_topology;
422    iris->domain               = elk->domain;
423    iris->include_primitive_id = elk->include_primitive_id;
424 }
425 
426 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)427 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
428                             const struct elk_gs_prog_data *elk)
429 {
430    assert(shader->stage == MESA_SHADER_GEOMETRY);
431    struct iris_gs_data *iris = &shader->gs;
432 
433    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
434 
435    iris->vertices_in                     = elk->vertices_in;
436    iris->output_vertex_size_hwords       = elk->output_vertex_size_hwords;
437    iris->output_topology                 = elk->output_topology;
438    iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
439    iris->control_data_format             = elk->control_data_format;
440    iris->static_vertex_count             = elk->static_vertex_count;
441    iris->invocations                     = elk->invocations;
442    iris->include_primitive_id            = elk->include_primitive_id;
443 }
444 
445 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)446 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
447                          struct elk_stage_prog_data *elk)
448 {
449    STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
450    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
451       shader->ubo_ranges[i].block  = elk->ubo_ranges[i].block;
452       shader->ubo_ranges[i].start  = elk->ubo_ranges[i].start;
453       shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
454    }
455 
456    shader->nr_params              = elk->nr_params;
457    shader->total_scratch          = elk->total_scratch;
458    shader->total_shared           = elk->total_shared;
459    shader->program_size           = elk->program_size;
460    shader->const_data_offset      = elk->const_data_offset;
461    shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
462    shader->has_ubo_pull           = elk->has_ubo_pull;
463    shader->use_alt_mode           = elk->use_alt_mode;
464 
465    switch (shader->stage) {
466    case MESA_SHADER_FRAGMENT:
467       iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
468       break;
469    case MESA_SHADER_COMPUTE:
470       iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
471       break;
472    case MESA_SHADER_VERTEX:
473       iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
474       break;
475    case MESA_SHADER_TESS_CTRL:
476       iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
477       break;
478    case MESA_SHADER_TESS_EVAL:
479       iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
480       break;
481    case MESA_SHADER_GEOMETRY:
482       iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
483       break;
484    default:
485       unreachable("invalid shader stage");
486    }
487 
488    shader->elk_prog_data = elk;
489 
490    ralloc_steal(shader, shader->elk_prog_data);
491    ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
492    ralloc_steal(shader->elk_prog_data, elk->param);
493 }
494 
495 #endif
496 
497 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)498 iris_finalize_program(struct iris_compiled_shader *shader,
499                       uint32_t *streamout,
500                       uint32_t *system_values,
501                       unsigned num_system_values,
502                       unsigned kernel_input_size,
503                       unsigned num_cbufs,
504                       const struct iris_binding_table *bt)
505 {
506    /* There can be only one. */
507 #ifdef INTEL_USE_ELK
508    assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
509 #else
510    assert(shader->brw_prog_data);
511 #endif
512 
513    shader->streamout = streamout;
514    shader->system_values = system_values;
515    shader->num_system_values = num_system_values;
516    shader->kernel_input_size = kernel_input_size;
517    shader->num_cbufs = num_cbufs;
518    shader->bt = *bt;
519 
520    ralloc_steal(shader, shader->streamout);
521    ralloc_steal(shader, shader->system_values);
522 }
523 
524 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)525 iris_to_brw_vs_key(const struct iris_screen *screen,
526                    const struct iris_vs_prog_key *key)
527 {
528    return (struct brw_vs_prog_key) {
529       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
530                    key->vue.base.limit_trig_input_range),
531    };
532 }
533 
534 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)535 iris_to_brw_tcs_key(const struct iris_screen *screen,
536                     const struct iris_tcs_prog_key *key)
537 {
538    return (struct brw_tcs_prog_key) {
539       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
540                    key->vue.base.limit_trig_input_range),
541       ._tes_primitive_mode = key->_tes_primitive_mode,
542       .input_vertices = key->input_vertices,
543       .patch_outputs_written = key->patch_outputs_written,
544       .outputs_written = key->outputs_written,
545    };
546 }
547 
548 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)549 iris_to_brw_tes_key(const struct iris_screen *screen,
550                     const struct iris_tes_prog_key *key)
551 {
552    return (struct brw_tes_prog_key) {
553       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
554                    key->vue.base.limit_trig_input_range),
555       .patch_inputs_read = key->patch_inputs_read,
556       .inputs_read = key->inputs_read,
557    };
558 }
559 
560 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)561 iris_to_brw_gs_key(const struct iris_screen *screen,
562                    const struct iris_gs_prog_key *key)
563 {
564    return (struct brw_gs_prog_key) {
565       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
566                    key->vue.base.limit_trig_input_range),
567    };
568 }
569 
570 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)571 iris_to_brw_fs_key(const struct iris_screen *screen,
572                    const struct iris_fs_prog_key *key)
573 {
574    return (struct brw_wm_prog_key) {
575       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
576                    key->base.limit_trig_input_range),
577       .nr_color_regions = key->nr_color_regions,
578       .flat_shade = key->flat_shade,
579       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
580       .alpha_to_coverage = key->alpha_to_coverage ? INTEL_ALWAYS : INTEL_NEVER,
581       .clamp_fragment_color = key->clamp_fragment_color,
582       .persample_interp = key->persample_interp ? INTEL_ALWAYS : INTEL_NEVER,
583       .multisample_fbo = key->multisample_fbo ? INTEL_ALWAYS : INTEL_NEVER,
584       .force_dual_color_blend = key->force_dual_color_blend,
585       .coherent_fb_fetch = key->coherent_fb_fetch,
586       .color_outputs_valid = key->color_outputs_valid,
587       .input_slots_valid = key->input_slots_valid,
588       .ignore_sample_mask_out = !key->multisample_fbo,
589       .null_push_constant_tbimr_workaround =
590          screen->devinfo->needs_null_push_constant_tbimr_workaround,
591    };
592 }
593 
594 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)595 iris_to_brw_cs_key(const struct iris_screen *screen,
596                    const struct iris_cs_prog_key *key)
597 {
598    return (struct brw_cs_prog_key) {
599       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
600                    key->base.limit_trig_input_range),
601    };
602 }
603 
604 #ifdef INTEL_USE_ELK
605 
606 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)607 iris_to_elk_vs_key(const struct iris_screen *screen,
608                    const struct iris_vs_prog_key *key)
609 {
610    return (struct elk_vs_prog_key) {
611       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
612                    key->vue.base.limit_trig_input_range),
613 
614       /* Don't tell the backend about our clip plane constants, we've
615        * already lowered them in NIR and don't want it doing it again.
616        */
617       .nr_userclip_plane_consts = 0,
618    };
619 }
620 
621 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)622 iris_to_elk_tcs_key(const struct iris_screen *screen,
623                     const struct iris_tcs_prog_key *key)
624 {
625    return (struct elk_tcs_prog_key) {
626       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
627                    key->vue.base.limit_trig_input_range),
628       ._tes_primitive_mode = key->_tes_primitive_mode,
629       .input_vertices = key->input_vertices,
630       .patch_outputs_written = key->patch_outputs_written,
631       .outputs_written = key->outputs_written,
632       .quads_workaround = key->quads_workaround,
633    };
634 }
635 
636 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)637 iris_to_elk_tes_key(const struct iris_screen *screen,
638                     const struct iris_tes_prog_key *key)
639 {
640    return (struct elk_tes_prog_key) {
641       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
642                    key->vue.base.limit_trig_input_range),
643       .patch_inputs_read = key->patch_inputs_read,
644       .inputs_read = key->inputs_read,
645    };
646 }
647 
648 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)649 iris_to_elk_gs_key(const struct iris_screen *screen,
650                    const struct iris_gs_prog_key *key)
651 {
652    return (struct elk_gs_prog_key) {
653       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
654                    key->vue.base.limit_trig_input_range),
655    };
656 }
657 
658 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)659 iris_to_elk_fs_key(const struct iris_screen *screen,
660                    const struct iris_fs_prog_key *key)
661 {
662    return (struct elk_wm_prog_key) {
663       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
664                    key->base.limit_trig_input_range),
665       .nr_color_regions = key->nr_color_regions,
666       .flat_shade = key->flat_shade,
667       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
668       .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
669       .clamp_fragment_color = key->clamp_fragment_color,
670       .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
671       .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
672       .force_dual_color_blend = key->force_dual_color_blend,
673       .coherent_fb_fetch = key->coherent_fb_fetch,
674       .color_outputs_valid = key->color_outputs_valid,
675       .input_slots_valid = key->input_slots_valid,
676       .ignore_sample_mask_out = !key->multisample_fbo,
677    };
678 }
679 
680 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)681 iris_to_elk_cs_key(const struct iris_screen *screen,
682                    const struct iris_cs_prog_key *key)
683 {
684    return (struct elk_cs_prog_key) {
685       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
686                    key->base.limit_trig_input_range),
687    };
688 }
689 
690 #endif
691 
692 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)693 upload_state(struct u_upload_mgr *uploader,
694              struct iris_state_ref *ref,
695              unsigned size,
696              unsigned alignment)
697 {
698    void *p = NULL;
699    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
700    return p;
701 }
702 
703 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)704 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
705                                 struct pipe_shader_buffer *buf,
706                                 struct iris_state_ref *surf_state,
707                                 isl_surf_usage_flags_t usage)
708 {
709    struct pipe_context *ctx = &ice->ctx;
710    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
711    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
712 
713    void *map =
714       upload_state(ice->state.surface_uploader, surf_state,
715                    screen->isl_dev.ss.size, 64);
716    if (!unlikely(map)) {
717       surf_state->res = NULL;
718       return;
719    }
720 
721    struct iris_resource *res = (void *) buf->buffer;
722    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
723    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
724 
725    const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
726 
727    isl_buffer_fill_state(&screen->isl_dev, map,
728                          .address = res->bo->address + res->offset +
729                                     buf->buffer_offset,
730                          .size_B = buf->buffer_size - res->offset,
731                          .format = dataport ? ISL_FORMAT_RAW
732                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
733                          .swizzle = ISL_SWIZZLE_IDENTITY,
734                          .stride_B = 1,
735                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
736 }
737 
738 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)739 get_aoa_deref_offset(nir_builder *b,
740                      nir_deref_instr *deref,
741                      unsigned elem_size)
742 {
743    unsigned array_size = elem_size;
744    nir_def *offset = nir_imm_int(b, 0);
745 
746    while (deref->deref_type != nir_deref_type_var) {
747       assert(deref->deref_type == nir_deref_type_array);
748 
749       /* This level's element size is the previous level's array size */
750       nir_def *index = deref->arr.index.ssa;
751       assert(deref->arr.index.ssa);
752       offset = nir_iadd(b, offset,
753                            nir_imul_imm(b, index, array_size));
754 
755       deref = nir_deref_instr_parent(deref);
756       assert(glsl_type_is_array(deref->type));
757       array_size *= glsl_get_length(deref->type);
758    }
759 
760    /* Accessing an invalid surface index with the dataport can result in a
761     * hang.  According to the spec "if the index used to select an individual
762     * element is negative or greater than or equal to the size of the array,
763     * the results of the operation are undefined but may not lead to
764     * termination" -- which is one of the possible outcomes of the hang.
765     * Clamp the index to prevent access outside of the array bounds.
766     */
767    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
768 }
769 
770 static void
iris_lower_storage_image_derefs(nir_shader * nir)771 iris_lower_storage_image_derefs(nir_shader *nir)
772 {
773    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
774 
775    nir_builder b = nir_builder_create(impl);
776    bool progress = false;
777 
778    nir_foreach_block(block, impl) {
779       nir_foreach_instr_safe(instr, block) {
780          if (instr->type != nir_instr_type_intrinsic)
781             continue;
782 
783          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
784          switch (intrin->intrinsic) {
785          case nir_intrinsic_image_deref_load:
786          case nir_intrinsic_image_deref_store:
787          case nir_intrinsic_image_deref_atomic:
788          case nir_intrinsic_image_deref_atomic_swap:
789          case nir_intrinsic_image_deref_size:
790          case nir_intrinsic_image_deref_samples:
791          case nir_intrinsic_image_deref_load_raw_intel:
792          case nir_intrinsic_image_deref_store_raw_intel: {
793             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
794             nir_variable *var = nir_deref_instr_get_variable(deref);
795 
796             b.cursor = nir_before_instr(&intrin->instr);
797             nir_def *index =
798                nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
799                                 var->data.driver_location);
800             nir_rewrite_image_intrinsic(intrin, index, false);
801             progress = true;
802             break;
803          }
804 
805          default:
806             break;
807          }
808       }
809    }
810 
811    if (progress) {
812       nir_metadata_preserve(impl, nir_metadata_control_flow);
813    } else {
814       nir_metadata_preserve(impl, nir_metadata_all);
815    }
816 }
817 
818 static bool
iris_uses_image_atomic(const nir_shader * shader)819 iris_uses_image_atomic(const nir_shader *shader)
820 {
821    nir_foreach_function_impl(impl, shader) {
822       nir_foreach_block(block, impl) {
823          nir_foreach_instr(instr, block) {
824             if (instr->type != nir_instr_type_intrinsic)
825                continue;
826 
827             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
828             switch (intrin->intrinsic) {
829             case nir_intrinsic_image_deref_atomic:
830             case nir_intrinsic_image_deref_atomic_swap:
831                unreachable("Should have been lowered in "
832                            "iris_lower_storage_image_derefs");
833 
834             case nir_intrinsic_image_atomic:
835             case nir_intrinsic_image_atomic_swap:
836                return true;
837 
838             default:
839                break;
840             }
841          }
842       }
843    }
844 
845    return false;
846 }
847 
848 /**
849  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
850  */
851 static bool
iris_fix_edge_flags(nir_shader * nir)852 iris_fix_edge_flags(nir_shader *nir)
853 {
854    if (nir->info.stage != MESA_SHADER_VERTEX) {
855       nir_shader_preserve_all_metadata(nir);
856       return false;
857    }
858 
859    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
860                                                        VARYING_SLOT_EDGE);
861    if (!var) {
862       nir_shader_preserve_all_metadata(nir);
863       return false;
864    }
865 
866    var->data.mode = nir_var_shader_temp;
867    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
868    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
869    nir_fixup_deref_modes(nir);
870 
871    nir_foreach_function_impl(impl, nir) {
872       nir_metadata_preserve(impl, nir_metadata_control_flow |
873                                   nir_metadata_live_defs |
874                                   nir_metadata_loop_analysis);
875    }
876 
877    return true;
878 }
879 
880 /**
881  * Fix an uncompiled shader's stream output info.
882  *
883  * Core Gallium stores output->register_index as a "slot" number, where
884  * slots are assigned consecutively to all outputs in info->outputs_written.
885  * This naive packing of outputs doesn't work for us - we too have slots,
886  * but the layout is defined by the VUE map, which we won't have until we
887  * compile a specific shader variant.  So, we remap these and simply store
888  * VARYING_SLOT_* in our copy's output->register_index fields.
889  *
890  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
891  * components of our VUE header.  See brw_vue_map.c for the layout.
892  */
893 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)894 update_so_info(struct pipe_stream_output_info *so_info,
895                uint64_t outputs_written)
896 {
897    uint8_t reverse_map[64] = {};
898    unsigned slot = 0;
899    while (outputs_written) {
900       reverse_map[slot++] = u_bit_scan64(&outputs_written);
901    }
902 
903    for (unsigned i = 0; i < so_info->num_outputs; i++) {
904       struct pipe_stream_output *output = &so_info->output[i];
905 
906       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
907       output->register_index = reverse_map[output->register_index];
908 
909       /* The VUE header contains three scalar fields packed together:
910        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
911        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
912        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
913        */
914       switch (output->register_index) {
915       case VARYING_SLOT_LAYER:
916          assert(output->num_components == 1);
917          output->register_index = VARYING_SLOT_PSIZ;
918          output->start_component = 1;
919          break;
920       case VARYING_SLOT_VIEWPORT:
921          assert(output->num_components == 1);
922          output->register_index = VARYING_SLOT_PSIZ;
923          output->start_component = 2;
924          break;
925       case VARYING_SLOT_PSIZ:
926          assert(output->num_components == 1);
927          output->start_component = 3;
928          break;
929       }
930 
931       //info->outputs_written |= 1ull << output->register_index;
932    }
933 }
934 
935 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)936 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
937                         unsigned offset, unsigned n)
938 {
939 #ifdef INTEL_USE_ELK
940    assert(offset % sizeof(uint32_t) == 0);
941 
942    for (unsigned i = 0; i < n; ++i)
943       sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
944 
945    for (unsigned i = n; i < 4; ++i)
946       sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
947 #else
948    unreachable("no elk support");
949 #endif
950 }
951 
952 /**
953  * Associate NIR uniform variables with the prog_data->param[] mechanism
954  * used by the backend.  Also, decide which UBOs we'd like to push in an
955  * ideal situation (though the backend can reduce this).
956  */
957 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)958 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
959                     void *mem_ctx,
960                     nir_shader *nir,
961                     unsigned kernel_input_size,
962                     uint32_t **out_system_values,
963                     unsigned *out_num_system_values,
964                     unsigned *out_num_cbufs)
965 {
966    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
967 
968    const unsigned IRIS_MAX_SYSTEM_VALUES =
969       PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
970    unsigned *system_values =
971       rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
972    unsigned num_system_values = 0;
973 
974    unsigned patch_vert_idx = -1;
975    unsigned tess_outer_default_idx = -1;
976    unsigned tess_inner_default_idx = -1;
977    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
978    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
979    unsigned variable_group_size_idx = -1;
980    unsigned work_dim_idx = -1;
981    memset(ucp_idx, -1, sizeof(ucp_idx));
982    memset(img_idx, -1, sizeof(img_idx));
983 
984    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
985 
986    nir_builder b = nir_builder_at(nir_before_impl(impl));
987 
988    nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
989 
990    /* Turn system value intrinsics into uniforms */
991    nir_foreach_block(block, impl) {
992       nir_foreach_instr_safe(instr, block) {
993          if (instr->type != nir_instr_type_intrinsic)
994             continue;
995 
996          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
997          nir_def *offset;
998 
999          switch (intrin->intrinsic) {
1000          case nir_intrinsic_load_base_workgroup_id: {
1001             /* GL doesn't have a concept of base workgroup */
1002             b.cursor = nir_instr_remove(&intrin->instr);
1003             nir_def_rewrite_uses(&intrin->def,
1004                                      nir_imm_zero(&b, 3, 32));
1005             continue;
1006          }
1007          case nir_intrinsic_load_constant: {
1008             unsigned load_size = intrin->def.num_components *
1009                                  intrin->def.bit_size / 8;
1010             unsigned load_align = intrin->def.bit_size / 8;
1011 
1012             /* This one is special because it reads from the shader constant
1013              * data and not cbuf0 which gallium uploads for us.
1014              */
1015             b.cursor = nir_instr_remove(&intrin->instr);
1016 
1017             nir_def *offset =
1018                nir_iadd_imm(&b, intrin->src[0].ssa,
1019                                 nir_intrinsic_base(intrin));
1020 
1021             assert(load_size < b.shader->constant_data_size);
1022             unsigned max_offset = b.shader->constant_data_size - load_size;
1023             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
1024 
1025             /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
1026              * and cannot cross that 4GB boundary, so we can do the address
1027              * calculation with 32-bit adds.  Also, we can ignore the high
1028              * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
1029              */
1030             assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
1031 
1032             nir_def *const_data_addr =
1033                nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1034 
1035             nir_def *data =
1036                nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1037                                         load_align,
1038                                         intrin->def.num_components,
1039                                         intrin->def.bit_size);
1040 
1041             nir_def_rewrite_uses(&intrin->def,
1042                                      data);
1043             continue;
1044          }
1045          case nir_intrinsic_load_user_clip_plane: {
1046             unsigned ucp = nir_intrinsic_ucp_id(intrin);
1047 
1048             if (ucp_idx[ucp] == -1) {
1049                ucp_idx[ucp] = num_system_values;
1050                num_system_values += 4;
1051             }
1052 
1053             for (int i = 0; i < 4; i++) {
1054                system_values[ucp_idx[ucp] + i] =
1055                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1056             }
1057 
1058             b.cursor = nir_before_instr(instr);
1059             offset = nir_imm_int(&b, system_values_start +
1060                                      ucp_idx[ucp] * sizeof(uint32_t));
1061             break;
1062          }
1063          case nir_intrinsic_load_patch_vertices_in:
1064             if (patch_vert_idx == -1)
1065                patch_vert_idx = num_system_values++;
1066 
1067             system_values[patch_vert_idx] =
1068                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1069 
1070             b.cursor = nir_before_instr(instr);
1071             offset = nir_imm_int(&b, system_values_start +
1072                                      patch_vert_idx * sizeof(uint32_t));
1073             break;
1074          case nir_intrinsic_load_tess_level_outer_default:
1075             if (tess_outer_default_idx == -1) {
1076                tess_outer_default_idx = num_system_values;
1077                num_system_values += 4;
1078             }
1079 
1080             for (int i = 0; i < 4; i++) {
1081                system_values[tess_outer_default_idx + i] =
1082                   BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1083             }
1084 
1085             b.cursor = nir_before_instr(instr);
1086             offset = nir_imm_int(&b, system_values_start +
1087                                  tess_outer_default_idx * sizeof(uint32_t));
1088             break;
1089          case nir_intrinsic_load_tess_level_inner_default:
1090             if (tess_inner_default_idx == -1) {
1091                tess_inner_default_idx = num_system_values;
1092                num_system_values += 2;
1093             }
1094 
1095             for (int i = 0; i < 2; i++) {
1096                system_values[tess_inner_default_idx + i] =
1097                   BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1098             }
1099 
1100             b.cursor = nir_before_instr(instr);
1101             offset = nir_imm_int(&b, system_values_start +
1102                                  tess_inner_default_idx * sizeof(uint32_t));
1103             break;
1104          case nir_intrinsic_image_deref_load_param_intel: {
1105             assert(devinfo->ver < 9);
1106             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1107             nir_variable *var = nir_deref_instr_get_variable(deref);
1108 
1109             if (img_idx[var->data.binding] == -1) {
1110                /* GL only allows arrays of arrays of images. */
1111                assert(glsl_type_is_image(glsl_without_array(var->type)));
1112                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1113 
1114                for (int i = 0; i < num_images; i++) {
1115                   const unsigned img = var->data.binding + i;
1116 
1117                   img_idx[img] = num_system_values;
1118                   num_system_values += ISL_IMAGE_PARAM_SIZE;
1119 
1120                   uint32_t *img_sv = &system_values[img_idx[img]];
1121 
1122                   setup_vec4_image_sysval(
1123                      img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1124                      offsetof(struct isl_image_param, offset), 2);
1125                   setup_vec4_image_sysval(
1126                      img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1127                      offsetof(struct isl_image_param, size), 3);
1128                   setup_vec4_image_sysval(
1129                      img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1130                      offsetof(struct isl_image_param, stride), 4);
1131                   setup_vec4_image_sysval(
1132                      img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1133                      offsetof(struct isl_image_param, tiling), 3);
1134                   setup_vec4_image_sysval(
1135                      img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1136                      offsetof(struct isl_image_param, swizzling), 2);
1137                }
1138             }
1139 
1140             b.cursor = nir_before_instr(instr);
1141             offset = nir_iadd_imm(&b,
1142                get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1143                system_values_start +
1144                img_idx[var->data.binding] * 4 +
1145                nir_intrinsic_base(intrin) * 16);
1146             break;
1147          }
1148          case nir_intrinsic_load_workgroup_size: {
1149             assert(nir->info.workgroup_size_variable);
1150             if (variable_group_size_idx == -1) {
1151                variable_group_size_idx = num_system_values;
1152                num_system_values += 3;
1153                for (int i = 0; i < 3; i++) {
1154                   system_values[variable_group_size_idx + i] =
1155                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1156                }
1157             }
1158 
1159             b.cursor = nir_before_instr(instr);
1160             offset = nir_imm_int(&b, system_values_start +
1161                                      variable_group_size_idx * sizeof(uint32_t));
1162             break;
1163          }
1164          case nir_intrinsic_load_work_dim: {
1165             if (work_dim_idx == -1) {
1166                work_dim_idx = num_system_values++;
1167                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1168             }
1169             b.cursor = nir_before_instr(instr);
1170             offset = nir_imm_int(&b, system_values_start +
1171                                      work_dim_idx * sizeof(uint32_t));
1172             break;
1173          }
1174          case nir_intrinsic_load_kernel_input: {
1175             assert(nir_intrinsic_base(intrin) +
1176                    nir_intrinsic_range(intrin) <= kernel_input_size);
1177             b.cursor = nir_before_instr(instr);
1178             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1179                                       nir_intrinsic_base(intrin));
1180             break;
1181          }
1182          default:
1183             continue;
1184          }
1185 
1186          nir_def *load =
1187             nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1188                          temp_ubo_name, offset,
1189                          .align_mul = 4,
1190                          .align_offset = 0,
1191                          .range_base = 0,
1192                          .range = ~0);
1193 
1194          nir_def_rewrite_uses(&intrin->def,
1195                                   load);
1196          nir_instr_remove(instr);
1197       }
1198    }
1199 
1200    nir_validate_shader(nir, "before remapping");
1201 
1202    /* Uniforms are stored in constant buffer 0, the
1203     * user-facing UBOs are indexed by one.  So if any constant buffer is
1204     * needed, the constant buffer 0 will be needed, so account for it.
1205     */
1206    unsigned num_cbufs = nir->info.num_ubos;
1207    if (num_cbufs || nir->num_uniforms)
1208       num_cbufs++;
1209 
1210    /* Place the new params in a new cbuf. */
1211    if (num_system_values > 0 || kernel_input_size > 0) {
1212       unsigned sysval_cbuf_index = num_cbufs;
1213       num_cbufs++;
1214 
1215       system_values = reralloc(mem_ctx, system_values, unsigned,
1216                                num_system_values);
1217 
1218       nir_foreach_block(block, impl) {
1219          nir_foreach_instr_safe(instr, block) {
1220             if (instr->type != nir_instr_type_intrinsic)
1221                continue;
1222 
1223             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1224 
1225             if (load->intrinsic != nir_intrinsic_load_ubo)
1226                continue;
1227 
1228             b.cursor = nir_before_instr(instr);
1229 
1230             if (load->src[0].ssa == temp_ubo_name) {
1231                nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1232                nir_src_rewrite(&load->src[0], imm);
1233             }
1234          }
1235       }
1236 
1237       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1238       nir_opt_constant_folding(nir);
1239    } else {
1240       ralloc_free(system_values);
1241       system_values = NULL;
1242    }
1243 
1244    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1245    nir_validate_shader(nir, "after remap");
1246 
1247    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
1248     * to detect when cbuf0 exists but we don't need it anymore when we get
1249     * here.  Instead, zero it out so that the back-end doesn't get confused
1250     * when nr_params * 4 != num_uniforms != nr_params * 4.
1251     */
1252    nir->num_uniforms = 0;
1253 
1254    *out_system_values = system_values;
1255    *out_num_system_values = num_system_values;
1256    *out_num_cbufs = num_cbufs;
1257 }
1258 
1259 static const char *surface_group_names[] = {
1260    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
1261    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1262    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
1263    [IRIS_SURFACE_GROUP_TEXTURE_LOW64]      = "texture",
1264    [IRIS_SURFACE_GROUP_TEXTURE_HIGH64]     = "texture",
1265    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
1266    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
1267    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
1268 };
1269 
1270 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1271 iris_print_binding_table(FILE *fp, const char *name,
1272                          const struct iris_binding_table *bt)
1273 {
1274    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1275 
1276    uint32_t total = 0;
1277    uint32_t compacted = 0;
1278 
1279    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1280       uint32_t size = bt->sizes[i];
1281       total += size;
1282       if (size)
1283          compacted += util_bitcount64(bt->used_mask[i]);
1284    }
1285 
1286    if (total == 0) {
1287       fprintf(fp, "Binding table for %s is empty\n\n", name);
1288       return;
1289    }
1290 
1291    if (total != compacted) {
1292       fprintf(fp, "Binding table for %s "
1293               "(compacted to %u entries from %u entries)\n",
1294               name, compacted, total);
1295    } else {
1296       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1297    }
1298 
1299    uint32_t entry = 0;
1300    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1301       uint64_t mask = bt->used_mask[i];
1302       while (mask) {
1303          int index = u_bit_scan64(&mask);
1304          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
1305       }
1306    }
1307    fprintf(fp, "\n");
1308 }
1309 
1310 enum {
1311    /* Max elements in a surface group. */
1312    SURFACE_GROUP_MAX_ELEMENTS = 64,
1313 };
1314 
1315 /**
1316  * Map a <group, index> pair to a binding table index.
1317  *
1318  * For example: <UBO, 5> => binding table index 12
1319  */
1320 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1321 iris_group_index_to_bti(const struct iris_binding_table *bt,
1322                         enum iris_surface_group group, uint32_t index)
1323 {
1324    assert(index < bt->sizes[group]);
1325    uint64_t mask = bt->used_mask[group];
1326    uint64_t bit = 1ull << index;
1327    if (bit & mask) {
1328       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1329    } else {
1330       return IRIS_SURFACE_NOT_USED;
1331    }
1332 }
1333 
1334 /**
1335  * Map a binding table index back to a <group, index> pair.
1336  *
1337  * For example: binding table index 12 => <UBO, 5>
1338  */
1339 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1340 iris_bti_to_group_index(const struct iris_binding_table *bt,
1341                         enum iris_surface_group group, uint32_t bti)
1342 {
1343    uint64_t used_mask = bt->used_mask[group];
1344    assert(bti >= bt->offsets[group]);
1345 
1346    uint32_t c = bti - bt->offsets[group];
1347    while (used_mask) {
1348       int i = u_bit_scan64(&used_mask);
1349       if (c == 0)
1350          return i;
1351       c--;
1352    }
1353 
1354    return IRIS_SURFACE_NOT_USED;
1355 }
1356 
1357 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1358 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1359                      nir_instr *instr, nir_src *src,
1360                      enum iris_surface_group group)
1361 {
1362    assert(bt->sizes[group] > 0);
1363 
1364    b->cursor = nir_before_instr(instr);
1365    nir_def *bti;
1366    if (nir_src_is_const(*src)) {
1367       uint32_t index = nir_src_as_uint(*src);
1368       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1369                            src->ssa->bit_size);
1370    } else {
1371       /* Indirect usage makes all the surfaces of the group to be available,
1372        * so we can just add the base.
1373        */
1374       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1375       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1376    }
1377    nir_src_rewrite(src, bti);
1378 }
1379 
1380 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1381 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1382                    enum iris_surface_group group)
1383 {
1384    assert(bt->sizes[group] > 0);
1385 
1386    if (nir_src_is_const(*src)) {
1387       uint64_t index = nir_src_as_uint(*src);
1388       assert(index < bt->sizes[group]);
1389       bt->used_mask[group] |= 1ull << index;
1390    } else {
1391       /* There's an indirect usage, we need all the surfaces. */
1392       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1393    }
1394 }
1395 
1396 static bool
skip_compacting_binding_tables(void)1397 skip_compacting_binding_tables(void)
1398 {
1399    static int skip = -1;
1400    if (skip < 0)
1401       skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1402    return skip;
1403 }
1404 
1405 /**
1406  * Set up the binding table indices and apply to the shader.
1407  */
1408 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs,bool use_null_rt)1409 iris_setup_binding_table(const struct intel_device_info *devinfo,
1410                          struct nir_shader *nir,
1411                          struct iris_binding_table *bt,
1412                          unsigned num_render_targets,
1413                          unsigned num_system_values,
1414                          unsigned num_cbufs,
1415                          bool use_null_rt)
1416 {
1417    const struct shader_info *info = &nir->info;
1418 
1419    memset(bt, 0, sizeof(*bt));
1420 
1421    /* Set the sizes for each surface group.  For some groups, we already know
1422     * upfront how many will be used, so mark them.
1423     */
1424    if (info->stage == MESA_SHADER_FRAGMENT) {
1425       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1426       /* All render targets used. */
1427       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1428          BITFIELD64_MASK(num_render_targets);
1429 
1430       /* Setup render target read surface group in order to support non-coherent
1431        * framebuffer fetch on Gfx8
1432        */
1433       if (devinfo->ver == 8 && info->outputs_read) {
1434          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1435          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1436             BITFIELD64_MASK(num_render_targets);
1437       }
1438 
1439       bt->use_null_rt = use_null_rt;
1440    } else if (info->stage == MESA_SHADER_COMPUTE) {
1441       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1442    }
1443 
1444    assert(ARRAY_SIZE(info->textures_used) >= 4);
1445    int max_tex = BITSET_LAST_BIT(info->textures_used);
1446    assert(max_tex <= 128);
1447    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1448    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1449    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1450       info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1451    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1452       info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1453    bt->samplers_used_mask = info->samplers_used[0];
1454 
1455    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1456 
1457    /* Allocate an extra slot in the UBO section for NIR constants.
1458     * Binding table compaction will remove it if unnecessary.
1459     *
1460     * We don't include them in iris_compiled_shader::num_cbufs because
1461     * they are uploaded separately from shs->constbuf[], but from a shader
1462     * point of view, they're another UBO (at the end of the section).
1463     */
1464    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1465 
1466    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1467 
1468    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1469       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1470 
1471    /* Mark surfaces used for the cases we don't have the information available
1472     * upfront.
1473     */
1474    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1475    nir_foreach_block (block, impl) {
1476       nir_foreach_instr (instr, block) {
1477          if (instr->type != nir_instr_type_intrinsic)
1478             continue;
1479 
1480          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1481          switch (intrin->intrinsic) {
1482          case nir_intrinsic_load_num_workgroups:
1483             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1484             break;
1485 
1486          case nir_intrinsic_load_output:
1487             if (devinfo->ver == 8) {
1488                mark_used_with_src(bt, &intrin->src[0],
1489                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1490             }
1491             break;
1492 
1493          case nir_intrinsic_image_size:
1494          case nir_intrinsic_image_load:
1495          case nir_intrinsic_image_store:
1496          case nir_intrinsic_image_atomic:
1497          case nir_intrinsic_image_atomic_swap:
1498          case nir_intrinsic_image_load_raw_intel:
1499          case nir_intrinsic_image_store_raw_intel:
1500             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1501             break;
1502 
1503          case nir_intrinsic_load_ubo:
1504             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1505             break;
1506 
1507          case nir_intrinsic_store_ssbo:
1508             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1509             break;
1510 
1511          case nir_intrinsic_get_ssbo_size:
1512          case nir_intrinsic_ssbo_atomic:
1513          case nir_intrinsic_ssbo_atomic_swap:
1514          case nir_intrinsic_load_ssbo:
1515             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1516             break;
1517 
1518          default:
1519             break;
1520          }
1521       }
1522    }
1523 
1524    /* When disable we just mark everything as used. */
1525    if (unlikely(skip_compacting_binding_tables())) {
1526       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1527          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1528    }
1529 
1530    /* Calculate the offsets and the binding table size based on the used
1531     * surfaces.  After this point, the functions to go between "group indices"
1532     * and binding table indices can be used.
1533     */
1534    uint32_t next = 0;
1535    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1536       if (bt->used_mask[i] != 0) {
1537          bt->offsets[i] = next;
1538          next += util_bitcount64(bt->used_mask[i]);
1539       }
1540    }
1541    bt->size_bytes = next * 4;
1542 
1543    if (INTEL_DEBUG(DEBUG_BT)) {
1544       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1545    }
1546 
1547    /* Apply the binding table indices.  The backend compiler is not expected
1548     * to change those, as we haven't set any of the *_start entries in brw
1549     * binding_table.
1550     */
1551    nir_builder b = nir_builder_create(impl);
1552 
1553    nir_foreach_block (block, impl) {
1554       nir_foreach_instr (instr, block) {
1555          if (instr->type == nir_instr_type_tex) {
1556             nir_tex_instr *tex = nir_instr_as_tex(instr);
1557             if (tex->texture_index < 64) {
1558                tex->texture_index =
1559                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1560                                           tex->texture_index);
1561             } else {
1562                tex->texture_index =
1563                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1564                                           tex->texture_index - 64);
1565             }
1566             continue;
1567          }
1568 
1569          if (instr->type != nir_instr_type_intrinsic)
1570             continue;
1571 
1572          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1573          switch (intrin->intrinsic) {
1574          case nir_intrinsic_image_size:
1575          case nir_intrinsic_image_load:
1576          case nir_intrinsic_image_store:
1577          case nir_intrinsic_image_atomic:
1578          case nir_intrinsic_image_atomic_swap:
1579          case nir_intrinsic_image_load_raw_intel:
1580          case nir_intrinsic_image_store_raw_intel:
1581             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1582                                  IRIS_SURFACE_GROUP_IMAGE);
1583             break;
1584 
1585          case nir_intrinsic_load_ubo:
1586             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1587                                  IRIS_SURFACE_GROUP_UBO);
1588             break;
1589 
1590          case nir_intrinsic_store_ssbo:
1591             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1592                                  IRIS_SURFACE_GROUP_SSBO);
1593             break;
1594 
1595          case nir_intrinsic_load_output:
1596             if (devinfo->ver == 8) {
1597                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1598                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1599             }
1600             break;
1601 
1602          case nir_intrinsic_get_ssbo_size:
1603          case nir_intrinsic_ssbo_atomic:
1604          case nir_intrinsic_ssbo_atomic_swap:
1605          case nir_intrinsic_load_ssbo:
1606             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1607                                  IRIS_SURFACE_GROUP_SSBO);
1608             break;
1609 
1610          default:
1611             break;
1612          }
1613       }
1614    }
1615 }
1616 
1617 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1618 iris_debug_recompile_brw(struct iris_screen *screen,
1619                          struct util_debug_callback *dbg,
1620                          struct iris_uncompiled_shader *ish,
1621                          const struct brw_base_prog_key *key)
1622 {
1623    if (!ish || list_is_empty(&ish->variants)
1624             || list_is_singular(&ish->variants))
1625       return;
1626 
1627    const struct brw_compiler *c = screen->brw;
1628    const struct shader_info *info = &ish->nir->info;
1629 
1630    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1631                        _mesa_shader_stage_to_string(info->stage),
1632                        info->name ? info->name : "(no identifier)",
1633                        info->label ? info->label : "");
1634 
1635    struct iris_compiled_shader *shader =
1636       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1637    const void *old_iris_key = &shader->key;
1638 
1639    union brw_any_prog_key old_key;
1640 
1641    switch (info->stage) {
1642    case MESA_SHADER_VERTEX:
1643       old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1644       break;
1645    case MESA_SHADER_TESS_CTRL:
1646       old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1647       break;
1648    case MESA_SHADER_TESS_EVAL:
1649       old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1650       break;
1651    case MESA_SHADER_GEOMETRY:
1652       old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1653       break;
1654    case MESA_SHADER_FRAGMENT:
1655       old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1656       break;
1657    case MESA_SHADER_COMPUTE:
1658       old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1659       break;
1660    default:
1661       unreachable("invalid shader stage");
1662    }
1663 
1664    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1665 }
1666 
1667 #ifdef INTEL_USE_ELK
1668 
1669 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1670 iris_debug_recompile_elk(struct iris_screen *screen,
1671                          struct util_debug_callback *dbg,
1672                          struct iris_uncompiled_shader *ish,
1673                          const struct elk_base_prog_key *key)
1674 {
1675    if (!ish || list_is_empty(&ish->variants)
1676             || list_is_singular(&ish->variants))
1677       return;
1678 
1679    const struct elk_compiler *c = screen->elk;
1680    const struct shader_info *info = &ish->nir->info;
1681 
1682    elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1683                        _mesa_shader_stage_to_string(info->stage),
1684                        info->name ? info->name : "(no identifier)",
1685                        info->label ? info->label : "");
1686 
1687    struct iris_compiled_shader *shader =
1688       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1689    const void *old_iris_key = &shader->key;
1690 
1691    union elk_any_prog_key old_key;
1692 
1693    switch (info->stage) {
1694    case MESA_SHADER_VERTEX:
1695       old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1696       break;
1697    case MESA_SHADER_TESS_CTRL:
1698       old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1699       break;
1700    case MESA_SHADER_TESS_EVAL:
1701       old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1702       break;
1703    case MESA_SHADER_GEOMETRY:
1704       old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1705       break;
1706    case MESA_SHADER_FRAGMENT:
1707       old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1708       break;
1709    case MESA_SHADER_COMPUTE:
1710       old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1711       break;
1712    default:
1713       unreachable("invalid shader stage");
1714    }
1715 
1716    elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1717 }
1718 
1719 #endif
1720 
1721 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1722 check_urb_size(struct iris_context *ice,
1723                unsigned needed_size,
1724                gl_shader_stage stage)
1725 {
1726    unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1727 
1728    /* If the last URB allocation wasn't large enough for our needs,
1729     * flag it as needing to be reconfigured.  Otherwise, we can use
1730     * the existing config.  However, if the URB is constrained, and
1731     * we can shrink our size for this stage, we may be able to gain
1732     * extra concurrency by reconfiguring it to be smaller.  Do so.
1733     */
1734    if (last_allocated_size < needed_size ||
1735        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1736       ice->state.dirty |= IRIS_DIRTY_URB;
1737    }
1738 }
1739 
1740 /**
1741  * Get the shader for the last enabled geometry stage.
1742  *
1743  * This stage is the one which will feed stream output and the rasterizer.
1744  */
1745 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1746 last_vue_stage(struct iris_context *ice)
1747 {
1748    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1749       return MESA_SHADER_GEOMETRY;
1750 
1751    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1752       return MESA_SHADER_TESS_EVAL;
1753 
1754    return MESA_SHADER_VERTEX;
1755 }
1756 
1757 /**
1758  * \param added  Set to \c true if the variant was added to the list (i.e., a
1759  *               variant matching \c key was not found).  Set to \c false
1760  *               otherwise.
1761  */
1762 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1763 find_or_add_variant(const struct iris_screen *screen,
1764                     struct iris_uncompiled_shader *ish,
1765                     enum iris_program_cache_id cache_id,
1766                     const void *key, unsigned key_size,
1767                     bool *added)
1768 {
1769    struct list_head *start = ish->variants.next;
1770 
1771    *added = false;
1772 
1773    if (screen->precompile) {
1774       /* Check the first list entry.  There will always be at least one
1775        * variant in the list (most likely the precompile variant), and
1776        * other contexts only append new variants, so we can safely check
1777        * it without locking, saving that cost in the common case.
1778        */
1779       struct iris_compiled_shader *first =
1780          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1781 
1782       if (memcmp(&first->key, key, key_size) == 0) {
1783          util_queue_fence_wait(&first->ready);
1784          return first;
1785       }
1786 
1787       /* Skip this one in the loop below */
1788       start = first->link.next;
1789    }
1790 
1791    struct iris_compiled_shader *variant = NULL;
1792 
1793    /* If it doesn't match, we have to walk the list; other contexts may be
1794     * concurrently appending shaders to it, so we need to lock here.
1795     */
1796    simple_mtx_lock(&ish->lock);
1797 
1798    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1799                             &ish->variants, link) {
1800       if (memcmp(&v->key, key, key_size) == 0) {
1801          variant = v;
1802          break;
1803       }
1804    }
1805 
1806    gl_shader_stage stage = ish->nir->info.stage;
1807 
1808    if (variant == NULL) {
1809       variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1810                                            key_size, key);
1811 
1812       /* Append our new variant to the shader's variant list. */
1813       list_addtail(&variant->link, &ish->variants);
1814       *added = true;
1815 
1816       simple_mtx_unlock(&ish->lock);
1817    } else {
1818       simple_mtx_unlock(&ish->lock);
1819 
1820       util_queue_fence_wait(&variant->ready);
1821    }
1822 
1823    assert(stage == variant->stage);
1824    return variant;
1825 }
1826 
1827 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1828 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1829                                  UNUSED int thread_index)
1830 {
1831    free(_job);
1832 }
1833 
1834 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1835 iris_schedule_compile(struct iris_screen *screen,
1836                       struct util_queue_fence *ready_fence,
1837                       struct util_debug_callback *dbg,
1838                       struct iris_threaded_compile_job *job,
1839                       util_queue_execute_func execute)
1840 
1841 {
1842    struct util_async_debug_callback async_debug;
1843 
1844    if (dbg) {
1845       u_async_debug_init(&async_debug);
1846       job->dbg = &async_debug.base;
1847    }
1848 
1849    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1850                       iris_threaded_compile_job_delete, 0);
1851 
1852    if (screen->driconf.sync_compile || dbg)
1853       util_queue_fence_wait(ready_fence);
1854 
1855    if (dbg) {
1856       u_async_debug_drain(&async_debug, dbg);
1857       u_async_debug_cleanup(&async_debug);
1858    }
1859 }
1860 
1861 /**
1862  * Compile a vertex shader, and upload the assembly.
1863  */
1864 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1865 iris_compile_vs(struct iris_screen *screen,
1866                 struct u_upload_mgr *uploader,
1867                 struct util_debug_callback *dbg,
1868                 struct iris_uncompiled_shader *ish,
1869                 struct iris_compiled_shader *shader)
1870 {
1871    const struct intel_device_info *devinfo = screen->devinfo;
1872    void *mem_ctx = ralloc_context(NULL);
1873    uint32_t *system_values;
1874    unsigned num_system_values;
1875    unsigned num_cbufs;
1876 
1877    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1878    const struct iris_vs_prog_key *const key = &shader->key.vs;
1879 
1880    if (key->vue.nr_userclip_plane_consts) {
1881       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1882       /* Check if variables were found. */
1883       if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1884                             true, false, NULL)) {
1885          nir_lower_io_to_temporaries(nir, impl, true, false);
1886          nir_lower_global_vars_to_local(nir);
1887          nir_lower_vars_to_ssa(nir);
1888          nir_shader_gather_info(nir, impl);
1889       }
1890    }
1891 
1892    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1893                        &num_system_values, &num_cbufs);
1894 
1895    struct iris_binding_table bt;
1896    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1897                             num_system_values, num_cbufs, false);
1898 
1899    const char *error;
1900    const unsigned *program;
1901    if (screen->brw) {
1902       struct brw_vs_prog_data *brw_prog_data =
1903          rzalloc(mem_ctx, struct brw_vs_prog_data);
1904 
1905       brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1906 
1907       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1908 
1909       brw_compute_vue_map(devinfo,
1910                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
1911                           nir->info.separate_shader, /* pos_slots */ 1);
1912 
1913       struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1914 
1915       struct brw_compile_vs_params params = {
1916          .base = {
1917             .mem_ctx = mem_ctx,
1918             .nir = nir,
1919             .log_data = dbg,
1920             .source_hash = ish->source_hash,
1921          },
1922          .key = &brw_key,
1923          .prog_data = brw_prog_data,
1924       };
1925 
1926       program = brw_compile_vs(screen->brw, &params);
1927       error = params.base.error_str;
1928       if (program) {
1929          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1930          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1931       }
1932    } else {
1933 #ifdef INTEL_USE_ELK
1934       struct elk_vs_prog_data *elk_prog_data =
1935          rzalloc(mem_ctx, struct elk_vs_prog_data);
1936 
1937       elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1938 
1939       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1940 
1941       elk_compute_vue_map(devinfo,
1942                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
1943                           nir->info.separate_shader, /* pos_slots */ 1);
1944 
1945       struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1946 
1947       struct elk_compile_vs_params params = {
1948          .base = {
1949             .mem_ctx = mem_ctx,
1950             .nir = nir,
1951             .log_data = dbg,
1952             .source_hash = ish->source_hash,
1953          },
1954          .key = &elk_key,
1955          .prog_data = elk_prog_data,
1956       };
1957 
1958       program = elk_compile_vs(screen->elk, &params);
1959       error = params.base.error_str;
1960       if (program) {
1961          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1962          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1963       }
1964 #else
1965       unreachable("no elk support");
1966 #endif
1967    }
1968 
1969    if (program == NULL) {
1970       dbg_printf("Failed to compile vertex shader: %s\n", error);
1971       ralloc_free(mem_ctx);
1972 
1973       shader->compilation_failed = true;
1974       util_queue_fence_signal(&shader->ready);
1975 
1976       return;
1977    }
1978 
1979    shader->compilation_failed = false;
1980 
1981    uint32_t *so_decls =
1982       screen->vtbl.create_so_decl_list(&ish->stream_output,
1983                                        &iris_vue_data(shader)->vue_map);
1984 
1985    iris_finalize_program(shader, so_decls,
1986                          system_values, num_system_values, 0, num_cbufs, &bt);
1987 
1988    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1989                       sizeof(*key), key, program);
1990 
1991    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1992 
1993    ralloc_free(mem_ctx);
1994 }
1995 
1996 /**
1997  * Update the current vertex shader variant.
1998  *
1999  * Fill out the key, look in the cache, compile and bind if needed.
2000  */
2001 static void
iris_update_compiled_vs(struct iris_context * ice)2002 iris_update_compiled_vs(struct iris_context *ice)
2003 {
2004    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2005    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
2006    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2007    struct iris_uncompiled_shader *ish =
2008       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
2009 
2010    struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
2011    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2012 
2013    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
2014    bool added;
2015    struct iris_compiled_shader *shader =
2016       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
2017 
2018    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2019                                           &key, sizeof(key))) {
2020       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
2021    }
2022 
2023    if (shader->compilation_failed)
2024       shader = NULL;
2025 
2026    if (old != shader) {
2027       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
2028                                     shader);
2029       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
2030       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
2031                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
2032                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
2033       shs->sysvals_need_upload = true;
2034 
2035       unsigned urb_entry_size = shader ?
2036          iris_vue_data(shader)->urb_entry_size : 0;
2037       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
2038    }
2039 }
2040 
2041 /**
2042  * Get the shader_info for a given stage, or NULL if the stage is disabled.
2043  */
2044 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2045 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2046 {
2047    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2048 
2049    if (!ish)
2050       return NULL;
2051 
2052    const nir_shader *nir = ish->nir;
2053    return &nir->info;
2054 }
2055 
2056 /**
2057  * Get the union of TCS output and TES input slots.
2058  *
2059  * TCS and TES need to agree on a common URB entry layout.  In particular,
2060  * the data for all patch vertices is stored in a single URB entry (unlike
2061  * GS which has one entry per input vertex).  This means that per-vertex
2062  * array indexing needs a stride.
2063  *
2064  * SSO requires locations to match, but doesn't require the number of
2065  * outputs/inputs to match (in fact, the TCS often has extra outputs).
2066  * So, we need to take the extra step of unifying these on the fly.
2067  */
2068 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2069 get_unified_tess_slots(const struct iris_context *ice,
2070                        uint64_t *per_vertex_slots,
2071                        uint32_t *per_patch_slots)
2072 {
2073    const struct shader_info *tcs =
2074       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2075    const struct shader_info *tes =
2076       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2077 
2078    *per_vertex_slots = tes->inputs_read;
2079    *per_patch_slots = tes->patch_inputs_read;
2080 
2081    if (tcs) {
2082       *per_vertex_slots |= tcs->outputs_written;
2083       *per_patch_slots |= tcs->patch_outputs_written;
2084    }
2085 }
2086 
2087 /**
2088  * Compile a tessellation control shader, and upload the assembly.
2089  */
2090 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2091 iris_compile_tcs(struct iris_screen *screen,
2092                  struct hash_table *passthrough_ht,
2093                  struct u_upload_mgr *uploader,
2094                  struct util_debug_callback *dbg,
2095                  struct iris_uncompiled_shader *ish,
2096                  struct iris_compiled_shader *shader)
2097 {
2098    void *mem_ctx = ralloc_context(NULL);
2099    const struct intel_device_info *devinfo = screen->devinfo;
2100    uint32_t *system_values = NULL;
2101    unsigned num_system_values = 0;
2102    unsigned num_cbufs = 0;
2103 
2104    nir_shader *nir;
2105 
2106    struct iris_binding_table bt;
2107 
2108    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2109    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2110 #ifdef INTEL_USE_ELK
2111    struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2112 #endif
2113    uint32_t source_hash;
2114 
2115    if (ish) {
2116       nir = nir_shader_clone(mem_ctx, ish->nir);
2117       source_hash = ish->source_hash;
2118    } else {
2119       if (screen->brw) {
2120          nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2121       } else {
2122 #ifdef INTEL_USE_ELK
2123          assert(screen->elk);
2124          nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2125 #else
2126          unreachable("no elk support");
2127 #endif
2128       }
2129       source_hash = *(uint32_t*)nir->info.source_blake3;
2130    }
2131 
2132    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2133                        &num_system_values, &num_cbufs);
2134    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2135                             num_system_values, num_cbufs, false);
2136 
2137    const char *error = NULL;
2138    const unsigned *program;
2139    if (screen->brw) {
2140       struct brw_tcs_prog_data *brw_prog_data =
2141          rzalloc(mem_ctx, struct brw_tcs_prog_data);
2142       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2143 
2144       struct brw_compile_tcs_params params = {
2145          .base = {
2146             .mem_ctx = mem_ctx,
2147             .nir = nir,
2148             .log_data = dbg,
2149             .source_hash = source_hash,
2150          },
2151          .key = &brw_key,
2152          .prog_data = brw_prog_data,
2153       };
2154 
2155       program = brw_compile_tcs(screen->brw, &params);
2156       error = params.base.error_str;
2157 
2158       if (program) {
2159          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2160          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2161       }
2162    } else {
2163 #ifdef INTEL_USE_ELK
2164       assert(screen->elk);
2165       struct elk_tcs_prog_data *elk_prog_data =
2166          rzalloc(mem_ctx, struct elk_tcs_prog_data);
2167       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2168 
2169       struct elk_compile_tcs_params params = {
2170          .base = {
2171             .mem_ctx = mem_ctx,
2172             .nir = nir,
2173             .log_data = dbg,
2174             .source_hash = source_hash,
2175          },
2176          .key = &elk_key,
2177          .prog_data = elk_prog_data,
2178       };
2179 
2180       program = elk_compile_tcs(screen->elk, &params);
2181       error = params.base.error_str;
2182 
2183       if (program) {
2184          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2185          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2186       }
2187 #else
2188       unreachable("no elk support");
2189 #endif
2190    }
2191 
2192    if (program == NULL) {
2193       dbg_printf("Failed to compile control shader: %s\n", error);
2194       ralloc_free(mem_ctx);
2195 
2196       shader->compilation_failed = true;
2197       util_queue_fence_signal(&shader->ready);
2198 
2199       return;
2200    }
2201 
2202    shader->compilation_failed = false;
2203 
2204    iris_finalize_program(shader, NULL, system_values,
2205                          num_system_values, 0, num_cbufs, &bt);
2206 
2207    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2208                       IRIS_CACHE_TCS, sizeof(*key), key, program);
2209 
2210    if (ish)
2211       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2212 
2213    ralloc_free(mem_ctx);
2214 }
2215 
2216 /**
2217  * Update the current tessellation control shader variant.
2218  *
2219  * Fill out the key, look in the cache, compile and bind if needed.
2220  */
2221 static void
iris_update_compiled_tcs(struct iris_context * ice)2222 iris_update_compiled_tcs(struct iris_context *ice)
2223 {
2224    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2225    struct iris_uncompiled_shader *tcs =
2226       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2227    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2228    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2229    const struct intel_device_info *devinfo = screen->devinfo;
2230 
2231    const struct shader_info *tes_info =
2232       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2233    struct iris_tcs_prog_key key = {
2234       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2235       ._tes_primitive_mode = tes_info->tess._primitive_mode,
2236       .input_vertices =
2237          !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2238       .quads_workaround = devinfo->ver < 9 &&
2239                           tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2240                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
2241    };
2242    get_unified_tess_slots(ice, &key.outputs_written,
2243                           &key.patch_outputs_written);
2244    screen->vtbl.populate_tcs_key(ice, &key);
2245 
2246    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2247    struct iris_compiled_shader *shader;
2248    bool added = false;
2249 
2250    if (tcs != NULL) {
2251       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2252                                    sizeof(key), &added);
2253    } else {
2254       /* Look for and possibly create a passthrough TCS */
2255       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2256 
2257 
2258       if (shader == NULL) {
2259          shader = iris_create_shader_variant(screen, ice->shaders.cache,
2260                                              MESA_SHADER_TESS_CTRL,
2261                                              IRIS_CACHE_TCS, sizeof(key), &key);
2262          added = true;
2263       }
2264 
2265    }
2266 
2267    /* If the shader was not found in (whichever cache), call iris_compile_tcs
2268     * if either ish is NULL or the shader could not be found in the disk
2269     * cache.
2270     */
2271    if (added &&
2272        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2273                                                  &key, sizeof(key)))) {
2274       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2275                        shader);
2276    }
2277 
2278    if (shader->compilation_failed)
2279       shader = NULL;
2280 
2281    if (old != shader) {
2282       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2283                                     shader);
2284       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2285                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2286                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2287       shs->sysvals_need_upload = true;
2288 
2289       unsigned urb_entry_size = shader ?
2290          iris_vue_data(shader)->urb_entry_size : 0;
2291       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2292    }
2293 }
2294 
2295 /**
2296  * Compile a tessellation evaluation shader, and upload the assembly.
2297  */
2298 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2299 iris_compile_tes(struct iris_screen *screen,
2300                  struct u_upload_mgr *uploader,
2301                  struct util_debug_callback *dbg,
2302                  struct iris_uncompiled_shader *ish,
2303                  struct iris_compiled_shader *shader)
2304 {
2305    void *mem_ctx = ralloc_context(NULL);
2306    uint32_t *system_values;
2307    const struct intel_device_info *devinfo = screen->devinfo;
2308    unsigned num_system_values;
2309    unsigned num_cbufs;
2310 
2311    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2312    const struct iris_tes_prog_key *const key = &shader->key.tes;
2313 
2314    if (key->vue.nr_userclip_plane_consts) {
2315       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2316       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2317                         true, false, NULL);
2318       nir_lower_io_to_temporaries(nir, impl, true, false);
2319       nir_lower_global_vars_to_local(nir);
2320       nir_lower_vars_to_ssa(nir);
2321       nir_shader_gather_info(nir, impl);
2322    }
2323 
2324    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2325                        &num_system_values, &num_cbufs);
2326 
2327    struct iris_binding_table bt;
2328    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2329                             num_system_values, num_cbufs, false);
2330 
2331    const char *error;
2332    const unsigned *program;
2333 
2334    if (screen->brw) {
2335       struct brw_tes_prog_data *brw_prog_data =
2336          rzalloc(mem_ctx, struct brw_tes_prog_data);
2337 
2338       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2339 
2340       struct intel_vue_map input_vue_map;
2341       brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2342                                key->patch_inputs_read);
2343 
2344       struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2345 
2346       struct brw_compile_tes_params params = {
2347          .base = {
2348             .mem_ctx = mem_ctx,
2349             .nir = nir,
2350             .log_data = dbg,
2351             .source_hash = ish->source_hash,
2352          },
2353          .key = &brw_key,
2354          .prog_data = brw_prog_data,
2355          .input_vue_map = &input_vue_map,
2356       };
2357 
2358       program = brw_compile_tes(screen->brw, &params);
2359       error = params.base.error_str;
2360 
2361       if (program) {
2362          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2363          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2364       }
2365    } else {
2366 #ifdef INTEL_USE_ELK
2367       struct elk_tes_prog_data *elk_prog_data =
2368          rzalloc(mem_ctx, struct elk_tes_prog_data);
2369 
2370       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2371 
2372       struct intel_vue_map input_vue_map;
2373       elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2374                                key->patch_inputs_read);
2375 
2376       struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2377 
2378       struct elk_compile_tes_params params = {
2379          .base = {
2380             .mem_ctx = mem_ctx,
2381             .nir = nir,
2382             .log_data = dbg,
2383             .source_hash = ish->source_hash,
2384          },
2385          .key = &elk_key,
2386          .prog_data = elk_prog_data,
2387          .input_vue_map = &input_vue_map,
2388       };
2389 
2390       program = elk_compile_tes(screen->elk, &params);
2391       error = params.base.error_str;
2392 
2393       if (program) {
2394          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2395          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2396       }
2397 #else
2398       unreachable("no elk support");
2399 #endif
2400    }
2401 
2402    if (program == NULL) {
2403       dbg_printf("Failed to compile evaluation shader: %s\n", error);
2404       ralloc_free(mem_ctx);
2405 
2406       shader->compilation_failed = true;
2407       util_queue_fence_signal(&shader->ready);
2408 
2409       return;
2410    }
2411 
2412    shader->compilation_failed = false;
2413 
2414    uint32_t *so_decls =
2415       screen->vtbl.create_so_decl_list(&ish->stream_output,
2416                                        &iris_vue_data(shader)->vue_map);
2417 
2418    iris_finalize_program(shader, so_decls, system_values,
2419                          num_system_values, 0, num_cbufs, &bt);
2420 
2421    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2422                       sizeof(*key), key, program);
2423 
2424    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2425 
2426    ralloc_free(mem_ctx);
2427 }
2428 
2429 /**
2430  * Update the current tessellation evaluation shader variant.
2431  *
2432  * Fill out the key, look in the cache, compile and bind if needed.
2433  */
2434 static void
iris_update_compiled_tes(struct iris_context * ice)2435 iris_update_compiled_tes(struct iris_context *ice)
2436 {
2437    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2438    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2439    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2440    struct iris_uncompiled_shader *ish =
2441       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2442 
2443    struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2444    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2445    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2446 
2447    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2448    bool added;
2449    struct iris_compiled_shader *shader =
2450       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2451 
2452    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2453                                           &key, sizeof(key))) {
2454       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2455    }
2456 
2457    if (shader->compilation_failed)
2458       shader = NULL;
2459 
2460    if (old != shader) {
2461       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2462                                     shader);
2463       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2464                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
2465                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2466       shs->sysvals_need_upload = true;
2467 
2468       unsigned urb_entry_size = shader ?
2469          iris_vue_data(shader)->urb_entry_size : 0;
2470       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2471    }
2472 
2473    /* TODO: Could compare and avoid flagging this. */
2474    const struct shader_info *tes_info = &ish->nir->info;
2475    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2476       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2477       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2478    }
2479 }
2480 
2481 /**
2482  * Compile a geometry shader, and upload the assembly.
2483  */
2484 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2485 iris_compile_gs(struct iris_screen *screen,
2486                 struct u_upload_mgr *uploader,
2487                 struct util_debug_callback *dbg,
2488                 struct iris_uncompiled_shader *ish,
2489                 struct iris_compiled_shader *shader)
2490 {
2491    const struct intel_device_info *devinfo = screen->devinfo;
2492    void *mem_ctx = ralloc_context(NULL);
2493    uint32_t *system_values;
2494    unsigned num_system_values;
2495    unsigned num_cbufs;
2496 
2497    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2498    const struct iris_gs_prog_key *const key = &shader->key.gs;
2499 
2500    if (key->vue.nr_userclip_plane_consts) {
2501       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2502       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2503                         false, NULL);
2504       nir_lower_io_to_temporaries(nir, impl, true, false);
2505       nir_lower_global_vars_to_local(nir);
2506       nir_lower_vars_to_ssa(nir);
2507       nir_shader_gather_info(nir, impl);
2508    }
2509 
2510    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2511                        &num_system_values, &num_cbufs);
2512 
2513    struct iris_binding_table bt;
2514    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2515                             num_system_values, num_cbufs, false);
2516 
2517    const char *error;
2518    const unsigned *program;
2519    if (screen->brw) {
2520       struct brw_gs_prog_data *brw_prog_data =
2521          rzalloc(mem_ctx, struct brw_gs_prog_data);
2522 
2523       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2524 
2525       brw_compute_vue_map(devinfo,
2526                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
2527                           nir->info.separate_shader, /* pos_slots */ 1);
2528 
2529       struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2530 
2531       struct brw_compile_gs_params params = {
2532          .base = {
2533             .mem_ctx = mem_ctx,
2534             .nir = nir,
2535             .log_data = dbg,
2536             .source_hash = ish->source_hash,
2537          },
2538          .key = &brw_key,
2539          .prog_data = brw_prog_data,
2540       };
2541 
2542       program = brw_compile_gs(screen->brw, &params);
2543       error = params.base.error_str;
2544       if (program) {
2545          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2546          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2547       }
2548    } else {
2549 #ifdef INTEL_USE_ELK
2550       struct elk_gs_prog_data *elk_prog_data =
2551          rzalloc(mem_ctx, struct elk_gs_prog_data);
2552 
2553       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2554 
2555       elk_compute_vue_map(devinfo,
2556                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
2557                           nir->info.separate_shader, /* pos_slots */ 1);
2558 
2559       struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2560 
2561       struct elk_compile_gs_params params = {
2562          .base = {
2563             .mem_ctx = mem_ctx,
2564             .nir = nir,
2565             .log_data = dbg,
2566             .source_hash = ish->source_hash,
2567          },
2568          .key = &elk_key,
2569          .prog_data = elk_prog_data,
2570       };
2571 
2572       program = elk_compile_gs(screen->elk, &params);
2573       error = params.base.error_str;
2574       if (program) {
2575          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2576          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2577       }
2578 #else
2579       unreachable("no elk support");
2580 #endif
2581    }
2582 
2583    if (program == NULL) {
2584       dbg_printf("Failed to compile geometry shader: %s\n", error);
2585       ralloc_free(mem_ctx);
2586 
2587       shader->compilation_failed = true;
2588       util_queue_fence_signal(&shader->ready);
2589 
2590       return;
2591    }
2592 
2593    shader->compilation_failed = false;
2594 
2595    uint32_t *so_decls =
2596       screen->vtbl.create_so_decl_list(&ish->stream_output,
2597                                        &iris_vue_data(shader)->vue_map);
2598 
2599    iris_finalize_program(shader, so_decls, system_values,
2600                          num_system_values, 0, num_cbufs, &bt);
2601 
2602    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2603                       sizeof(*key), key, program);
2604 
2605    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2606 
2607    ralloc_free(mem_ctx);
2608 }
2609 
2610 /**
2611  * Update the current geometry shader variant.
2612  *
2613  * Fill out the key, look in the cache, compile and bind if needed.
2614  */
2615 static void
iris_update_compiled_gs(struct iris_context * ice)2616 iris_update_compiled_gs(struct iris_context *ice)
2617 {
2618    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2619    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2620    struct iris_uncompiled_shader *ish =
2621       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2622    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2623    struct iris_compiled_shader *shader = NULL;
2624    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2625 
2626    if (ish) {
2627       struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2628       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2629 
2630       bool added;
2631 
2632       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2633                                    sizeof(key), &added);
2634 
2635       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2636                                              &key, sizeof(key))) {
2637          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2638       }
2639 
2640       if (shader->compilation_failed)
2641          shader = NULL;
2642    }
2643 
2644    if (old != shader) {
2645       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2646                                     shader);
2647       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2648                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
2649                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2650       shs->sysvals_need_upload = true;
2651 
2652       unsigned urb_entry_size = shader ?
2653          iris_vue_data(shader)->urb_entry_size : 0;
2654       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2655    }
2656 }
2657 
2658 /**
2659  * Compile a fragment (pixel) shader, and upload the assembly.
2660  */
2661 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2662 iris_compile_fs(struct iris_screen *screen,
2663                 struct u_upload_mgr *uploader,
2664                 struct util_debug_callback *dbg,
2665                 struct iris_uncompiled_shader *ish,
2666                 struct iris_compiled_shader *shader,
2667                 struct intel_vue_map *vue_map)
2668 {
2669    void *mem_ctx = ralloc_context(NULL);
2670    uint32_t *system_values;
2671    const struct intel_device_info *devinfo = screen->devinfo;
2672    unsigned num_system_values;
2673    unsigned num_cbufs;
2674 
2675    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2676    const struct iris_fs_prog_key *const key = &shader->key.fs;
2677 
2678    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2679                        &num_system_values, &num_cbufs);
2680 
2681    /* Lower output variables to load_output intrinsics before setting up
2682     * binding tables, so iris_setup_binding_table can map any load_output
2683     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2684     * non-coherent framebuffer fetches.
2685     */
2686    brw_nir_lower_fs_outputs(nir);
2687 
2688    int null_rts = brw_nir_fs_needs_null_rt(devinfo, nir,
2689                                            key->multisample_fbo,
2690                                            key->alpha_to_coverage) ? 1 : 0;
2691 
2692    struct iris_binding_table bt;
2693    iris_setup_binding_table(devinfo, nir, &bt,
2694                             MAX2(key->nr_color_regions, null_rts),
2695                             num_system_values, num_cbufs, null_rts != 0);
2696 
2697    const char *error;
2698    const unsigned *program;
2699 
2700    if (screen->brw) {
2701       struct brw_wm_prog_data *brw_prog_data =
2702          rzalloc(mem_ctx, struct brw_wm_prog_data);
2703 
2704       brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2705 
2706       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2707 
2708       struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2709 
2710       struct brw_compile_fs_params params = {
2711          .base = {
2712             .mem_ctx = mem_ctx,
2713             .nir = nir,
2714             .log_data = dbg,
2715             .source_hash = ish->source_hash,
2716          },
2717          .key = &brw_key,
2718          .prog_data = brw_prog_data,
2719 
2720          .allow_spilling = true,
2721          .max_polygons = UCHAR_MAX,
2722          .vue_map = vue_map,
2723       };
2724 
2725       program = brw_compile_fs(screen->brw, &params);
2726       error = params.base.error_str;
2727       if (program) {
2728          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2729          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2730       }
2731    } else {
2732 #ifdef INTEL_USE_ELK
2733       struct elk_wm_prog_data *elk_prog_data =
2734          rzalloc(mem_ctx, struct elk_wm_prog_data);
2735 
2736       elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2737 
2738       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2739 
2740       struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2741 
2742       struct elk_compile_fs_params params = {
2743          .base = {
2744             .mem_ctx = mem_ctx,
2745             .nir = nir,
2746             .log_data = dbg,
2747             .source_hash = ish->source_hash,
2748          },
2749          .key = &elk_key,
2750          .prog_data = elk_prog_data,
2751 
2752          .allow_spilling = true,
2753          .max_polygons = UCHAR_MAX,
2754          .vue_map = vue_map,
2755       };
2756 
2757       program = elk_compile_fs(screen->elk, &params);
2758       error = params.base.error_str;
2759       if (program) {
2760          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2761          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2762       }
2763 #else
2764       unreachable("no elk support");
2765 #endif
2766    }
2767 
2768    if (program == NULL) {
2769       dbg_printf("Failed to compile fragment shader: %s\n", error);
2770       ralloc_free(mem_ctx);
2771 
2772       shader->compilation_failed = true;
2773       util_queue_fence_signal(&shader->ready);
2774 
2775       return;
2776    }
2777 
2778    shader->compilation_failed = false;
2779 
2780    iris_finalize_program(shader, NULL, system_values,
2781                          num_system_values, 0, num_cbufs, &bt);
2782 
2783    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2784                       sizeof(*key), key, program);
2785 
2786    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2787 
2788    ralloc_free(mem_ctx);
2789 }
2790 
2791 /**
2792  * Update the current fragment shader variant.
2793  *
2794  * Fill out the key, look in the cache, compile and bind if needed.
2795  */
2796 static void
iris_update_compiled_fs(struct iris_context * ice)2797 iris_update_compiled_fs(struct iris_context *ice)
2798 {
2799    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2800    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2801    struct iris_uncompiled_shader *ish =
2802       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2803    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2804    struct iris_fs_prog_key key = { KEY_INIT(base) };
2805    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2806 
2807    struct intel_vue_map *last_vue_map =
2808       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2809 
2810    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2811       key.input_slots_valid = last_vue_map->slots_valid;
2812 
2813    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2814    bool added;
2815    struct iris_compiled_shader *shader =
2816       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2817                           sizeof(key), &added);
2818 
2819    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2820                                           &key, sizeof(key))) {
2821       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2822    }
2823 
2824    if (shader->compilation_failed)
2825       shader = NULL;
2826 
2827    if (old != shader) {
2828       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2829       // toggles.  might be able to avoid flagging SBE too.
2830       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2831                                     shader);
2832       ice->state.dirty |= IRIS_DIRTY_WM |
2833                           IRIS_DIRTY_CLIP |
2834                           IRIS_DIRTY_SBE;
2835       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2836                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2837                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2838       shs->sysvals_need_upload = true;
2839    }
2840 }
2841 
2842 /**
2843  * Update the last enabled stage's VUE map.
2844  *
2845  * When the shader feeding the rasterizer's output interface changes, we
2846  * need to re-emit various packets.
2847  */
2848 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2849 update_last_vue_map(struct iris_context *ice,
2850                     struct iris_compiled_shader *shader)
2851 {
2852    const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2853    const struct intel_vue_map *old_map =
2854       !ice->shaders.last_vue_shader ? NULL :
2855       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2856    const uint64_t changed_slots =
2857       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2858 
2859    if (changed_slots & VARYING_BIT_VIEWPORT) {
2860       ice->state.num_viewports =
2861          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2862       ice->state.dirty |= IRIS_DIRTY_CLIP |
2863                           IRIS_DIRTY_SF_CL_VIEWPORT |
2864                           IRIS_DIRTY_CC_VIEWPORT |
2865                           IRIS_DIRTY_SCISSOR_RECT;
2866       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2867          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2868    }
2869 
2870    if (changed_slots & VARYING_BIT_LAYER) {
2871       ice->state.dirty |= IRIS_DIRTY_CLIP;
2872    }
2873 
2874    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2875       ice->state.dirty |= IRIS_DIRTY_SBE;
2876    }
2877 
2878    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2879 }
2880 
2881 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2882 iris_update_pull_constant_descriptors(struct iris_context *ice,
2883                                       gl_shader_stage stage)
2884 {
2885    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2886 
2887    if (!shader || !shader->has_ubo_pull)
2888       return;
2889 
2890    struct iris_shader_state *shs = &ice->state.shaders[stage];
2891    bool any_new_descriptors =
2892       shader->num_system_values > 0 && shs->sysvals_need_upload;
2893 
2894    unsigned bound_cbufs = shs->bound_cbufs;
2895 
2896    while (bound_cbufs) {
2897       const int i = u_bit_scan(&bound_cbufs);
2898       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2899       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2900       if (!surf_state->res && cbuf->buffer) {
2901          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2902                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2903          any_new_descriptors = true;
2904       }
2905    }
2906 
2907    if (any_new_descriptors)
2908       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2909 }
2910 
2911 /**
2912  * Update the current shader variants for the given state.
2913  *
2914  * This should be called on every draw call to ensure that the correct
2915  * shaders are bound.  It will also flag any dirty state triggered by
2916  * swapping out those shaders.
2917  */
2918 void
iris_update_compiled_shaders(struct iris_context * ice)2919 iris_update_compiled_shaders(struct iris_context *ice)
2920 {
2921    const uint64_t stage_dirty = ice->state.stage_dirty;
2922 
2923    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2924                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2925        struct iris_uncompiled_shader *tes =
2926           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2927        if (tes) {
2928           iris_update_compiled_tcs(ice);
2929           iris_update_compiled_tes(ice);
2930        } else {
2931          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2932          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2933           ice->state.stage_dirty |=
2934              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2935              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2936              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2937 
2938           if (ice->shaders.urb.constrained)
2939              ice->state.dirty |= IRIS_DIRTY_URB;
2940        }
2941    }
2942 
2943    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2944       iris_update_compiled_vs(ice);
2945    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2946       iris_update_compiled_gs(ice);
2947 
2948    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2949                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2950       const struct iris_compiled_shader *gs =
2951          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2952       const struct iris_compiled_shader *tes =
2953          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2954 
2955       bool points_or_lines = false;
2956 
2957       if (gs) {
2958          const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2959          points_or_lines =
2960             gs_data->output_topology == _3DPRIM_POINTLIST ||
2961             gs_data->output_topology == _3DPRIM_LINESTRIP;
2962       } else if (tes) {
2963          const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2964          points_or_lines =
2965             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2966             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2967       }
2968 
2969       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2970          /* Outbound to XY Clip enables */
2971          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2972          ice->state.dirty |= IRIS_DIRTY_CLIP;
2973       }
2974    }
2975 
2976    gl_shader_stage last_stage = last_vue_stage(ice);
2977    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2978    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2979    update_last_vue_map(ice, shader);
2980    if (ice->state.streamout != shader->streamout) {
2981       ice->state.streamout = shader->streamout;
2982       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2983    }
2984 
2985    if (ice->state.streamout_active) {
2986       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2987          struct iris_stream_output_target *so =
2988             (void *) ice->state.so_target[i];
2989          if (so)
2990             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2991       }
2992    }
2993 
2994    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2995       iris_update_compiled_fs(ice);
2996 
2997    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2998       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2999          iris_update_pull_constant_descriptors(ice, i);
3000    }
3001 }
3002 
3003 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)3004 iris_compile_cs(struct iris_screen *screen,
3005                 struct u_upload_mgr *uploader,
3006                 struct util_debug_callback *dbg,
3007                 struct iris_uncompiled_shader *ish,
3008                 struct iris_compiled_shader *shader)
3009 {
3010    void *mem_ctx = ralloc_context(NULL);
3011    uint32_t *system_values;
3012    const struct intel_device_info *devinfo = screen->devinfo;
3013    unsigned num_system_values;
3014    unsigned num_cbufs;
3015 
3016    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
3017    const struct iris_cs_prog_key *const key = &shader->key.cs;
3018 
3019    if (screen->brw)
3020       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
3021    else
3022 #ifdef INTEL_USE_ELK
3023       NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
3024 #else
3025       unreachable("no elk support");
3026 #endif
3027 
3028    iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
3029                        &system_values, &num_system_values, &num_cbufs);
3030 
3031    struct iris_binding_table bt;
3032    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
3033                             num_system_values, num_cbufs, false);
3034 
3035    const char *error;
3036    const unsigned *program;
3037 
3038    if (screen->brw) {
3039       struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
3040 
3041       struct brw_cs_prog_data *brw_prog_data =
3042          rzalloc(mem_ctx, struct brw_cs_prog_data);
3043 
3044       struct brw_compile_cs_params params = {
3045          .base = {
3046             .mem_ctx = mem_ctx,
3047             .nir = nir,
3048             .log_data = dbg,
3049             .source_hash = ish->source_hash,
3050          },
3051          .key = &brw_key,
3052          .prog_data = brw_prog_data,
3053       };
3054 
3055       program = brw_compile_cs(screen->brw, &params);
3056       error = params.base.error_str;
3057       if (program) {
3058          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
3059          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
3060       }
3061    } else {
3062 #ifdef INTEL_USE_ELK
3063       struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
3064 
3065       struct elk_cs_prog_data *elk_prog_data =
3066          rzalloc(mem_ctx, struct elk_cs_prog_data);
3067 
3068       struct elk_compile_cs_params params = {
3069          .base = {
3070             .mem_ctx = mem_ctx,
3071             .nir = nir,
3072             .log_data = dbg,
3073             .source_hash = ish->source_hash,
3074          },
3075          .key = &elk_key,
3076          .prog_data = elk_prog_data,
3077       };
3078 
3079       program = elk_compile_cs(screen->elk, &params);
3080       error = params.base.error_str;
3081       if (program) {
3082          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3083          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3084       }
3085 #else
3086       unreachable("no elk support");
3087 #endif
3088    }
3089 
3090    if (program == NULL) {
3091       dbg_printf("Failed to compile compute shader: %s\n", error);
3092 
3093       shader->compilation_failed = true;
3094       util_queue_fence_signal(&shader->ready);
3095 
3096       return;
3097    }
3098 
3099    shader->compilation_failed = false;
3100 
3101    iris_finalize_program(shader, NULL, system_values,
3102                          num_system_values, ish->kernel_input_size, num_cbufs,
3103                          &bt);
3104 
3105    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3106                       sizeof(*key), key, program);
3107 
3108    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3109 
3110    ralloc_free(mem_ctx);
3111 }
3112 
3113 static void
iris_update_compiled_cs(struct iris_context * ice)3114 iris_update_compiled_cs(struct iris_context *ice)
3115 {
3116    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3117    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3118    struct iris_uncompiled_shader *ish =
3119       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3120    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3121    struct iris_cs_prog_key key = { KEY_INIT(base) };
3122    screen->vtbl.populate_cs_key(ice, &key);
3123 
3124    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3125    bool added;
3126    struct iris_compiled_shader *shader =
3127       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3128                           sizeof(key), &added);
3129 
3130    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3131                                           &key, sizeof(key))) {
3132       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3133    }
3134 
3135    if (shader->compilation_failed)
3136       shader = NULL;
3137 
3138    if (old != shader) {
3139       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3140                                     shader);
3141       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3142                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
3143                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3144       shs->sysvals_need_upload = true;
3145    }
3146 }
3147 
3148 void
iris_update_compiled_compute_shader(struct iris_context * ice)3149 iris_update_compiled_compute_shader(struct iris_context *ice)
3150 {
3151    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3152       iris_update_compiled_cs(ice);
3153 
3154    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3155       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3156 }
3157 
3158 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3159 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3160                                struct iris_compiled_shader *shader,
3161                                unsigned threads,
3162                                uint32_t *dst)
3163 {
3164    struct iris_cs_data *cs_data = iris_cs_data(shader);
3165    assert(iris_cs_push_const_total_size(shader, threads) > 0);
3166    assert(cs_data->push.cross_thread.size == 0);
3167    assert(cs_data->push.per_thread.dwords == 1);
3168    assert(cs_data->first_param_is_builtin_subgroup_id);
3169    for (unsigned t = 0; t < threads; t++)
3170       dst[8 * t] = t;
3171 }
3172 
3173 /**
3174  * Allocate scratch BOs as needed for the given per-thread size and stage.
3175  */
3176 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3177 iris_get_scratch_space(struct iris_context *ice,
3178                        unsigned per_thread_scratch,
3179                        gl_shader_stage stage)
3180 {
3181    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3182    struct iris_bufmgr *bufmgr = screen->bufmgr;
3183    const struct intel_device_info *devinfo = screen->devinfo;
3184 
3185    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3186    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3187    assert(per_thread_scratch == 1 << (encoded_size + 10));
3188 
3189    /* On GFX version 12.5, scratch access changed to a surface-based model.
3190     * Instead of each shader type having its own layout based on IDs passed
3191     * from the relevant fixed-function unit, all scratch access is based on
3192     * thread IDs like it always has been for compute.
3193     */
3194    if (devinfo->verx10 >= 125)
3195       stage = MESA_SHADER_COMPUTE;
3196 
3197    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3198 
3199    if (!*bop) {
3200       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3201       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3202       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3203                            IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3204    }
3205 
3206    return *bop;
3207 }
3208 
3209 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3210 iris_get_scratch_surf(struct iris_context *ice,
3211                       unsigned per_thread_scratch)
3212 {
3213    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3214    ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3215 
3216    assert(devinfo->verx10 >= 125);
3217 
3218    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3219    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3220    assert(per_thread_scratch == 1 << (encoded_size + 10));
3221 
3222    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3223 
3224    if (ref->res)
3225       return ref;
3226 
3227    struct iris_bo *scratch_bo =
3228       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3229 
3230    void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3231                             screen->isl_dev.ss.size, 64);
3232 
3233    isl_buffer_fill_state(&screen->isl_dev, map,
3234                          .address = scratch_bo->address,
3235                          .size_B = scratch_bo->size,
3236                          .format = ISL_FORMAT_RAW,
3237                          .swizzle = ISL_SWIZZLE_IDENTITY,
3238                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3239                          .stride_B = per_thread_scratch,
3240                          .is_scratch = true);
3241 
3242    return ref;
3243 }
3244 
3245 /* ------------------------------------------------------------------- */
3246 
3247 /**
3248  * The pipe->create_[stage]_state() driver hooks.
3249  *
3250  * Performs basic NIR preprocessing, records any state dependencies, and
3251  * returns an iris_uncompiled_shader as the Gallium CSO.
3252  *
3253  * Actual shader compilation to assembly happens later, at first use.
3254  */
3255 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3256 iris_create_uncompiled_shader(struct iris_screen *screen,
3257                               nir_shader *nir,
3258                               const struct pipe_stream_output_info *so_info)
3259 {
3260    struct iris_uncompiled_shader *ish =
3261       calloc(1, sizeof(struct iris_uncompiled_shader));
3262    if (!ish)
3263       return NULL;
3264 
3265    pipe_reference_init(&ish->ref, 1);
3266    list_inithead(&ish->variants);
3267    simple_mtx_init(&ish->lock, mtx_plain);
3268    util_queue_fence_init(&ish->ready);
3269 
3270    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3271 
3272    ish->program_id = get_new_program_id(screen);
3273    ish->nir = nir;
3274    if (so_info) {
3275       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3276       update_so_info(&ish->stream_output, nir->info.outputs_written);
3277    }
3278 
3279    /* Use lowest dword of source shader blake3 for shader hash. */
3280    ish->source_hash = *(uint32_t*)nir->info.source_blake3;
3281 
3282    if (screen->disk_cache) {
3283       /* Serialize the NIR to a binary blob that we can hash for the disk
3284        * cache.  Drop unnecessary information (like variable names)
3285        * so the serialized NIR is smaller, and also to let us detect more
3286        * isomorphic shaders when hashing, increasing cache hits.
3287        */
3288       struct blob blob;
3289       blob_init(&blob);
3290       nir_serialize(&blob, nir, true);
3291       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3292       blob_finish(&blob);
3293    }
3294 
3295    return ish;
3296 }
3297 
3298 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3299 iris_create_compute_state(struct pipe_context *ctx,
3300                           const struct pipe_compute_state *state)
3301 {
3302    struct iris_context *ice = (void *) ctx;
3303    struct iris_screen *screen = (void *) ctx->screen;
3304    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3305 
3306    nir_shader *nir;
3307    switch (state->ir_type) {
3308    case PIPE_SHADER_IR_NIR:
3309       nir = (void *)state->prog;
3310       break;
3311 
3312    default:
3313       unreachable("Unsupported IR");
3314    }
3315 
3316    /* Most of iris doesn't really care about the difference between compute
3317     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
3318     * it's way easier if we just normalize to COMPUTE here.
3319     */
3320    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3321           nir->info.stage == MESA_SHADER_KERNEL);
3322    nir->info.stage = MESA_SHADER_COMPUTE;
3323 
3324    struct iris_uncompiled_shader *ish =
3325       iris_create_uncompiled_shader(screen, nir, NULL);
3326    ish->kernel_input_size = state->req_input_mem;
3327    ish->kernel_shared_size = state->static_shared_mem;
3328 
3329    // XXX: disallow more than 64KB of shared variables
3330 
3331    if (screen->precompile) {
3332       struct iris_cs_prog_key key = { KEY_INIT(base) };
3333 
3334       struct iris_compiled_shader *shader =
3335          iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3336                                     IRIS_CACHE_CS, sizeof(key), &key);
3337 
3338       /* Append our new variant to the shader's variant list. */
3339       list_addtail(&shader->link, &ish->variants);
3340 
3341       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3342                                     &key, sizeof(key))) {
3343          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3344       }
3345    }
3346 
3347    return ish;
3348 }
3349 
3350 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3351 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3352                             struct pipe_compute_state_object_info *info)
3353 {
3354    struct iris_screen *screen = (void *) ctx->screen;
3355    struct iris_uncompiled_shader *ish = state;
3356 
3357    info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3358    info->private_memory = 0;
3359    info->preferred_simd_size = 32;
3360    info->simd_sizes = 8 | 16 | 32;
3361 
3362    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3363                             &ish->variants, link) {
3364       info->private_memory = MAX2(info->private_memory,
3365                                   shader->total_scratch);
3366    }
3367 }
3368 
3369 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3370 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3371                                      const uint32_t block[3])
3372 {
3373    struct iris_context *ice = (void *) ctx;
3374    struct iris_screen *screen = (void *) ctx->screen;
3375    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3376    struct iris_uncompiled_shader *ish = state;
3377 
3378    struct iris_cs_prog_key key = { KEY_INIT(base) };
3379    screen->vtbl.populate_cs_key(ice, &key);
3380 
3381    bool added;
3382    struct iris_compiled_shader *shader =
3383       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3384                           sizeof(key), &added);
3385 
3386    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3387                                           &key, sizeof(key))) {
3388       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3389    }
3390 
3391    return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3392 }
3393 
3394 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3395 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3396 {
3397    const struct iris_threaded_compile_job *job =
3398       (struct iris_threaded_compile_job *) _job;
3399 
3400    struct iris_screen *screen = job->screen;
3401    struct u_upload_mgr *uploader = job->uploader;
3402    struct util_debug_callback *dbg = job->dbg;
3403    struct iris_uncompiled_shader *ish = job->ish;
3404    struct iris_compiled_shader *shader = job->shader;
3405 
3406    switch (ish->nir->info.stage) {
3407    case MESA_SHADER_VERTEX:
3408       iris_compile_vs(screen, uploader, dbg, ish, shader);
3409       break;
3410    case MESA_SHADER_TESS_CTRL:
3411       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3412       break;
3413    case MESA_SHADER_TESS_EVAL:
3414       iris_compile_tes(screen, uploader, dbg, ish, shader);
3415       break;
3416    case MESA_SHADER_GEOMETRY:
3417       iris_compile_gs(screen, uploader, dbg, ish, shader);
3418       break;
3419    case MESA_SHADER_FRAGMENT:
3420       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3421       break;
3422 
3423    default:
3424       unreachable("Invalid shader stage.");
3425    }
3426 }
3427 
3428 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3429 iris_create_shader_state(struct pipe_context *ctx,
3430                          const struct pipe_shader_state *state)
3431 {
3432    struct iris_context *ice = (void *) ctx;
3433    struct iris_screen *screen = (void *) ctx->screen;
3434    struct nir_shader *nir;
3435 
3436    if (state->type == PIPE_SHADER_IR_TGSI)
3437       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3438    else
3439       nir = state->ir.nir;
3440 
3441    const struct shader_info *const info = &nir->info;
3442    struct iris_uncompiled_shader *ish =
3443       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3444 
3445    union iris_any_prog_key key;
3446    unsigned key_size = 0;
3447 
3448    memset(&key, 0, sizeof(key));
3449 
3450    switch (info->stage) {
3451    case MESA_SHADER_VERTEX:
3452       /* User clip planes */
3453       if (info->clip_distance_array_size == 0)
3454          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3455 
3456       key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3457       key_size = sizeof(key.vs);
3458       break;
3459 
3460    case MESA_SHADER_TESS_CTRL: {
3461       key.tcs = (struct iris_tcs_prog_key) {
3462          KEY_INIT(vue.base),
3463          // XXX: make sure the linker fills this out from the TES...
3464          ._tes_primitive_mode =
3465          info->tess._primitive_mode ? info->tess._primitive_mode
3466                                    : TESS_PRIMITIVE_TRIANGLES,
3467          .outputs_written = info->outputs_written,
3468          .patch_outputs_written = info->patch_outputs_written,
3469       };
3470 
3471       /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3472        * We don't have that information, so we randomly guess that the input
3473        * and output patches are the same size.  This is a bad guess, but we
3474        * can't do much better.
3475        */
3476       if (iris_use_tcs_multi_patch(screen))
3477          key.tcs.input_vertices = info->tess.tcs_vertices_out;
3478 
3479       key_size = sizeof(key.tcs);
3480       break;
3481    }
3482 
3483    case MESA_SHADER_TESS_EVAL:
3484       /* User clip planes */
3485       if (info->clip_distance_array_size == 0)
3486          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3487 
3488       key.tes = (struct iris_tes_prog_key) {
3489          KEY_INIT(vue.base),
3490          // XXX: not ideal, need TCS output/TES input unification
3491          .inputs_read = info->inputs_read,
3492          .patch_inputs_read = info->patch_inputs_read,
3493       };
3494 
3495       key_size = sizeof(key.tes);
3496       break;
3497 
3498    case MESA_SHADER_GEOMETRY:
3499       ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3500 
3501       key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3502       key_size = sizeof(key.gs);
3503       break;
3504 
3505    case MESA_SHADER_FRAGMENT:
3506       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3507                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3508                   (1ull << IRIS_NOS_RASTERIZER) |
3509                   (1ull << IRIS_NOS_BLEND);
3510 
3511 #ifdef INTEL_USE_ELK
3512       STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3513 #endif
3514 
3515       /* The program key needs the VUE map if there are > 16 inputs */
3516       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3517          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3518       }
3519 
3520       const uint64_t color_outputs = info->outputs_written &
3521          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3522            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3523            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3524 
3525       bool can_rearrange_varyings =
3526          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3527 
3528       const struct intel_device_info *devinfo = screen->devinfo;
3529 
3530       key.fs = (struct iris_fs_prog_key) {
3531          KEY_INIT(base),
3532          .nr_color_regions = util_bitcount(color_outputs),
3533          .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
3534          .input_slots_valid =
3535             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3536       };
3537 
3538       key_size = sizeof(key.fs);
3539       break;
3540 
3541    default:
3542       unreachable("Invalid shader stage.");
3543    }
3544 
3545    if (screen->precompile) {
3546       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3547 
3548       struct iris_compiled_shader *shader =
3549          iris_create_shader_variant(screen, NULL, info->stage,
3550                                     (enum iris_program_cache_id) info->stage,
3551                                     key_size, &key);
3552 
3553       /* Append our new variant to the shader's variant list. */
3554       list_addtail(&shader->link, &ish->variants);
3555 
3556       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3557                                     &key, key_size)) {
3558          assert(!util_queue_fence_is_signalled(&shader->ready));
3559 
3560          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3561 
3562          job->screen = screen;
3563          job->uploader = uploader;
3564          job->ish = ish;
3565          job->shader = shader;
3566 
3567          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3568                                iris_compile_shader);
3569       }
3570    }
3571 
3572    return ish;
3573 }
3574 
3575 /**
3576  * Called when the refcount on the iris_uncompiled_shader reaches 0.
3577  *
3578  * Frees the iris_uncompiled_shader.
3579  *
3580  * \sa iris_delete_shader_state
3581  */
3582 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3583 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3584 {
3585    struct iris_uncompiled_shader *ish = state;
3586 
3587    /* No need to take ish->lock; we hold the last reference to ish */
3588    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3589                             &ish->variants, link) {
3590       list_del(&shader->link);
3591 
3592       iris_shader_variant_reference(&shader, NULL);
3593    }
3594 
3595    simple_mtx_destroy(&ish->lock);
3596    util_queue_fence_destroy(&ish->ready);
3597 
3598    ralloc_free(ish->nir);
3599    free(ish);
3600 }
3601 
3602 /**
3603  * The pipe->delete_[stage]_state() driver hooks.
3604  *
3605  * \sa iris_destroy_shader_state
3606  */
3607 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3608 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3609 {
3610    struct iris_uncompiled_shader *ish = state;
3611    struct iris_context *ice = (void *) ctx;
3612 
3613    const gl_shader_stage stage = ish->nir->info.stage;
3614 
3615    if (ice->shaders.uncompiled[stage] == ish) {
3616       ice->shaders.uncompiled[stage] = NULL;
3617       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3618    }
3619 
3620    if (pipe_reference(&ish->ref, NULL))
3621       iris_destroy_shader_state(ctx, state);
3622 }
3623 
3624 /**
3625  * The pipe->bind_[stage]_state() driver hook.
3626  *
3627  * Binds an uncompiled shader as the current one for a particular stage.
3628  * Updates dirty tracking to account for the shader's NOS.
3629  */
3630 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3631 bind_shader_state(struct iris_context *ice,
3632                   struct iris_uncompiled_shader *ish,
3633                   gl_shader_stage stage)
3634 {
3635    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3636    const uint64_t nos = ish ? ish->nos : 0;
3637 
3638    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3639    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3640 
3641    if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3642        (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3643       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3644    }
3645 
3646    ice->shaders.uncompiled[stage] = ish;
3647    ice->state.stage_dirty |= stage_dirty_bit;
3648 
3649    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3650     * (or that they no longer need to do so).
3651     */
3652    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3653       if (nos & (1 << i))
3654          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3655       else
3656          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3657    }
3658 }
3659 
3660 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3661 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3662 {
3663    struct iris_context *ice = (struct iris_context *)ctx;
3664    struct iris_uncompiled_shader *ish = state;
3665 
3666    if (ish) {
3667       const struct shader_info *info = &ish->nir->info;
3668       if (ice->state.window_space_position != info->vs.window_space_position) {
3669          ice->state.window_space_position = info->vs.window_space_position;
3670 
3671          ice->state.dirty |= IRIS_DIRTY_CLIP |
3672                              IRIS_DIRTY_RASTER |
3673                              IRIS_DIRTY_CC_VIEWPORT;
3674       }
3675 
3676       const bool uses_draw_params =
3677          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3678          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3679       const bool uses_derived_draw_params =
3680          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3681          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3682       const bool needs_sgvs_element = uses_draw_params ||
3683          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3684          BITSET_TEST(info->system_values_read,
3685                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3686 
3687       if (ice->state.vs_uses_draw_params != uses_draw_params ||
3688           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3689           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3690           ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3691          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3692                              IRIS_DIRTY_VERTEX_ELEMENTS;
3693       }
3694 
3695       ice->state.vs_uses_draw_params = uses_draw_params;
3696       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3697       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3698       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3699    }
3700 
3701    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3702 }
3703 
3704 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3705 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3706 {
3707    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3708 }
3709 
3710 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3711 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3712 {
3713    struct iris_context *ice = (struct iris_context *)ctx;
3714    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3715    const struct intel_device_info *devinfo = screen->devinfo;
3716 
3717    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3718    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3719       ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3720                                             IRIS_DIRTY_VFG : 0);
3721 
3722    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3723 }
3724 
3725 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3726 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3727 {
3728    struct iris_context *ice = (struct iris_context *)ctx;
3729 
3730    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3731    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3732       ice->state.dirty |= IRIS_DIRTY_URB;
3733 
3734    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3735 }
3736 
3737 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3738 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3739 {
3740    struct iris_context *ice = (struct iris_context *) ctx;
3741    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3742    const struct intel_device_info *devinfo = screen->devinfo;
3743    struct iris_uncompiled_shader *old_ish =
3744       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3745    struct iris_uncompiled_shader *new_ish = state;
3746 
3747    const unsigned color_bits =
3748       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3749       BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3750 
3751    /* Fragment shader outputs influence HasWriteableRT */
3752    if (!old_ish || !new_ish ||
3753        (old_ish->nir->info.outputs_written & color_bits) !=
3754        (new_ish->nir->info.outputs_written & color_bits))
3755       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3756 
3757    if (devinfo->ver == 8)
3758       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3759 
3760    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3761 }
3762 
3763 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3764 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3765 {
3766    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3767 }
3768 
3769 static char *
iris_finalize_nir(struct pipe_screen * _screen,struct nir_shader * nir)3770 iris_finalize_nir(struct pipe_screen *_screen, struct nir_shader *nir)
3771 {
3772    struct iris_screen *screen = (struct iris_screen *)_screen;
3773    const struct intel_device_info *devinfo = screen->devinfo;
3774 
3775    NIR_PASS_V(nir, iris_fix_edge_flags);
3776 
3777    if (screen->brw) {
3778       struct brw_nir_compiler_opts opts = {};
3779       brw_preprocess_nir(screen->brw, nir, &opts);
3780 
3781       NIR_PASS_V(nir, brw_nir_lower_storage_image,
3782                  &(struct brw_nir_lower_storage_image_opts) {
3783                     .devinfo      = devinfo,
3784                     .lower_loads  = true,
3785                     .lower_stores = true,
3786                  });
3787    } else {
3788 #ifdef INTEL_USE_ELK
3789       assert(screen->elk);
3790 
3791       struct elk_nir_compiler_opts opts = {};
3792       elk_preprocess_nir(screen->elk, nir, &opts);
3793 
3794       NIR_PASS_V(nir, elk_nir_lower_storage_image,
3795                  &(struct elk_nir_lower_storage_image_opts) {
3796                     .devinfo        = devinfo,
3797                     .lower_loads    = true,
3798                     .lower_stores   = true,
3799 
3800                     /* Iris uploads image params used by
3801                      * get_size lowering only in Gfx8.
3802                      */
3803                     .lower_get_size = true,
3804                  });
3805 #else
3806       unreachable("no elk support");
3807 #endif
3808    }
3809 
3810    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3811 
3812    nir_sweep(nir);
3813 
3814    return NULL;
3815 }
3816 
3817 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3818 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3819                                      unsigned max_threads)
3820 {
3821    struct iris_screen *screen = (struct iris_screen *) pscreen;
3822    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3823                                  false);
3824 }
3825 
3826 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3827 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3828                                              void *v_shader,
3829                                              enum pipe_shader_type p_stage)
3830 {
3831    struct iris_screen *screen = (struct iris_screen *) pscreen;
3832 
3833    /* Threaded compilation is only used for the precompile.  If precompile is
3834     * disabled, threaded compilation is "done."
3835     */
3836    if (!screen->precompile)
3837       return true;
3838 
3839    struct iris_uncompiled_shader *ish = v_shader;
3840 
3841    /* When precompile is enabled, the first entry is the precompile variant.
3842     * Check the ready fence of the precompile variant.
3843     */
3844    struct iris_compiled_shader *first =
3845       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3846 
3847    return util_queue_fence_is_signalled(&first->ready);
3848 }
3849 
3850 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3851 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3852 {
3853    pscreen->is_parallel_shader_compilation_finished =
3854       iris_is_parallel_shader_compilation_finished;
3855    pscreen->set_max_shader_compiler_threads =
3856       iris_set_max_shader_compiler_threads;
3857    pscreen->finalize_nir = iris_finalize_nir;
3858 }
3859 
3860 void
iris_init_program_functions(struct pipe_context * ctx)3861 iris_init_program_functions(struct pipe_context *ctx)
3862 {
3863    ctx->create_vs_state  = iris_create_shader_state;
3864    ctx->create_tcs_state = iris_create_shader_state;
3865    ctx->create_tes_state = iris_create_shader_state;
3866    ctx->create_gs_state  = iris_create_shader_state;
3867    ctx->create_fs_state  = iris_create_shader_state;
3868    ctx->create_compute_state = iris_create_compute_state;
3869 
3870    ctx->delete_vs_state  = iris_delete_shader_state;
3871    ctx->delete_tcs_state = iris_delete_shader_state;
3872    ctx->delete_tes_state = iris_delete_shader_state;
3873    ctx->delete_gs_state  = iris_delete_shader_state;
3874    ctx->delete_fs_state  = iris_delete_shader_state;
3875    ctx->delete_compute_state = iris_delete_shader_state;
3876 
3877    ctx->bind_vs_state  = iris_bind_vs_state;
3878    ctx->bind_tcs_state = iris_bind_tcs_state;
3879    ctx->bind_tes_state = iris_bind_tes_state;
3880    ctx->bind_gs_state  = iris_bind_gs_state;
3881    ctx->bind_fs_state  = iris_bind_fs_state;
3882    ctx->bind_compute_state = iris_bind_cs_state;
3883 
3884    ctx->get_compute_state_info = iris_get_compute_state_info;
3885    ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3886 }
3887 
3888 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3889 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3890                           const struct iris_compiled_shader *shader,
3891                           const uint32_t block[3])
3892 {
3893    if (shader->brw_prog_data) {
3894       return brw_cs_get_dispatch_info(devinfo,
3895                                       brw_cs_prog_data(shader->brw_prog_data),
3896                                       block);
3897    } else {
3898 #ifdef INTEL_USE_ELK
3899       assert(shader->elk_prog_data);
3900       return elk_cs_get_dispatch_info(devinfo,
3901                                       elk_cs_prog_data(shader->elk_prog_data),
3902                                       block);
3903 #else
3904       unreachable("no elk support");
3905 #endif
3906    }
3907 }
3908 
3909 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3910 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3911                               unsigned threads)
3912 {
3913    if (shader->brw_prog_data) {
3914       return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3915                                           threads);
3916    } else {
3917 #ifdef INTEL_USE_ELK
3918       assert(shader->elk_prog_data);
3919       return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3920                                           threads);
3921 #else
3922       unreachable("no elk support");
3923 #endif
3924    }
3925 }
3926 
3927 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3928 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3929                           enum intel_msaa_flags pushed_msaa_flags)
3930 {
3931    if (shader->brw_prog_data) {
3932       return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3933                                             pushed_msaa_flags);
3934    } else {
3935 #ifdef INTEL_USE_ELK
3936       assert(shader->elk_prog_data);
3937       return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3938                                                 pushed_msaa_flags);
3939 #else
3940       unreachable("no elk support");
3941 #endif
3942    }
3943 }
3944 
3945 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3946 iris_use_tcs_multi_patch(struct iris_screen *screen)
3947 {
3948    return screen->brw && screen->brw->use_tcs_multi_patch;
3949 }
3950 
3951 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3952 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3953 {
3954    if (screen->brw) {
3955       return screen->brw->indirect_ubos_use_sampler;
3956    } else {
3957 #ifdef INTEL_USE_ELK
3958       assert(screen->elk);
3959       return screen->elk->indirect_ubos_use_sampler;
3960 #else
3961       unreachable("no elk support");
3962 #endif
3963    }
3964 }
3965 
3966 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3967 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3968 {
3969    struct util_debug_callback *dbg = data;
3970    va_list args;
3971 
3972    if (!dbg->debug_message)
3973       return;
3974 
3975    va_start(args, fmt);
3976    dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3977    va_end(args);
3978 }
3979 
3980 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3981 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3982 {
3983    struct util_debug_callback *dbg = data;
3984    va_list args;
3985    va_start(args, fmt);
3986 
3987    if (INTEL_DEBUG(DEBUG_PERF)) {
3988       va_list args_copy;
3989       va_copy(args_copy, args);
3990       vfprintf(stderr, fmt, args_copy);
3991       va_end(args_copy);
3992    }
3993 
3994    if (dbg->debug_message) {
3995       dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3996    }
3997 
3998    va_end(args);
3999 }
4000 
4001 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)4002 iris_get_compiler_options(struct pipe_screen *pscreen,
4003                           enum pipe_shader_ir ir,
4004                           enum pipe_shader_type pstage)
4005 {
4006    struct iris_screen *screen = (struct iris_screen *) pscreen;
4007    gl_shader_stage stage = stage_from_pipe(pstage);
4008    assert(ir == PIPE_SHADER_IR_NIR);
4009 
4010 #ifdef INTEL_USE_ELK
4011    return screen->brw ? screen->brw->nir_options[stage]
4012                       : screen->elk->nir_options[stage];
4013 #else
4014    return screen->brw->nir_options[stage];
4015 #endif
4016 }
4017 
4018 void
iris_compiler_init(struct iris_screen * screen)4019 iris_compiler_init(struct iris_screen *screen)
4020 {
4021    if (screen->devinfo->ver >= 9) {
4022       STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
4023 
4024       screen->brw = brw_compiler_create(screen, screen->devinfo);
4025       screen->brw->shader_debug_log = iris_shader_debug_log;
4026       screen->brw->shader_perf_log = iris_shader_perf_log;
4027    } else {
4028 #ifdef INTEL_USE_ELK
4029       STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
4030       STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
4031 
4032       screen->elk = elk_compiler_create(screen, screen->devinfo);
4033       screen->elk->shader_debug_log = iris_shader_debug_log;
4034       screen->elk->shader_perf_log = iris_shader_perf_log;
4035       screen->elk->supports_shader_constants = true;
4036 #else
4037       unreachable("no elk support");
4038 #endif
4039    }
4040 }
4041