• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55 
56 #define KEY_INIT(prefix)                                                   \
57    .prefix.program_string_id = ish->program_id,                            \
58    .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
60    .base.program_string_id = prog_id,                      \
61    .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input)       \
63    .base.program_string_id = prog_id,                      \
64    .base.limit_trig_input_range = limit_trig_input
65 
66 struct iris_threaded_compile_job {
67    struct iris_screen *screen;
68    struct u_upload_mgr *uploader;
69    struct util_debug_callback *dbg;
70    struct iris_uncompiled_shader *ish;
71    struct iris_compiled_shader *shader;
72 };
73 
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77    return p_atomic_inc_return(&screen->program_id);
78 }
79 
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82                             const struct brw_wm_prog_data *brw)
83 {
84    assert(shader->stage == MESA_SHADER_FRAGMENT);
85    struct iris_fs_data *iris = &shader->fs;
86 
87    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89    memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90    memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91    iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92 
93    iris->num_varying_inputs   = brw->num_varying_inputs;
94    iris->msaa_flags_param     = brw->msaa_flags_param;
95    iris->flat_inputs          = brw->flat_inputs;
96    iris->inputs               = brw->inputs;
97    iris->computed_depth_mode  = brw->computed_depth_mode;
98    iris->max_polygons         = brw->max_polygons;
99    iris->dispatch_multi       = brw->dispatch_multi;
100    iris->computed_stencil     = brw->computed_stencil;
101    iris->early_fragment_tests = brw->early_fragment_tests;
102    iris->post_depth_coverage  = brw->post_depth_coverage;
103    iris->inner_coverage       = brw->inner_coverage;
104    iris->dispatch_8           = brw->dispatch_8;
105    iris->dispatch_16          = brw->dispatch_16;
106    iris->dispatch_32          = brw->dispatch_32;
107    iris->dual_src_blend       = brw->dual_src_blend;
108    iris->uses_pos_offset      = brw->uses_pos_offset;
109    iris->uses_omask           = brw->uses_omask;
110    iris->uses_kill            = brw->uses_kill;
111    iris->uses_src_depth       = brw->uses_src_depth;
112    iris->uses_src_w           = brw->uses_src_w;
113    iris->uses_sample_mask     = brw->uses_sample_mask;
114    iris->uses_vmask           = brw->uses_vmask;
115    iris->has_side_effects     = brw->has_side_effects;
116    iris->pulls_bary           = brw->pulls_bary;
117 
118    iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
119 
120    iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
121 }
122 
123 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)124 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
125                             const struct brw_cs_prog_data *brw)
126 {
127    assert(shader->stage == MESA_SHADER_COMPUTE);
128    struct iris_cs_data *iris = &shader->cs;
129 
130    iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
131    iris->push.cross_thread.regs   = brw->push.cross_thread.regs;
132    iris->push.cross_thread.size   = brw->push.cross_thread.size;
133 
134    iris->push.per_thread.dwords = brw->push.per_thread.dwords;
135    iris->push.per_thread.regs   = brw->push.per_thread.regs;
136    iris->push.per_thread.size   = brw->push.per_thread.size;
137 
138    iris->local_size[0]  = brw->local_size[0];
139    iris->local_size[1]  = brw->local_size[1];
140    iris->local_size[2]  = brw->local_size[2];
141    iris->prog_offset[0] = brw->prog_offset[0];
142    iris->prog_offset[1] = brw->prog_offset[1];
143    iris->prog_offset[2] = brw->prog_offset[2];
144 
145    iris->generate_local_id = brw->generate_local_id;
146    iris->walk_order        = brw->walk_order;
147    iris->uses_barrier      = brw->uses_barrier;
148    iris->prog_mask         = brw->prog_mask;
149 
150    iris->first_param_is_builtin_subgroup_id =
151       brw->base.nr_params > 0 &&
152       brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
153 }
154 
155 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)156 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
157                              struct iris_vue_data *iris)
158 {
159    memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
160 
161    iris->urb_read_length     = brw->urb_read_length;
162    iris->cull_distance_mask  = brw->cull_distance_mask;
163    iris->urb_entry_size      = brw->urb_entry_size;
164    iris->dispatch_mode       = brw->dispatch_mode;
165    iris->include_vue_handles = brw->include_vue_handles;
166 }
167 
168 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)169 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
170                             const struct brw_vs_prog_data *brw)
171 {
172    assert(shader->stage == MESA_SHADER_VERTEX);
173    struct iris_vs_data *iris = &shader->vs;
174 
175    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
176 
177    iris->uses_vertexid     = brw->uses_vertexid;
178    iris->uses_instanceid   = brw->uses_instanceid;
179    iris->uses_firstvertex  = brw->uses_firstvertex;
180    iris->uses_baseinstance = brw->uses_baseinstance;
181    iris->uses_drawid       = brw->uses_drawid;
182 }
183 
184 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)185 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
186                              const struct brw_tcs_prog_data *brw)
187 {
188    assert(shader->stage == MESA_SHADER_TESS_CTRL);
189    struct iris_tcs_data *iris = &shader->tcs;
190 
191    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
192 
193    iris->instances             = brw->instances;
194    iris->patch_count_threshold = brw->patch_count_threshold;
195    iris->include_primitive_id  = brw->include_primitive_id;
196 }
197 
198 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)199 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
200                              const struct brw_tes_prog_data *brw)
201 {
202    assert(shader->stage == MESA_SHADER_TESS_EVAL);
203    struct iris_tes_data *iris = &shader->tes;
204 
205    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
206 
207    iris->partitioning         = brw->partitioning;
208    iris->output_topology      = brw->output_topology;
209    iris->domain               = brw->domain;
210    iris->include_primitive_id = brw->include_primitive_id;
211 }
212 
213 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)214 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
215                             const struct brw_gs_prog_data *brw)
216 {
217    assert(shader->stage == MESA_SHADER_GEOMETRY);
218    struct iris_gs_data *iris = &shader->gs;
219 
220    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
221 
222    iris->vertices_in                     = brw->vertices_in;
223    iris->output_vertex_size_hwords       = brw->output_vertex_size_hwords;
224    iris->output_topology                 = brw->output_topology;
225    iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
226    iris->control_data_format             = brw->control_data_format;
227    iris->static_vertex_count             = brw->static_vertex_count;
228    iris->invocations                     = brw->invocations;
229    iris->include_primitive_id            = brw->include_primitive_id;
230 }
231 
232 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)233 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
234                          struct brw_stage_prog_data *brw)
235 {
236    STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
237    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
238       shader->ubo_ranges[i].block  = brw->ubo_ranges[i].block;
239       shader->ubo_ranges[i].start  = brw->ubo_ranges[i].start;
240       shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
241    }
242 
243    shader->nr_params              = brw->nr_params;
244    shader->total_scratch          = brw->total_scratch;
245    shader->total_shared           = brw->total_shared;
246    shader->program_size           = brw->program_size;
247    shader->const_data_offset      = brw->const_data_offset;
248    shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
249    shader->has_ubo_pull           = brw->has_ubo_pull;
250    shader->use_alt_mode           = brw->use_alt_mode;
251 
252    switch (shader->stage) {
253    case MESA_SHADER_FRAGMENT:
254       iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
255       break;
256    case MESA_SHADER_COMPUTE:
257       iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
258       break;
259    case MESA_SHADER_VERTEX:
260       iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
261       break;
262    case MESA_SHADER_TESS_CTRL:
263       iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
264       break;
265    case MESA_SHADER_TESS_EVAL:
266       iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
267       break;
268    case MESA_SHADER_GEOMETRY:
269       iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
270       break;
271    default:
272       unreachable("invalid shader stage");
273    }
274 
275    shader->brw_prog_data = brw;
276 
277    ralloc_steal(shader, shader->brw_prog_data);
278    ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
279    ralloc_steal(shader->brw_prog_data, brw->param);
280 }
281 
282 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)283 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
284                             const struct elk_wm_prog_data *elk)
285 {
286    assert(shader->stage == MESA_SHADER_FRAGMENT);
287    struct iris_fs_data *iris = &shader->fs;
288 
289    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
290    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
291    memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
292    memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
293    iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
294 
295    iris->num_varying_inputs   = elk->num_varying_inputs;
296    iris->msaa_flags_param     = elk->msaa_flags_param;
297    iris->flat_inputs          = elk->flat_inputs;
298    iris->inputs               = elk->inputs;
299    iris->computed_depth_mode  = elk->computed_depth_mode;
300    iris->max_polygons         = elk->max_polygons;
301    iris->dispatch_multi       = elk->dispatch_multi;
302    iris->computed_stencil     = elk->computed_stencil;
303    iris->early_fragment_tests = elk->early_fragment_tests;
304    iris->post_depth_coverage  = elk->post_depth_coverage;
305    iris->inner_coverage       = elk->inner_coverage;
306    iris->dispatch_8           = elk->dispatch_8;
307    iris->dispatch_16          = elk->dispatch_16;
308    iris->dispatch_32          = elk->dispatch_32;
309    iris->dual_src_blend       = elk->dual_src_blend;
310    iris->uses_pos_offset      = elk->uses_pos_offset;
311    iris->uses_omask           = elk->uses_omask;
312    iris->uses_kill            = elk->uses_kill;
313    iris->uses_src_depth       = elk->uses_src_depth;
314    iris->uses_src_w           = elk->uses_src_w;
315    iris->uses_sample_mask     = elk->uses_sample_mask;
316    iris->uses_vmask           = elk->uses_vmask;
317    iris->pulls_bary           = elk->pulls_bary;
318    iris->has_side_effects     = elk->has_side_effects;
319 
320    iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
321 
322    iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
323 }
324 
325 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)326 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
327                             const struct elk_cs_prog_data *elk)
328 {
329    assert(shader->stage == MESA_SHADER_COMPUTE);
330    struct iris_cs_data *iris = &shader->cs;
331 
332    iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
333    iris->push.cross_thread.regs   = elk->push.cross_thread.regs;
334    iris->push.cross_thread.size   = elk->push.cross_thread.size;
335 
336    iris->push.per_thread.dwords = elk->push.per_thread.dwords;
337    iris->push.per_thread.regs   = elk->push.per_thread.regs;
338    iris->push.per_thread.size   = elk->push.per_thread.size;
339 
340    iris->local_size[0]  = elk->local_size[0];
341    iris->local_size[1]  = elk->local_size[1];
342    iris->local_size[2]  = elk->local_size[2];
343    iris->prog_offset[0] = elk->prog_offset[0];
344    iris->prog_offset[1] = elk->prog_offset[1];
345    iris->prog_offset[2] = elk->prog_offset[2];
346 
347    iris->generate_local_id = elk->generate_local_id;
348    iris->walk_order        = elk->walk_order;
349    iris->uses_barrier      = elk->uses_barrier;
350    iris->prog_mask         = elk->prog_mask;
351 
352    iris->first_param_is_builtin_subgroup_id =
353       elk->base.nr_params > 0 &&
354       elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
355 }
356 
357 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)358 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
359                              struct iris_vue_data *iris)
360 {
361    memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
362 
363    iris->urb_read_length     = elk->urb_read_length;
364    iris->cull_distance_mask  = elk->cull_distance_mask;
365    iris->urb_entry_size      = elk->urb_entry_size;
366    iris->dispatch_mode       = elk->dispatch_mode;
367    iris->include_vue_handles = elk->include_vue_handles;
368 }
369 
370 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)371 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
372                             const struct elk_vs_prog_data *elk)
373 {
374    assert(shader->stage == MESA_SHADER_VERTEX);
375    struct iris_vs_data *iris = &shader->vs;
376 
377    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
378 
379    iris->uses_vertexid     = elk->uses_vertexid;
380    iris->uses_instanceid   = elk->uses_instanceid;
381    iris->uses_firstvertex  = elk->uses_firstvertex;
382    iris->uses_baseinstance = elk->uses_baseinstance;
383    iris->uses_drawid       = elk->uses_drawid;
384 }
385 
386 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)387 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
388                              const struct elk_tcs_prog_data *elk)
389 {
390    assert(shader->stage == MESA_SHADER_TESS_CTRL);
391    struct iris_tcs_data *iris = &shader->tcs;
392 
393    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
394 
395    iris->instances             = elk->instances;
396    iris->patch_count_threshold = elk->patch_count_threshold;
397    iris->include_primitive_id  = elk->include_primitive_id;
398 }
399 
400 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)401 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
402                              const struct elk_tes_prog_data *elk)
403 {
404    assert(shader->stage == MESA_SHADER_TESS_EVAL);
405    struct iris_tes_data *iris = &shader->tes;
406 
407    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
408 
409    iris->partitioning         = elk->partitioning;
410    iris->output_topology      = elk->output_topology;
411    iris->domain               = elk->domain;
412    iris->include_primitive_id = elk->include_primitive_id;
413 }
414 
415 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)416 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
417                             const struct elk_gs_prog_data *elk)
418 {
419    assert(shader->stage == MESA_SHADER_GEOMETRY);
420    struct iris_gs_data *iris = &shader->gs;
421 
422    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
423 
424    iris->vertices_in                     = elk->vertices_in;
425    iris->output_vertex_size_hwords       = elk->output_vertex_size_hwords;
426    iris->output_topology                 = elk->output_topology;
427    iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
428    iris->control_data_format             = elk->control_data_format;
429    iris->static_vertex_count             = elk->static_vertex_count;
430    iris->invocations                     = elk->invocations;
431    iris->include_primitive_id            = elk->include_primitive_id;
432 }
433 
434 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)435 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
436                          struct elk_stage_prog_data *elk)
437 {
438    STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
439    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
440       shader->ubo_ranges[i].block  = elk->ubo_ranges[i].block;
441       shader->ubo_ranges[i].start  = elk->ubo_ranges[i].start;
442       shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
443    }
444 
445    shader->nr_params              = elk->nr_params;
446    shader->total_scratch          = elk->total_scratch;
447    shader->total_shared           = elk->total_shared;
448    shader->program_size           = elk->program_size;
449    shader->const_data_offset      = elk->const_data_offset;
450    shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
451    shader->has_ubo_pull           = elk->has_ubo_pull;
452    shader->use_alt_mode           = elk->use_alt_mode;
453 
454    switch (shader->stage) {
455    case MESA_SHADER_FRAGMENT:
456       iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
457       break;
458    case MESA_SHADER_COMPUTE:
459       iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
460       break;
461    case MESA_SHADER_VERTEX:
462       iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
463       break;
464    case MESA_SHADER_TESS_CTRL:
465       iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
466       break;
467    case MESA_SHADER_TESS_EVAL:
468       iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
469       break;
470    case MESA_SHADER_GEOMETRY:
471       iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
472       break;
473    default:
474       unreachable("invalid shader stage");
475    }
476 
477    shader->elk_prog_data = elk;
478 
479    ralloc_steal(shader, shader->elk_prog_data);
480    ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
481    ralloc_steal(shader->elk_prog_data, elk->param);
482 }
483 
484 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)485 iris_finalize_program(struct iris_compiled_shader *shader,
486                       uint32_t *streamout,
487                       uint32_t *system_values,
488                       unsigned num_system_values,
489                       unsigned kernel_input_size,
490                       unsigned num_cbufs,
491                       const struct iris_binding_table *bt)
492 {
493    /* There can be only one. */
494    assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
495 
496    shader->streamout = streamout;
497    shader->system_values = system_values;
498    shader->num_system_values = num_system_values;
499    shader->kernel_input_size = kernel_input_size;
500    shader->num_cbufs = num_cbufs;
501    shader->bt = *bt;
502 
503    ralloc_steal(shader, shader->streamout);
504    ralloc_steal(shader, shader->system_values);
505 }
506 
507 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)508 iris_to_brw_vs_key(const struct iris_screen *screen,
509                    const struct iris_vs_prog_key *key)
510 {
511    return (struct brw_vs_prog_key) {
512       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
513                    key->vue.base.limit_trig_input_range),
514    };
515 }
516 
517 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)518 iris_to_brw_tcs_key(const struct iris_screen *screen,
519                     const struct iris_tcs_prog_key *key)
520 {
521    return (struct brw_tcs_prog_key) {
522       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
523                    key->vue.base.limit_trig_input_range),
524       ._tes_primitive_mode = key->_tes_primitive_mode,
525       .input_vertices = key->input_vertices,
526       .patch_outputs_written = key->patch_outputs_written,
527       .outputs_written = key->outputs_written,
528    };
529 }
530 
531 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)532 iris_to_brw_tes_key(const struct iris_screen *screen,
533                     const struct iris_tes_prog_key *key)
534 {
535    return (struct brw_tes_prog_key) {
536       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
537                    key->vue.base.limit_trig_input_range),
538       .patch_inputs_read = key->patch_inputs_read,
539       .inputs_read = key->inputs_read,
540    };
541 }
542 
543 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)544 iris_to_brw_gs_key(const struct iris_screen *screen,
545                    const struct iris_gs_prog_key *key)
546 {
547    return (struct brw_gs_prog_key) {
548       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
549                    key->vue.base.limit_trig_input_range),
550    };
551 }
552 
553 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)554 iris_to_brw_fs_key(const struct iris_screen *screen,
555                    const struct iris_fs_prog_key *key)
556 {
557    return (struct brw_wm_prog_key) {
558       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
559                    key->base.limit_trig_input_range),
560       .nr_color_regions = key->nr_color_regions,
561       .flat_shade = key->flat_shade,
562       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
563       .alpha_to_coverage = key->alpha_to_coverage ? BRW_ALWAYS : BRW_NEVER,
564       .clamp_fragment_color = key->clamp_fragment_color,
565       .persample_interp = key->persample_interp ? BRW_ALWAYS : BRW_NEVER,
566       .multisample_fbo = key->multisample_fbo ? BRW_ALWAYS : BRW_NEVER,
567       .force_dual_color_blend = key->force_dual_color_blend,
568       .coherent_fb_fetch = key->coherent_fb_fetch,
569       .color_outputs_valid = key->color_outputs_valid,
570       .input_slots_valid = key->input_slots_valid,
571       .ignore_sample_mask_out = !key->multisample_fbo,
572    };
573 }
574 
575 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)576 iris_to_brw_cs_key(const struct iris_screen *screen,
577                    const struct iris_cs_prog_key *key)
578 {
579    return (struct brw_cs_prog_key) {
580       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
581                    key->base.limit_trig_input_range),
582    };
583 }
584 
585 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)586 iris_to_elk_vs_key(const struct iris_screen *screen,
587                    const struct iris_vs_prog_key *key)
588 {
589    return (struct elk_vs_prog_key) {
590       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
591                    key->vue.base.limit_trig_input_range),
592 
593       /* Don't tell the backend about our clip plane constants, we've
594        * already lowered them in NIR and don't want it doing it again.
595        */
596       .nr_userclip_plane_consts = 0,
597    };
598 }
599 
600 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)601 iris_to_elk_tcs_key(const struct iris_screen *screen,
602                     const struct iris_tcs_prog_key *key)
603 {
604    return (struct elk_tcs_prog_key) {
605       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
606                    key->vue.base.limit_trig_input_range),
607       ._tes_primitive_mode = key->_tes_primitive_mode,
608       .input_vertices = key->input_vertices,
609       .patch_outputs_written = key->patch_outputs_written,
610       .outputs_written = key->outputs_written,
611       .quads_workaround = key->quads_workaround,
612    };
613 }
614 
615 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)616 iris_to_elk_tes_key(const struct iris_screen *screen,
617                     const struct iris_tes_prog_key *key)
618 {
619    return (struct elk_tes_prog_key) {
620       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
621                    key->vue.base.limit_trig_input_range),
622       .patch_inputs_read = key->patch_inputs_read,
623       .inputs_read = key->inputs_read,
624    };
625 }
626 
627 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)628 iris_to_elk_gs_key(const struct iris_screen *screen,
629                    const struct iris_gs_prog_key *key)
630 {
631    return (struct elk_gs_prog_key) {
632       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
633                    key->vue.base.limit_trig_input_range),
634    };
635 }
636 
637 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)638 iris_to_elk_fs_key(const struct iris_screen *screen,
639                    const struct iris_fs_prog_key *key)
640 {
641    return (struct elk_wm_prog_key) {
642       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
643                    key->base.limit_trig_input_range),
644       .nr_color_regions = key->nr_color_regions,
645       .flat_shade = key->flat_shade,
646       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
647       .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
648       .clamp_fragment_color = key->clamp_fragment_color,
649       .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
650       .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
651       .force_dual_color_blend = key->force_dual_color_blend,
652       .coherent_fb_fetch = key->coherent_fb_fetch,
653       .color_outputs_valid = key->color_outputs_valid,
654       .input_slots_valid = key->input_slots_valid,
655       .ignore_sample_mask_out = !key->multisample_fbo,
656    };
657 }
658 
659 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)660 iris_to_elk_cs_key(const struct iris_screen *screen,
661                    const struct iris_cs_prog_key *key)
662 {
663    return (struct elk_cs_prog_key) {
664       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
665                    key->base.limit_trig_input_range),
666    };
667 }
668 
669 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)670 upload_state(struct u_upload_mgr *uploader,
671              struct iris_state_ref *ref,
672              unsigned size,
673              unsigned alignment)
674 {
675    void *p = NULL;
676    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
677    return p;
678 }
679 
680 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)681 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
682                                 struct pipe_shader_buffer *buf,
683                                 struct iris_state_ref *surf_state,
684                                 isl_surf_usage_flags_t usage)
685 {
686    struct pipe_context *ctx = &ice->ctx;
687    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
688    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
689 
690    void *map =
691       upload_state(ice->state.surface_uploader, surf_state,
692                    screen->isl_dev.ss.size, 64);
693    if (!unlikely(map)) {
694       surf_state->res = NULL;
695       return;
696    }
697 
698    struct iris_resource *res = (void *) buf->buffer;
699    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
700    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
701 
702    const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
703 
704    isl_buffer_fill_state(&screen->isl_dev, map,
705                          .address = res->bo->address + res->offset +
706                                     buf->buffer_offset,
707                          .size_B = buf->buffer_size - res->offset,
708                          .format = dataport ? ISL_FORMAT_RAW
709                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
710                          .swizzle = ISL_SWIZZLE_IDENTITY,
711                          .stride_B = 1,
712                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
713 }
714 
715 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)716 get_aoa_deref_offset(nir_builder *b,
717                      nir_deref_instr *deref,
718                      unsigned elem_size)
719 {
720    unsigned array_size = elem_size;
721    nir_def *offset = nir_imm_int(b, 0);
722 
723    while (deref->deref_type != nir_deref_type_var) {
724       assert(deref->deref_type == nir_deref_type_array);
725 
726       /* This level's element size is the previous level's array size */
727       nir_def *index = deref->arr.index.ssa;
728       assert(deref->arr.index.ssa);
729       offset = nir_iadd(b, offset,
730                            nir_imul_imm(b, index, array_size));
731 
732       deref = nir_deref_instr_parent(deref);
733       assert(glsl_type_is_array(deref->type));
734       array_size *= glsl_get_length(deref->type);
735    }
736 
737    /* Accessing an invalid surface index with the dataport can result in a
738     * hang.  According to the spec "if the index used to select an individual
739     * element is negative or greater than or equal to the size of the array,
740     * the results of the operation are undefined but may not lead to
741     * termination" -- which is one of the possible outcomes of the hang.
742     * Clamp the index to prevent access outside of the array bounds.
743     */
744    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
745 }
746 
747 static void
iris_lower_storage_image_derefs(nir_shader * nir)748 iris_lower_storage_image_derefs(nir_shader *nir)
749 {
750    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
751 
752    nir_builder b = nir_builder_create(impl);
753 
754    nir_foreach_block(block, impl) {
755       nir_foreach_instr_safe(instr, block) {
756          if (instr->type != nir_instr_type_intrinsic)
757             continue;
758 
759          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
760          switch (intrin->intrinsic) {
761          case nir_intrinsic_image_deref_load:
762          case nir_intrinsic_image_deref_store:
763          case nir_intrinsic_image_deref_atomic:
764          case nir_intrinsic_image_deref_atomic_swap:
765          case nir_intrinsic_image_deref_size:
766          case nir_intrinsic_image_deref_samples:
767          case nir_intrinsic_image_deref_load_raw_intel:
768          case nir_intrinsic_image_deref_store_raw_intel: {
769             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
770             nir_variable *var = nir_deref_instr_get_variable(deref);
771 
772             b.cursor = nir_before_instr(&intrin->instr);
773             nir_def *index =
774                nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
775                                 var->data.driver_location);
776             nir_rewrite_image_intrinsic(intrin, index, false);
777             break;
778          }
779 
780          default:
781             break;
782          }
783       }
784    }
785 }
786 
787 static bool
iris_uses_image_atomic(const nir_shader * shader)788 iris_uses_image_atomic(const nir_shader *shader)
789 {
790    nir_foreach_function_impl(impl, shader) {
791       nir_foreach_block(block, impl) {
792          nir_foreach_instr(instr, block) {
793             if (instr->type != nir_instr_type_intrinsic)
794                continue;
795 
796             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
797             switch (intrin->intrinsic) {
798             case nir_intrinsic_image_deref_atomic:
799             case nir_intrinsic_image_deref_atomic_swap:
800                unreachable("Should have been lowered in "
801                            "iris_lower_storage_image_derefs");
802 
803             case nir_intrinsic_image_atomic:
804             case nir_intrinsic_image_atomic_swap:
805                return true;
806 
807             default:
808                break;
809             }
810          }
811       }
812    }
813 
814    return false;
815 }
816 
817 /**
818  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
819  */
820 static bool
iris_fix_edge_flags(nir_shader * nir)821 iris_fix_edge_flags(nir_shader *nir)
822 {
823    if (nir->info.stage != MESA_SHADER_VERTEX) {
824       nir_shader_preserve_all_metadata(nir);
825       return false;
826    }
827 
828    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
829                                                        VARYING_SLOT_EDGE);
830    if (!var) {
831       nir_shader_preserve_all_metadata(nir);
832       return false;
833    }
834 
835    var->data.mode = nir_var_shader_temp;
836    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
837    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
838    nir_fixup_deref_modes(nir);
839 
840    nir_foreach_function_impl(impl, nir) {
841       nir_metadata_preserve(impl, nir_metadata_block_index |
842                                   nir_metadata_dominance |
843                                   nir_metadata_live_defs |
844                                   nir_metadata_loop_analysis);
845    }
846 
847    return true;
848 }
849 
850 /**
851  * Fix an uncompiled shader's stream output info.
852  *
853  * Core Gallium stores output->register_index as a "slot" number, where
854  * slots are assigned consecutively to all outputs in info->outputs_written.
855  * This naive packing of outputs doesn't work for us - we too have slots,
856  * but the layout is defined by the VUE map, which we won't have until we
857  * compile a specific shader variant.  So, we remap these and simply store
858  * VARYING_SLOT_* in our copy's output->register_index fields.
859  *
860  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
861  * components of our VUE header.  See brw_vue_map.c for the layout.
862  */
863 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)864 update_so_info(struct pipe_stream_output_info *so_info,
865                uint64_t outputs_written)
866 {
867    uint8_t reverse_map[64] = {};
868    unsigned slot = 0;
869    while (outputs_written) {
870       reverse_map[slot++] = u_bit_scan64(&outputs_written);
871    }
872 
873    for (unsigned i = 0; i < so_info->num_outputs; i++) {
874       struct pipe_stream_output *output = &so_info->output[i];
875 
876       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
877       output->register_index = reverse_map[output->register_index];
878 
879       /* The VUE header contains three scalar fields packed together:
880        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
881        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
882        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
883        */
884       switch (output->register_index) {
885       case VARYING_SLOT_LAYER:
886          assert(output->num_components == 1);
887          output->register_index = VARYING_SLOT_PSIZ;
888          output->start_component = 1;
889          break;
890       case VARYING_SLOT_VIEWPORT:
891          assert(output->num_components == 1);
892          output->register_index = VARYING_SLOT_PSIZ;
893          output->start_component = 2;
894          break;
895       case VARYING_SLOT_PSIZ:
896          assert(output->num_components == 1);
897          output->start_component = 3;
898          break;
899       }
900 
901       //info->outputs_written |= 1ull << output->register_index;
902    }
903 }
904 
905 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)906 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
907                         unsigned offset, unsigned n)
908 {
909    assert(offset % sizeof(uint32_t) == 0);
910 
911    for (unsigned i = 0; i < n; ++i)
912       sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
913 
914    for (unsigned i = n; i < 4; ++i)
915       sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
916 }
917 
918 /**
919  * Associate NIR uniform variables with the prog_data->param[] mechanism
920  * used by the backend.  Also, decide which UBOs we'd like to push in an
921  * ideal situation (though the backend can reduce this).
922  */
923 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)924 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
925                     void *mem_ctx,
926                     nir_shader *nir,
927                     unsigned kernel_input_size,
928                     uint32_t **out_system_values,
929                     unsigned *out_num_system_values,
930                     unsigned *out_num_cbufs)
931 {
932    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
933 
934    const unsigned IRIS_MAX_SYSTEM_VALUES =
935       PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
936    unsigned *system_values =
937       rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
938    unsigned num_system_values = 0;
939 
940    unsigned patch_vert_idx = -1;
941    unsigned tess_outer_default_idx = -1;
942    unsigned tess_inner_default_idx = -1;
943    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
944    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
945    unsigned variable_group_size_idx = -1;
946    unsigned work_dim_idx = -1;
947    memset(ucp_idx, -1, sizeof(ucp_idx));
948    memset(img_idx, -1, sizeof(img_idx));
949 
950    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
951 
952    nir_builder b = nir_builder_at(nir_before_impl(impl));
953 
954    nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
955 
956    /* Turn system value intrinsics into uniforms */
957    nir_foreach_block(block, impl) {
958       nir_foreach_instr_safe(instr, block) {
959          if (instr->type != nir_instr_type_intrinsic)
960             continue;
961 
962          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
963          nir_def *offset;
964 
965          switch (intrin->intrinsic) {
966          case nir_intrinsic_load_base_workgroup_id: {
967             /* GL doesn't have a concept of base workgroup */
968             b.cursor = nir_instr_remove(&intrin->instr);
969             nir_def_rewrite_uses(&intrin->def,
970                                      nir_imm_zero(&b, 3, 32));
971             continue;
972          }
973          case nir_intrinsic_load_constant: {
974             unsigned load_size = intrin->def.num_components *
975                                  intrin->def.bit_size / 8;
976             unsigned load_align = intrin->def.bit_size / 8;
977 
978             /* This one is special because it reads from the shader constant
979              * data and not cbuf0 which gallium uploads for us.
980              */
981             b.cursor = nir_instr_remove(&intrin->instr);
982 
983             nir_def *offset =
984                nir_iadd_imm(&b, intrin->src[0].ssa,
985                                 nir_intrinsic_base(intrin));
986 
987             assert(load_size < b.shader->constant_data_size);
988             unsigned max_offset = b.shader->constant_data_size - load_size;
989             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
990 
991             /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
992              * and cannot cross that 4GB boundary, so we can do the address
993              * calculation with 32-bit adds.  Also, we can ignore the high
994              * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
995              */
996             assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
997 
998             nir_def *const_data_addr =
999                nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1000 
1001             nir_def *data =
1002                nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1003                                         load_align,
1004                                         intrin->def.num_components,
1005                                         intrin->def.bit_size);
1006 
1007             nir_def_rewrite_uses(&intrin->def,
1008                                      data);
1009             continue;
1010          }
1011          case nir_intrinsic_load_user_clip_plane: {
1012             unsigned ucp = nir_intrinsic_ucp_id(intrin);
1013 
1014             if (ucp_idx[ucp] == -1) {
1015                ucp_idx[ucp] = num_system_values;
1016                num_system_values += 4;
1017             }
1018 
1019             for (int i = 0; i < 4; i++) {
1020                system_values[ucp_idx[ucp] + i] =
1021                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1022             }
1023 
1024             b.cursor = nir_before_instr(instr);
1025             offset = nir_imm_int(&b, system_values_start +
1026                                      ucp_idx[ucp] * sizeof(uint32_t));
1027             break;
1028          }
1029          case nir_intrinsic_load_patch_vertices_in:
1030             if (patch_vert_idx == -1)
1031                patch_vert_idx = num_system_values++;
1032 
1033             system_values[patch_vert_idx] =
1034                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1035 
1036             b.cursor = nir_before_instr(instr);
1037             offset = nir_imm_int(&b, system_values_start +
1038                                      patch_vert_idx * sizeof(uint32_t));
1039             break;
1040          case nir_intrinsic_load_tess_level_outer_default:
1041             if (tess_outer_default_idx == -1) {
1042                tess_outer_default_idx = num_system_values;
1043                num_system_values += 4;
1044             }
1045 
1046             for (int i = 0; i < 4; i++) {
1047                system_values[tess_outer_default_idx + i] =
1048                   BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1049             }
1050 
1051             b.cursor = nir_before_instr(instr);
1052             offset = nir_imm_int(&b, system_values_start +
1053                                  tess_outer_default_idx * sizeof(uint32_t));
1054             break;
1055          case nir_intrinsic_load_tess_level_inner_default:
1056             if (tess_inner_default_idx == -1) {
1057                tess_inner_default_idx = num_system_values;
1058                num_system_values += 2;
1059             }
1060 
1061             for (int i = 0; i < 2; i++) {
1062                system_values[tess_inner_default_idx + i] =
1063                   BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1064             }
1065 
1066             b.cursor = nir_before_instr(instr);
1067             offset = nir_imm_int(&b, system_values_start +
1068                                  tess_inner_default_idx * sizeof(uint32_t));
1069             break;
1070          case nir_intrinsic_image_deref_load_param_intel: {
1071             assert(devinfo->ver < 9);
1072             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1073             nir_variable *var = nir_deref_instr_get_variable(deref);
1074 
1075             if (img_idx[var->data.binding] == -1) {
1076                /* GL only allows arrays of arrays of images. */
1077                assert(glsl_type_is_image(glsl_without_array(var->type)));
1078                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1079 
1080                for (int i = 0; i < num_images; i++) {
1081                   const unsigned img = var->data.binding + i;
1082 
1083                   img_idx[img] = num_system_values;
1084                   num_system_values += ISL_IMAGE_PARAM_SIZE;
1085 
1086                   uint32_t *img_sv = &system_values[img_idx[img]];
1087 
1088                   setup_vec4_image_sysval(
1089                      img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1090                      offsetof(struct isl_image_param, offset), 2);
1091                   setup_vec4_image_sysval(
1092                      img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1093                      offsetof(struct isl_image_param, size), 3);
1094                   setup_vec4_image_sysval(
1095                      img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1096                      offsetof(struct isl_image_param, stride), 4);
1097                   setup_vec4_image_sysval(
1098                      img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1099                      offsetof(struct isl_image_param, tiling), 3);
1100                   setup_vec4_image_sysval(
1101                      img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1102                      offsetof(struct isl_image_param, swizzling), 2);
1103                }
1104             }
1105 
1106             b.cursor = nir_before_instr(instr);
1107             offset = nir_iadd_imm(&b,
1108                get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1109                system_values_start +
1110                img_idx[var->data.binding] * 4 +
1111                nir_intrinsic_base(intrin) * 16);
1112             break;
1113          }
1114          case nir_intrinsic_load_workgroup_size: {
1115             assert(nir->info.workgroup_size_variable);
1116             if (variable_group_size_idx == -1) {
1117                variable_group_size_idx = num_system_values;
1118                num_system_values += 3;
1119                for (int i = 0; i < 3; i++) {
1120                   system_values[variable_group_size_idx + i] =
1121                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1122                }
1123             }
1124 
1125             b.cursor = nir_before_instr(instr);
1126             offset = nir_imm_int(&b, system_values_start +
1127                                      variable_group_size_idx * sizeof(uint32_t));
1128             break;
1129          }
1130          case nir_intrinsic_load_work_dim: {
1131             if (work_dim_idx == -1) {
1132                work_dim_idx = num_system_values++;
1133                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1134             }
1135             b.cursor = nir_before_instr(instr);
1136             offset = nir_imm_int(&b, system_values_start +
1137                                      work_dim_idx * sizeof(uint32_t));
1138             break;
1139          }
1140          case nir_intrinsic_load_kernel_input: {
1141             assert(nir_intrinsic_base(intrin) +
1142                    nir_intrinsic_range(intrin) <= kernel_input_size);
1143             b.cursor = nir_before_instr(instr);
1144             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1145                                       nir_intrinsic_base(intrin));
1146             break;
1147          }
1148          default:
1149             continue;
1150          }
1151 
1152          nir_def *load =
1153             nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1154                          temp_ubo_name, offset,
1155                          .align_mul = 4,
1156                          .align_offset = 0,
1157                          .range_base = 0,
1158                          .range = ~0);
1159 
1160          nir_def_rewrite_uses(&intrin->def,
1161                                   load);
1162          nir_instr_remove(instr);
1163       }
1164    }
1165 
1166    nir_validate_shader(nir, "before remapping");
1167 
1168    /* Uniforms are stored in constant buffer 0, the
1169     * user-facing UBOs are indexed by one.  So if any constant buffer is
1170     * needed, the constant buffer 0 will be needed, so account for it.
1171     */
1172    unsigned num_cbufs = nir->info.num_ubos;
1173    if (num_cbufs || nir->num_uniforms)
1174       num_cbufs++;
1175 
1176    /* Place the new params in a new cbuf. */
1177    if (num_system_values > 0 || kernel_input_size > 0) {
1178       unsigned sysval_cbuf_index = num_cbufs;
1179       num_cbufs++;
1180 
1181       system_values = reralloc(mem_ctx, system_values, unsigned,
1182                                num_system_values);
1183 
1184       nir_foreach_block(block, impl) {
1185          nir_foreach_instr_safe(instr, block) {
1186             if (instr->type != nir_instr_type_intrinsic)
1187                continue;
1188 
1189             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1190 
1191             if (load->intrinsic != nir_intrinsic_load_ubo)
1192                continue;
1193 
1194             b.cursor = nir_before_instr(instr);
1195 
1196             if (load->src[0].ssa == temp_ubo_name) {
1197                nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1198                nir_src_rewrite(&load->src[0], imm);
1199             }
1200          }
1201       }
1202 
1203       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1204       nir_opt_constant_folding(nir);
1205    } else {
1206       ralloc_free(system_values);
1207       system_values = NULL;
1208    }
1209 
1210    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1211    nir_validate_shader(nir, "after remap");
1212 
1213    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
1214     * to detect when cbuf0 exists but we don't need it anymore when we get
1215     * here.  Instead, zero it out so that the back-end doesn't get confused
1216     * when nr_params * 4 != num_uniforms != nr_params * 4.
1217     */
1218    nir->num_uniforms = 0;
1219 
1220    *out_system_values = system_values;
1221    *out_num_system_values = num_system_values;
1222    *out_num_cbufs = num_cbufs;
1223 }
1224 
1225 static const char *surface_group_names[] = {
1226    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
1227    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1228    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
1229    [IRIS_SURFACE_GROUP_TEXTURE_LOW64]      = "texture",
1230    [IRIS_SURFACE_GROUP_TEXTURE_HIGH64]     = "texture",
1231    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
1232    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
1233    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
1234 };
1235 
1236 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1237 iris_print_binding_table(FILE *fp, const char *name,
1238                          const struct iris_binding_table *bt)
1239 {
1240    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1241 
1242    uint32_t total = 0;
1243    uint32_t compacted = 0;
1244 
1245    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1246       uint32_t size = bt->sizes[i];
1247       total += size;
1248       if (size)
1249          compacted += util_bitcount64(bt->used_mask[i]);
1250    }
1251 
1252    if (total == 0) {
1253       fprintf(fp, "Binding table for %s is empty\n\n", name);
1254       return;
1255    }
1256 
1257    if (total != compacted) {
1258       fprintf(fp, "Binding table for %s "
1259               "(compacted to %u entries from %u entries)\n",
1260               name, compacted, total);
1261    } else {
1262       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1263    }
1264 
1265    uint32_t entry = 0;
1266    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1267       uint64_t mask = bt->used_mask[i];
1268       while (mask) {
1269          int index = u_bit_scan64(&mask);
1270          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
1271       }
1272    }
1273    fprintf(fp, "\n");
1274 }
1275 
1276 enum {
1277    /* Max elements in a surface group. */
1278    SURFACE_GROUP_MAX_ELEMENTS = 64,
1279 };
1280 
1281 /**
1282  * Map a <group, index> pair to a binding table index.
1283  *
1284  * For example: <UBO, 5> => binding table index 12
1285  */
1286 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1287 iris_group_index_to_bti(const struct iris_binding_table *bt,
1288                         enum iris_surface_group group, uint32_t index)
1289 {
1290    assert(index < bt->sizes[group]);
1291    uint64_t mask = bt->used_mask[group];
1292    uint64_t bit = 1ull << index;
1293    if (bit & mask) {
1294       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1295    } else {
1296       return IRIS_SURFACE_NOT_USED;
1297    }
1298 }
1299 
1300 /**
1301  * Map a binding table index back to a <group, index> pair.
1302  *
1303  * For example: binding table index 12 => <UBO, 5>
1304  */
1305 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1306 iris_bti_to_group_index(const struct iris_binding_table *bt,
1307                         enum iris_surface_group group, uint32_t bti)
1308 {
1309    uint64_t used_mask = bt->used_mask[group];
1310    assert(bti >= bt->offsets[group]);
1311 
1312    uint32_t c = bti - bt->offsets[group];
1313    while (used_mask) {
1314       int i = u_bit_scan64(&used_mask);
1315       if (c == 0)
1316          return i;
1317       c--;
1318    }
1319 
1320    return IRIS_SURFACE_NOT_USED;
1321 }
1322 
1323 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1324 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1325                      nir_instr *instr, nir_src *src,
1326                      enum iris_surface_group group)
1327 {
1328    assert(bt->sizes[group] > 0);
1329 
1330    b->cursor = nir_before_instr(instr);
1331    nir_def *bti;
1332    if (nir_src_is_const(*src)) {
1333       uint32_t index = nir_src_as_uint(*src);
1334       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1335                            src->ssa->bit_size);
1336    } else {
1337       /* Indirect usage makes all the surfaces of the group to be available,
1338        * so we can just add the base.
1339        */
1340       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1341       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1342    }
1343    nir_src_rewrite(src, bti);
1344 }
1345 
1346 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1347 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1348                    enum iris_surface_group group)
1349 {
1350    assert(bt->sizes[group] > 0);
1351 
1352    if (nir_src_is_const(*src)) {
1353       uint64_t index = nir_src_as_uint(*src);
1354       assert(index < bt->sizes[group]);
1355       bt->used_mask[group] |= 1ull << index;
1356    } else {
1357       /* There's an indirect usage, we need all the surfaces. */
1358       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1359    }
1360 }
1361 
1362 static bool
skip_compacting_binding_tables(void)1363 skip_compacting_binding_tables(void)
1364 {
1365    static int skip = -1;
1366    if (skip < 0)
1367       skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1368    return skip;
1369 }
1370 
1371 /**
1372  * Set up the binding table indices and apply to the shader.
1373  */
1374 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)1375 iris_setup_binding_table(const struct intel_device_info *devinfo,
1376                          struct nir_shader *nir,
1377                          struct iris_binding_table *bt,
1378                          unsigned num_render_targets,
1379                          unsigned num_system_values,
1380                          unsigned num_cbufs)
1381 {
1382    const struct shader_info *info = &nir->info;
1383 
1384    memset(bt, 0, sizeof(*bt));
1385 
1386    /* Set the sizes for each surface group.  For some groups, we already know
1387     * upfront how many will be used, so mark them.
1388     */
1389    if (info->stage == MESA_SHADER_FRAGMENT) {
1390       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1391       /* All render targets used. */
1392       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1393          BITFIELD64_MASK(num_render_targets);
1394 
1395       /* Setup render target read surface group in order to support non-coherent
1396        * framebuffer fetch on Gfx8
1397        */
1398       if (devinfo->ver == 8 && info->outputs_read) {
1399          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1400          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1401             BITFIELD64_MASK(num_render_targets);
1402       }
1403    } else if (info->stage == MESA_SHADER_COMPUTE) {
1404       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1405    }
1406 
1407    assert(ARRAY_SIZE(info->textures_used) >= 4);
1408    int max_tex = BITSET_LAST_BIT(info->textures_used);
1409    assert(max_tex <= 128);
1410    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1411    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1412    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1413       info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1414    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1415       info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1416    bt->samplers_used_mask = info->samplers_used[0];
1417 
1418    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1419 
1420    /* Allocate an extra slot in the UBO section for NIR constants.
1421     * Binding table compaction will remove it if unnecessary.
1422     *
1423     * We don't include them in iris_compiled_shader::num_cbufs because
1424     * they are uploaded separately from shs->constbuf[], but from a shader
1425     * point of view, they're another UBO (at the end of the section).
1426     */
1427    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1428 
1429    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1430 
1431    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1432       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1433 
1434    /* Mark surfaces used for the cases we don't have the information available
1435     * upfront.
1436     */
1437    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1438    nir_foreach_block (block, impl) {
1439       nir_foreach_instr (instr, block) {
1440          if (instr->type != nir_instr_type_intrinsic)
1441             continue;
1442 
1443          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1444          switch (intrin->intrinsic) {
1445          case nir_intrinsic_load_num_workgroups:
1446             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1447             break;
1448 
1449          case nir_intrinsic_load_output:
1450             if (devinfo->ver == 8) {
1451                mark_used_with_src(bt, &intrin->src[0],
1452                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1453             }
1454             break;
1455 
1456          case nir_intrinsic_image_size:
1457          case nir_intrinsic_image_load:
1458          case nir_intrinsic_image_store:
1459          case nir_intrinsic_image_atomic:
1460          case nir_intrinsic_image_atomic_swap:
1461          case nir_intrinsic_image_load_raw_intel:
1462          case nir_intrinsic_image_store_raw_intel:
1463             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1464             break;
1465 
1466          case nir_intrinsic_load_ubo:
1467             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1468             break;
1469 
1470          case nir_intrinsic_store_ssbo:
1471             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1472             break;
1473 
1474          case nir_intrinsic_get_ssbo_size:
1475          case nir_intrinsic_ssbo_atomic:
1476          case nir_intrinsic_ssbo_atomic_swap:
1477          case nir_intrinsic_load_ssbo:
1478             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1479             break;
1480 
1481          default:
1482             break;
1483          }
1484       }
1485    }
1486 
1487    /* When disable we just mark everything as used. */
1488    if (unlikely(skip_compacting_binding_tables())) {
1489       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1490          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1491    }
1492 
1493    /* Calculate the offsets and the binding table size based on the used
1494     * surfaces.  After this point, the functions to go between "group indices"
1495     * and binding table indices can be used.
1496     */
1497    uint32_t next = 0;
1498    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1499       if (bt->used_mask[i] != 0) {
1500          bt->offsets[i] = next;
1501          next += util_bitcount64(bt->used_mask[i]);
1502       }
1503    }
1504    bt->size_bytes = next * 4;
1505 
1506    if (INTEL_DEBUG(DEBUG_BT)) {
1507       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1508    }
1509 
1510    /* Apply the binding table indices.  The backend compiler is not expected
1511     * to change those, as we haven't set any of the *_start entries in brw
1512     * binding_table.
1513     */
1514    nir_builder b = nir_builder_create(impl);
1515 
1516    nir_foreach_block (block, impl) {
1517       nir_foreach_instr (instr, block) {
1518          if (instr->type == nir_instr_type_tex) {
1519             nir_tex_instr *tex = nir_instr_as_tex(instr);
1520             if (tex->texture_index < 64) {
1521                tex->texture_index =
1522                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1523                                           tex->texture_index);
1524             } else {
1525                tex->texture_index =
1526                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1527                                           tex->texture_index - 64);
1528             }
1529             continue;
1530          }
1531 
1532          if (instr->type != nir_instr_type_intrinsic)
1533             continue;
1534 
1535          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1536          switch (intrin->intrinsic) {
1537          case nir_intrinsic_image_size:
1538          case nir_intrinsic_image_load:
1539          case nir_intrinsic_image_store:
1540          case nir_intrinsic_image_atomic:
1541          case nir_intrinsic_image_atomic_swap:
1542          case nir_intrinsic_image_load_raw_intel:
1543          case nir_intrinsic_image_store_raw_intel:
1544             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1545                                  IRIS_SURFACE_GROUP_IMAGE);
1546             break;
1547 
1548          case nir_intrinsic_load_ubo:
1549             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1550                                  IRIS_SURFACE_GROUP_UBO);
1551             break;
1552 
1553          case nir_intrinsic_store_ssbo:
1554             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1555                                  IRIS_SURFACE_GROUP_SSBO);
1556             break;
1557 
1558          case nir_intrinsic_load_output:
1559             if (devinfo->ver == 8) {
1560                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1561                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1562             }
1563             break;
1564 
1565          case nir_intrinsic_get_ssbo_size:
1566          case nir_intrinsic_ssbo_atomic:
1567          case nir_intrinsic_ssbo_atomic_swap:
1568          case nir_intrinsic_load_ssbo:
1569             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1570                                  IRIS_SURFACE_GROUP_SSBO);
1571             break;
1572 
1573          default:
1574             break;
1575          }
1576       }
1577    }
1578 }
1579 
1580 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1581 iris_debug_recompile_brw(struct iris_screen *screen,
1582                          struct util_debug_callback *dbg,
1583                          struct iris_uncompiled_shader *ish,
1584                          const struct brw_base_prog_key *key)
1585 {
1586    if (!ish || list_is_empty(&ish->variants)
1587             || list_is_singular(&ish->variants))
1588       return;
1589 
1590    const struct brw_compiler *c = screen->brw;
1591    const struct shader_info *info = &ish->nir->info;
1592 
1593    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1594                        _mesa_shader_stage_to_string(info->stage),
1595                        info->name ? info->name : "(no identifier)",
1596                        info->label ? info->label : "");
1597 
1598    struct iris_compiled_shader *shader =
1599       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1600    const void *old_iris_key = &shader->key;
1601 
1602    union brw_any_prog_key old_key;
1603 
1604    switch (info->stage) {
1605    case MESA_SHADER_VERTEX:
1606       old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1607       break;
1608    case MESA_SHADER_TESS_CTRL:
1609       old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1610       break;
1611    case MESA_SHADER_TESS_EVAL:
1612       old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1613       break;
1614    case MESA_SHADER_GEOMETRY:
1615       old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1616       break;
1617    case MESA_SHADER_FRAGMENT:
1618       old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1619       break;
1620    case MESA_SHADER_COMPUTE:
1621       old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1622       break;
1623    default:
1624       unreachable("invalid shader stage");
1625    }
1626 
1627    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1628 }
1629 
1630 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1631 iris_debug_recompile_elk(struct iris_screen *screen,
1632                          struct util_debug_callback *dbg,
1633                          struct iris_uncompiled_shader *ish,
1634                          const struct elk_base_prog_key *key)
1635 {
1636    if (!ish || list_is_empty(&ish->variants)
1637             || list_is_singular(&ish->variants))
1638       return;
1639 
1640    const struct elk_compiler *c = screen->elk;
1641    const struct shader_info *info = &ish->nir->info;
1642 
1643    elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1644                        _mesa_shader_stage_to_string(info->stage),
1645                        info->name ? info->name : "(no identifier)",
1646                        info->label ? info->label : "");
1647 
1648    struct iris_compiled_shader *shader =
1649       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1650    const void *old_iris_key = &shader->key;
1651 
1652    union elk_any_prog_key old_key;
1653 
1654    switch (info->stage) {
1655    case MESA_SHADER_VERTEX:
1656       old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1657       break;
1658    case MESA_SHADER_TESS_CTRL:
1659       old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1660       break;
1661    case MESA_SHADER_TESS_EVAL:
1662       old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1663       break;
1664    case MESA_SHADER_GEOMETRY:
1665       old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1666       break;
1667    case MESA_SHADER_FRAGMENT:
1668       old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1669       break;
1670    case MESA_SHADER_COMPUTE:
1671       old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1672       break;
1673    default:
1674       unreachable("invalid shader stage");
1675    }
1676 
1677    elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1678 }
1679 
1680 
1681 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1682 check_urb_size(struct iris_context *ice,
1683                unsigned needed_size,
1684                gl_shader_stage stage)
1685 {
1686    unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1687 
1688    /* If the last URB allocation wasn't large enough for our needs,
1689     * flag it as needing to be reconfigured.  Otherwise, we can use
1690     * the existing config.  However, if the URB is constrained, and
1691     * we can shrink our size for this stage, we may be able to gain
1692     * extra concurrency by reconfiguring it to be smaller.  Do so.
1693     */
1694    if (last_allocated_size < needed_size ||
1695        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1696       ice->state.dirty |= IRIS_DIRTY_URB;
1697    }
1698 }
1699 
1700 /**
1701  * Get the shader for the last enabled geometry stage.
1702  *
1703  * This stage is the one which will feed stream output and the rasterizer.
1704  */
1705 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1706 last_vue_stage(struct iris_context *ice)
1707 {
1708    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1709       return MESA_SHADER_GEOMETRY;
1710 
1711    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1712       return MESA_SHADER_TESS_EVAL;
1713 
1714    return MESA_SHADER_VERTEX;
1715 }
1716 
1717 /**
1718  * \param added  Set to \c true if the variant was added to the list (i.e., a
1719  *               variant matching \c key was not found).  Set to \c false
1720  *               otherwise.
1721  */
1722 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1723 find_or_add_variant(const struct iris_screen *screen,
1724                     struct iris_uncompiled_shader *ish,
1725                     enum iris_program_cache_id cache_id,
1726                     const void *key, unsigned key_size,
1727                     bool *added)
1728 {
1729    struct list_head *start = ish->variants.next;
1730 
1731    *added = false;
1732 
1733    if (screen->precompile) {
1734       /* Check the first list entry.  There will always be at least one
1735        * variant in the list (most likely the precompile variant), and
1736        * other contexts only append new variants, so we can safely check
1737        * it without locking, saving that cost in the common case.
1738        */
1739       struct iris_compiled_shader *first =
1740          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1741 
1742       if (memcmp(&first->key, key, key_size) == 0) {
1743          util_queue_fence_wait(&first->ready);
1744          return first;
1745       }
1746 
1747       /* Skip this one in the loop below */
1748       start = first->link.next;
1749    }
1750 
1751    struct iris_compiled_shader *variant = NULL;
1752 
1753    /* If it doesn't match, we have to walk the list; other contexts may be
1754     * concurrently appending shaders to it, so we need to lock here.
1755     */
1756    simple_mtx_lock(&ish->lock);
1757 
1758    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1759                             &ish->variants, link) {
1760       if (memcmp(&v->key, key, key_size) == 0) {
1761          variant = v;
1762          break;
1763       }
1764    }
1765 
1766    gl_shader_stage stage = ish->nir->info.stage;
1767 
1768    if (variant == NULL) {
1769       variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1770                                            key_size, key);
1771 
1772       /* Append our new variant to the shader's variant list. */
1773       list_addtail(&variant->link, &ish->variants);
1774       *added = true;
1775 
1776       simple_mtx_unlock(&ish->lock);
1777    } else {
1778       simple_mtx_unlock(&ish->lock);
1779 
1780       util_queue_fence_wait(&variant->ready);
1781    }
1782 
1783    assert(stage == variant->stage);
1784    return variant;
1785 }
1786 
1787 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1788 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1789                                  UNUSED int thread_index)
1790 {
1791    free(_job);
1792 }
1793 
1794 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1795 iris_schedule_compile(struct iris_screen *screen,
1796                       struct util_queue_fence *ready_fence,
1797                       struct util_debug_callback *dbg,
1798                       struct iris_threaded_compile_job *job,
1799                       util_queue_execute_func execute)
1800 
1801 {
1802    struct util_async_debug_callback async_debug;
1803 
1804    if (dbg) {
1805       u_async_debug_init(&async_debug);
1806       job->dbg = &async_debug.base;
1807    }
1808 
1809    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1810                       iris_threaded_compile_job_delete, 0);
1811 
1812    if (screen->driconf.sync_compile || dbg)
1813       util_queue_fence_wait(ready_fence);
1814 
1815    if (dbg) {
1816       u_async_debug_drain(&async_debug, dbg);
1817       u_async_debug_cleanup(&async_debug);
1818    }
1819 }
1820 
1821 /**
1822  * Compile a vertex shader, and upload the assembly.
1823  */
1824 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1825 iris_compile_vs(struct iris_screen *screen,
1826                 struct u_upload_mgr *uploader,
1827                 struct util_debug_callback *dbg,
1828                 struct iris_uncompiled_shader *ish,
1829                 struct iris_compiled_shader *shader)
1830 {
1831    const struct intel_device_info *devinfo = screen->devinfo;
1832    void *mem_ctx = ralloc_context(NULL);
1833    uint32_t *system_values;
1834    unsigned num_system_values;
1835    unsigned num_cbufs;
1836 
1837    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1838    const struct iris_vs_prog_key *const key = &shader->key.vs;
1839 
1840    if (key->vue.nr_userclip_plane_consts) {
1841       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1842       /* Check if variables were found. */
1843       if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1844                             true, false, NULL)) {
1845          nir_lower_io_to_temporaries(nir, impl, true, false);
1846          nir_lower_global_vars_to_local(nir);
1847          nir_lower_vars_to_ssa(nir);
1848          nir_shader_gather_info(nir, impl);
1849       }
1850    }
1851 
1852    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1853                        &num_system_values, &num_cbufs);
1854 
1855    struct iris_binding_table bt;
1856    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1857                             num_system_values, num_cbufs);
1858 
1859    const char *error;
1860    const unsigned *program;
1861    if (screen->brw) {
1862       struct brw_vs_prog_data *brw_prog_data =
1863          rzalloc(mem_ctx, struct brw_vs_prog_data);
1864 
1865       brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1866 
1867       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1868 
1869       brw_compute_vue_map(devinfo,
1870                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
1871                           nir->info.separate_shader, /* pos_slots */ 1);
1872 
1873       struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1874 
1875       struct brw_compile_vs_params params = {
1876          .base = {
1877             .mem_ctx = mem_ctx,
1878             .nir = nir,
1879             .log_data = dbg,
1880             .source_hash = ish->source_hash,
1881          },
1882          .key = &brw_key,
1883          .prog_data = brw_prog_data,
1884       };
1885 
1886       program = brw_compile_vs(screen->brw, &params);
1887       error = params.base.error_str;
1888       if (program) {
1889          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1890          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1891       }
1892    } else {
1893       struct elk_vs_prog_data *elk_prog_data =
1894          rzalloc(mem_ctx, struct elk_vs_prog_data);
1895 
1896       elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1897 
1898       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1899 
1900       elk_compute_vue_map(devinfo,
1901                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
1902                           nir->info.separate_shader, /* pos_slots */ 1);
1903 
1904       struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1905 
1906       struct elk_compile_vs_params params = {
1907          .base = {
1908             .mem_ctx = mem_ctx,
1909             .nir = nir,
1910             .log_data = dbg,
1911             .source_hash = ish->source_hash,
1912          },
1913          .key = &elk_key,
1914          .prog_data = elk_prog_data,
1915       };
1916 
1917       program = elk_compile_vs(screen->elk, &params);
1918       error = params.base.error_str;
1919       if (program) {
1920          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1921          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1922       }
1923    }
1924 
1925    if (program == NULL) {
1926       dbg_printf("Failed to compile vertex shader: %s\n", error);
1927       ralloc_free(mem_ctx);
1928 
1929       shader->compilation_failed = true;
1930       util_queue_fence_signal(&shader->ready);
1931 
1932       return;
1933    }
1934 
1935    shader->compilation_failed = false;
1936 
1937    uint32_t *so_decls =
1938       screen->vtbl.create_so_decl_list(&ish->stream_output,
1939                                        &iris_vue_data(shader)->vue_map);
1940 
1941    iris_finalize_program(shader, so_decls,
1942                          system_values, num_system_values, 0, num_cbufs, &bt);
1943 
1944    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1945                       sizeof(*key), key, program);
1946 
1947    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1948 
1949    ralloc_free(mem_ctx);
1950 }
1951 
1952 /**
1953  * Update the current vertex shader variant.
1954  *
1955  * Fill out the key, look in the cache, compile and bind if needed.
1956  */
1957 static void
iris_update_compiled_vs(struct iris_context * ice)1958 iris_update_compiled_vs(struct iris_context *ice)
1959 {
1960    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1961    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1962    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1963    struct iris_uncompiled_shader *ish =
1964       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1965 
1966    struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1967    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1968 
1969    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1970    bool added;
1971    struct iris_compiled_shader *shader =
1972       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1973 
1974    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1975                                           &key, sizeof(key))) {
1976       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1977    }
1978 
1979    if (shader->compilation_failed)
1980       shader = NULL;
1981 
1982    if (old != shader) {
1983       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1984                                     shader);
1985       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1986       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1987                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
1988                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1989       shs->sysvals_need_upload = true;
1990 
1991       unsigned urb_entry_size = shader ?
1992          iris_vue_data(shader)->urb_entry_size : 0;
1993       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1994    }
1995 }
1996 
1997 /**
1998  * Get the shader_info for a given stage, or NULL if the stage is disabled.
1999  */
2000 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2001 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2002 {
2003    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2004 
2005    if (!ish)
2006       return NULL;
2007 
2008    const nir_shader *nir = ish->nir;
2009    return &nir->info;
2010 }
2011 
2012 /**
2013  * Get the union of TCS output and TES input slots.
2014  *
2015  * TCS and TES need to agree on a common URB entry layout.  In particular,
2016  * the data for all patch vertices is stored in a single URB entry (unlike
2017  * GS which has one entry per input vertex).  This means that per-vertex
2018  * array indexing needs a stride.
2019  *
2020  * SSO requires locations to match, but doesn't require the number of
2021  * outputs/inputs to match (in fact, the TCS often has extra outputs).
2022  * So, we need to take the extra step of unifying these on the fly.
2023  */
2024 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2025 get_unified_tess_slots(const struct iris_context *ice,
2026                        uint64_t *per_vertex_slots,
2027                        uint32_t *per_patch_slots)
2028 {
2029    const struct shader_info *tcs =
2030       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2031    const struct shader_info *tes =
2032       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2033 
2034    *per_vertex_slots = tes->inputs_read;
2035    *per_patch_slots = tes->patch_inputs_read;
2036 
2037    if (tcs) {
2038       *per_vertex_slots |= tcs->outputs_written;
2039       *per_patch_slots |= tcs->patch_outputs_written;
2040    }
2041 }
2042 
2043 /**
2044  * Compile a tessellation control shader, and upload the assembly.
2045  */
2046 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2047 iris_compile_tcs(struct iris_screen *screen,
2048                  struct hash_table *passthrough_ht,
2049                  struct u_upload_mgr *uploader,
2050                  struct util_debug_callback *dbg,
2051                  struct iris_uncompiled_shader *ish,
2052                  struct iris_compiled_shader *shader)
2053 {
2054    void *mem_ctx = ralloc_context(NULL);
2055    const struct intel_device_info *devinfo = screen->devinfo;
2056    uint32_t *system_values = NULL;
2057    unsigned num_system_values = 0;
2058    unsigned num_cbufs = 0;
2059 
2060    nir_shader *nir;
2061 
2062    struct iris_binding_table bt;
2063 
2064    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2065    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2066    struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2067    uint32_t source_hash;
2068 
2069    if (ish) {
2070       nir = nir_shader_clone(mem_ctx, ish->nir);
2071       source_hash = ish->source_hash;
2072    } else {
2073       if (screen->brw) {
2074          nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2075       } else {
2076          assert(screen->elk);
2077          nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2078       }
2079       source_hash = *(uint32_t*)nir->info.source_sha1;
2080    }
2081 
2082    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2083                        &num_system_values, &num_cbufs);
2084    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2085                             num_system_values, num_cbufs);
2086 
2087    const char *error = NULL;
2088    const unsigned *program;
2089    if (screen->brw) {
2090       struct brw_tcs_prog_data *brw_prog_data =
2091          rzalloc(mem_ctx, struct brw_tcs_prog_data);
2092       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2093 
2094       struct brw_compile_tcs_params params = {
2095          .base = {
2096             .mem_ctx = mem_ctx,
2097             .nir = nir,
2098             .log_data = dbg,
2099             .source_hash = source_hash,
2100          },
2101          .key = &brw_key,
2102          .prog_data = brw_prog_data,
2103       };
2104 
2105       program = brw_compile_tcs(screen->brw, &params);
2106       error = params.base.error_str;
2107 
2108       if (program) {
2109          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2110          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2111       }
2112    } else {
2113       assert(screen->elk);
2114       struct elk_tcs_prog_data *elk_prog_data =
2115          rzalloc(mem_ctx, struct elk_tcs_prog_data);
2116       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2117 
2118       struct elk_compile_tcs_params params = {
2119          .base = {
2120             .mem_ctx = mem_ctx,
2121             .nir = nir,
2122             .log_data = dbg,
2123             .source_hash = source_hash,
2124          },
2125          .key = &elk_key,
2126          .prog_data = elk_prog_data,
2127       };
2128 
2129       program = elk_compile_tcs(screen->elk, &params);
2130       error = params.base.error_str;
2131 
2132       if (program) {
2133          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2134          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2135       }
2136    }
2137 
2138    if (program == NULL) {
2139       dbg_printf("Failed to compile control shader: %s\n", error);
2140       ralloc_free(mem_ctx);
2141 
2142       shader->compilation_failed = true;
2143       util_queue_fence_signal(&shader->ready);
2144 
2145       return;
2146    }
2147 
2148    shader->compilation_failed = false;
2149 
2150    iris_finalize_program(shader, NULL, system_values,
2151                          num_system_values, 0, num_cbufs, &bt);
2152 
2153    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2154                       IRIS_CACHE_TCS, sizeof(*key), key, program);
2155 
2156    if (ish)
2157       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2158 
2159    ralloc_free(mem_ctx);
2160 }
2161 
2162 /**
2163  * Update the current tessellation control shader variant.
2164  *
2165  * Fill out the key, look in the cache, compile and bind if needed.
2166  */
2167 static void
iris_update_compiled_tcs(struct iris_context * ice)2168 iris_update_compiled_tcs(struct iris_context *ice)
2169 {
2170    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2171    struct iris_uncompiled_shader *tcs =
2172       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2173    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2174    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2175    const struct intel_device_info *devinfo = screen->devinfo;
2176 
2177    const struct shader_info *tes_info =
2178       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2179    struct iris_tcs_prog_key key = {
2180       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2181       ._tes_primitive_mode = tes_info->tess._primitive_mode,
2182       .input_vertices =
2183          !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2184       .quads_workaround = devinfo->ver < 9 &&
2185                           tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2186                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
2187    };
2188    get_unified_tess_slots(ice, &key.outputs_written,
2189                           &key.patch_outputs_written);
2190    screen->vtbl.populate_tcs_key(ice, &key);
2191 
2192    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2193    struct iris_compiled_shader *shader;
2194    bool added = false;
2195 
2196    if (tcs != NULL) {
2197       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2198                                    sizeof(key), &added);
2199    } else {
2200       /* Look for and possibly create a passthrough TCS */
2201       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2202 
2203 
2204       if (shader == NULL) {
2205          shader = iris_create_shader_variant(screen, ice->shaders.cache,
2206                                              MESA_SHADER_TESS_CTRL,
2207                                              IRIS_CACHE_TCS, sizeof(key), &key);
2208          added = true;
2209       }
2210 
2211    }
2212 
2213    /* If the shader was not found in (whichever cache), call iris_compile_tcs
2214     * if either ish is NULL or the shader could not be found in the disk
2215     * cache.
2216     */
2217    if (added &&
2218        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2219                                                  &key, sizeof(key)))) {
2220       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2221                        shader);
2222    }
2223 
2224    if (shader->compilation_failed)
2225       shader = NULL;
2226 
2227    if (old != shader) {
2228       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2229                                     shader);
2230       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2231                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2232                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2233       shs->sysvals_need_upload = true;
2234 
2235       unsigned urb_entry_size = shader ?
2236          iris_vue_data(shader)->urb_entry_size : 0;
2237       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2238    }
2239 }
2240 
2241 /**
2242  * Compile a tessellation evaluation shader, and upload the assembly.
2243  */
2244 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2245 iris_compile_tes(struct iris_screen *screen,
2246                  struct u_upload_mgr *uploader,
2247                  struct util_debug_callback *dbg,
2248                  struct iris_uncompiled_shader *ish,
2249                  struct iris_compiled_shader *shader)
2250 {
2251    void *mem_ctx = ralloc_context(NULL);
2252    uint32_t *system_values;
2253    const struct intel_device_info *devinfo = screen->devinfo;
2254    unsigned num_system_values;
2255    unsigned num_cbufs;
2256 
2257    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2258    const struct iris_tes_prog_key *const key = &shader->key.tes;
2259 
2260    if (key->vue.nr_userclip_plane_consts) {
2261       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2262       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2263                         true, false, NULL);
2264       nir_lower_io_to_temporaries(nir, impl, true, false);
2265       nir_lower_global_vars_to_local(nir);
2266       nir_lower_vars_to_ssa(nir);
2267       nir_shader_gather_info(nir, impl);
2268    }
2269 
2270    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2271                        &num_system_values, &num_cbufs);
2272 
2273    struct iris_binding_table bt;
2274    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2275                             num_system_values, num_cbufs);
2276 
2277    const char *error;
2278    const unsigned *program;
2279 
2280    if (screen->brw) {
2281       struct brw_tes_prog_data *brw_prog_data =
2282          rzalloc(mem_ctx, struct brw_tes_prog_data);
2283 
2284       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2285 
2286       struct intel_vue_map input_vue_map;
2287       brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2288                                key->patch_inputs_read);
2289 
2290       struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2291 
2292       struct brw_compile_tes_params params = {
2293          .base = {
2294             .mem_ctx = mem_ctx,
2295             .nir = nir,
2296             .log_data = dbg,
2297             .source_hash = ish->source_hash,
2298          },
2299          .key = &brw_key,
2300          .prog_data = brw_prog_data,
2301          .input_vue_map = &input_vue_map,
2302       };
2303 
2304       program = brw_compile_tes(screen->brw, &params);
2305       error = params.base.error_str;
2306 
2307       if (program) {
2308          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2309          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2310       }
2311    } else {
2312       struct elk_tes_prog_data *elk_prog_data =
2313          rzalloc(mem_ctx, struct elk_tes_prog_data);
2314 
2315       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2316 
2317       struct intel_vue_map input_vue_map;
2318       elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2319                                key->patch_inputs_read);
2320 
2321       struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2322 
2323       struct elk_compile_tes_params params = {
2324          .base = {
2325             .mem_ctx = mem_ctx,
2326             .nir = nir,
2327             .log_data = dbg,
2328             .source_hash = ish->source_hash,
2329          },
2330          .key = &elk_key,
2331          .prog_data = elk_prog_data,
2332          .input_vue_map = &input_vue_map,
2333       };
2334 
2335       program = elk_compile_tes(screen->elk, &params);
2336       error = params.base.error_str;
2337 
2338       if (program) {
2339          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2340          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2341       }
2342    }
2343 
2344    if (program == NULL) {
2345       dbg_printf("Failed to compile evaluation shader: %s\n", error);
2346       ralloc_free(mem_ctx);
2347 
2348       shader->compilation_failed = true;
2349       util_queue_fence_signal(&shader->ready);
2350 
2351       return;
2352    }
2353 
2354    shader->compilation_failed = false;
2355 
2356    uint32_t *so_decls =
2357       screen->vtbl.create_so_decl_list(&ish->stream_output,
2358                                        &iris_vue_data(shader)->vue_map);
2359 
2360    iris_finalize_program(shader, so_decls, system_values,
2361                          num_system_values, 0, num_cbufs, &bt);
2362 
2363    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2364                       sizeof(*key), key, program);
2365 
2366    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2367 
2368    ralloc_free(mem_ctx);
2369 }
2370 
2371 /**
2372  * Update the current tessellation evaluation shader variant.
2373  *
2374  * Fill out the key, look in the cache, compile and bind if needed.
2375  */
2376 static void
iris_update_compiled_tes(struct iris_context * ice)2377 iris_update_compiled_tes(struct iris_context *ice)
2378 {
2379    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2380    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2381    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2382    struct iris_uncompiled_shader *ish =
2383       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2384 
2385    struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2386    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2387    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2388 
2389    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2390    bool added;
2391    struct iris_compiled_shader *shader =
2392       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2393 
2394    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2395                                           &key, sizeof(key))) {
2396       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2397    }
2398 
2399    if (shader->compilation_failed)
2400       shader = NULL;
2401 
2402    if (old != shader) {
2403       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2404                                     shader);
2405       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2406                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
2407                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2408       shs->sysvals_need_upload = true;
2409 
2410       unsigned urb_entry_size = shader ?
2411          iris_vue_data(shader)->urb_entry_size : 0;
2412       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2413    }
2414 
2415    /* TODO: Could compare and avoid flagging this. */
2416    const struct shader_info *tes_info = &ish->nir->info;
2417    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2418       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2419       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2420    }
2421 }
2422 
2423 /**
2424  * Compile a geometry shader, and upload the assembly.
2425  */
2426 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2427 iris_compile_gs(struct iris_screen *screen,
2428                 struct u_upload_mgr *uploader,
2429                 struct util_debug_callback *dbg,
2430                 struct iris_uncompiled_shader *ish,
2431                 struct iris_compiled_shader *shader)
2432 {
2433    const struct intel_device_info *devinfo = screen->devinfo;
2434    void *mem_ctx = ralloc_context(NULL);
2435    uint32_t *system_values;
2436    unsigned num_system_values;
2437    unsigned num_cbufs;
2438 
2439    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2440    const struct iris_gs_prog_key *const key = &shader->key.gs;
2441 
2442    if (key->vue.nr_userclip_plane_consts) {
2443       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2444       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2445                         false, NULL);
2446       nir_lower_io_to_temporaries(nir, impl, true, false);
2447       nir_lower_global_vars_to_local(nir);
2448       nir_lower_vars_to_ssa(nir);
2449       nir_shader_gather_info(nir, impl);
2450    }
2451 
2452    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2453                        &num_system_values, &num_cbufs);
2454 
2455    struct iris_binding_table bt;
2456    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2457                             num_system_values, num_cbufs);
2458 
2459    const char *error;
2460    const unsigned *program;
2461    if (screen->brw) {
2462       struct brw_gs_prog_data *brw_prog_data =
2463          rzalloc(mem_ctx, struct brw_gs_prog_data);
2464 
2465       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2466 
2467       brw_compute_vue_map(devinfo,
2468                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
2469                           nir->info.separate_shader, /* pos_slots */ 1);
2470 
2471       struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2472 
2473       struct brw_compile_gs_params params = {
2474          .base = {
2475             .mem_ctx = mem_ctx,
2476             .nir = nir,
2477             .log_data = dbg,
2478             .source_hash = ish->source_hash,
2479          },
2480          .key = &brw_key,
2481          .prog_data = brw_prog_data,
2482       };
2483 
2484       program = brw_compile_gs(screen->brw, &params);
2485       error = params.base.error_str;
2486       if (program) {
2487          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2488          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2489       }
2490    } else {
2491       struct elk_gs_prog_data *elk_prog_data =
2492          rzalloc(mem_ctx, struct elk_gs_prog_data);
2493 
2494       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2495 
2496       elk_compute_vue_map(devinfo,
2497                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
2498                           nir->info.separate_shader, /* pos_slots */ 1);
2499 
2500       struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2501 
2502       struct elk_compile_gs_params params = {
2503          .base = {
2504             .mem_ctx = mem_ctx,
2505             .nir = nir,
2506             .log_data = dbg,
2507             .source_hash = ish->source_hash,
2508          },
2509          .key = &elk_key,
2510          .prog_data = elk_prog_data,
2511       };
2512 
2513       program = elk_compile_gs(screen->elk, &params);
2514       error = params.base.error_str;
2515       if (program) {
2516          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2517          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2518       }
2519    }
2520 
2521    if (program == NULL) {
2522       dbg_printf("Failed to compile geometry shader: %s\n", error);
2523       ralloc_free(mem_ctx);
2524 
2525       shader->compilation_failed = true;
2526       util_queue_fence_signal(&shader->ready);
2527 
2528       return;
2529    }
2530 
2531    shader->compilation_failed = false;
2532 
2533    uint32_t *so_decls =
2534       screen->vtbl.create_so_decl_list(&ish->stream_output,
2535                                        &iris_vue_data(shader)->vue_map);
2536 
2537    iris_finalize_program(shader, so_decls, system_values,
2538                          num_system_values, 0, num_cbufs, &bt);
2539 
2540    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2541                       sizeof(*key), key, program);
2542 
2543    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2544 
2545    ralloc_free(mem_ctx);
2546 }
2547 
2548 /**
2549  * Update the current geometry shader variant.
2550  *
2551  * Fill out the key, look in the cache, compile and bind if needed.
2552  */
2553 static void
iris_update_compiled_gs(struct iris_context * ice)2554 iris_update_compiled_gs(struct iris_context *ice)
2555 {
2556    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2557    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2558    struct iris_uncompiled_shader *ish =
2559       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2560    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2561    struct iris_compiled_shader *shader = NULL;
2562    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2563 
2564    if (ish) {
2565       struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2566       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2567 
2568       bool added;
2569 
2570       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2571                                    sizeof(key), &added);
2572 
2573       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2574                                              &key, sizeof(key))) {
2575          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2576       }
2577 
2578       if (shader->compilation_failed)
2579          shader = NULL;
2580    }
2581 
2582    if (old != shader) {
2583       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2584                                     shader);
2585       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2586                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
2587                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2588       shs->sysvals_need_upload = true;
2589 
2590       unsigned urb_entry_size = shader ?
2591          iris_vue_data(shader)->urb_entry_size : 0;
2592       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2593    }
2594 }
2595 
2596 /**
2597  * Compile a fragment (pixel) shader, and upload the assembly.
2598  */
2599 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2600 iris_compile_fs(struct iris_screen *screen,
2601                 struct u_upload_mgr *uploader,
2602                 struct util_debug_callback *dbg,
2603                 struct iris_uncompiled_shader *ish,
2604                 struct iris_compiled_shader *shader,
2605                 struct intel_vue_map *vue_map)
2606 {
2607    void *mem_ctx = ralloc_context(NULL);
2608    uint32_t *system_values;
2609    const struct intel_device_info *devinfo = screen->devinfo;
2610    unsigned num_system_values;
2611    unsigned num_cbufs;
2612 
2613    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2614    const struct iris_fs_prog_key *const key = &shader->key.fs;
2615 
2616    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2617                        &num_system_values, &num_cbufs);
2618 
2619    /* Lower output variables to load_output intrinsics before setting up
2620     * binding tables, so iris_setup_binding_table can map any load_output
2621     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2622     * non-coherent framebuffer fetches.
2623     */
2624    brw_nir_lower_fs_outputs(nir);
2625 
2626    /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
2627     * and do not need a binding table entry with a null surface.  Earlier
2628     * generations need an entry for a null surface.
2629     */
2630    int null_rts = devinfo->ver < 11 ? 1 : 0;
2631 
2632    struct iris_binding_table bt;
2633    iris_setup_binding_table(devinfo, nir, &bt,
2634                             MAX2(key->nr_color_regions, null_rts),
2635                             num_system_values, num_cbufs);
2636 
2637    const char *error;
2638    const unsigned *program;
2639 
2640    if (screen->brw) {
2641       struct brw_wm_prog_data *brw_prog_data =
2642          rzalloc(mem_ctx, struct brw_wm_prog_data);
2643 
2644       brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2645 
2646       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2647 
2648       struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2649 
2650       struct brw_compile_fs_params params = {
2651          .base = {
2652             .mem_ctx = mem_ctx,
2653             .nir = nir,
2654             .log_data = dbg,
2655             .source_hash = ish->source_hash,
2656          },
2657          .key = &brw_key,
2658          .prog_data = brw_prog_data,
2659 
2660          .allow_spilling = true,
2661          .max_polygons = UCHAR_MAX,
2662          .vue_map = vue_map,
2663       };
2664 
2665       program = brw_compile_fs(screen->brw, &params);
2666       error = params.base.error_str;
2667       if (program) {
2668          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2669          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2670       }
2671    } else {
2672       struct elk_wm_prog_data *elk_prog_data =
2673          rzalloc(mem_ctx, struct elk_wm_prog_data);
2674 
2675       elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2676 
2677       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2678 
2679       struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2680 
2681       struct elk_compile_fs_params params = {
2682          .base = {
2683             .mem_ctx = mem_ctx,
2684             .nir = nir,
2685             .log_data = dbg,
2686             .source_hash = ish->source_hash,
2687          },
2688          .key = &elk_key,
2689          .prog_data = elk_prog_data,
2690 
2691          .allow_spilling = true,
2692          .max_polygons = UCHAR_MAX,
2693          .vue_map = vue_map,
2694       };
2695 
2696       program = elk_compile_fs(screen->elk, &params);
2697       error = params.base.error_str;
2698       if (program) {
2699          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2700          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2701       }
2702    }
2703 
2704    if (program == NULL) {
2705       dbg_printf("Failed to compile fragment shader: %s\n", error);
2706       ralloc_free(mem_ctx);
2707 
2708       shader->compilation_failed = true;
2709       util_queue_fence_signal(&shader->ready);
2710 
2711       return;
2712    }
2713 
2714    shader->compilation_failed = false;
2715 
2716    iris_finalize_program(shader, NULL, system_values,
2717                          num_system_values, 0, num_cbufs, &bt);
2718 
2719    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2720                       sizeof(*key), key, program);
2721 
2722    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2723 
2724    ralloc_free(mem_ctx);
2725 }
2726 
2727 /**
2728  * Update the current fragment shader variant.
2729  *
2730  * Fill out the key, look in the cache, compile and bind if needed.
2731  */
2732 static void
iris_update_compiled_fs(struct iris_context * ice)2733 iris_update_compiled_fs(struct iris_context *ice)
2734 {
2735    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2736    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2737    struct iris_uncompiled_shader *ish =
2738       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2739    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2740    struct iris_fs_prog_key key = { KEY_INIT(base) };
2741    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2742 
2743    struct intel_vue_map *last_vue_map =
2744       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2745 
2746    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2747       key.input_slots_valid = last_vue_map->slots_valid;
2748 
2749    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2750    bool added;
2751    struct iris_compiled_shader *shader =
2752       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2753                           sizeof(key), &added);
2754 
2755    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2756                                           &key, sizeof(key))) {
2757       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2758    }
2759 
2760    if (shader->compilation_failed)
2761       shader = NULL;
2762 
2763    if (old != shader) {
2764       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2765       // toggles.  might be able to avoid flagging SBE too.
2766       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2767                                     shader);
2768       ice->state.dirty |= IRIS_DIRTY_WM |
2769                           IRIS_DIRTY_CLIP |
2770                           IRIS_DIRTY_SBE;
2771       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2772                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2773                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2774       shs->sysvals_need_upload = true;
2775    }
2776 }
2777 
2778 /**
2779  * Update the last enabled stage's VUE map.
2780  *
2781  * When the shader feeding the rasterizer's output interface changes, we
2782  * need to re-emit various packets.
2783  */
2784 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2785 update_last_vue_map(struct iris_context *ice,
2786                     struct iris_compiled_shader *shader)
2787 {
2788    const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2789    const struct intel_vue_map *old_map =
2790       !ice->shaders.last_vue_shader ? NULL :
2791       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2792    const uint64_t changed_slots =
2793       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2794 
2795    if (changed_slots & VARYING_BIT_VIEWPORT) {
2796       ice->state.num_viewports =
2797          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2798       ice->state.dirty |= IRIS_DIRTY_CLIP |
2799                           IRIS_DIRTY_SF_CL_VIEWPORT |
2800                           IRIS_DIRTY_CC_VIEWPORT |
2801                           IRIS_DIRTY_SCISSOR_RECT;
2802       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2803          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2804    }
2805 
2806    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2807       ice->state.dirty |= IRIS_DIRTY_SBE;
2808    }
2809 
2810    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2811 }
2812 
2813 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2814 iris_update_pull_constant_descriptors(struct iris_context *ice,
2815                                       gl_shader_stage stage)
2816 {
2817    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2818 
2819    if (!shader || !shader->has_ubo_pull)
2820       return;
2821 
2822    struct iris_shader_state *shs = &ice->state.shaders[stage];
2823    bool any_new_descriptors =
2824       shader->num_system_values > 0 && shs->sysvals_need_upload;
2825 
2826    unsigned bound_cbufs = shs->bound_cbufs;
2827 
2828    while (bound_cbufs) {
2829       const int i = u_bit_scan(&bound_cbufs);
2830       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2831       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2832       if (!surf_state->res && cbuf->buffer) {
2833          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2834                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2835          any_new_descriptors = true;
2836       }
2837    }
2838 
2839    if (any_new_descriptors)
2840       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2841 }
2842 
2843 /**
2844  * Update the current shader variants for the given state.
2845  *
2846  * This should be called on every draw call to ensure that the correct
2847  * shaders are bound.  It will also flag any dirty state triggered by
2848  * swapping out those shaders.
2849  */
2850 void
iris_update_compiled_shaders(struct iris_context * ice)2851 iris_update_compiled_shaders(struct iris_context *ice)
2852 {
2853    const uint64_t stage_dirty = ice->state.stage_dirty;
2854 
2855    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2856                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2857        struct iris_uncompiled_shader *tes =
2858           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2859        if (tes) {
2860           iris_update_compiled_tcs(ice);
2861           iris_update_compiled_tes(ice);
2862        } else {
2863          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2864          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2865           ice->state.stage_dirty |=
2866              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2867              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2868              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2869 
2870           if (ice->shaders.urb.constrained)
2871              ice->state.dirty |= IRIS_DIRTY_URB;
2872        }
2873    }
2874 
2875    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2876       iris_update_compiled_vs(ice);
2877    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2878       iris_update_compiled_gs(ice);
2879 
2880    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2881                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2882       const struct iris_compiled_shader *gs =
2883          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2884       const struct iris_compiled_shader *tes =
2885          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2886 
2887       bool points_or_lines = false;
2888 
2889       if (gs) {
2890          const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2891          points_or_lines =
2892             gs_data->output_topology == _3DPRIM_POINTLIST ||
2893             gs_data->output_topology == _3DPRIM_LINESTRIP;
2894       } else if (tes) {
2895          const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2896          points_or_lines =
2897             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2898             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2899       }
2900 
2901       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2902          /* Outbound to XY Clip enables */
2903          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2904          ice->state.dirty |= IRIS_DIRTY_CLIP;
2905       }
2906    }
2907 
2908    gl_shader_stage last_stage = last_vue_stage(ice);
2909    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2910    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2911    update_last_vue_map(ice, shader);
2912    if (ice->state.streamout != shader->streamout) {
2913       ice->state.streamout = shader->streamout;
2914       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2915    }
2916 
2917    if (ice->state.streamout_active) {
2918       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2919          struct iris_stream_output_target *so =
2920             (void *) ice->state.so_target[i];
2921          if (so)
2922             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2923       }
2924    }
2925 
2926    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2927       iris_update_compiled_fs(ice);
2928 
2929    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2930       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2931          iris_update_pull_constant_descriptors(ice, i);
2932    }
2933 }
2934 
2935 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2936 iris_compile_cs(struct iris_screen *screen,
2937                 struct u_upload_mgr *uploader,
2938                 struct util_debug_callback *dbg,
2939                 struct iris_uncompiled_shader *ish,
2940                 struct iris_compiled_shader *shader)
2941 {
2942    void *mem_ctx = ralloc_context(NULL);
2943    uint32_t *system_values;
2944    const struct intel_device_info *devinfo = screen->devinfo;
2945    unsigned num_system_values;
2946    unsigned num_cbufs;
2947 
2948    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2949    const struct iris_cs_prog_key *const key = &shader->key.cs;
2950 
2951    if (screen->brw)
2952       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2953    else
2954       NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2955 
2956    iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2957                        &system_values, &num_system_values, &num_cbufs);
2958 
2959    struct iris_binding_table bt;
2960    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2961                             num_system_values, num_cbufs);
2962 
2963    const char *error;
2964    const unsigned *program;
2965 
2966    if (screen->brw) {
2967       struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2968 
2969       struct brw_cs_prog_data *brw_prog_data =
2970          rzalloc(mem_ctx, struct brw_cs_prog_data);
2971 
2972       struct brw_compile_cs_params params = {
2973          .base = {
2974             .mem_ctx = mem_ctx,
2975             .nir = nir,
2976             .log_data = dbg,
2977             .source_hash = ish->source_hash,
2978          },
2979          .key = &brw_key,
2980          .prog_data = brw_prog_data,
2981       };
2982 
2983       program = brw_compile_cs(screen->brw, &params);
2984       error = params.base.error_str;
2985       if (program) {
2986          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2987          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2988       }
2989    } else {
2990       struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
2991 
2992       struct elk_cs_prog_data *elk_prog_data =
2993          rzalloc(mem_ctx, struct elk_cs_prog_data);
2994 
2995       struct elk_compile_cs_params params = {
2996          .base = {
2997             .mem_ctx = mem_ctx,
2998             .nir = nir,
2999             .log_data = dbg,
3000             .source_hash = ish->source_hash,
3001          },
3002          .key = &elk_key,
3003          .prog_data = elk_prog_data,
3004       };
3005 
3006       program = elk_compile_cs(screen->elk, &params);
3007       error = params.base.error_str;
3008       if (program) {
3009          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3010          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3011       }
3012    }
3013 
3014    if (program == NULL) {
3015       dbg_printf("Failed to compile compute shader: %s\n", error);
3016 
3017       shader->compilation_failed = true;
3018       util_queue_fence_signal(&shader->ready);
3019 
3020       return;
3021    }
3022 
3023    shader->compilation_failed = false;
3024 
3025    iris_finalize_program(shader, NULL, system_values,
3026                          num_system_values, ish->kernel_input_size, num_cbufs,
3027                          &bt);
3028 
3029    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3030                       sizeof(*key), key, program);
3031 
3032    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3033 
3034    ralloc_free(mem_ctx);
3035 }
3036 
3037 static void
iris_update_compiled_cs(struct iris_context * ice)3038 iris_update_compiled_cs(struct iris_context *ice)
3039 {
3040    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3041    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3042    struct iris_uncompiled_shader *ish =
3043       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3044    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3045    struct iris_cs_prog_key key = { KEY_INIT(base) };
3046    screen->vtbl.populate_cs_key(ice, &key);
3047 
3048    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3049    bool added;
3050    struct iris_compiled_shader *shader =
3051       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3052                           sizeof(key), &added);
3053 
3054    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3055                                           &key, sizeof(key))) {
3056       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3057    }
3058 
3059    if (shader->compilation_failed)
3060       shader = NULL;
3061 
3062    if (old != shader) {
3063       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3064                                     shader);
3065       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3066                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
3067                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3068       shs->sysvals_need_upload = true;
3069    }
3070 }
3071 
3072 void
iris_update_compiled_compute_shader(struct iris_context * ice)3073 iris_update_compiled_compute_shader(struct iris_context *ice)
3074 {
3075    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3076       iris_update_compiled_cs(ice);
3077 
3078    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3079       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3080 }
3081 
3082 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3083 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3084                                struct iris_compiled_shader *shader,
3085                                unsigned threads,
3086                                uint32_t *dst)
3087 {
3088    struct iris_cs_data *cs_data = iris_cs_data(shader);
3089    assert(iris_cs_push_const_total_size(shader, threads) > 0);
3090    assert(cs_data->push.cross_thread.size == 0);
3091    assert(cs_data->push.per_thread.dwords == 1);
3092    assert(cs_data->first_param_is_builtin_subgroup_id);
3093    for (unsigned t = 0; t < threads; t++)
3094       dst[8 * t] = t;
3095 }
3096 
3097 /**
3098  * Allocate scratch BOs as needed for the given per-thread size and stage.
3099  */
3100 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3101 iris_get_scratch_space(struct iris_context *ice,
3102                        unsigned per_thread_scratch,
3103                        gl_shader_stage stage)
3104 {
3105    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3106    struct iris_bufmgr *bufmgr = screen->bufmgr;
3107    const struct intel_device_info *devinfo = screen->devinfo;
3108 
3109    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3110    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3111    assert(per_thread_scratch == 1 << (encoded_size + 10));
3112 
3113    /* On GFX version 12.5, scratch access changed to a surface-based model.
3114     * Instead of each shader type having its own layout based on IDs passed
3115     * from the relevant fixed-function unit, all scratch access is based on
3116     * thread IDs like it always has been for compute.
3117     */
3118    if (devinfo->verx10 >= 125)
3119       stage = MESA_SHADER_COMPUTE;
3120 
3121    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3122 
3123    if (!*bop) {
3124       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3125       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3126       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3127                            IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3128    }
3129 
3130    return *bop;
3131 }
3132 
3133 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3134 iris_get_scratch_surf(struct iris_context *ice,
3135                       unsigned per_thread_scratch)
3136 {
3137    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3138    ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3139 
3140    assert(devinfo->verx10 >= 125);
3141 
3142    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3143    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3144    assert(per_thread_scratch == 1 << (encoded_size + 10));
3145 
3146    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3147 
3148    if (ref->res)
3149       return ref;
3150 
3151    struct iris_bo *scratch_bo =
3152       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3153 
3154    void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3155                             screen->isl_dev.ss.size, 64);
3156 
3157    isl_buffer_fill_state(&screen->isl_dev, map,
3158                          .address = scratch_bo->address,
3159                          .size_B = scratch_bo->size,
3160                          .format = ISL_FORMAT_RAW,
3161                          .swizzle = ISL_SWIZZLE_IDENTITY,
3162                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3163                          .stride_B = per_thread_scratch,
3164                          .is_scratch = true);
3165 
3166    return ref;
3167 }
3168 
3169 /* ------------------------------------------------------------------- */
3170 
3171 /**
3172  * The pipe->create_[stage]_state() driver hooks.
3173  *
3174  * Performs basic NIR preprocessing, records any state dependencies, and
3175  * returns an iris_uncompiled_shader as the Gallium CSO.
3176  *
3177  * Actual shader compilation to assembly happens later, at first use.
3178  */
3179 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3180 iris_create_uncompiled_shader(struct iris_screen *screen,
3181                               nir_shader *nir,
3182                               const struct pipe_stream_output_info *so_info)
3183 {
3184    struct iris_uncompiled_shader *ish =
3185       calloc(1, sizeof(struct iris_uncompiled_shader));
3186    if (!ish)
3187       return NULL;
3188 
3189    pipe_reference_init(&ish->ref, 1);
3190    list_inithead(&ish->variants);
3191    simple_mtx_init(&ish->lock, mtx_plain);
3192    util_queue_fence_init(&ish->ready);
3193 
3194    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3195 
3196    ish->program_id = get_new_program_id(screen);
3197    ish->nir = nir;
3198    if (so_info) {
3199       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3200       update_so_info(&ish->stream_output, nir->info.outputs_written);
3201    }
3202 
3203    /* Use lowest dword of source shader sha1 for shader hash. */
3204    ish->source_hash = *(uint32_t*)nir->info.source_sha1;
3205 
3206    if (screen->disk_cache) {
3207       /* Serialize the NIR to a binary blob that we can hash for the disk
3208        * cache.  Drop unnecessary information (like variable names)
3209        * so the serialized NIR is smaller, and also to let us detect more
3210        * isomorphic shaders when hashing, increasing cache hits.
3211        */
3212       struct blob blob;
3213       blob_init(&blob);
3214       nir_serialize(&blob, nir, true);
3215       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3216       blob_finish(&blob);
3217    }
3218 
3219    return ish;
3220 }
3221 
3222 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3223 iris_create_compute_state(struct pipe_context *ctx,
3224                           const struct pipe_compute_state *state)
3225 {
3226    struct iris_context *ice = (void *) ctx;
3227    struct iris_screen *screen = (void *) ctx->screen;
3228    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3229    const nir_shader_compiler_options *options =
3230       screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
3231                   : screen->elk->nir_options[MESA_SHADER_COMPUTE];
3232 
3233    nir_shader *nir;
3234    switch (state->ir_type) {
3235    case PIPE_SHADER_IR_NIR:
3236       nir = (void *)state->prog;
3237       break;
3238 
3239    case PIPE_SHADER_IR_NIR_SERIALIZED: {
3240       struct blob_reader reader;
3241       const struct pipe_binary_program_header *hdr = state->prog;
3242       blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
3243       nir = nir_deserialize(NULL, options, &reader);
3244       break;
3245    }
3246 
3247    default:
3248       unreachable("Unsupported IR");
3249    }
3250 
3251    /* Most of iris doesn't really care about the difference between compute
3252     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
3253     * it's way easier if we just normalize to COMPUTE here.
3254     */
3255    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3256           nir->info.stage == MESA_SHADER_KERNEL);
3257    nir->info.stage = MESA_SHADER_COMPUTE;
3258 
3259    struct iris_uncompiled_shader *ish =
3260       iris_create_uncompiled_shader(screen, nir, NULL);
3261    ish->kernel_input_size = state->req_input_mem;
3262    ish->kernel_shared_size = state->static_shared_mem;
3263 
3264    // XXX: disallow more than 64KB of shared variables
3265 
3266    if (screen->precompile) {
3267       struct iris_cs_prog_key key = { KEY_INIT(base) };
3268 
3269       struct iris_compiled_shader *shader =
3270          iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3271                                     IRIS_CACHE_CS, sizeof(key), &key);
3272 
3273       /* Append our new variant to the shader's variant list. */
3274       list_addtail(&shader->link, &ish->variants);
3275 
3276       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3277                                     &key, sizeof(key))) {
3278          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3279       }
3280    }
3281 
3282    return ish;
3283 }
3284 
3285 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3286 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3287                             struct pipe_compute_state_object_info *info)
3288 {
3289    struct iris_screen *screen = (void *) ctx->screen;
3290    struct iris_uncompiled_shader *ish = state;
3291 
3292    info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3293    info->private_memory = 0;
3294    info->preferred_simd_size = 32;
3295    info->simd_sizes = 8 | 16 | 32;
3296 
3297    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3298                             &ish->variants, link) {
3299       info->private_memory = MAX2(info->private_memory,
3300                                   shader->total_scratch);
3301    }
3302 }
3303 
3304 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3305 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3306                                      const uint32_t block[3])
3307 {
3308    struct iris_context *ice = (void *) ctx;
3309    struct iris_screen *screen = (void *) ctx->screen;
3310    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3311    struct iris_uncompiled_shader *ish = state;
3312 
3313    struct iris_cs_prog_key key = { KEY_INIT(base) };
3314    screen->vtbl.populate_cs_key(ice, &key);
3315 
3316    bool added;
3317    struct iris_compiled_shader *shader =
3318       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3319                           sizeof(key), &added);
3320 
3321    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3322                                           &key, sizeof(key))) {
3323       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3324    }
3325 
3326    return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3327 }
3328 
3329 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3330 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3331 {
3332    const struct iris_threaded_compile_job *job =
3333       (struct iris_threaded_compile_job *) _job;
3334 
3335    struct iris_screen *screen = job->screen;
3336    struct u_upload_mgr *uploader = job->uploader;
3337    struct util_debug_callback *dbg = job->dbg;
3338    struct iris_uncompiled_shader *ish = job->ish;
3339    struct iris_compiled_shader *shader = job->shader;
3340 
3341    switch (ish->nir->info.stage) {
3342    case MESA_SHADER_VERTEX:
3343       iris_compile_vs(screen, uploader, dbg, ish, shader);
3344       break;
3345    case MESA_SHADER_TESS_CTRL:
3346       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3347       break;
3348    case MESA_SHADER_TESS_EVAL:
3349       iris_compile_tes(screen, uploader, dbg, ish, shader);
3350       break;
3351    case MESA_SHADER_GEOMETRY:
3352       iris_compile_gs(screen, uploader, dbg, ish, shader);
3353       break;
3354    case MESA_SHADER_FRAGMENT:
3355       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3356       break;
3357 
3358    default:
3359       unreachable("Invalid shader stage.");
3360    }
3361 }
3362 
3363 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3364 iris_create_shader_state(struct pipe_context *ctx,
3365                          const struct pipe_shader_state *state)
3366 {
3367    struct iris_context *ice = (void *) ctx;
3368    struct iris_screen *screen = (void *) ctx->screen;
3369    struct nir_shader *nir;
3370 
3371    if (state->type == PIPE_SHADER_IR_TGSI)
3372       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3373    else
3374       nir = state->ir.nir;
3375 
3376    const struct shader_info *const info = &nir->info;
3377    struct iris_uncompiled_shader *ish =
3378       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3379 
3380    union iris_any_prog_key key;
3381    unsigned key_size = 0;
3382 
3383    memset(&key, 0, sizeof(key));
3384 
3385    switch (info->stage) {
3386    case MESA_SHADER_VERTEX:
3387       /* User clip planes */
3388       if (info->clip_distance_array_size == 0)
3389          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3390 
3391       key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3392       key_size = sizeof(key.vs);
3393       break;
3394 
3395    case MESA_SHADER_TESS_CTRL: {
3396       key.tcs = (struct iris_tcs_prog_key) {
3397          KEY_INIT(vue.base),
3398          // XXX: make sure the linker fills this out from the TES...
3399          ._tes_primitive_mode =
3400          info->tess._primitive_mode ? info->tess._primitive_mode
3401                                    : TESS_PRIMITIVE_TRIANGLES,
3402          .outputs_written = info->outputs_written,
3403          .patch_outputs_written = info->patch_outputs_written,
3404       };
3405 
3406       /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3407        * We don't have that information, so we randomly guess that the input
3408        * and output patches are the same size.  This is a bad guess, but we
3409        * can't do much better.
3410        */
3411       if (iris_use_tcs_multi_patch(screen))
3412          key.tcs.input_vertices = info->tess.tcs_vertices_out;
3413 
3414       key_size = sizeof(key.tcs);
3415       break;
3416    }
3417 
3418    case MESA_SHADER_TESS_EVAL:
3419       /* User clip planes */
3420       if (info->clip_distance_array_size == 0)
3421          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3422 
3423       key.tes = (struct iris_tes_prog_key) {
3424          KEY_INIT(vue.base),
3425          // XXX: not ideal, need TCS output/TES input unification
3426          .inputs_read = info->inputs_read,
3427          .patch_inputs_read = info->patch_inputs_read,
3428       };
3429 
3430       key_size = sizeof(key.tes);
3431       break;
3432 
3433    case MESA_SHADER_GEOMETRY:
3434       /* User clip planes */
3435       if (info->clip_distance_array_size == 0)
3436          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3437 
3438       key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3439       key_size = sizeof(key.gs);
3440       break;
3441 
3442    case MESA_SHADER_FRAGMENT:
3443       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3444                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3445                   (1ull << IRIS_NOS_RASTERIZER) |
3446                   (1ull << IRIS_NOS_BLEND);
3447 
3448       STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3449 
3450       /* The program key needs the VUE map if there are > 16 inputs */
3451       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3452          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3453       }
3454 
3455       const uint64_t color_outputs = info->outputs_written &
3456          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3457            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3458            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3459 
3460       bool can_rearrange_varyings =
3461          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3462 
3463       const struct intel_device_info *devinfo = screen->devinfo;
3464 
3465       key.fs = (struct iris_fs_prog_key) {
3466          KEY_INIT(base),
3467          .nr_color_regions = util_bitcount(color_outputs),
3468          .coherent_fb_fetch = devinfo->ver >= 9,
3469          .input_slots_valid =
3470             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3471       };
3472 
3473       key_size = sizeof(key.fs);
3474       break;
3475 
3476    default:
3477       unreachable("Invalid shader stage.");
3478    }
3479 
3480    if (screen->precompile) {
3481       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3482 
3483       struct iris_compiled_shader *shader =
3484          iris_create_shader_variant(screen, NULL, info->stage,
3485                                     (enum iris_program_cache_id) info->stage,
3486                                     key_size, &key);
3487 
3488       /* Append our new variant to the shader's variant list. */
3489       list_addtail(&shader->link, &ish->variants);
3490 
3491       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3492                                     &key, key_size)) {
3493          assert(!util_queue_fence_is_signalled(&shader->ready));
3494 
3495          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3496 
3497          job->screen = screen;
3498          job->uploader = uploader;
3499          job->ish = ish;
3500          job->shader = shader;
3501 
3502          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3503                                iris_compile_shader);
3504       }
3505    }
3506 
3507    return ish;
3508 }
3509 
3510 /**
3511  * Called when the refcount on the iris_uncompiled_shader reaches 0.
3512  *
3513  * Frees the iris_uncompiled_shader.
3514  *
3515  * \sa iris_delete_shader_state
3516  */
3517 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3518 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3519 {
3520    struct iris_uncompiled_shader *ish = state;
3521 
3522    /* No need to take ish->lock; we hold the last reference to ish */
3523    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3524                             &ish->variants, link) {
3525       list_del(&shader->link);
3526 
3527       iris_shader_variant_reference(&shader, NULL);
3528    }
3529 
3530    simple_mtx_destroy(&ish->lock);
3531    util_queue_fence_destroy(&ish->ready);
3532 
3533    ralloc_free(ish->nir);
3534    free(ish);
3535 }
3536 
3537 /**
3538  * The pipe->delete_[stage]_state() driver hooks.
3539  *
3540  * \sa iris_destroy_shader_state
3541  */
3542 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3543 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3544 {
3545    struct iris_uncompiled_shader *ish = state;
3546    struct iris_context *ice = (void *) ctx;
3547 
3548    const gl_shader_stage stage = ish->nir->info.stage;
3549 
3550    if (ice->shaders.uncompiled[stage] == ish) {
3551       ice->shaders.uncompiled[stage] = NULL;
3552       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3553    }
3554 
3555    if (pipe_reference(&ish->ref, NULL))
3556       iris_destroy_shader_state(ctx, state);
3557 }
3558 
3559 /**
3560  * The pipe->bind_[stage]_state() driver hook.
3561  *
3562  * Binds an uncompiled shader as the current one for a particular stage.
3563  * Updates dirty tracking to account for the shader's NOS.
3564  */
3565 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3566 bind_shader_state(struct iris_context *ice,
3567                   struct iris_uncompiled_shader *ish,
3568                   gl_shader_stage stage)
3569 {
3570    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3571    const uint64_t nos = ish ? ish->nos : 0;
3572 
3573    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3574    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3575 
3576    if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3577        (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3578       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3579    }
3580 
3581    ice->shaders.uncompiled[stage] = ish;
3582    ice->state.stage_dirty |= stage_dirty_bit;
3583 
3584    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3585     * (or that they no longer need to do so).
3586     */
3587    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3588       if (nos & (1 << i))
3589          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3590       else
3591          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3592    }
3593 }
3594 
3595 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3596 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3597 {
3598    struct iris_context *ice = (struct iris_context *)ctx;
3599    struct iris_uncompiled_shader *ish = state;
3600 
3601    if (ish) {
3602       const struct shader_info *info = &ish->nir->info;
3603       if (ice->state.window_space_position != info->vs.window_space_position) {
3604          ice->state.window_space_position = info->vs.window_space_position;
3605 
3606          ice->state.dirty |= IRIS_DIRTY_CLIP |
3607                              IRIS_DIRTY_RASTER |
3608                              IRIS_DIRTY_CC_VIEWPORT;
3609       }
3610 
3611       const bool uses_draw_params =
3612          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3613          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3614       const bool uses_derived_draw_params =
3615          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3616          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3617       const bool needs_sgvs_element = uses_draw_params ||
3618          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3619          BITSET_TEST(info->system_values_read,
3620                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3621 
3622       if (ice->state.vs_uses_draw_params != uses_draw_params ||
3623           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3624           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3625           ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3626          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3627                              IRIS_DIRTY_VERTEX_ELEMENTS;
3628       }
3629 
3630       ice->state.vs_uses_draw_params = uses_draw_params;
3631       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3632       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3633       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3634    }
3635 
3636    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3637 }
3638 
3639 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3640 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3641 {
3642    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3643 }
3644 
3645 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3646 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3647 {
3648    struct iris_context *ice = (struct iris_context *)ctx;
3649    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3650    const struct intel_device_info *devinfo = screen->devinfo;
3651 
3652    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3653    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3654       ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3655                                             IRIS_DIRTY_VFG : 0);
3656 
3657    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3658 }
3659 
3660 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3661 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3662 {
3663    struct iris_context *ice = (struct iris_context *)ctx;
3664 
3665    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3666    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3667       ice->state.dirty |= IRIS_DIRTY_URB;
3668 
3669    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3670 }
3671 
3672 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3673 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3674 {
3675    struct iris_context *ice = (struct iris_context *) ctx;
3676    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3677    const struct intel_device_info *devinfo = screen->devinfo;
3678    struct iris_uncompiled_shader *old_ish =
3679       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3680    struct iris_uncompiled_shader *new_ish = state;
3681 
3682    const unsigned color_bits =
3683       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3684       BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3685 
3686    /* Fragment shader outputs influence HasWriteableRT */
3687    if (!old_ish || !new_ish ||
3688        (old_ish->nir->info.outputs_written & color_bits) !=
3689        (new_ish->nir->info.outputs_written & color_bits))
3690       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3691 
3692    if (devinfo->ver == 8)
3693       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3694 
3695    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3696 }
3697 
3698 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3699 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3700 {
3701    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3702 }
3703 
3704 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)3705 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
3706 {
3707    struct iris_screen *screen = (struct iris_screen *)_screen;
3708    struct nir_shader *nir = (struct nir_shader *) nirptr;
3709    const struct intel_device_info *devinfo = screen->devinfo;
3710 
3711    NIR_PASS_V(nir, iris_fix_edge_flags);
3712 
3713    if (screen->brw) {
3714       struct brw_nir_compiler_opts opts = {};
3715       brw_preprocess_nir(screen->brw, nir, &opts);
3716 
3717       NIR_PASS_V(nir, brw_nir_lower_storage_image,
3718                  &(struct brw_nir_lower_storage_image_opts) {
3719                     .devinfo      = devinfo,
3720                     .lower_loads  = true,
3721                     .lower_stores = true,
3722                  });
3723    } else {
3724       assert(screen->elk);
3725 
3726       struct elk_nir_compiler_opts opts = {};
3727       elk_preprocess_nir(screen->elk, nir, &opts);
3728 
3729       NIR_PASS_V(nir, elk_nir_lower_storage_image,
3730                  &(struct elk_nir_lower_storage_image_opts) {
3731                     .devinfo        = devinfo,
3732                     .lower_loads    = true,
3733                     .lower_stores   = true,
3734 
3735                     /* Iris uploads image params used by
3736                      * get_size lowering only in Gfx8.
3737                      */
3738                     .lower_get_size = true,
3739                  });
3740    }
3741 
3742    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3743 
3744    nir_sweep(nir);
3745 
3746    return NULL;
3747 }
3748 
3749 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3750 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3751                                      unsigned max_threads)
3752 {
3753    struct iris_screen *screen = (struct iris_screen *) pscreen;
3754    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3755                                  false);
3756 }
3757 
3758 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3759 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3760                                              void *v_shader,
3761                                              enum pipe_shader_type p_stage)
3762 {
3763    struct iris_screen *screen = (struct iris_screen *) pscreen;
3764 
3765    /* Threaded compilation is only used for the precompile.  If precompile is
3766     * disabled, threaded compilation is "done."
3767     */
3768    if (!screen->precompile)
3769       return true;
3770 
3771    struct iris_uncompiled_shader *ish = v_shader;
3772 
3773    /* When precompile is enabled, the first entry is the precompile variant.
3774     * Check the ready fence of the precompile variant.
3775     */
3776    struct iris_compiled_shader *first =
3777       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3778 
3779    return util_queue_fence_is_signalled(&first->ready);
3780 }
3781 
3782 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3783 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3784 {
3785    pscreen->is_parallel_shader_compilation_finished =
3786       iris_is_parallel_shader_compilation_finished;
3787    pscreen->set_max_shader_compiler_threads =
3788       iris_set_max_shader_compiler_threads;
3789    pscreen->finalize_nir = iris_finalize_nir;
3790 }
3791 
3792 void
iris_init_program_functions(struct pipe_context * ctx)3793 iris_init_program_functions(struct pipe_context *ctx)
3794 {
3795    ctx->create_vs_state  = iris_create_shader_state;
3796    ctx->create_tcs_state = iris_create_shader_state;
3797    ctx->create_tes_state = iris_create_shader_state;
3798    ctx->create_gs_state  = iris_create_shader_state;
3799    ctx->create_fs_state  = iris_create_shader_state;
3800    ctx->create_compute_state = iris_create_compute_state;
3801 
3802    ctx->delete_vs_state  = iris_delete_shader_state;
3803    ctx->delete_tcs_state = iris_delete_shader_state;
3804    ctx->delete_tes_state = iris_delete_shader_state;
3805    ctx->delete_gs_state  = iris_delete_shader_state;
3806    ctx->delete_fs_state  = iris_delete_shader_state;
3807    ctx->delete_compute_state = iris_delete_shader_state;
3808 
3809    ctx->bind_vs_state  = iris_bind_vs_state;
3810    ctx->bind_tcs_state = iris_bind_tcs_state;
3811    ctx->bind_tes_state = iris_bind_tes_state;
3812    ctx->bind_gs_state  = iris_bind_gs_state;
3813    ctx->bind_fs_state  = iris_bind_fs_state;
3814    ctx->bind_compute_state = iris_bind_cs_state;
3815 
3816    ctx->get_compute_state_info = iris_get_compute_state_info;
3817    ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3818 }
3819 
3820 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3821 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3822                           const struct iris_compiled_shader *shader,
3823                           const uint32_t block[3])
3824 {
3825    if (shader->brw_prog_data) {
3826       return brw_cs_get_dispatch_info(devinfo,
3827                                       brw_cs_prog_data(shader->brw_prog_data),
3828                                       block);
3829    } else {
3830       assert(shader->elk_prog_data);
3831       return elk_cs_get_dispatch_info(devinfo,
3832                                       elk_cs_prog_data(shader->elk_prog_data),
3833                                       block);
3834    }
3835 }
3836 
3837 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3838 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3839                               unsigned threads)
3840 {
3841    if (shader->brw_prog_data) {
3842       return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3843                                           threads);
3844    } else {
3845       assert(shader->elk_prog_data);
3846       return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3847                                           threads);
3848    }
3849 }
3850 
3851 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3852 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3853                           enum intel_msaa_flags pushed_msaa_flags)
3854 {
3855    if (shader->brw_prog_data) {
3856       return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3857                                             pushed_msaa_flags);
3858    } else {
3859       assert(shader->elk_prog_data);
3860       return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3861                                                 pushed_msaa_flags);
3862    }
3863 }
3864 
3865 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3866 iris_use_tcs_multi_patch(struct iris_screen *screen)
3867 {
3868    return screen->brw && screen->brw->use_tcs_multi_patch;
3869 }
3870 
3871 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3872 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3873 {
3874    return screen->devinfo->ver < 12;
3875 }
3876 
3877 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3878 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3879 {
3880    struct util_debug_callback *dbg = data;
3881    va_list args;
3882 
3883    if (!dbg->debug_message)
3884       return;
3885 
3886    va_start(args, fmt);
3887    dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3888    va_end(args);
3889 }
3890 
3891 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3892 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3893 {
3894    struct util_debug_callback *dbg = data;
3895    va_list args;
3896    va_start(args, fmt);
3897 
3898    if (INTEL_DEBUG(DEBUG_PERF)) {
3899       va_list args_copy;
3900       va_copy(args_copy, args);
3901       vfprintf(stderr, fmt, args_copy);
3902       va_end(args_copy);
3903    }
3904 
3905    if (dbg->debug_message) {
3906       dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3907    }
3908 
3909    va_end(args);
3910 }
3911 
3912 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3913 iris_get_compiler_options(struct pipe_screen *pscreen,
3914                           enum pipe_shader_ir ir,
3915                           enum pipe_shader_type pstage)
3916 {
3917    struct iris_screen *screen = (struct iris_screen *) pscreen;
3918    gl_shader_stage stage = stage_from_pipe(pstage);
3919    assert(ir == PIPE_SHADER_IR_NIR);
3920 
3921    return screen->brw ? screen->brw->nir_options[stage]
3922                       : screen->elk->nir_options[stage];
3923 }
3924 
3925 void
iris_compiler_init(struct iris_screen * screen)3926 iris_compiler_init(struct iris_screen *screen)
3927 {
3928    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3929    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3930    STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3931 
3932    if (screen->devinfo->ver >= 9) {
3933       screen->brw = brw_compiler_create(screen, screen->devinfo);
3934       screen->brw->shader_debug_log = iris_shader_debug_log;
3935       screen->brw->shader_perf_log = iris_shader_perf_log;
3936       screen->brw->supports_shader_constants = true;
3937       screen->brw->indirect_ubos_use_sampler = iris_indirect_ubos_use_sampler(screen);
3938    } else {
3939       screen->elk = elk_compiler_create(screen, screen->devinfo);
3940       screen->elk->shader_debug_log = iris_shader_debug_log;
3941       screen->elk->shader_perf_log = iris_shader_perf_log;
3942       screen->elk->supports_shader_constants = true;
3943       screen->elk->indirect_ubos_use_sampler = iris_indirect_ubos_use_sampler(screen);
3944    }
3945 }
3946