• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/u_debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/intel_nir.h"
48 #include "intel/compiler/brw_prim.h"
49 #include "intel/compiler/elk/elk_compiler.h"
50 #include "intel/compiler/elk/elk_nir.h"
51 #include "intel/compiler/elk/elk_prim.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "nir/tgsi_to_nir.h"
55 
56 #define KEY_INIT(prefix)                                                   \
57    .prefix.program_string_id = ish->program_id,                            \
58    .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
59 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
60    .base.program_string_id = prog_id,                      \
61    .base.limit_trig_input_range = limit_trig_input
62 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input)       \
63    .base.program_string_id = prog_id,                      \
64    .base.limit_trig_input_range = limit_trig_input
65 
66 struct iris_threaded_compile_job {
67    struct iris_screen *screen;
68    struct u_upload_mgr *uploader;
69    struct util_debug_callback *dbg;
70    struct iris_uncompiled_shader *ish;
71    struct iris_compiled_shader *shader;
72 };
73 
74 static unsigned
get_new_program_id(struct iris_screen * screen)75 get_new_program_id(struct iris_screen *screen)
76 {
77    return p_atomic_inc_return(&screen->program_id);
78 }
79 
80 static void
iris_apply_brw_wm_prog_data(struct iris_compiled_shader * shader,const struct brw_wm_prog_data * brw)81 iris_apply_brw_wm_prog_data(struct iris_compiled_shader *shader,
82                             const struct brw_wm_prog_data *brw)
83 {
84    assert(shader->stage == MESA_SHADER_FRAGMENT);
85    struct iris_fs_data *iris = &shader->fs;
86 
87    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(brw->urb_setup));
88    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(brw->urb_setup_attribs));
89    memcpy(iris->urb_setup, brw->urb_setup, sizeof(iris->urb_setup));
90    memcpy(iris->urb_setup_attribs, brw->urb_setup_attribs, brw->urb_setup_attribs_count);
91    iris->urb_setup_attribs_count = brw->urb_setup_attribs_count;
92 
93    iris->num_varying_inputs   = brw->num_varying_inputs;
94    iris->msaa_flags_param     = brw->msaa_flags_param;
95    iris->flat_inputs          = brw->flat_inputs;
96    iris->inputs               = brw->inputs;
97    iris->computed_depth_mode  = brw->computed_depth_mode;
98    iris->max_polygons         = brw->max_polygons;
99    iris->dispatch_multi       = brw->dispatch_multi;
100    iris->computed_stencil     = brw->computed_stencil;
101    iris->early_fragment_tests = brw->early_fragment_tests;
102    iris->post_depth_coverage  = brw->post_depth_coverage;
103    iris->inner_coverage       = brw->inner_coverage;
104    iris->dispatch_8           = brw->dispatch_8;
105    iris->dispatch_16          = brw->dispatch_16;
106    iris->dispatch_32          = brw->dispatch_32;
107    iris->dual_src_blend       = brw->dual_src_blend;
108    iris->uses_pos_offset      = brw->uses_pos_offset;
109    iris->uses_omask           = brw->uses_omask;
110    iris->uses_kill            = brw->uses_kill;
111    iris->uses_src_depth       = brw->uses_src_depth;
112    iris->uses_src_w           = brw->uses_src_w;
113    iris->uses_sample_mask     = brw->uses_sample_mask;
114    iris->uses_vmask           = brw->uses_vmask;
115    iris->has_side_effects     = brw->has_side_effects;
116    iris->pulls_bary           = brw->pulls_bary;
117 
118    iris->uses_sample_offsets        = brw->uses_sample_offsets;
119    iris->uses_npc_bary_coefficients = brw->uses_npc_bary_coefficients;
120    iris->uses_pc_bary_coefficients  = brw->uses_pc_bary_coefficients;
121    iris->uses_depth_w_coefficients  = brw->uses_depth_w_coefficients;
122 
123    iris->uses_nonperspective_interp_modes = brw->uses_nonperspective_interp_modes;
124 
125    iris->is_per_sample = brw_wm_prog_data_is_persample(brw, 0);
126 }
127 
128 static void
iris_apply_brw_cs_prog_data(struct iris_compiled_shader * shader,const struct brw_cs_prog_data * brw)129 iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
130                             const struct brw_cs_prog_data *brw)
131 {
132    assert(shader->stage == MESA_SHADER_COMPUTE);
133    struct iris_cs_data *iris = &shader->cs;
134 
135    iris->push.cross_thread.dwords = brw->push.cross_thread.dwords;
136    iris->push.cross_thread.regs   = brw->push.cross_thread.regs;
137    iris->push.cross_thread.size   = brw->push.cross_thread.size;
138 
139    iris->push.per_thread.dwords = brw->push.per_thread.dwords;
140    iris->push.per_thread.regs   = brw->push.per_thread.regs;
141    iris->push.per_thread.size   = brw->push.per_thread.size;
142 
143    iris->local_size[0]  = brw->local_size[0];
144    iris->local_size[1]  = brw->local_size[1];
145    iris->local_size[2]  = brw->local_size[2];
146    iris->prog_offset[0] = brw->prog_offset[0];
147    iris->prog_offset[1] = brw->prog_offset[1];
148    iris->prog_offset[2] = brw->prog_offset[2];
149 
150    iris->generate_local_id = brw->generate_local_id;
151    iris->walk_order        = brw->walk_order;
152    iris->uses_barrier      = brw->uses_barrier;
153    iris->uses_sampler      = brw->uses_sampler;
154    iris->prog_mask         = brw->prog_mask;
155 
156    iris->first_param_is_builtin_subgroup_id =
157       brw->base.nr_params > 0 &&
158       brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
159 }
160 
161 static void
iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data * brw,struct iris_vue_data * iris)162 iris_apply_brw_vue_prog_data(const struct brw_vue_prog_data *brw,
163                              struct iris_vue_data *iris)
164 {
165    memcpy(&iris->vue_map, &brw->vue_map, sizeof(struct intel_vue_map));
166 
167    iris->urb_read_length     = brw->urb_read_length;
168    iris->cull_distance_mask  = brw->cull_distance_mask;
169    iris->urb_entry_size      = brw->urb_entry_size;
170    iris->dispatch_mode       = brw->dispatch_mode;
171    iris->include_vue_handles = brw->include_vue_handles;
172 }
173 
174 static void
iris_apply_brw_vs_prog_data(struct iris_compiled_shader * shader,const struct brw_vs_prog_data * brw)175 iris_apply_brw_vs_prog_data(struct iris_compiled_shader *shader,
176                             const struct brw_vs_prog_data *brw)
177 {
178    assert(shader->stage == MESA_SHADER_VERTEX);
179    struct iris_vs_data *iris = &shader->vs;
180 
181    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
182 
183    iris->uses_vertexid     = brw->uses_vertexid;
184    iris->uses_instanceid   = brw->uses_instanceid;
185    iris->uses_firstvertex  = brw->uses_firstvertex;
186    iris->uses_baseinstance = brw->uses_baseinstance;
187    iris->uses_drawid       = brw->uses_drawid;
188 }
189 
190 static void
iris_apply_brw_tcs_prog_data(struct iris_compiled_shader * shader,const struct brw_tcs_prog_data * brw)191 iris_apply_brw_tcs_prog_data(struct iris_compiled_shader *shader,
192                              const struct brw_tcs_prog_data *brw)
193 {
194    assert(shader->stage == MESA_SHADER_TESS_CTRL);
195    struct iris_tcs_data *iris = &shader->tcs;
196 
197    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
198 
199    iris->instances             = brw->instances;
200    iris->patch_count_threshold = brw->patch_count_threshold;
201    iris->include_primitive_id  = brw->include_primitive_id;
202 }
203 
204 static void
iris_apply_brw_tes_prog_data(struct iris_compiled_shader * shader,const struct brw_tes_prog_data * brw)205 iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
206                              const struct brw_tes_prog_data *brw)
207 {
208    assert(shader->stage == MESA_SHADER_TESS_EVAL);
209    struct iris_tes_data *iris = &shader->tes;
210 
211    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
212 
213    iris->partitioning         = brw->partitioning;
214    iris->output_topology      = brw->output_topology;
215    iris->domain               = brw->domain;
216    iris->include_primitive_id = brw->include_primitive_id;
217 }
218 
219 static void
iris_apply_brw_gs_prog_data(struct iris_compiled_shader * shader,const struct brw_gs_prog_data * brw)220 iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
221                             const struct brw_gs_prog_data *brw)
222 {
223    assert(shader->stage == MESA_SHADER_GEOMETRY);
224    struct iris_gs_data *iris = &shader->gs;
225 
226    iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
227 
228    iris->vertices_in                     = brw->vertices_in;
229    iris->output_vertex_size_hwords       = brw->output_vertex_size_hwords;
230    iris->output_topology                 = brw->output_topology;
231    iris->control_data_header_size_hwords = brw->control_data_header_size_hwords;
232    iris->control_data_format             = brw->control_data_format;
233    iris->static_vertex_count             = brw->static_vertex_count;
234    iris->invocations                     = brw->invocations;
235    iris->include_primitive_id            = brw->include_primitive_id;
236 }
237 
238 void
iris_apply_brw_prog_data(struct iris_compiled_shader * shader,struct brw_stage_prog_data * brw)239 iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
240                          struct brw_stage_prog_data *brw)
241 {
242    STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
243    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
244       shader->ubo_ranges[i].block  = brw->ubo_ranges[i].block;
245       shader->ubo_ranges[i].start  = brw->ubo_ranges[i].start;
246       shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
247    }
248 
249    shader->nr_params              = brw->nr_params;
250    shader->total_scratch          = brw->total_scratch;
251    shader->total_shared           = brw->total_shared;
252    shader->program_size           = brw->program_size;
253    shader->const_data_offset      = brw->const_data_offset;
254    shader->dispatch_grf_start_reg = brw->dispatch_grf_start_reg;
255    shader->has_ubo_pull           = brw->has_ubo_pull;
256    shader->use_alt_mode           = brw->use_alt_mode;
257 
258    switch (shader->stage) {
259    case MESA_SHADER_FRAGMENT:
260       iris_apply_brw_wm_prog_data(shader, brw_wm_prog_data_const(brw));
261       break;
262    case MESA_SHADER_COMPUTE:
263       iris_apply_brw_cs_prog_data(shader, brw_cs_prog_data_const(brw));
264       break;
265    case MESA_SHADER_VERTEX:
266       iris_apply_brw_vs_prog_data(shader, brw_vs_prog_data_const(brw));
267       break;
268    case MESA_SHADER_TESS_CTRL:
269       iris_apply_brw_tcs_prog_data(shader, brw_tcs_prog_data_const(brw));
270       break;
271    case MESA_SHADER_TESS_EVAL:
272       iris_apply_brw_tes_prog_data(shader, brw_tes_prog_data_const(brw));
273       break;
274    case MESA_SHADER_GEOMETRY:
275       iris_apply_brw_gs_prog_data(shader, brw_gs_prog_data_const(brw));
276       break;
277    default:
278       unreachable("invalid shader stage");
279    }
280 
281    shader->brw_prog_data = brw;
282 
283    ralloc_steal(shader, shader->brw_prog_data);
284    ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
285    ralloc_steal(shader->brw_prog_data, brw->param);
286 }
287 
288 static void
iris_apply_elk_wm_prog_data(struct iris_compiled_shader * shader,const struct elk_wm_prog_data * elk)289 iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
290                             const struct elk_wm_prog_data *elk)
291 {
292    assert(shader->stage == MESA_SHADER_FRAGMENT);
293    struct iris_fs_data *iris = &shader->fs;
294 
295    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup) == ARRAY_SIZE(elk->urb_setup));
296    STATIC_ASSERT(ARRAY_SIZE(iris->urb_setup_attribs) == ARRAY_SIZE(elk->urb_setup_attribs));
297    memcpy(iris->urb_setup, elk->urb_setup, sizeof(iris->urb_setup));
298    memcpy(iris->urb_setup_attribs, elk->urb_setup_attribs, elk->urb_setup_attribs_count);
299    iris->urb_setup_attribs_count = elk->urb_setup_attribs_count;
300 
301    iris->num_varying_inputs   = elk->num_varying_inputs;
302    iris->msaa_flags_param     = elk->msaa_flags_param;
303    iris->flat_inputs          = elk->flat_inputs;
304    iris->inputs               = elk->inputs;
305    iris->computed_depth_mode  = elk->computed_depth_mode;
306    iris->max_polygons         = 1;
307    iris->dispatch_multi       = 0;
308    iris->computed_stencil     = elk->computed_stencil;
309    iris->early_fragment_tests = elk->early_fragment_tests;
310    iris->post_depth_coverage  = elk->post_depth_coverage;
311    iris->inner_coverage       = elk->inner_coverage;
312    iris->dispatch_8           = elk->dispatch_8;
313    iris->dispatch_16          = elk->dispatch_16;
314    iris->dispatch_32          = elk->dispatch_32;
315    iris->dual_src_blend       = elk->dual_src_blend;
316    iris->uses_pos_offset      = elk->uses_pos_offset;
317    iris->uses_omask           = elk->uses_omask;
318    iris->uses_kill            = elk->uses_kill;
319    iris->uses_src_depth       = elk->uses_src_depth;
320    iris->uses_src_w           = elk->uses_src_w;
321    iris->uses_sample_mask     = elk->uses_sample_mask;
322    iris->uses_vmask           = elk->uses_vmask;
323    iris->pulls_bary           = elk->pulls_bary;
324    iris->has_side_effects     = elk->has_side_effects;
325 
326    iris->uses_nonperspective_interp_modes = elk->uses_nonperspective_interp_modes;
327 
328    iris->is_per_sample = elk_wm_prog_data_is_persample(elk, 0);
329 }
330 
331 static void
iris_apply_elk_cs_prog_data(struct iris_compiled_shader * shader,const struct elk_cs_prog_data * elk)332 iris_apply_elk_cs_prog_data(struct iris_compiled_shader *shader,
333                             const struct elk_cs_prog_data *elk)
334 {
335    assert(shader->stage == MESA_SHADER_COMPUTE);
336    struct iris_cs_data *iris = &shader->cs;
337 
338    iris->push.cross_thread.dwords = elk->push.cross_thread.dwords;
339    iris->push.cross_thread.regs   = elk->push.cross_thread.regs;
340    iris->push.cross_thread.size   = elk->push.cross_thread.size;
341 
342    iris->push.per_thread.dwords = elk->push.per_thread.dwords;
343    iris->push.per_thread.regs   = elk->push.per_thread.regs;
344    iris->push.per_thread.size   = elk->push.per_thread.size;
345 
346    iris->local_size[0]  = elk->local_size[0];
347    iris->local_size[1]  = elk->local_size[1];
348    iris->local_size[2]  = elk->local_size[2];
349    iris->prog_offset[0] = elk->prog_offset[0];
350    iris->prog_offset[1] = elk->prog_offset[1];
351    iris->prog_offset[2] = elk->prog_offset[2];
352 
353    iris->uses_barrier      = elk->uses_barrier;
354    iris->prog_mask         = elk->prog_mask;
355 
356    iris->first_param_is_builtin_subgroup_id =
357       elk->base.nr_params > 0 &&
358       elk->base.param[0] == ELK_PARAM_BUILTIN_SUBGROUP_ID;
359 }
360 
361 static void
iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data * elk,struct iris_vue_data * iris)362 iris_apply_elk_vue_prog_data(const struct elk_vue_prog_data *elk,
363                              struct iris_vue_data *iris)
364 {
365    memcpy(&iris->vue_map, &elk->vue_map, sizeof(struct intel_vue_map));
366 
367    iris->urb_read_length     = elk->urb_read_length;
368    iris->cull_distance_mask  = elk->cull_distance_mask;
369    iris->urb_entry_size      = elk->urb_entry_size;
370    iris->dispatch_mode       = elk->dispatch_mode;
371    iris->include_vue_handles = elk->include_vue_handles;
372 }
373 
374 static void
iris_apply_elk_vs_prog_data(struct iris_compiled_shader * shader,const struct elk_vs_prog_data * elk)375 iris_apply_elk_vs_prog_data(struct iris_compiled_shader *shader,
376                             const struct elk_vs_prog_data *elk)
377 {
378    assert(shader->stage == MESA_SHADER_VERTEX);
379    struct iris_vs_data *iris = &shader->vs;
380 
381    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
382 
383    iris->uses_vertexid     = elk->uses_vertexid;
384    iris->uses_instanceid   = elk->uses_instanceid;
385    iris->uses_firstvertex  = elk->uses_firstvertex;
386    iris->uses_baseinstance = elk->uses_baseinstance;
387    iris->uses_drawid       = elk->uses_drawid;
388 }
389 
390 static void
iris_apply_elk_tcs_prog_data(struct iris_compiled_shader * shader,const struct elk_tcs_prog_data * elk)391 iris_apply_elk_tcs_prog_data(struct iris_compiled_shader *shader,
392                              const struct elk_tcs_prog_data *elk)
393 {
394    assert(shader->stage == MESA_SHADER_TESS_CTRL);
395    struct iris_tcs_data *iris = &shader->tcs;
396 
397    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
398 
399    iris->instances             = elk->instances;
400    iris->patch_count_threshold = elk->patch_count_threshold;
401    iris->include_primitive_id  = elk->include_primitive_id;
402 }
403 
404 static void
iris_apply_elk_tes_prog_data(struct iris_compiled_shader * shader,const struct elk_tes_prog_data * elk)405 iris_apply_elk_tes_prog_data(struct iris_compiled_shader *shader,
406                              const struct elk_tes_prog_data *elk)
407 {
408    assert(shader->stage == MESA_SHADER_TESS_EVAL);
409    struct iris_tes_data *iris = &shader->tes;
410 
411    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
412 
413    iris->partitioning         = elk->partitioning;
414    iris->output_topology      = elk->output_topology;
415    iris->domain               = elk->domain;
416    iris->include_primitive_id = elk->include_primitive_id;
417 }
418 
419 static void
iris_apply_elk_gs_prog_data(struct iris_compiled_shader * shader,const struct elk_gs_prog_data * elk)420 iris_apply_elk_gs_prog_data(struct iris_compiled_shader *shader,
421                             const struct elk_gs_prog_data *elk)
422 {
423    assert(shader->stage == MESA_SHADER_GEOMETRY);
424    struct iris_gs_data *iris = &shader->gs;
425 
426    iris_apply_elk_vue_prog_data(&elk->base, &iris->base);
427 
428    iris->vertices_in                     = elk->vertices_in;
429    iris->output_vertex_size_hwords       = elk->output_vertex_size_hwords;
430    iris->output_topology                 = elk->output_topology;
431    iris->control_data_header_size_hwords = elk->control_data_header_size_hwords;
432    iris->control_data_format             = elk->control_data_format;
433    iris->static_vertex_count             = elk->static_vertex_count;
434    iris->invocations                     = elk->invocations;
435    iris->include_primitive_id            = elk->include_primitive_id;
436 }
437 
438 void
iris_apply_elk_prog_data(struct iris_compiled_shader * shader,struct elk_stage_prog_data * elk)439 iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
440                          struct elk_stage_prog_data *elk)
441 {
442    STATIC_ASSERT(ARRAY_SIZE(elk->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
443    for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
444       shader->ubo_ranges[i].block  = elk->ubo_ranges[i].block;
445       shader->ubo_ranges[i].start  = elk->ubo_ranges[i].start;
446       shader->ubo_ranges[i].length = elk->ubo_ranges[i].length;
447    }
448 
449    shader->nr_params              = elk->nr_params;
450    shader->total_scratch          = elk->total_scratch;
451    shader->total_shared           = elk->total_shared;
452    shader->program_size           = elk->program_size;
453    shader->const_data_offset      = elk->const_data_offset;
454    shader->dispatch_grf_start_reg = elk->dispatch_grf_start_reg;
455    shader->has_ubo_pull           = elk->has_ubo_pull;
456    shader->use_alt_mode           = elk->use_alt_mode;
457 
458    switch (shader->stage) {
459    case MESA_SHADER_FRAGMENT:
460       iris_apply_elk_wm_prog_data(shader, elk_wm_prog_data_const(elk));
461       break;
462    case MESA_SHADER_COMPUTE:
463       iris_apply_elk_cs_prog_data(shader, elk_cs_prog_data_const(elk));
464       break;
465    case MESA_SHADER_VERTEX:
466       iris_apply_elk_vs_prog_data(shader, elk_vs_prog_data_const(elk));
467       break;
468    case MESA_SHADER_TESS_CTRL:
469       iris_apply_elk_tcs_prog_data(shader, elk_tcs_prog_data_const(elk));
470       break;
471    case MESA_SHADER_TESS_EVAL:
472       iris_apply_elk_tes_prog_data(shader, elk_tes_prog_data_const(elk));
473       break;
474    case MESA_SHADER_GEOMETRY:
475       iris_apply_elk_gs_prog_data(shader, elk_gs_prog_data_const(elk));
476       break;
477    default:
478       unreachable("invalid shader stage");
479    }
480 
481    shader->elk_prog_data = elk;
482 
483    ralloc_steal(shader, shader->elk_prog_data);
484    ralloc_steal(shader->elk_prog_data, (void *)elk->relocs);
485    ralloc_steal(shader->elk_prog_data, elk->param);
486 }
487 
488 void
iris_finalize_program(struct iris_compiled_shader * shader,uint32_t * streamout,uint32_t * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)489 iris_finalize_program(struct iris_compiled_shader *shader,
490                       uint32_t *streamout,
491                       uint32_t *system_values,
492                       unsigned num_system_values,
493                       unsigned kernel_input_size,
494                       unsigned num_cbufs,
495                       const struct iris_binding_table *bt)
496 {
497    /* There can be only one. */
498    assert((shader->brw_prog_data == NULL) != (shader->elk_prog_data == NULL));
499 
500    shader->streamout = streamout;
501    shader->system_values = system_values;
502    shader->num_system_values = num_system_values;
503    shader->kernel_input_size = kernel_input_size;
504    shader->num_cbufs = num_cbufs;
505    shader->bt = *bt;
506 
507    ralloc_steal(shader, shader->streamout);
508    ralloc_steal(shader, shader->system_values);
509 }
510 
511 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)512 iris_to_brw_vs_key(const struct iris_screen *screen,
513                    const struct iris_vs_prog_key *key)
514 {
515    return (struct brw_vs_prog_key) {
516       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
517                    key->vue.base.limit_trig_input_range),
518    };
519 }
520 
521 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)522 iris_to_brw_tcs_key(const struct iris_screen *screen,
523                     const struct iris_tcs_prog_key *key)
524 {
525    return (struct brw_tcs_prog_key) {
526       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
527                    key->vue.base.limit_trig_input_range),
528       ._tes_primitive_mode = key->_tes_primitive_mode,
529       .input_vertices = key->input_vertices,
530       .patch_outputs_written = key->patch_outputs_written,
531       .outputs_written = key->outputs_written,
532    };
533 }
534 
535 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)536 iris_to_brw_tes_key(const struct iris_screen *screen,
537                     const struct iris_tes_prog_key *key)
538 {
539    return (struct brw_tes_prog_key) {
540       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
541                    key->vue.base.limit_trig_input_range),
542       .patch_inputs_read = key->patch_inputs_read,
543       .inputs_read = key->inputs_read,
544    };
545 }
546 
547 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)548 iris_to_brw_gs_key(const struct iris_screen *screen,
549                    const struct iris_gs_prog_key *key)
550 {
551    return (struct brw_gs_prog_key) {
552       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
553                    key->vue.base.limit_trig_input_range),
554    };
555 }
556 
557 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)558 iris_to_brw_fs_key(const struct iris_screen *screen,
559                    const struct iris_fs_prog_key *key)
560 {
561    return (struct brw_wm_prog_key) {
562       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
563                    key->base.limit_trig_input_range),
564       .nr_color_regions = key->nr_color_regions,
565       .flat_shade = key->flat_shade,
566       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
567       .alpha_to_coverage = key->alpha_to_coverage ? INTEL_ALWAYS : INTEL_NEVER,
568       .clamp_fragment_color = key->clamp_fragment_color,
569       .persample_interp = key->persample_interp ? INTEL_ALWAYS : INTEL_NEVER,
570       .multisample_fbo = key->multisample_fbo ? INTEL_ALWAYS : INTEL_NEVER,
571       .force_dual_color_blend = key->force_dual_color_blend,
572       .coherent_fb_fetch = key->coherent_fb_fetch,
573       .color_outputs_valid = key->color_outputs_valid,
574       .input_slots_valid = key->input_slots_valid,
575       .ignore_sample_mask_out = !key->multisample_fbo,
576       .null_push_constant_tbimr_workaround =
577          screen->devinfo->needs_null_push_constant_tbimr_workaround,
578    };
579 }
580 
581 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)582 iris_to_brw_cs_key(const struct iris_screen *screen,
583                    const struct iris_cs_prog_key *key)
584 {
585    return (struct brw_cs_prog_key) {
586       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
587                    key->base.limit_trig_input_range),
588    };
589 }
590 
591 static struct elk_vs_prog_key
iris_to_elk_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)592 iris_to_elk_vs_key(const struct iris_screen *screen,
593                    const struct iris_vs_prog_key *key)
594 {
595    return (struct elk_vs_prog_key) {
596       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
597                    key->vue.base.limit_trig_input_range),
598 
599       /* Don't tell the backend about our clip plane constants, we've
600        * already lowered them in NIR and don't want it doing it again.
601        */
602       .nr_userclip_plane_consts = 0,
603    };
604 }
605 
606 static struct elk_tcs_prog_key
iris_to_elk_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)607 iris_to_elk_tcs_key(const struct iris_screen *screen,
608                     const struct iris_tcs_prog_key *key)
609 {
610    return (struct elk_tcs_prog_key) {
611       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
612                    key->vue.base.limit_trig_input_range),
613       ._tes_primitive_mode = key->_tes_primitive_mode,
614       .input_vertices = key->input_vertices,
615       .patch_outputs_written = key->patch_outputs_written,
616       .outputs_written = key->outputs_written,
617       .quads_workaround = key->quads_workaround,
618    };
619 }
620 
621 static struct elk_tes_prog_key
iris_to_elk_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)622 iris_to_elk_tes_key(const struct iris_screen *screen,
623                     const struct iris_tes_prog_key *key)
624 {
625    return (struct elk_tes_prog_key) {
626       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
627                    key->vue.base.limit_trig_input_range),
628       .patch_inputs_read = key->patch_inputs_read,
629       .inputs_read = key->inputs_read,
630    };
631 }
632 
633 static struct elk_gs_prog_key
iris_to_elk_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)634 iris_to_elk_gs_key(const struct iris_screen *screen,
635                    const struct iris_gs_prog_key *key)
636 {
637    return (struct elk_gs_prog_key) {
638       ELK_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
639                    key->vue.base.limit_trig_input_range),
640    };
641 }
642 
643 static struct elk_wm_prog_key
iris_to_elk_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)644 iris_to_elk_fs_key(const struct iris_screen *screen,
645                    const struct iris_fs_prog_key *key)
646 {
647    return (struct elk_wm_prog_key) {
648       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
649                    key->base.limit_trig_input_range),
650       .nr_color_regions = key->nr_color_regions,
651       .flat_shade = key->flat_shade,
652       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
653       .alpha_to_coverage = key->alpha_to_coverage ? ELK_ALWAYS : ELK_NEVER,
654       .clamp_fragment_color = key->clamp_fragment_color,
655       .persample_interp = key->persample_interp ? ELK_ALWAYS : ELK_NEVER,
656       .multisample_fbo = key->multisample_fbo ? ELK_ALWAYS : ELK_NEVER,
657       .force_dual_color_blend = key->force_dual_color_blend,
658       .coherent_fb_fetch = key->coherent_fb_fetch,
659       .color_outputs_valid = key->color_outputs_valid,
660       .input_slots_valid = key->input_slots_valid,
661       .ignore_sample_mask_out = !key->multisample_fbo,
662    };
663 }
664 
665 static struct elk_cs_prog_key
iris_to_elk_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)666 iris_to_elk_cs_key(const struct iris_screen *screen,
667                    const struct iris_cs_prog_key *key)
668 {
669    return (struct elk_cs_prog_key) {
670       ELK_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
671                    key->base.limit_trig_input_range),
672    };
673 }
674 
675 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)676 upload_state(struct u_upload_mgr *uploader,
677              struct iris_state_ref *ref,
678              unsigned size,
679              unsigned alignment)
680 {
681    void *p = NULL;
682    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
683    return p;
684 }
685 
686 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)687 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
688                                 struct pipe_shader_buffer *buf,
689                                 struct iris_state_ref *surf_state,
690                                 isl_surf_usage_flags_t usage)
691 {
692    struct pipe_context *ctx = &ice->ctx;
693    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
694    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
695 
696    void *map =
697       upload_state(ice->state.surface_uploader, surf_state,
698                    screen->isl_dev.ss.size, 64);
699    if (!unlikely(map)) {
700       surf_state->res = NULL;
701       return;
702    }
703 
704    struct iris_resource *res = (void *) buf->buffer;
705    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
706    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
707 
708    const bool dataport = ssbo || !iris_indirect_ubos_use_sampler(screen);
709 
710    isl_buffer_fill_state(&screen->isl_dev, map,
711                          .address = res->bo->address + res->offset +
712                                     buf->buffer_offset,
713                          .size_B = buf->buffer_size - res->offset,
714                          .format = dataport ? ISL_FORMAT_RAW
715                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
716                          .swizzle = ISL_SWIZZLE_IDENTITY,
717                          .stride_B = 1,
718                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
719 }
720 
721 static nir_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)722 get_aoa_deref_offset(nir_builder *b,
723                      nir_deref_instr *deref,
724                      unsigned elem_size)
725 {
726    unsigned array_size = elem_size;
727    nir_def *offset = nir_imm_int(b, 0);
728 
729    while (deref->deref_type != nir_deref_type_var) {
730       assert(deref->deref_type == nir_deref_type_array);
731 
732       /* This level's element size is the previous level's array size */
733       nir_def *index = deref->arr.index.ssa;
734       assert(deref->arr.index.ssa);
735       offset = nir_iadd(b, offset,
736                            nir_imul_imm(b, index, array_size));
737 
738       deref = nir_deref_instr_parent(deref);
739       assert(glsl_type_is_array(deref->type));
740       array_size *= glsl_get_length(deref->type);
741    }
742 
743    /* Accessing an invalid surface index with the dataport can result in a
744     * hang.  According to the spec "if the index used to select an individual
745     * element is negative or greater than or equal to the size of the array,
746     * the results of the operation are undefined but may not lead to
747     * termination" -- which is one of the possible outcomes of the hang.
748     * Clamp the index to prevent access outside of the array bounds.
749     */
750    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
751 }
752 
753 static void
iris_lower_storage_image_derefs(nir_shader * nir)754 iris_lower_storage_image_derefs(nir_shader *nir)
755 {
756    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
757 
758    nir_builder b = nir_builder_create(impl);
759 
760    nir_foreach_block(block, impl) {
761       nir_foreach_instr_safe(instr, block) {
762          if (instr->type != nir_instr_type_intrinsic)
763             continue;
764 
765          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
766          switch (intrin->intrinsic) {
767          case nir_intrinsic_image_deref_load:
768          case nir_intrinsic_image_deref_store:
769          case nir_intrinsic_image_deref_atomic:
770          case nir_intrinsic_image_deref_atomic_swap:
771          case nir_intrinsic_image_deref_size:
772          case nir_intrinsic_image_deref_samples:
773          case nir_intrinsic_image_deref_load_raw_intel:
774          case nir_intrinsic_image_deref_store_raw_intel: {
775             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
776             nir_variable *var = nir_deref_instr_get_variable(deref);
777 
778             b.cursor = nir_before_instr(&intrin->instr);
779             nir_def *index =
780                nir_iadd_imm(&b, get_aoa_deref_offset(&b, deref, 1),
781                                 var->data.driver_location);
782             nir_rewrite_image_intrinsic(intrin, index, false);
783             break;
784          }
785 
786          default:
787             break;
788          }
789       }
790    }
791 }
792 
793 static bool
iris_uses_image_atomic(const nir_shader * shader)794 iris_uses_image_atomic(const nir_shader *shader)
795 {
796    nir_foreach_function_impl(impl, shader) {
797       nir_foreach_block(block, impl) {
798          nir_foreach_instr(instr, block) {
799             if (instr->type != nir_instr_type_intrinsic)
800                continue;
801 
802             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
803             switch (intrin->intrinsic) {
804             case nir_intrinsic_image_deref_atomic:
805             case nir_intrinsic_image_deref_atomic_swap:
806                unreachable("Should have been lowered in "
807                            "iris_lower_storage_image_derefs");
808 
809             case nir_intrinsic_image_atomic:
810             case nir_intrinsic_image_atomic_swap:
811                return true;
812 
813             default:
814                break;
815             }
816          }
817       }
818    }
819 
820    return false;
821 }
822 
823 /**
824  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
825  */
826 static bool
iris_fix_edge_flags(nir_shader * nir)827 iris_fix_edge_flags(nir_shader *nir)
828 {
829    if (nir->info.stage != MESA_SHADER_VERTEX) {
830       nir_shader_preserve_all_metadata(nir);
831       return false;
832    }
833 
834    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
835                                                        VARYING_SLOT_EDGE);
836    if (!var) {
837       nir_shader_preserve_all_metadata(nir);
838       return false;
839    }
840 
841    var->data.mode = nir_var_shader_temp;
842    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
843    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
844    nir_fixup_deref_modes(nir);
845 
846    nir_foreach_function_impl(impl, nir) {
847       nir_metadata_preserve(impl, nir_metadata_control_flow |
848                                   nir_metadata_live_defs |
849                                   nir_metadata_loop_analysis);
850    }
851 
852    return true;
853 }
854 
855 /**
856  * Fix an uncompiled shader's stream output info.
857  *
858  * Core Gallium stores output->register_index as a "slot" number, where
859  * slots are assigned consecutively to all outputs in info->outputs_written.
860  * This naive packing of outputs doesn't work for us - we too have slots,
861  * but the layout is defined by the VUE map, which we won't have until we
862  * compile a specific shader variant.  So, we remap these and simply store
863  * VARYING_SLOT_* in our copy's output->register_index fields.
864  *
865  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
866  * components of our VUE header.  See brw_vue_map.c for the layout.
867  */
868 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)869 update_so_info(struct pipe_stream_output_info *so_info,
870                uint64_t outputs_written)
871 {
872    uint8_t reverse_map[64] = {};
873    unsigned slot = 0;
874    while (outputs_written) {
875       reverse_map[slot++] = u_bit_scan64(&outputs_written);
876    }
877 
878    for (unsigned i = 0; i < so_info->num_outputs; i++) {
879       struct pipe_stream_output *output = &so_info->output[i];
880 
881       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
882       output->register_index = reverse_map[output->register_index];
883 
884       /* The VUE header contains three scalar fields packed together:
885        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
886        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
887        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
888        */
889       switch (output->register_index) {
890       case VARYING_SLOT_LAYER:
891          assert(output->num_components == 1);
892          output->register_index = VARYING_SLOT_PSIZ;
893          output->start_component = 1;
894          break;
895       case VARYING_SLOT_VIEWPORT:
896          assert(output->num_components == 1);
897          output->register_index = VARYING_SLOT_PSIZ;
898          output->start_component = 2;
899          break;
900       case VARYING_SLOT_PSIZ:
901          assert(output->num_components == 1);
902          output->start_component = 3;
903          break;
904       }
905 
906       //info->outputs_written |= 1ull << output->register_index;
907    }
908 }
909 
910 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)911 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
912                         unsigned offset, unsigned n)
913 {
914    assert(offset % sizeof(uint32_t) == 0);
915 
916    for (unsigned i = 0; i < n; ++i)
917       sysvals[i] = ELK_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
918 
919    for (unsigned i = n; i < 4; ++i)
920       sysvals[i] = ELK_PARAM_BUILTIN_ZERO;
921 }
922 
923 /**
924  * Associate NIR uniform variables with the prog_data->param[] mechanism
925  * used by the backend.  Also, decide which UBOs we'd like to push in an
926  * ideal situation (though the backend can reduce this).
927  */
928 static void
iris_setup_uniforms(ASSERTED const struct intel_device_info * devinfo,void * mem_ctx,nir_shader * nir,unsigned kernel_input_size,uint32_t ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)929 iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
930                     void *mem_ctx,
931                     nir_shader *nir,
932                     unsigned kernel_input_size,
933                     uint32_t **out_system_values,
934                     unsigned *out_num_system_values,
935                     unsigned *out_num_cbufs)
936 {
937    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
938 
939    const unsigned IRIS_MAX_SYSTEM_VALUES =
940       PIPE_MAX_SHADER_IMAGES * ISL_IMAGE_PARAM_SIZE;
941    unsigned *system_values =
942       rzalloc_array(mem_ctx, unsigned, IRIS_MAX_SYSTEM_VALUES);
943    unsigned num_system_values = 0;
944 
945    unsigned patch_vert_idx = -1;
946    unsigned tess_outer_default_idx = -1;
947    unsigned tess_inner_default_idx = -1;
948    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
949    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
950    unsigned variable_group_size_idx = -1;
951    unsigned work_dim_idx = -1;
952    memset(ucp_idx, -1, sizeof(ucp_idx));
953    memset(img_idx, -1, sizeof(img_idx));
954 
955    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
956 
957    nir_builder b = nir_builder_at(nir_before_impl(impl));
958 
959    nir_def *temp_ubo_name = nir_undef(&b, 1, 32);
960 
961    /* Turn system value intrinsics into uniforms */
962    nir_foreach_block(block, impl) {
963       nir_foreach_instr_safe(instr, block) {
964          if (instr->type != nir_instr_type_intrinsic)
965             continue;
966 
967          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
968          nir_def *offset;
969 
970          switch (intrin->intrinsic) {
971          case nir_intrinsic_load_base_workgroup_id: {
972             /* GL doesn't have a concept of base workgroup */
973             b.cursor = nir_instr_remove(&intrin->instr);
974             nir_def_rewrite_uses(&intrin->def,
975                                      nir_imm_zero(&b, 3, 32));
976             continue;
977          }
978          case nir_intrinsic_load_constant: {
979             unsigned load_size = intrin->def.num_components *
980                                  intrin->def.bit_size / 8;
981             unsigned load_align = intrin->def.bit_size / 8;
982 
983             /* This one is special because it reads from the shader constant
984              * data and not cbuf0 which gallium uploads for us.
985              */
986             b.cursor = nir_instr_remove(&intrin->instr);
987 
988             nir_def *offset =
989                nir_iadd_imm(&b, intrin->src[0].ssa,
990                                 nir_intrinsic_base(intrin));
991 
992             assert(load_size < b.shader->constant_data_size);
993             unsigned max_offset = b.shader->constant_data_size - load_size;
994             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
995 
996             /* Constant data lives in buffers within IRIS_MEMZONE_SHADER
997              * and cannot cross that 4GB boundary, so we can do the address
998              * calculation with 32-bit adds.  Also, we can ignore the high
999              * bits because IRIS_MEMZONE_SHADER is in the [0, 4GB) range.
1000              */
1001             assert(IRIS_MEMZONE_SHADER_START >> 32 == 0ull);
1002 
1003             nir_def *const_data_addr =
1004                nir_iadd(&b, nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), offset);
1005 
1006             nir_def *data =
1007                nir_load_global_constant(&b, nir_u2u64(&b, const_data_addr),
1008                                         load_align,
1009                                         intrin->def.num_components,
1010                                         intrin->def.bit_size);
1011 
1012             nir_def_rewrite_uses(&intrin->def,
1013                                      data);
1014             continue;
1015          }
1016          case nir_intrinsic_load_user_clip_plane: {
1017             unsigned ucp = nir_intrinsic_ucp_id(intrin);
1018 
1019             if (ucp_idx[ucp] == -1) {
1020                ucp_idx[ucp] = num_system_values;
1021                num_system_values += 4;
1022             }
1023 
1024             for (int i = 0; i < 4; i++) {
1025                system_values[ucp_idx[ucp] + i] =
1026                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
1027             }
1028 
1029             b.cursor = nir_before_instr(instr);
1030             offset = nir_imm_int(&b, system_values_start +
1031                                      ucp_idx[ucp] * sizeof(uint32_t));
1032             break;
1033          }
1034          case nir_intrinsic_load_patch_vertices_in:
1035             if (patch_vert_idx == -1)
1036                patch_vert_idx = num_system_values++;
1037 
1038             system_values[patch_vert_idx] =
1039                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
1040 
1041             b.cursor = nir_before_instr(instr);
1042             offset = nir_imm_int(&b, system_values_start +
1043                                      patch_vert_idx * sizeof(uint32_t));
1044             break;
1045          case nir_intrinsic_load_tess_level_outer_default:
1046             if (tess_outer_default_idx == -1) {
1047                tess_outer_default_idx = num_system_values;
1048                num_system_values += 4;
1049             }
1050 
1051             for (int i = 0; i < 4; i++) {
1052                system_values[tess_outer_default_idx + i] =
1053                   BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1054             }
1055 
1056             b.cursor = nir_before_instr(instr);
1057             offset = nir_imm_int(&b, system_values_start +
1058                                  tess_outer_default_idx * sizeof(uint32_t));
1059             break;
1060          case nir_intrinsic_load_tess_level_inner_default:
1061             if (tess_inner_default_idx == -1) {
1062                tess_inner_default_idx = num_system_values;
1063                num_system_values += 2;
1064             }
1065 
1066             for (int i = 0; i < 2; i++) {
1067                system_values[tess_inner_default_idx + i] =
1068                   BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X + i;
1069             }
1070 
1071             b.cursor = nir_before_instr(instr);
1072             offset = nir_imm_int(&b, system_values_start +
1073                                  tess_inner_default_idx * sizeof(uint32_t));
1074             break;
1075          case nir_intrinsic_image_deref_load_param_intel: {
1076             assert(devinfo->ver < 9);
1077             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1078             nir_variable *var = nir_deref_instr_get_variable(deref);
1079 
1080             if (img_idx[var->data.binding] == -1) {
1081                /* GL only allows arrays of arrays of images. */
1082                assert(glsl_type_is_image(glsl_without_array(var->type)));
1083                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
1084 
1085                for (int i = 0; i < num_images; i++) {
1086                   const unsigned img = var->data.binding + i;
1087 
1088                   img_idx[img] = num_system_values;
1089                   num_system_values += ISL_IMAGE_PARAM_SIZE;
1090 
1091                   uint32_t *img_sv = &system_values[img_idx[img]];
1092 
1093                   setup_vec4_image_sysval(
1094                      img_sv + ISL_IMAGE_PARAM_OFFSET_OFFSET, img,
1095                      offsetof(struct isl_image_param, offset), 2);
1096                   setup_vec4_image_sysval(
1097                      img_sv + ISL_IMAGE_PARAM_SIZE_OFFSET, img,
1098                      offsetof(struct isl_image_param, size), 3);
1099                   setup_vec4_image_sysval(
1100                      img_sv + ISL_IMAGE_PARAM_STRIDE_OFFSET, img,
1101                      offsetof(struct isl_image_param, stride), 4);
1102                   setup_vec4_image_sysval(
1103                      img_sv + ISL_IMAGE_PARAM_TILING_OFFSET, img,
1104                      offsetof(struct isl_image_param, tiling), 3);
1105                   setup_vec4_image_sysval(
1106                      img_sv + ISL_IMAGE_PARAM_SWIZZLING_OFFSET, img,
1107                      offsetof(struct isl_image_param, swizzling), 2);
1108                }
1109             }
1110 
1111             b.cursor = nir_before_instr(instr);
1112             offset = nir_iadd_imm(&b,
1113                get_aoa_deref_offset(&b, deref, ISL_IMAGE_PARAM_SIZE * 4),
1114                system_values_start +
1115                img_idx[var->data.binding] * 4 +
1116                nir_intrinsic_base(intrin) * 16);
1117             break;
1118          }
1119          case nir_intrinsic_load_workgroup_size: {
1120             assert(nir->info.workgroup_size_variable);
1121             if (variable_group_size_idx == -1) {
1122                variable_group_size_idx = num_system_values;
1123                num_system_values += 3;
1124                for (int i = 0; i < 3; i++) {
1125                   system_values[variable_group_size_idx + i] =
1126                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
1127                }
1128             }
1129 
1130             b.cursor = nir_before_instr(instr);
1131             offset = nir_imm_int(&b, system_values_start +
1132                                      variable_group_size_idx * sizeof(uint32_t));
1133             break;
1134          }
1135          case nir_intrinsic_load_work_dim: {
1136             if (work_dim_idx == -1) {
1137                work_dim_idx = num_system_values++;
1138                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
1139             }
1140             b.cursor = nir_before_instr(instr);
1141             offset = nir_imm_int(&b, system_values_start +
1142                                      work_dim_idx * sizeof(uint32_t));
1143             break;
1144          }
1145          case nir_intrinsic_load_kernel_input: {
1146             assert(nir_intrinsic_base(intrin) +
1147                    nir_intrinsic_range(intrin) <= kernel_input_size);
1148             b.cursor = nir_before_instr(instr);
1149             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
1150                                       nir_intrinsic_base(intrin));
1151             break;
1152          }
1153          default:
1154             continue;
1155          }
1156 
1157          nir_def *load =
1158             nir_load_ubo(&b, intrin->def.num_components, intrin->def.bit_size,
1159                          temp_ubo_name, offset,
1160                          .align_mul = 4,
1161                          .align_offset = 0,
1162                          .range_base = 0,
1163                          .range = ~0);
1164 
1165          nir_def_rewrite_uses(&intrin->def,
1166                                   load);
1167          nir_instr_remove(instr);
1168       }
1169    }
1170 
1171    nir_validate_shader(nir, "before remapping");
1172 
1173    /* Uniforms are stored in constant buffer 0, the
1174     * user-facing UBOs are indexed by one.  So if any constant buffer is
1175     * needed, the constant buffer 0 will be needed, so account for it.
1176     */
1177    unsigned num_cbufs = nir->info.num_ubos;
1178    if (num_cbufs || nir->num_uniforms)
1179       num_cbufs++;
1180 
1181    /* Place the new params in a new cbuf. */
1182    if (num_system_values > 0 || kernel_input_size > 0) {
1183       unsigned sysval_cbuf_index = num_cbufs;
1184       num_cbufs++;
1185 
1186       system_values = reralloc(mem_ctx, system_values, unsigned,
1187                                num_system_values);
1188 
1189       nir_foreach_block(block, impl) {
1190          nir_foreach_instr_safe(instr, block) {
1191             if (instr->type != nir_instr_type_intrinsic)
1192                continue;
1193 
1194             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
1195 
1196             if (load->intrinsic != nir_intrinsic_load_ubo)
1197                continue;
1198 
1199             b.cursor = nir_before_instr(instr);
1200 
1201             if (load->src[0].ssa == temp_ubo_name) {
1202                nir_def *imm = nir_imm_int(&b, sysval_cbuf_index);
1203                nir_src_rewrite(&load->src[0], imm);
1204             }
1205          }
1206       }
1207 
1208       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
1209       nir_opt_constant_folding(nir);
1210    } else {
1211       ralloc_free(system_values);
1212       system_values = NULL;
1213    }
1214 
1215    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
1216    nir_validate_shader(nir, "after remap");
1217 
1218    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
1219     * to detect when cbuf0 exists but we don't need it anymore when we get
1220     * here.  Instead, zero it out so that the back-end doesn't get confused
1221     * when nr_params * 4 != num_uniforms != nr_params * 4.
1222     */
1223    nir->num_uniforms = 0;
1224 
1225    *out_system_values = system_values;
1226    *out_num_system_values = num_system_values;
1227    *out_num_cbufs = num_cbufs;
1228 }
1229 
1230 static const char *surface_group_names[] = {
1231    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
1232    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
1233    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
1234    [IRIS_SURFACE_GROUP_TEXTURE_LOW64]      = "texture",
1235    [IRIS_SURFACE_GROUP_TEXTURE_HIGH64]     = "texture",
1236    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
1237    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
1238    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
1239 };
1240 
1241 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)1242 iris_print_binding_table(FILE *fp, const char *name,
1243                          const struct iris_binding_table *bt)
1244 {
1245    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
1246 
1247    uint32_t total = 0;
1248    uint32_t compacted = 0;
1249 
1250    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1251       uint32_t size = bt->sizes[i];
1252       total += size;
1253       if (size)
1254          compacted += util_bitcount64(bt->used_mask[i]);
1255    }
1256 
1257    if (total == 0) {
1258       fprintf(fp, "Binding table for %s is empty\n\n", name);
1259       return;
1260    }
1261 
1262    if (total != compacted) {
1263       fprintf(fp, "Binding table for %s "
1264               "(compacted to %u entries from %u entries)\n",
1265               name, compacted, total);
1266    } else {
1267       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
1268    }
1269 
1270    uint32_t entry = 0;
1271    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1272       uint64_t mask = bt->used_mask[i];
1273       while (mask) {
1274          int index = u_bit_scan64(&mask);
1275          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
1276       }
1277    }
1278    fprintf(fp, "\n");
1279 }
1280 
1281 enum {
1282    /* Max elements in a surface group. */
1283    SURFACE_GROUP_MAX_ELEMENTS = 64,
1284 };
1285 
1286 /**
1287  * Map a <group, index> pair to a binding table index.
1288  *
1289  * For example: <UBO, 5> => binding table index 12
1290  */
1291 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)1292 iris_group_index_to_bti(const struct iris_binding_table *bt,
1293                         enum iris_surface_group group, uint32_t index)
1294 {
1295    assert(index < bt->sizes[group]);
1296    uint64_t mask = bt->used_mask[group];
1297    uint64_t bit = 1ull << index;
1298    if (bit & mask) {
1299       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
1300    } else {
1301       return IRIS_SURFACE_NOT_USED;
1302    }
1303 }
1304 
1305 /**
1306  * Map a binding table index back to a <group, index> pair.
1307  *
1308  * For example: binding table index 12 => <UBO, 5>
1309  */
1310 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)1311 iris_bti_to_group_index(const struct iris_binding_table *bt,
1312                         enum iris_surface_group group, uint32_t bti)
1313 {
1314    uint64_t used_mask = bt->used_mask[group];
1315    assert(bti >= bt->offsets[group]);
1316 
1317    uint32_t c = bti - bt->offsets[group];
1318    while (used_mask) {
1319       int i = u_bit_scan64(&used_mask);
1320       if (c == 0)
1321          return i;
1322       c--;
1323    }
1324 
1325    return IRIS_SURFACE_NOT_USED;
1326 }
1327 
1328 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)1329 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
1330                      nir_instr *instr, nir_src *src,
1331                      enum iris_surface_group group)
1332 {
1333    assert(bt->sizes[group] > 0);
1334 
1335    b->cursor = nir_before_instr(instr);
1336    nir_def *bti;
1337    if (nir_src_is_const(*src)) {
1338       uint32_t index = nir_src_as_uint(*src);
1339       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
1340                            src->ssa->bit_size);
1341    } else {
1342       /* Indirect usage makes all the surfaces of the group to be available,
1343        * so we can just add the base.
1344        */
1345       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
1346       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
1347    }
1348    nir_src_rewrite(src, bti);
1349 }
1350 
1351 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)1352 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
1353                    enum iris_surface_group group)
1354 {
1355    assert(bt->sizes[group] > 0);
1356 
1357    if (nir_src_is_const(*src)) {
1358       uint64_t index = nir_src_as_uint(*src);
1359       assert(index < bt->sizes[group]);
1360       bt->used_mask[group] |= 1ull << index;
1361    } else {
1362       /* There's an indirect usage, we need all the surfaces. */
1363       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
1364    }
1365 }
1366 
1367 static bool
skip_compacting_binding_tables(void)1368 skip_compacting_binding_tables(void)
1369 {
1370    static int skip = -1;
1371    if (skip < 0)
1372       skip = debug_get_bool_option("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
1373    return skip;
1374 }
1375 
1376 /**
1377  * Set up the binding table indices and apply to the shader.
1378  */
1379 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs,bool use_null_rt)1380 iris_setup_binding_table(const struct intel_device_info *devinfo,
1381                          struct nir_shader *nir,
1382                          struct iris_binding_table *bt,
1383                          unsigned num_render_targets,
1384                          unsigned num_system_values,
1385                          unsigned num_cbufs,
1386                          bool use_null_rt)
1387 {
1388    const struct shader_info *info = &nir->info;
1389 
1390    memset(bt, 0, sizeof(*bt));
1391 
1392    /* Set the sizes for each surface group.  For some groups, we already know
1393     * upfront how many will be used, so mark them.
1394     */
1395    if (info->stage == MESA_SHADER_FRAGMENT) {
1396       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
1397       /* All render targets used. */
1398       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
1399          BITFIELD64_MASK(num_render_targets);
1400 
1401       /* Setup render target read surface group in order to support non-coherent
1402        * framebuffer fetch on Gfx8
1403        */
1404       if (devinfo->ver == 8 && info->outputs_read) {
1405          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
1406          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
1407             BITFIELD64_MASK(num_render_targets);
1408       }
1409 
1410       bt->use_null_rt = use_null_rt;
1411    } else if (info->stage == MESA_SHADER_COMPUTE) {
1412       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1413    }
1414 
1415    assert(ARRAY_SIZE(info->textures_used) >= 4);
1416    int max_tex = BITSET_LAST_BIT(info->textures_used);
1417    assert(max_tex <= 128);
1418    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_LOW64] = MIN2(64, max_tex);
1419    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] = MAX2(0, max_tex - 64);
1420    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_LOW64] =
1421       info->textures_used[0] | ((uint64_t)info->textures_used[1]) << 32;
1422    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE_HIGH64] =
1423       info->textures_used[2] | ((uint64_t)info->textures_used[3]) << 32;
1424    bt->samplers_used_mask = info->samplers_used[0];
1425 
1426    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = BITSET_LAST_BIT(info->images_used);
1427 
1428    /* Allocate an extra slot in the UBO section for NIR constants.
1429     * Binding table compaction will remove it if unnecessary.
1430     *
1431     * We don't include them in iris_compiled_shader::num_cbufs because
1432     * they are uploaded separately from shs->constbuf[], but from a shader
1433     * point of view, they're another UBO (at the end of the section).
1434     */
1435    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
1436 
1437    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
1438 
1439    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1440       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
1441 
1442    /* Mark surfaces used for the cases we don't have the information available
1443     * upfront.
1444     */
1445    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1446    nir_foreach_block (block, impl) {
1447       nir_foreach_instr (instr, block) {
1448          if (instr->type != nir_instr_type_intrinsic)
1449             continue;
1450 
1451          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1452          switch (intrin->intrinsic) {
1453          case nir_intrinsic_load_num_workgroups:
1454             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
1455             break;
1456 
1457          case nir_intrinsic_load_output:
1458             if (devinfo->ver == 8) {
1459                mark_used_with_src(bt, &intrin->src[0],
1460                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1461             }
1462             break;
1463 
1464          case nir_intrinsic_image_size:
1465          case nir_intrinsic_image_load:
1466          case nir_intrinsic_image_store:
1467          case nir_intrinsic_image_atomic:
1468          case nir_intrinsic_image_atomic_swap:
1469          case nir_intrinsic_image_load_raw_intel:
1470          case nir_intrinsic_image_store_raw_intel:
1471             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
1472             break;
1473 
1474          case nir_intrinsic_load_ubo:
1475             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
1476             break;
1477 
1478          case nir_intrinsic_store_ssbo:
1479             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
1480             break;
1481 
1482          case nir_intrinsic_get_ssbo_size:
1483          case nir_intrinsic_ssbo_atomic:
1484          case nir_intrinsic_ssbo_atomic_swap:
1485          case nir_intrinsic_load_ssbo:
1486             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1487             break;
1488 
1489          default:
1490             break;
1491          }
1492       }
1493    }
1494 
1495    /* When disable we just mark everything as used. */
1496    if (unlikely(skip_compacting_binding_tables())) {
1497       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1498          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1499    }
1500 
1501    /* Calculate the offsets and the binding table size based on the used
1502     * surfaces.  After this point, the functions to go between "group indices"
1503     * and binding table indices can be used.
1504     */
1505    uint32_t next = 0;
1506    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1507       if (bt->used_mask[i] != 0) {
1508          bt->offsets[i] = next;
1509          next += util_bitcount64(bt->used_mask[i]);
1510       }
1511    }
1512    bt->size_bytes = next * 4;
1513 
1514    if (INTEL_DEBUG(DEBUG_BT)) {
1515       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1516    }
1517 
1518    /* Apply the binding table indices.  The backend compiler is not expected
1519     * to change those, as we haven't set any of the *_start entries in brw
1520     * binding_table.
1521     */
1522    nir_builder b = nir_builder_create(impl);
1523 
1524    nir_foreach_block (block, impl) {
1525       nir_foreach_instr (instr, block) {
1526          if (instr->type == nir_instr_type_tex) {
1527             nir_tex_instr *tex = nir_instr_as_tex(instr);
1528             if (tex->texture_index < 64) {
1529                tex->texture_index =
1530                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_LOW64,
1531                                           tex->texture_index);
1532             } else {
1533                tex->texture_index =
1534                   iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE_HIGH64,
1535                                           tex->texture_index - 64);
1536             }
1537             continue;
1538          }
1539 
1540          if (instr->type != nir_instr_type_intrinsic)
1541             continue;
1542 
1543          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1544          switch (intrin->intrinsic) {
1545          case nir_intrinsic_image_size:
1546          case nir_intrinsic_image_load:
1547          case nir_intrinsic_image_store:
1548          case nir_intrinsic_image_atomic:
1549          case nir_intrinsic_image_atomic_swap:
1550          case nir_intrinsic_image_load_raw_intel:
1551          case nir_intrinsic_image_store_raw_intel:
1552             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1553                                  IRIS_SURFACE_GROUP_IMAGE);
1554             break;
1555 
1556          case nir_intrinsic_load_ubo:
1557             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1558                                  IRIS_SURFACE_GROUP_UBO);
1559             break;
1560 
1561          case nir_intrinsic_store_ssbo:
1562             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1563                                  IRIS_SURFACE_GROUP_SSBO);
1564             break;
1565 
1566          case nir_intrinsic_load_output:
1567             if (devinfo->ver == 8) {
1568                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1569                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1570             }
1571             break;
1572 
1573          case nir_intrinsic_get_ssbo_size:
1574          case nir_intrinsic_ssbo_atomic:
1575          case nir_intrinsic_ssbo_atomic_swap:
1576          case nir_intrinsic_load_ssbo:
1577             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1578                                  IRIS_SURFACE_GROUP_SSBO);
1579             break;
1580 
1581          default:
1582             break;
1583          }
1584       }
1585    }
1586 }
1587 
1588 static void
iris_debug_recompile_brw(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1589 iris_debug_recompile_brw(struct iris_screen *screen,
1590                          struct util_debug_callback *dbg,
1591                          struct iris_uncompiled_shader *ish,
1592                          const struct brw_base_prog_key *key)
1593 {
1594    if (!ish || list_is_empty(&ish->variants)
1595             || list_is_singular(&ish->variants))
1596       return;
1597 
1598    const struct brw_compiler *c = screen->brw;
1599    const struct shader_info *info = &ish->nir->info;
1600 
1601    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1602                        _mesa_shader_stage_to_string(info->stage),
1603                        info->name ? info->name : "(no identifier)",
1604                        info->label ? info->label : "");
1605 
1606    struct iris_compiled_shader *shader =
1607       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1608    const void *old_iris_key = &shader->key;
1609 
1610    union brw_any_prog_key old_key;
1611 
1612    switch (info->stage) {
1613    case MESA_SHADER_VERTEX:
1614       old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1615       break;
1616    case MESA_SHADER_TESS_CTRL:
1617       old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1618       break;
1619    case MESA_SHADER_TESS_EVAL:
1620       old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1621       break;
1622    case MESA_SHADER_GEOMETRY:
1623       old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1624       break;
1625    case MESA_SHADER_FRAGMENT:
1626       old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1627       break;
1628    case MESA_SHADER_COMPUTE:
1629       old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1630       break;
1631    default:
1632       unreachable("invalid shader stage");
1633    }
1634 
1635    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1636 }
1637 
1638 static void
iris_debug_recompile_elk(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct elk_base_prog_key * key)1639 iris_debug_recompile_elk(struct iris_screen *screen,
1640                          struct util_debug_callback *dbg,
1641                          struct iris_uncompiled_shader *ish,
1642                          const struct elk_base_prog_key *key)
1643 {
1644    if (!ish || list_is_empty(&ish->variants)
1645             || list_is_singular(&ish->variants))
1646       return;
1647 
1648    const struct elk_compiler *c = screen->elk;
1649    const struct shader_info *info = &ish->nir->info;
1650 
1651    elk_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1652                        _mesa_shader_stage_to_string(info->stage),
1653                        info->name ? info->name : "(no identifier)",
1654                        info->label ? info->label : "");
1655 
1656    struct iris_compiled_shader *shader =
1657       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1658    const void *old_iris_key = &shader->key;
1659 
1660    union elk_any_prog_key old_key;
1661 
1662    switch (info->stage) {
1663    case MESA_SHADER_VERTEX:
1664       old_key.vs = iris_to_elk_vs_key(screen, old_iris_key);
1665       break;
1666    case MESA_SHADER_TESS_CTRL:
1667       old_key.tcs = iris_to_elk_tcs_key(screen, old_iris_key);
1668       break;
1669    case MESA_SHADER_TESS_EVAL:
1670       old_key.tes = iris_to_elk_tes_key(screen, old_iris_key);
1671       break;
1672    case MESA_SHADER_GEOMETRY:
1673       old_key.gs = iris_to_elk_gs_key(screen, old_iris_key);
1674       break;
1675    case MESA_SHADER_FRAGMENT:
1676       old_key.wm = iris_to_elk_fs_key(screen, old_iris_key);
1677       break;
1678    case MESA_SHADER_COMPUTE:
1679       old_key.cs = iris_to_elk_cs_key(screen, old_iris_key);
1680       break;
1681    default:
1682       unreachable("invalid shader stage");
1683    }
1684 
1685    elk_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1686 }
1687 
1688 
1689 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1690 check_urb_size(struct iris_context *ice,
1691                unsigned needed_size,
1692                gl_shader_stage stage)
1693 {
1694    unsigned last_allocated_size = ice->shaders.urb.cfg.size[stage];
1695 
1696    /* If the last URB allocation wasn't large enough for our needs,
1697     * flag it as needing to be reconfigured.  Otherwise, we can use
1698     * the existing config.  However, if the URB is constrained, and
1699     * we can shrink our size for this stage, we may be able to gain
1700     * extra concurrency by reconfiguring it to be smaller.  Do so.
1701     */
1702    if (last_allocated_size < needed_size ||
1703        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1704       ice->state.dirty |= IRIS_DIRTY_URB;
1705    }
1706 }
1707 
1708 /**
1709  * Get the shader for the last enabled geometry stage.
1710  *
1711  * This stage is the one which will feed stream output and the rasterizer.
1712  */
1713 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1714 last_vue_stage(struct iris_context *ice)
1715 {
1716    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1717       return MESA_SHADER_GEOMETRY;
1718 
1719    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1720       return MESA_SHADER_TESS_EVAL;
1721 
1722    return MESA_SHADER_VERTEX;
1723 }
1724 
1725 /**
1726  * \param added  Set to \c true if the variant was added to the list (i.e., a
1727  *               variant matching \c key was not found).  Set to \c false
1728  *               otherwise.
1729  */
1730 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1731 find_or_add_variant(const struct iris_screen *screen,
1732                     struct iris_uncompiled_shader *ish,
1733                     enum iris_program_cache_id cache_id,
1734                     const void *key, unsigned key_size,
1735                     bool *added)
1736 {
1737    struct list_head *start = ish->variants.next;
1738 
1739    *added = false;
1740 
1741    if (screen->precompile) {
1742       /* Check the first list entry.  There will always be at least one
1743        * variant in the list (most likely the precompile variant), and
1744        * other contexts only append new variants, so we can safely check
1745        * it without locking, saving that cost in the common case.
1746        */
1747       struct iris_compiled_shader *first =
1748          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1749 
1750       if (memcmp(&first->key, key, key_size) == 0) {
1751          util_queue_fence_wait(&first->ready);
1752          return first;
1753       }
1754 
1755       /* Skip this one in the loop below */
1756       start = first->link.next;
1757    }
1758 
1759    struct iris_compiled_shader *variant = NULL;
1760 
1761    /* If it doesn't match, we have to walk the list; other contexts may be
1762     * concurrently appending shaders to it, so we need to lock here.
1763     */
1764    simple_mtx_lock(&ish->lock);
1765 
1766    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1767                             &ish->variants, link) {
1768       if (memcmp(&v->key, key, key_size) == 0) {
1769          variant = v;
1770          break;
1771       }
1772    }
1773 
1774    gl_shader_stage stage = ish->nir->info.stage;
1775 
1776    if (variant == NULL) {
1777       variant = iris_create_shader_variant(screen, NULL, stage, cache_id,
1778                                            key_size, key);
1779 
1780       /* Append our new variant to the shader's variant list. */
1781       list_addtail(&variant->link, &ish->variants);
1782       *added = true;
1783 
1784       simple_mtx_unlock(&ish->lock);
1785    } else {
1786       simple_mtx_unlock(&ish->lock);
1787 
1788       util_queue_fence_wait(&variant->ready);
1789    }
1790 
1791    assert(stage == variant->stage);
1792    return variant;
1793 }
1794 
1795 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1796 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1797                                  UNUSED int thread_index)
1798 {
1799    free(_job);
1800 }
1801 
1802 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1803 iris_schedule_compile(struct iris_screen *screen,
1804                       struct util_queue_fence *ready_fence,
1805                       struct util_debug_callback *dbg,
1806                       struct iris_threaded_compile_job *job,
1807                       util_queue_execute_func execute)
1808 
1809 {
1810    struct util_async_debug_callback async_debug;
1811 
1812    if (dbg) {
1813       u_async_debug_init(&async_debug);
1814       job->dbg = &async_debug.base;
1815    }
1816 
1817    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1818                       iris_threaded_compile_job_delete, 0);
1819 
1820    if (screen->driconf.sync_compile || dbg)
1821       util_queue_fence_wait(ready_fence);
1822 
1823    if (dbg) {
1824       u_async_debug_drain(&async_debug, dbg);
1825       u_async_debug_cleanup(&async_debug);
1826    }
1827 }
1828 
1829 /**
1830  * Compile a vertex shader, and upload the assembly.
1831  */
1832 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1833 iris_compile_vs(struct iris_screen *screen,
1834                 struct u_upload_mgr *uploader,
1835                 struct util_debug_callback *dbg,
1836                 struct iris_uncompiled_shader *ish,
1837                 struct iris_compiled_shader *shader)
1838 {
1839    const struct intel_device_info *devinfo = screen->devinfo;
1840    void *mem_ctx = ralloc_context(NULL);
1841    uint32_t *system_values;
1842    unsigned num_system_values;
1843    unsigned num_cbufs;
1844 
1845    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1846    const struct iris_vs_prog_key *const key = &shader->key.vs;
1847 
1848    if (key->vue.nr_userclip_plane_consts) {
1849       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1850       /* Check if variables were found. */
1851       if (nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1852                             true, false, NULL)) {
1853          nir_lower_io_to_temporaries(nir, impl, true, false);
1854          nir_lower_global_vars_to_local(nir);
1855          nir_lower_vars_to_ssa(nir);
1856          nir_shader_gather_info(nir, impl);
1857       }
1858    }
1859 
1860    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
1861                        &num_system_values, &num_cbufs);
1862 
1863    struct iris_binding_table bt;
1864    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1865                             num_system_values, num_cbufs, false);
1866 
1867    const char *error;
1868    const unsigned *program;
1869    if (screen->brw) {
1870       struct brw_vs_prog_data *brw_prog_data =
1871          rzalloc(mem_ctx, struct brw_vs_prog_data);
1872 
1873       brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1874 
1875       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
1876 
1877       brw_compute_vue_map(devinfo,
1878                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
1879                           nir->info.separate_shader, /* pos_slots */ 1);
1880 
1881       struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1882 
1883       struct brw_compile_vs_params params = {
1884          .base = {
1885             .mem_ctx = mem_ctx,
1886             .nir = nir,
1887             .log_data = dbg,
1888             .source_hash = ish->source_hash,
1889          },
1890          .key = &brw_key,
1891          .prog_data = brw_prog_data,
1892       };
1893 
1894       program = brw_compile_vs(screen->brw, &params);
1895       error = params.base.error_str;
1896       if (program) {
1897          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
1898          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
1899       }
1900    } else {
1901       struct elk_vs_prog_data *elk_prog_data =
1902          rzalloc(mem_ctx, struct elk_vs_prog_data);
1903 
1904       elk_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
1905 
1906       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
1907 
1908       elk_compute_vue_map(devinfo,
1909                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
1910                           nir->info.separate_shader, /* pos_slots */ 1);
1911 
1912       struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
1913 
1914       struct elk_compile_vs_params params = {
1915          .base = {
1916             .mem_ctx = mem_ctx,
1917             .nir = nir,
1918             .log_data = dbg,
1919             .source_hash = ish->source_hash,
1920          },
1921          .key = &elk_key,
1922          .prog_data = elk_prog_data,
1923       };
1924 
1925       program = elk_compile_vs(screen->elk, &params);
1926       error = params.base.error_str;
1927       if (program) {
1928          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
1929          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
1930       }
1931    }
1932 
1933    if (program == NULL) {
1934       dbg_printf("Failed to compile vertex shader: %s\n", error);
1935       ralloc_free(mem_ctx);
1936 
1937       shader->compilation_failed = true;
1938       util_queue_fence_signal(&shader->ready);
1939 
1940       return;
1941    }
1942 
1943    shader->compilation_failed = false;
1944 
1945    uint32_t *so_decls =
1946       screen->vtbl.create_so_decl_list(&ish->stream_output,
1947                                        &iris_vue_data(shader)->vue_map);
1948 
1949    iris_finalize_program(shader, so_decls,
1950                          system_values, num_system_values, 0, num_cbufs, &bt);
1951 
1952    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1953                       sizeof(*key), key, program);
1954 
1955    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1956 
1957    ralloc_free(mem_ctx);
1958 }
1959 
1960 /**
1961  * Update the current vertex shader variant.
1962  *
1963  * Fill out the key, look in the cache, compile and bind if needed.
1964  */
1965 static void
iris_update_compiled_vs(struct iris_context * ice)1966 iris_update_compiled_vs(struct iris_context *ice)
1967 {
1968    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1969    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1970    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1971    struct iris_uncompiled_shader *ish =
1972       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1973 
1974    struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1975    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1976 
1977    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1978    bool added;
1979    struct iris_compiled_shader *shader =
1980       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1981 
1982    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1983                                           &key, sizeof(key))) {
1984       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1985    }
1986 
1987    if (shader->compilation_failed)
1988       shader = NULL;
1989 
1990    if (old != shader) {
1991       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1992                                     shader);
1993       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1994       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1995                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
1996                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1997       shs->sysvals_need_upload = true;
1998 
1999       unsigned urb_entry_size = shader ?
2000          iris_vue_data(shader)->urb_entry_size : 0;
2001       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
2002    }
2003 }
2004 
2005 /**
2006  * Get the shader_info for a given stage, or NULL if the stage is disabled.
2007  */
2008 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)2009 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
2010 {
2011    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
2012 
2013    if (!ish)
2014       return NULL;
2015 
2016    const nir_shader *nir = ish->nir;
2017    return &nir->info;
2018 }
2019 
2020 /**
2021  * Get the union of TCS output and TES input slots.
2022  *
2023  * TCS and TES need to agree on a common URB entry layout.  In particular,
2024  * the data for all patch vertices is stored in a single URB entry (unlike
2025  * GS which has one entry per input vertex).  This means that per-vertex
2026  * array indexing needs a stride.
2027  *
2028  * SSO requires locations to match, but doesn't require the number of
2029  * outputs/inputs to match (in fact, the TCS often has extra outputs).
2030  * So, we need to take the extra step of unifying these on the fly.
2031  */
2032 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)2033 get_unified_tess_slots(const struct iris_context *ice,
2034                        uint64_t *per_vertex_slots,
2035                        uint32_t *per_patch_slots)
2036 {
2037    const struct shader_info *tcs =
2038       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
2039    const struct shader_info *tes =
2040       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2041 
2042    *per_vertex_slots = tes->inputs_read;
2043    *per_patch_slots = tes->patch_inputs_read;
2044 
2045    if (tcs) {
2046       *per_vertex_slots |= tcs->outputs_written;
2047       *per_patch_slots |= tcs->patch_outputs_written;
2048    }
2049 }
2050 
2051 /**
2052  * Compile a tessellation control shader, and upload the assembly.
2053  */
2054 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2055 iris_compile_tcs(struct iris_screen *screen,
2056                  struct hash_table *passthrough_ht,
2057                  struct u_upload_mgr *uploader,
2058                  struct util_debug_callback *dbg,
2059                  struct iris_uncompiled_shader *ish,
2060                  struct iris_compiled_shader *shader)
2061 {
2062    void *mem_ctx = ralloc_context(NULL);
2063    const struct intel_device_info *devinfo = screen->devinfo;
2064    uint32_t *system_values = NULL;
2065    unsigned num_system_values = 0;
2066    unsigned num_cbufs = 0;
2067 
2068    nir_shader *nir;
2069 
2070    struct iris_binding_table bt;
2071 
2072    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
2073    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
2074    struct elk_tcs_prog_key elk_key = iris_to_elk_tcs_key(screen, key);
2075    uint32_t source_hash;
2076 
2077    if (ish) {
2078       nir = nir_shader_clone(mem_ctx, ish->nir);
2079       source_hash = ish->source_hash;
2080    } else {
2081       if (screen->brw) {
2082          nir = brw_nir_create_passthrough_tcs(mem_ctx, screen->brw, &brw_key);
2083       } else {
2084          assert(screen->elk);
2085          nir = elk_nir_create_passthrough_tcs(mem_ctx, screen->elk, &elk_key);
2086       }
2087       source_hash = *(uint32_t*)nir->info.source_blake3;
2088    }
2089 
2090    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2091                        &num_system_values, &num_cbufs);
2092    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2093                             num_system_values, num_cbufs, false);
2094 
2095    const char *error = NULL;
2096    const unsigned *program;
2097    if (screen->brw) {
2098       struct brw_tcs_prog_data *brw_prog_data =
2099          rzalloc(mem_ctx, struct brw_tcs_prog_data);
2100       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2101 
2102       struct brw_compile_tcs_params params = {
2103          .base = {
2104             .mem_ctx = mem_ctx,
2105             .nir = nir,
2106             .log_data = dbg,
2107             .source_hash = source_hash,
2108          },
2109          .key = &brw_key,
2110          .prog_data = brw_prog_data,
2111       };
2112 
2113       program = brw_compile_tcs(screen->brw, &params);
2114       error = params.base.error_str;
2115 
2116       if (program) {
2117          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2118          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2119       }
2120    } else {
2121       assert(screen->elk);
2122       struct elk_tcs_prog_data *elk_prog_data =
2123          rzalloc(mem_ctx, struct elk_tcs_prog_data);
2124       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2125 
2126       struct elk_compile_tcs_params params = {
2127          .base = {
2128             .mem_ctx = mem_ctx,
2129             .nir = nir,
2130             .log_data = dbg,
2131             .source_hash = source_hash,
2132          },
2133          .key = &elk_key,
2134          .prog_data = elk_prog_data,
2135       };
2136 
2137       program = elk_compile_tcs(screen->elk, &params);
2138       error = params.base.error_str;
2139 
2140       if (program) {
2141          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2142          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2143       }
2144    }
2145 
2146    if (program == NULL) {
2147       dbg_printf("Failed to compile control shader: %s\n", error);
2148       ralloc_free(mem_ctx);
2149 
2150       shader->compilation_failed = true;
2151       util_queue_fence_signal(&shader->ready);
2152 
2153       return;
2154    }
2155 
2156    shader->compilation_failed = false;
2157 
2158    iris_finalize_program(shader, NULL, system_values,
2159                          num_system_values, 0, num_cbufs, &bt);
2160 
2161    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
2162                       IRIS_CACHE_TCS, sizeof(*key), key, program);
2163 
2164    if (ish)
2165       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2166 
2167    ralloc_free(mem_ctx);
2168 }
2169 
2170 /**
2171  * Update the current tessellation control shader variant.
2172  *
2173  * Fill out the key, look in the cache, compile and bind if needed.
2174  */
2175 static void
iris_update_compiled_tcs(struct iris_context * ice)2176 iris_update_compiled_tcs(struct iris_context *ice)
2177 {
2178    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
2179    struct iris_uncompiled_shader *tcs =
2180       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
2181    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2182    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2183    const struct intel_device_info *devinfo = screen->devinfo;
2184 
2185    const struct shader_info *tes_info =
2186       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
2187    struct iris_tcs_prog_key key = {
2188       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
2189       ._tes_primitive_mode = tes_info->tess._primitive_mode,
2190       .input_vertices =
2191          !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
2192       .quads_workaround = devinfo->ver < 9 &&
2193                           tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
2194                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
2195    };
2196    get_unified_tess_slots(ice, &key.outputs_written,
2197                           &key.patch_outputs_written);
2198    screen->vtbl.populate_tcs_key(ice, &key);
2199 
2200    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
2201    struct iris_compiled_shader *shader;
2202    bool added = false;
2203 
2204    if (tcs != NULL) {
2205       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
2206                                    sizeof(key), &added);
2207    } else {
2208       /* Look for and possibly create a passthrough TCS */
2209       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
2210 
2211 
2212       if (shader == NULL) {
2213          shader = iris_create_shader_variant(screen, ice->shaders.cache,
2214                                              MESA_SHADER_TESS_CTRL,
2215                                              IRIS_CACHE_TCS, sizeof(key), &key);
2216          added = true;
2217       }
2218 
2219    }
2220 
2221    /* If the shader was not found in (whichever cache), call iris_compile_tcs
2222     * if either ish is NULL or the shader could not be found in the disk
2223     * cache.
2224     */
2225    if (added &&
2226        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
2227                                                  &key, sizeof(key)))) {
2228       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
2229                        shader);
2230    }
2231 
2232    if (shader->compilation_failed)
2233       shader = NULL;
2234 
2235    if (old != shader) {
2236       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
2237                                     shader);
2238       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
2239                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
2240                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
2241       shs->sysvals_need_upload = true;
2242 
2243       unsigned urb_entry_size = shader ?
2244          iris_vue_data(shader)->urb_entry_size : 0;
2245       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
2246    }
2247 }
2248 
2249 /**
2250  * Compile a tessellation evaluation shader, and upload the assembly.
2251  */
2252 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2253 iris_compile_tes(struct iris_screen *screen,
2254                  struct u_upload_mgr *uploader,
2255                  struct util_debug_callback *dbg,
2256                  struct iris_uncompiled_shader *ish,
2257                  struct iris_compiled_shader *shader)
2258 {
2259    void *mem_ctx = ralloc_context(NULL);
2260    uint32_t *system_values;
2261    const struct intel_device_info *devinfo = screen->devinfo;
2262    unsigned num_system_values;
2263    unsigned num_cbufs;
2264 
2265    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2266    const struct iris_tes_prog_key *const key = &shader->key.tes;
2267 
2268    if (key->vue.nr_userclip_plane_consts) {
2269       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2270       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2271                         true, false, NULL);
2272       nir_lower_io_to_temporaries(nir, impl, true, false);
2273       nir_lower_global_vars_to_local(nir);
2274       nir_lower_vars_to_ssa(nir);
2275       nir_shader_gather_info(nir, impl);
2276    }
2277 
2278    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2279                        &num_system_values, &num_cbufs);
2280 
2281    struct iris_binding_table bt;
2282    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2283                             num_system_values, num_cbufs, false);
2284 
2285    const char *error;
2286    const unsigned *program;
2287 
2288    if (screen->brw) {
2289       struct brw_tes_prog_data *brw_prog_data =
2290          rzalloc(mem_ctx, struct brw_tes_prog_data);
2291 
2292       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2293 
2294       struct intel_vue_map input_vue_map;
2295       brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2296                                key->patch_inputs_read);
2297 
2298       struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
2299 
2300       struct brw_compile_tes_params params = {
2301          .base = {
2302             .mem_ctx = mem_ctx,
2303             .nir = nir,
2304             .log_data = dbg,
2305             .source_hash = ish->source_hash,
2306          },
2307          .key = &brw_key,
2308          .prog_data = brw_prog_data,
2309          .input_vue_map = &input_vue_map,
2310       };
2311 
2312       program = brw_compile_tes(screen->brw, &params);
2313       error = params.base.error_str;
2314 
2315       if (program) {
2316          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2317          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2318       }
2319    } else {
2320       struct elk_tes_prog_data *elk_prog_data =
2321          rzalloc(mem_ctx, struct elk_tes_prog_data);
2322 
2323       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2324 
2325       struct intel_vue_map input_vue_map;
2326       elk_compute_tess_vue_map(&input_vue_map, key->inputs_read,
2327                                key->patch_inputs_read);
2328 
2329       struct elk_tes_prog_key elk_key = iris_to_elk_tes_key(screen, key);
2330 
2331       struct elk_compile_tes_params params = {
2332          .base = {
2333             .mem_ctx = mem_ctx,
2334             .nir = nir,
2335             .log_data = dbg,
2336             .source_hash = ish->source_hash,
2337          },
2338          .key = &elk_key,
2339          .prog_data = elk_prog_data,
2340          .input_vue_map = &input_vue_map,
2341       };
2342 
2343       program = elk_compile_tes(screen->elk, &params);
2344       error = params.base.error_str;
2345 
2346       if (program) {
2347          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2348          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2349       }
2350    }
2351 
2352    if (program == NULL) {
2353       dbg_printf("Failed to compile evaluation shader: %s\n", error);
2354       ralloc_free(mem_ctx);
2355 
2356       shader->compilation_failed = true;
2357       util_queue_fence_signal(&shader->ready);
2358 
2359       return;
2360    }
2361 
2362    shader->compilation_failed = false;
2363 
2364    uint32_t *so_decls =
2365       screen->vtbl.create_so_decl_list(&ish->stream_output,
2366                                        &iris_vue_data(shader)->vue_map);
2367 
2368    iris_finalize_program(shader, so_decls, system_values,
2369                          num_system_values, 0, num_cbufs, &bt);
2370 
2371    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
2372                       sizeof(*key), key, program);
2373 
2374    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2375 
2376    ralloc_free(mem_ctx);
2377 }
2378 
2379 /**
2380  * Update the current tessellation evaluation shader variant.
2381  *
2382  * Fill out the key, look in the cache, compile and bind if needed.
2383  */
2384 static void
iris_update_compiled_tes(struct iris_context * ice)2385 iris_update_compiled_tes(struct iris_context *ice)
2386 {
2387    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2388    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2389    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
2390    struct iris_uncompiled_shader *ish =
2391       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2392 
2393    struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
2394    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
2395    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2396 
2397    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
2398    bool added;
2399    struct iris_compiled_shader *shader =
2400       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
2401 
2402    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2403                                           &key, sizeof(key))) {
2404       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
2405    }
2406 
2407    if (shader->compilation_failed)
2408       shader = NULL;
2409 
2410    if (old != shader) {
2411       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
2412                                     shader);
2413       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
2414                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
2415                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
2416       shs->sysvals_need_upload = true;
2417 
2418       unsigned urb_entry_size = shader ?
2419          iris_vue_data(shader)->urb_entry_size : 0;
2420       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
2421    }
2422 
2423    /* TODO: Could compare and avoid flagging this. */
2424    const struct shader_info *tes_info = &ish->nir->info;
2425    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
2426       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
2427       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
2428    }
2429 }
2430 
2431 /**
2432  * Compile a geometry shader, and upload the assembly.
2433  */
2434 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2435 iris_compile_gs(struct iris_screen *screen,
2436                 struct u_upload_mgr *uploader,
2437                 struct util_debug_callback *dbg,
2438                 struct iris_uncompiled_shader *ish,
2439                 struct iris_compiled_shader *shader)
2440 {
2441    const struct intel_device_info *devinfo = screen->devinfo;
2442    void *mem_ctx = ralloc_context(NULL);
2443    uint32_t *system_values;
2444    unsigned num_system_values;
2445    unsigned num_cbufs;
2446 
2447    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2448    const struct iris_gs_prog_key *const key = &shader->key.gs;
2449 
2450    if (key->vue.nr_userclip_plane_consts) {
2451       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2452       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
2453                         false, NULL);
2454       nir_lower_io_to_temporaries(nir, impl, true, false);
2455       nir_lower_global_vars_to_local(nir);
2456       nir_lower_vars_to_ssa(nir);
2457       nir_shader_gather_info(nir, impl);
2458    }
2459 
2460    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2461                        &num_system_values, &num_cbufs);
2462 
2463    struct iris_binding_table bt;
2464    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2465                             num_system_values, num_cbufs, false);
2466 
2467    const char *error;
2468    const unsigned *program;
2469    if (screen->brw) {
2470       struct brw_gs_prog_data *brw_prog_data =
2471          rzalloc(mem_ctx, struct brw_gs_prog_data);
2472 
2473       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
2474 
2475       brw_compute_vue_map(devinfo,
2476                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
2477                           nir->info.separate_shader, /* pos_slots */ 1);
2478 
2479       struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
2480 
2481       struct brw_compile_gs_params params = {
2482          .base = {
2483             .mem_ctx = mem_ctx,
2484             .nir = nir,
2485             .log_data = dbg,
2486             .source_hash = ish->source_hash,
2487          },
2488          .key = &brw_key,
2489          .prog_data = brw_prog_data,
2490       };
2491 
2492       program = brw_compile_gs(screen->brw, &params);
2493       error = params.base.error_str;
2494       if (program) {
2495          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2496          iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
2497       }
2498    } else {
2499       struct elk_gs_prog_data *elk_prog_data =
2500          rzalloc(mem_ctx, struct elk_gs_prog_data);
2501 
2502       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.base.ubo_ranges);
2503 
2504       elk_compute_vue_map(devinfo,
2505                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
2506                           nir->info.separate_shader, /* pos_slots */ 1);
2507 
2508       struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
2509 
2510       struct elk_compile_gs_params params = {
2511          .base = {
2512             .mem_ctx = mem_ctx,
2513             .nir = nir,
2514             .log_data = dbg,
2515             .source_hash = ish->source_hash,
2516          },
2517          .key = &elk_key,
2518          .prog_data = elk_prog_data,
2519       };
2520 
2521       program = elk_compile_gs(screen->elk, &params);
2522       error = params.base.error_str;
2523       if (program) {
2524          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2525          iris_apply_elk_prog_data(shader, &elk_prog_data->base.base);
2526       }
2527    }
2528 
2529    if (program == NULL) {
2530       dbg_printf("Failed to compile geometry shader: %s\n", error);
2531       ralloc_free(mem_ctx);
2532 
2533       shader->compilation_failed = true;
2534       util_queue_fence_signal(&shader->ready);
2535 
2536       return;
2537    }
2538 
2539    shader->compilation_failed = false;
2540 
2541    uint32_t *so_decls =
2542       screen->vtbl.create_so_decl_list(&ish->stream_output,
2543                                        &iris_vue_data(shader)->vue_map);
2544 
2545    iris_finalize_program(shader, so_decls, system_values,
2546                          num_system_values, 0, num_cbufs, &bt);
2547 
2548    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
2549                       sizeof(*key), key, program);
2550 
2551    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2552 
2553    ralloc_free(mem_ctx);
2554 }
2555 
2556 /**
2557  * Update the current geometry shader variant.
2558  *
2559  * Fill out the key, look in the cache, compile and bind if needed.
2560  */
2561 static void
iris_update_compiled_gs(struct iris_context * ice)2562 iris_update_compiled_gs(struct iris_context *ice)
2563 {
2564    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
2565    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2566    struct iris_uncompiled_shader *ish =
2567       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
2568    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
2569    struct iris_compiled_shader *shader = NULL;
2570    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2571 
2572    if (ish) {
2573       struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
2574       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
2575 
2576       bool added;
2577 
2578       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
2579                                    sizeof(key), &added);
2580 
2581       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2582                                              &key, sizeof(key))) {
2583          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
2584       }
2585 
2586       if (shader->compilation_failed)
2587          shader = NULL;
2588    }
2589 
2590    if (old != shader) {
2591       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
2592                                     shader);
2593       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
2594                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
2595                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
2596       shs->sysvals_need_upload = true;
2597 
2598       unsigned urb_entry_size = shader ?
2599          iris_vue_data(shader)->urb_entry_size : 0;
2600       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
2601    }
2602 }
2603 
2604 /**
2605  * Compile a fragment (pixel) shader, and upload the assembly.
2606  */
2607 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct intel_vue_map * vue_map)2608 iris_compile_fs(struct iris_screen *screen,
2609                 struct u_upload_mgr *uploader,
2610                 struct util_debug_callback *dbg,
2611                 struct iris_uncompiled_shader *ish,
2612                 struct iris_compiled_shader *shader,
2613                 struct intel_vue_map *vue_map)
2614 {
2615    void *mem_ctx = ralloc_context(NULL);
2616    uint32_t *system_values;
2617    const struct intel_device_info *devinfo = screen->devinfo;
2618    unsigned num_system_values;
2619    unsigned num_cbufs;
2620 
2621    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2622    const struct iris_fs_prog_key *const key = &shader->key.fs;
2623 
2624    iris_setup_uniforms(devinfo, mem_ctx, nir, 0, &system_values,
2625                        &num_system_values, &num_cbufs);
2626 
2627    /* Lower output variables to load_output intrinsics before setting up
2628     * binding tables, so iris_setup_binding_table can map any load_output
2629     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
2630     * non-coherent framebuffer fetches.
2631     */
2632    brw_nir_lower_fs_outputs(nir);
2633 
2634    int null_rts = brw_nir_fs_needs_null_rt(devinfo, nir,
2635                                            key->multisample_fbo,
2636                                            key->alpha_to_coverage) ? 1 : 0;
2637 
2638    struct iris_binding_table bt;
2639    iris_setup_binding_table(devinfo, nir, &bt,
2640                             MAX2(key->nr_color_regions, null_rts),
2641                             num_system_values, num_cbufs, null_rts != 0);
2642 
2643    const char *error;
2644    const unsigned *program;
2645 
2646    if (screen->brw) {
2647       struct brw_wm_prog_data *brw_prog_data =
2648          rzalloc(mem_ctx, struct brw_wm_prog_data);
2649 
2650       brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2651 
2652       brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
2653 
2654       struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
2655 
2656       struct brw_compile_fs_params params = {
2657          .base = {
2658             .mem_ctx = mem_ctx,
2659             .nir = nir,
2660             .log_data = dbg,
2661             .source_hash = ish->source_hash,
2662          },
2663          .key = &brw_key,
2664          .prog_data = brw_prog_data,
2665 
2666          .allow_spilling = true,
2667          .max_polygons = UCHAR_MAX,
2668          .vue_map = vue_map,
2669       };
2670 
2671       program = brw_compile_fs(screen->brw, &params);
2672       error = params.base.error_str;
2673       if (program) {
2674          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2675          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2676       }
2677    } else {
2678       struct elk_wm_prog_data *elk_prog_data =
2679          rzalloc(mem_ctx, struct elk_wm_prog_data);
2680 
2681       elk_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
2682 
2683       elk_nir_analyze_ubo_ranges(screen->elk, nir, elk_prog_data->base.ubo_ranges);
2684 
2685       struct elk_wm_prog_key elk_key = iris_to_elk_fs_key(screen, key);
2686 
2687       struct elk_compile_fs_params params = {
2688          .base = {
2689             .mem_ctx = mem_ctx,
2690             .nir = nir,
2691             .log_data = dbg,
2692             .source_hash = ish->source_hash,
2693          },
2694          .key = &elk_key,
2695          .prog_data = elk_prog_data,
2696 
2697          .allow_spilling = true,
2698          .max_polygons = UCHAR_MAX,
2699          .vue_map = vue_map,
2700       };
2701 
2702       program = elk_compile_fs(screen->elk, &params);
2703       error = params.base.error_str;
2704       if (program) {
2705          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
2706          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
2707       }
2708    }
2709 
2710    if (program == NULL) {
2711       dbg_printf("Failed to compile fragment shader: %s\n", error);
2712       ralloc_free(mem_ctx);
2713 
2714       shader->compilation_failed = true;
2715       util_queue_fence_signal(&shader->ready);
2716 
2717       return;
2718    }
2719 
2720    shader->compilation_failed = false;
2721 
2722    iris_finalize_program(shader, NULL, system_values,
2723                          num_system_values, 0, num_cbufs, &bt);
2724 
2725    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2726                       sizeof(*key), key, program);
2727 
2728    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2729 
2730    ralloc_free(mem_ctx);
2731 }
2732 
2733 /**
2734  * Update the current fragment shader variant.
2735  *
2736  * Fill out the key, look in the cache, compile and bind if needed.
2737  */
2738 static void
iris_update_compiled_fs(struct iris_context * ice)2739 iris_update_compiled_fs(struct iris_context *ice)
2740 {
2741    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2742    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2743    struct iris_uncompiled_shader *ish =
2744       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2745    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2746    struct iris_fs_prog_key key = { KEY_INIT(base) };
2747    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2748 
2749    struct intel_vue_map *last_vue_map =
2750       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2751 
2752    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2753       key.input_slots_valid = last_vue_map->slots_valid;
2754 
2755    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2756    bool added;
2757    struct iris_compiled_shader *shader =
2758       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2759                           sizeof(key), &added);
2760 
2761    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2762                                           &key, sizeof(key))) {
2763       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2764    }
2765 
2766    if (shader->compilation_failed)
2767       shader = NULL;
2768 
2769    if (old != shader) {
2770       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2771       // toggles.  might be able to avoid flagging SBE too.
2772       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2773                                     shader);
2774       ice->state.dirty |= IRIS_DIRTY_WM |
2775                           IRIS_DIRTY_CLIP |
2776                           IRIS_DIRTY_SBE;
2777       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2778                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2779                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2780       shs->sysvals_need_upload = true;
2781    }
2782 }
2783 
2784 /**
2785  * Update the last enabled stage's VUE map.
2786  *
2787  * When the shader feeding the rasterizer's output interface changes, we
2788  * need to re-emit various packets.
2789  */
2790 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2791 update_last_vue_map(struct iris_context *ice,
2792                     struct iris_compiled_shader *shader)
2793 {
2794    const struct intel_vue_map *vue_map = &iris_vue_data(shader)->vue_map;
2795    const struct intel_vue_map *old_map =
2796       !ice->shaders.last_vue_shader ? NULL :
2797       &iris_vue_data(ice->shaders.last_vue_shader)->vue_map;
2798    const uint64_t changed_slots =
2799       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2800 
2801    if (changed_slots & VARYING_BIT_VIEWPORT) {
2802       ice->state.num_viewports =
2803          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2804       ice->state.dirty |= IRIS_DIRTY_CLIP |
2805                           IRIS_DIRTY_SF_CL_VIEWPORT |
2806                           IRIS_DIRTY_CC_VIEWPORT |
2807                           IRIS_DIRTY_SCISSOR_RECT;
2808       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2809          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2810    }
2811 
2812    if (changed_slots & VARYING_BIT_LAYER) {
2813       ice->state.dirty |= IRIS_DIRTY_CLIP;
2814    }
2815 
2816    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2817       ice->state.dirty |= IRIS_DIRTY_SBE;
2818    }
2819 
2820    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2821 }
2822 
2823 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2824 iris_update_pull_constant_descriptors(struct iris_context *ice,
2825                                       gl_shader_stage stage)
2826 {
2827    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2828 
2829    if (!shader || !shader->has_ubo_pull)
2830       return;
2831 
2832    struct iris_shader_state *shs = &ice->state.shaders[stage];
2833    bool any_new_descriptors =
2834       shader->num_system_values > 0 && shs->sysvals_need_upload;
2835 
2836    unsigned bound_cbufs = shs->bound_cbufs;
2837 
2838    while (bound_cbufs) {
2839       const int i = u_bit_scan(&bound_cbufs);
2840       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2841       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2842       if (!surf_state->res && cbuf->buffer) {
2843          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2844                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2845          any_new_descriptors = true;
2846       }
2847    }
2848 
2849    if (any_new_descriptors)
2850       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2851 }
2852 
2853 /**
2854  * Update the current shader variants for the given state.
2855  *
2856  * This should be called on every draw call to ensure that the correct
2857  * shaders are bound.  It will also flag any dirty state triggered by
2858  * swapping out those shaders.
2859  */
2860 void
iris_update_compiled_shaders(struct iris_context * ice)2861 iris_update_compiled_shaders(struct iris_context *ice)
2862 {
2863    const uint64_t stage_dirty = ice->state.stage_dirty;
2864 
2865    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2866                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2867        struct iris_uncompiled_shader *tes =
2868           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2869        if (tes) {
2870           iris_update_compiled_tcs(ice);
2871           iris_update_compiled_tes(ice);
2872        } else {
2873          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2874          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2875           ice->state.stage_dirty |=
2876              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2877              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2878              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2879 
2880           if (ice->shaders.urb.constrained)
2881              ice->state.dirty |= IRIS_DIRTY_URB;
2882        }
2883    }
2884 
2885    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2886       iris_update_compiled_vs(ice);
2887    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2888       iris_update_compiled_gs(ice);
2889 
2890    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2891                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2892       const struct iris_compiled_shader *gs =
2893          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2894       const struct iris_compiled_shader *tes =
2895          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2896 
2897       bool points_or_lines = false;
2898 
2899       if (gs) {
2900          const struct iris_gs_data *gs_data = iris_gs_data_const(gs);
2901          points_or_lines =
2902             gs_data->output_topology == _3DPRIM_POINTLIST ||
2903             gs_data->output_topology == _3DPRIM_LINESTRIP;
2904       } else if (tes) {
2905          const struct iris_tes_data *tes_data = iris_tes_data_const(tes);
2906          points_or_lines =
2907             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_LINE ||
2908             tes_data->output_topology == INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
2909       }
2910 
2911       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2912          /* Outbound to XY Clip enables */
2913          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2914          ice->state.dirty |= IRIS_DIRTY_CLIP;
2915       }
2916    }
2917 
2918    gl_shader_stage last_stage = last_vue_stage(ice);
2919    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2920    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2921    update_last_vue_map(ice, shader);
2922    if (ice->state.streamout != shader->streamout) {
2923       ice->state.streamout = shader->streamout;
2924       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2925    }
2926 
2927    if (ice->state.streamout_active) {
2928       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2929          struct iris_stream_output_target *so =
2930             (void *) ice->state.so_target[i];
2931          if (so)
2932             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2933       }
2934    }
2935 
2936    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2937       iris_update_compiled_fs(ice);
2938 
2939    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2940       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2941          iris_update_pull_constant_descriptors(ice, i);
2942    }
2943 }
2944 
2945 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2946 iris_compile_cs(struct iris_screen *screen,
2947                 struct u_upload_mgr *uploader,
2948                 struct util_debug_callback *dbg,
2949                 struct iris_uncompiled_shader *ish,
2950                 struct iris_compiled_shader *shader)
2951 {
2952    void *mem_ctx = ralloc_context(NULL);
2953    uint32_t *system_values;
2954    const struct intel_device_info *devinfo = screen->devinfo;
2955    unsigned num_system_values;
2956    unsigned num_cbufs;
2957 
2958    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2959    const struct iris_cs_prog_key *const key = &shader->key.cs;
2960 
2961    if (screen->brw)
2962       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
2963    else
2964       NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, devinfo, NULL);
2965 
2966    iris_setup_uniforms(devinfo, mem_ctx, nir, ish->kernel_input_size,
2967                        &system_values, &num_system_values, &num_cbufs);
2968 
2969    struct iris_binding_table bt;
2970    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2971                             num_system_values, num_cbufs, false);
2972 
2973    const char *error;
2974    const unsigned *program;
2975 
2976    if (screen->brw) {
2977       struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2978 
2979       struct brw_cs_prog_data *brw_prog_data =
2980          rzalloc(mem_ctx, struct brw_cs_prog_data);
2981 
2982       struct brw_compile_cs_params params = {
2983          .base = {
2984             .mem_ctx = mem_ctx,
2985             .nir = nir,
2986             .log_data = dbg,
2987             .source_hash = ish->source_hash,
2988          },
2989          .key = &brw_key,
2990          .prog_data = brw_prog_data,
2991       };
2992 
2993       program = brw_compile_cs(screen->brw, &params);
2994       error = params.base.error_str;
2995       if (program) {
2996          iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
2997          iris_apply_brw_prog_data(shader, &brw_prog_data->base);
2998       }
2999    } else {
3000       struct elk_cs_prog_key elk_key = iris_to_elk_cs_key(screen, key);
3001 
3002       struct elk_cs_prog_data *elk_prog_data =
3003          rzalloc(mem_ctx, struct elk_cs_prog_data);
3004 
3005       struct elk_compile_cs_params params = {
3006          .base = {
3007             .mem_ctx = mem_ctx,
3008             .nir = nir,
3009             .log_data = dbg,
3010             .source_hash = ish->source_hash,
3011          },
3012          .key = &elk_key,
3013          .prog_data = elk_prog_data,
3014       };
3015 
3016       program = elk_compile_cs(screen->elk, &params);
3017       error = params.base.error_str;
3018       if (program) {
3019          iris_debug_recompile_elk(screen, dbg, ish, &elk_key.base);
3020          iris_apply_elk_prog_data(shader, &elk_prog_data->base);
3021       }
3022    }
3023 
3024    if (program == NULL) {
3025       dbg_printf("Failed to compile compute shader: %s\n", error);
3026 
3027       shader->compilation_failed = true;
3028       util_queue_fence_signal(&shader->ready);
3029 
3030       return;
3031    }
3032 
3033    shader->compilation_failed = false;
3034 
3035    iris_finalize_program(shader, NULL, system_values,
3036                          num_system_values, ish->kernel_input_size, num_cbufs,
3037                          &bt);
3038 
3039    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
3040                       sizeof(*key), key, program);
3041 
3042    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
3043 
3044    ralloc_free(mem_ctx);
3045 }
3046 
3047 static void
iris_update_compiled_cs(struct iris_context * ice)3048 iris_update_compiled_cs(struct iris_context *ice)
3049 {
3050    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
3051    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3052    struct iris_uncompiled_shader *ish =
3053       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
3054    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3055    struct iris_cs_prog_key key = { KEY_INIT(base) };
3056    screen->vtbl.populate_cs_key(ice, &key);
3057 
3058    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
3059    bool added;
3060    struct iris_compiled_shader *shader =
3061       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3062                           sizeof(key), &added);
3063 
3064    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3065                                           &key, sizeof(key))) {
3066       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3067    }
3068 
3069    if (shader->compilation_failed)
3070       shader = NULL;
3071 
3072    if (old != shader) {
3073       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
3074                                     shader);
3075       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
3076                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
3077                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
3078       shs->sysvals_need_upload = true;
3079    }
3080 }
3081 
3082 void
iris_update_compiled_compute_shader(struct iris_context * ice)3083 iris_update_compiled_compute_shader(struct iris_context *ice)
3084 {
3085    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
3086       iris_update_compiled_cs(ice);
3087 
3088    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
3089       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
3090 }
3091 
3092 void
iris_fill_cs_push_const_buffer(struct iris_screen * screen,struct iris_compiled_shader * shader,unsigned threads,uint32_t * dst)3093 iris_fill_cs_push_const_buffer(struct iris_screen *screen,
3094                                struct iris_compiled_shader *shader,
3095                                unsigned threads,
3096                                uint32_t *dst)
3097 {
3098    struct iris_cs_data *cs_data = iris_cs_data(shader);
3099    assert(iris_cs_push_const_total_size(shader, threads) > 0);
3100    assert(cs_data->push.cross_thread.size == 0);
3101    assert(cs_data->push.per_thread.dwords == 1);
3102    assert(cs_data->first_param_is_builtin_subgroup_id);
3103    for (unsigned t = 0; t < threads; t++)
3104       dst[8 * t] = t;
3105 }
3106 
3107 /**
3108  * Allocate scratch BOs as needed for the given per-thread size and stage.
3109  */
3110 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)3111 iris_get_scratch_space(struct iris_context *ice,
3112                        unsigned per_thread_scratch,
3113                        gl_shader_stage stage)
3114 {
3115    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3116    struct iris_bufmgr *bufmgr = screen->bufmgr;
3117    const struct intel_device_info *devinfo = screen->devinfo;
3118 
3119    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3120    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
3121    assert(per_thread_scratch == 1 << (encoded_size + 10));
3122 
3123    /* On GFX version 12.5, scratch access changed to a surface-based model.
3124     * Instead of each shader type having its own layout based on IDs passed
3125     * from the relevant fixed-function unit, all scratch access is based on
3126     * thread IDs like it always has been for compute.
3127     */
3128    if (devinfo->verx10 >= 125)
3129       stage = MESA_SHADER_COMPUTE;
3130 
3131    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
3132 
3133    if (!*bop) {
3134       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
3135       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
3136       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
3137                            IRIS_MEMZONE_SHADER, BO_ALLOC_PLAIN);
3138    }
3139 
3140    return *bop;
3141 }
3142 
3143 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)3144 iris_get_scratch_surf(struct iris_context *ice,
3145                       unsigned per_thread_scratch)
3146 {
3147    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
3148    ASSERTED const struct intel_device_info *devinfo = screen->devinfo;
3149 
3150    assert(devinfo->verx10 >= 125);
3151 
3152    unsigned encoded_size = ffs(per_thread_scratch) - 11;
3153    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
3154    assert(per_thread_scratch == 1 << (encoded_size + 10));
3155 
3156    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
3157 
3158    if (ref->res)
3159       return ref;
3160 
3161    struct iris_bo *scratch_bo =
3162       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
3163 
3164    void *map = upload_state(ice->state.scratch_surface_uploader, ref,
3165                             screen->isl_dev.ss.size, 64);
3166 
3167    isl_buffer_fill_state(&screen->isl_dev, map,
3168                          .address = scratch_bo->address,
3169                          .size_B = scratch_bo->size,
3170                          .format = ISL_FORMAT_RAW,
3171                          .swizzle = ISL_SWIZZLE_IDENTITY,
3172                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
3173                          .stride_B = per_thread_scratch,
3174                          .is_scratch = true);
3175 
3176    return ref;
3177 }
3178 
3179 /* ------------------------------------------------------------------- */
3180 
3181 /**
3182  * The pipe->create_[stage]_state() driver hooks.
3183  *
3184  * Performs basic NIR preprocessing, records any state dependencies, and
3185  * returns an iris_uncompiled_shader as the Gallium CSO.
3186  *
3187  * Actual shader compilation to assembly happens later, at first use.
3188  */
3189 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)3190 iris_create_uncompiled_shader(struct iris_screen *screen,
3191                               nir_shader *nir,
3192                               const struct pipe_stream_output_info *so_info)
3193 {
3194    struct iris_uncompiled_shader *ish =
3195       calloc(1, sizeof(struct iris_uncompiled_shader));
3196    if (!ish)
3197       return NULL;
3198 
3199    pipe_reference_init(&ish->ref, 1);
3200    list_inithead(&ish->variants);
3201    simple_mtx_init(&ish->lock, mtx_plain);
3202    util_queue_fence_init(&ish->ready);
3203 
3204    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
3205 
3206    ish->program_id = get_new_program_id(screen);
3207    ish->nir = nir;
3208    if (so_info) {
3209       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
3210       update_so_info(&ish->stream_output, nir->info.outputs_written);
3211    }
3212 
3213    /* Use lowest dword of source shader blake3 for shader hash. */
3214    ish->source_hash = *(uint32_t*)nir->info.source_blake3;
3215 
3216    if (screen->disk_cache) {
3217       /* Serialize the NIR to a binary blob that we can hash for the disk
3218        * cache.  Drop unnecessary information (like variable names)
3219        * so the serialized NIR is smaller, and also to let us detect more
3220        * isomorphic shaders when hashing, increasing cache hits.
3221        */
3222       struct blob blob;
3223       blob_init(&blob);
3224       nir_serialize(&blob, nir, true);
3225       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
3226       blob_finish(&blob);
3227    }
3228 
3229    return ish;
3230 }
3231 
3232 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)3233 iris_create_compute_state(struct pipe_context *ctx,
3234                           const struct pipe_compute_state *state)
3235 {
3236    struct iris_context *ice = (void *) ctx;
3237    struct iris_screen *screen = (void *) ctx->screen;
3238    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3239 
3240    nir_shader *nir;
3241    switch (state->ir_type) {
3242    case PIPE_SHADER_IR_NIR:
3243       nir = (void *)state->prog;
3244       break;
3245 
3246    default:
3247       unreachable("Unsupported IR");
3248    }
3249 
3250    /* Most of iris doesn't really care about the difference between compute
3251     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
3252     * it's way easier if we just normalize to COMPUTE here.
3253     */
3254    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
3255           nir->info.stage == MESA_SHADER_KERNEL);
3256    nir->info.stage = MESA_SHADER_COMPUTE;
3257 
3258    struct iris_uncompiled_shader *ish =
3259       iris_create_uncompiled_shader(screen, nir, NULL);
3260    ish->kernel_input_size = state->req_input_mem;
3261    ish->kernel_shared_size = state->static_shared_mem;
3262 
3263    // XXX: disallow more than 64KB of shared variables
3264 
3265    if (screen->precompile) {
3266       struct iris_cs_prog_key key = { KEY_INIT(base) };
3267 
3268       struct iris_compiled_shader *shader =
3269          iris_create_shader_variant(screen, NULL, MESA_SHADER_COMPUTE,
3270                                     IRIS_CACHE_CS, sizeof(key), &key);
3271 
3272       /* Append our new variant to the shader's variant list. */
3273       list_addtail(&shader->link, &ish->variants);
3274 
3275       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3276                                     &key, sizeof(key))) {
3277          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3278       }
3279    }
3280 
3281    return ish;
3282 }
3283 
3284 static void
iris_get_compute_state_info(struct pipe_context * ctx,void * state,struct pipe_compute_state_object_info * info)3285 iris_get_compute_state_info(struct pipe_context *ctx, void *state,
3286                             struct pipe_compute_state_object_info *info)
3287 {
3288    struct iris_screen *screen = (void *) ctx->screen;
3289    struct iris_uncompiled_shader *ish = state;
3290 
3291    info->max_threads = MIN2(1024, 32 * screen->devinfo->max_cs_workgroup_threads);
3292    info->private_memory = 0;
3293    info->preferred_simd_size = 32;
3294    info->simd_sizes = 8 | 16 | 32;
3295 
3296    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3297                             &ish->variants, link) {
3298       info->private_memory = MAX2(info->private_memory,
3299                                   shader->total_scratch);
3300    }
3301 }
3302 
3303 static uint32_t
iris_get_compute_state_subgroup_size(struct pipe_context * ctx,void * state,const uint32_t block[3])3304 iris_get_compute_state_subgroup_size(struct pipe_context *ctx, void *state,
3305                                      const uint32_t block[3])
3306 {
3307    struct iris_context *ice = (void *) ctx;
3308    struct iris_screen *screen = (void *) ctx->screen;
3309    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
3310    struct iris_uncompiled_shader *ish = state;
3311 
3312    struct iris_cs_prog_key key = { KEY_INIT(base) };
3313    screen->vtbl.populate_cs_key(ice, &key);
3314 
3315    bool added;
3316    struct iris_compiled_shader *shader =
3317       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
3318                           sizeof(key), &added);
3319 
3320    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
3321                                           &key, sizeof(key))) {
3322       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
3323    }
3324 
3325    return iris_get_cs_dispatch_info(screen->devinfo, shader, block).simd_size;
3326 }
3327 
3328 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)3329 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
3330 {
3331    const struct iris_threaded_compile_job *job =
3332       (struct iris_threaded_compile_job *) _job;
3333 
3334    struct iris_screen *screen = job->screen;
3335    struct u_upload_mgr *uploader = job->uploader;
3336    struct util_debug_callback *dbg = job->dbg;
3337    struct iris_uncompiled_shader *ish = job->ish;
3338    struct iris_compiled_shader *shader = job->shader;
3339 
3340    switch (ish->nir->info.stage) {
3341    case MESA_SHADER_VERTEX:
3342       iris_compile_vs(screen, uploader, dbg, ish, shader);
3343       break;
3344    case MESA_SHADER_TESS_CTRL:
3345       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
3346       break;
3347    case MESA_SHADER_TESS_EVAL:
3348       iris_compile_tes(screen, uploader, dbg, ish, shader);
3349       break;
3350    case MESA_SHADER_GEOMETRY:
3351       iris_compile_gs(screen, uploader, dbg, ish, shader);
3352       break;
3353    case MESA_SHADER_FRAGMENT:
3354       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
3355       break;
3356 
3357    default:
3358       unreachable("Invalid shader stage.");
3359    }
3360 }
3361 
3362 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)3363 iris_create_shader_state(struct pipe_context *ctx,
3364                          const struct pipe_shader_state *state)
3365 {
3366    struct iris_context *ice = (void *) ctx;
3367    struct iris_screen *screen = (void *) ctx->screen;
3368    struct nir_shader *nir;
3369 
3370    if (state->type == PIPE_SHADER_IR_TGSI)
3371       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
3372    else
3373       nir = state->ir.nir;
3374 
3375    const struct shader_info *const info = &nir->info;
3376    struct iris_uncompiled_shader *ish =
3377       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
3378 
3379    union iris_any_prog_key key;
3380    unsigned key_size = 0;
3381 
3382    memset(&key, 0, sizeof(key));
3383 
3384    switch (info->stage) {
3385    case MESA_SHADER_VERTEX:
3386       /* User clip planes */
3387       if (info->clip_distance_array_size == 0)
3388          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3389 
3390       key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
3391       key_size = sizeof(key.vs);
3392       break;
3393 
3394    case MESA_SHADER_TESS_CTRL: {
3395       key.tcs = (struct iris_tcs_prog_key) {
3396          KEY_INIT(vue.base),
3397          // XXX: make sure the linker fills this out from the TES...
3398          ._tes_primitive_mode =
3399          info->tess._primitive_mode ? info->tess._primitive_mode
3400                                    : TESS_PRIMITIVE_TRIANGLES,
3401          .outputs_written = info->outputs_written,
3402          .patch_outputs_written = info->patch_outputs_written,
3403       };
3404 
3405       /* MULTI_PATCH mode needs the key to contain the input patch dimensionality.
3406        * We don't have that information, so we randomly guess that the input
3407        * and output patches are the same size.  This is a bad guess, but we
3408        * can't do much better.
3409        */
3410       if (iris_use_tcs_multi_patch(screen))
3411          key.tcs.input_vertices = info->tess.tcs_vertices_out;
3412 
3413       key_size = sizeof(key.tcs);
3414       break;
3415    }
3416 
3417    case MESA_SHADER_TESS_EVAL:
3418       /* User clip planes */
3419       if (info->clip_distance_array_size == 0)
3420          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3421 
3422       key.tes = (struct iris_tes_prog_key) {
3423          KEY_INIT(vue.base),
3424          // XXX: not ideal, need TCS output/TES input unification
3425          .inputs_read = info->inputs_read,
3426          .patch_inputs_read = info->patch_inputs_read,
3427       };
3428 
3429       key_size = sizeof(key.tes);
3430       break;
3431 
3432    case MESA_SHADER_GEOMETRY:
3433       /* User clip planes */
3434       if (info->clip_distance_array_size == 0)
3435          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
3436 
3437       key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
3438       key_size = sizeof(key.gs);
3439       break;
3440 
3441    case MESA_SHADER_FRAGMENT:
3442       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
3443                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
3444                   (1ull << IRIS_NOS_RASTERIZER) |
3445                   (1ull << IRIS_NOS_BLEND);
3446 
3447       STATIC_ASSERT(BRW_FS_VARYING_INPUT_MASK == ELK_FS_VARYING_INPUT_MASK);
3448 
3449       /* The program key needs the VUE map if there are > 16 inputs */
3450       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
3451          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
3452       }
3453 
3454       const uint64_t color_outputs = info->outputs_written &
3455          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
3456            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
3457            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
3458 
3459       bool can_rearrange_varyings =
3460          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
3461 
3462       const struct intel_device_info *devinfo = screen->devinfo;
3463 
3464       key.fs = (struct iris_fs_prog_key) {
3465          KEY_INIT(base),
3466          .nr_color_regions = util_bitcount(color_outputs),
3467          .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
3468          .input_slots_valid =
3469             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
3470       };
3471 
3472       key_size = sizeof(key.fs);
3473       break;
3474 
3475    default:
3476       unreachable("Invalid shader stage.");
3477    }
3478 
3479    if (screen->precompile) {
3480       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
3481 
3482       struct iris_compiled_shader *shader =
3483          iris_create_shader_variant(screen, NULL, info->stage,
3484                                     (enum iris_program_cache_id) info->stage,
3485                                     key_size, &key);
3486 
3487       /* Append our new variant to the shader's variant list. */
3488       list_addtail(&shader->link, &ish->variants);
3489 
3490       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
3491                                     &key, key_size)) {
3492          assert(!util_queue_fence_is_signalled(&shader->ready));
3493 
3494          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
3495 
3496          job->screen = screen;
3497          job->uploader = uploader;
3498          job->ish = ish;
3499          job->shader = shader;
3500 
3501          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
3502                                iris_compile_shader);
3503       }
3504    }
3505 
3506    return ish;
3507 }
3508 
3509 /**
3510  * Called when the refcount on the iris_uncompiled_shader reaches 0.
3511  *
3512  * Frees the iris_uncompiled_shader.
3513  *
3514  * \sa iris_delete_shader_state
3515  */
3516 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)3517 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
3518 {
3519    struct iris_uncompiled_shader *ish = state;
3520 
3521    /* No need to take ish->lock; we hold the last reference to ish */
3522    list_for_each_entry_safe(struct iris_compiled_shader, shader,
3523                             &ish->variants, link) {
3524       list_del(&shader->link);
3525 
3526       iris_shader_variant_reference(&shader, NULL);
3527    }
3528 
3529    simple_mtx_destroy(&ish->lock);
3530    util_queue_fence_destroy(&ish->ready);
3531 
3532    ralloc_free(ish->nir);
3533    free(ish);
3534 }
3535 
3536 /**
3537  * The pipe->delete_[stage]_state() driver hooks.
3538  *
3539  * \sa iris_destroy_shader_state
3540  */
3541 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)3542 iris_delete_shader_state(struct pipe_context *ctx, void *state)
3543 {
3544    struct iris_uncompiled_shader *ish = state;
3545    struct iris_context *ice = (void *) ctx;
3546 
3547    const gl_shader_stage stage = ish->nir->info.stage;
3548 
3549    if (ice->shaders.uncompiled[stage] == ish) {
3550       ice->shaders.uncompiled[stage] = NULL;
3551       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3552    }
3553 
3554    if (pipe_reference(&ish->ref, NULL))
3555       iris_destroy_shader_state(ctx, state);
3556 }
3557 
3558 /**
3559  * The pipe->bind_[stage]_state() driver hook.
3560  *
3561  * Binds an uncompiled shader as the current one for a particular stage.
3562  * Updates dirty tracking to account for the shader's NOS.
3563  */
3564 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)3565 bind_shader_state(struct iris_context *ice,
3566                   struct iris_uncompiled_shader *ish,
3567                   gl_shader_stage stage)
3568 {
3569    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3570    const uint64_t nos = ish ? ish->nos : 0;
3571 
3572    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
3573    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3574 
3575    if ((old_info ? BITSET_LAST_BIT(old_info->samplers_used) : 0) !=
3576        (new_info ? BITSET_LAST_BIT(new_info->samplers_used) : 0)) {
3577       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3578    }
3579 
3580    ice->shaders.uncompiled[stage] = ish;
3581    ice->state.stage_dirty |= stage_dirty_bit;
3582 
3583    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
3584     * (or that they no longer need to do so).
3585     */
3586    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
3587       if (nos & (1 << i))
3588          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
3589       else
3590          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
3591    }
3592 }
3593 
3594 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)3595 iris_bind_vs_state(struct pipe_context *ctx, void *state)
3596 {
3597    struct iris_context *ice = (struct iris_context *)ctx;
3598    struct iris_uncompiled_shader *ish = state;
3599 
3600    if (ish) {
3601       const struct shader_info *info = &ish->nir->info;
3602       if (ice->state.window_space_position != info->vs.window_space_position) {
3603          ice->state.window_space_position = info->vs.window_space_position;
3604 
3605          ice->state.dirty |= IRIS_DIRTY_CLIP |
3606                              IRIS_DIRTY_RASTER |
3607                              IRIS_DIRTY_CC_VIEWPORT;
3608       }
3609 
3610       const bool uses_draw_params =
3611          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
3612          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3613       const bool uses_derived_draw_params =
3614          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
3615          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
3616       const bool needs_sgvs_element = uses_draw_params ||
3617          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
3618          BITSET_TEST(info->system_values_read,
3619                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
3620 
3621       if (ice->state.vs_uses_draw_params != uses_draw_params ||
3622           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
3623           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
3624           ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
3625          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
3626                              IRIS_DIRTY_VERTEX_ELEMENTS;
3627       }
3628 
3629       ice->state.vs_uses_draw_params = uses_draw_params;
3630       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
3631       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
3632       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
3633    }
3634 
3635    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3636 }
3637 
3638 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)3639 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
3640 {
3641    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3642 }
3643 
3644 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)3645 iris_bind_tes_state(struct pipe_context *ctx, void *state)
3646 {
3647    struct iris_context *ice = (struct iris_context *)ctx;
3648    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3649    const struct intel_device_info *devinfo = screen->devinfo;
3650 
3651    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3652    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3653       ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
3654                                             IRIS_DIRTY_VFG : 0);
3655 
3656    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3657 }
3658 
3659 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)3660 iris_bind_gs_state(struct pipe_context *ctx, void *state)
3661 {
3662    struct iris_context *ice = (struct iris_context *)ctx;
3663 
3664    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3665    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3666       ice->state.dirty |= IRIS_DIRTY_URB;
3667 
3668    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3669 }
3670 
3671 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)3672 iris_bind_fs_state(struct pipe_context *ctx, void *state)
3673 {
3674    struct iris_context *ice = (struct iris_context *) ctx;
3675    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
3676    const struct intel_device_info *devinfo = screen->devinfo;
3677    struct iris_uncompiled_shader *old_ish =
3678       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3679    struct iris_uncompiled_shader *new_ish = state;
3680 
3681    const unsigned color_bits =
3682       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3683       BITFIELD64_RANGE(FRAG_RESULT_DATA0, IRIS_MAX_DRAW_BUFFERS);
3684 
3685    /* Fragment shader outputs influence HasWriteableRT */
3686    if (!old_ish || !new_ish ||
3687        (old_ish->nir->info.outputs_written & color_bits) !=
3688        (new_ish->nir->info.outputs_written & color_bits))
3689       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
3690 
3691    if (devinfo->ver == 8)
3692       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
3693 
3694    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3695 }
3696 
3697 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)3698 iris_bind_cs_state(struct pipe_context *ctx, void *state)
3699 {
3700    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3701 }
3702 
3703 static char *
iris_finalize_nir(struct pipe_screen * _screen,struct nir_shader * nir)3704 iris_finalize_nir(struct pipe_screen *_screen, struct nir_shader *nir)
3705 {
3706    struct iris_screen *screen = (struct iris_screen *)_screen;
3707    const struct intel_device_info *devinfo = screen->devinfo;
3708 
3709    NIR_PASS_V(nir, iris_fix_edge_flags);
3710 
3711    if (screen->brw) {
3712       struct brw_nir_compiler_opts opts = {};
3713       brw_preprocess_nir(screen->brw, nir, &opts);
3714 
3715       NIR_PASS_V(nir, brw_nir_lower_storage_image,
3716                  &(struct brw_nir_lower_storage_image_opts) {
3717                     .devinfo      = devinfo,
3718                     .lower_loads  = true,
3719                     .lower_stores = true,
3720                  });
3721    } else {
3722       assert(screen->elk);
3723 
3724       struct elk_nir_compiler_opts opts = {};
3725       elk_preprocess_nir(screen->elk, nir, &opts);
3726 
3727       NIR_PASS_V(nir, elk_nir_lower_storage_image,
3728                  &(struct elk_nir_lower_storage_image_opts) {
3729                     .devinfo        = devinfo,
3730                     .lower_loads    = true,
3731                     .lower_stores   = true,
3732 
3733                     /* Iris uploads image params used by
3734                      * get_size lowering only in Gfx8.
3735                      */
3736                     .lower_get_size = true,
3737                  });
3738    }
3739 
3740    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
3741 
3742    nir_sweep(nir);
3743 
3744    return NULL;
3745 }
3746 
3747 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)3748 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
3749                                      unsigned max_threads)
3750 {
3751    struct iris_screen *screen = (struct iris_screen *) pscreen;
3752    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
3753                                  false);
3754 }
3755 
3756 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)3757 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
3758                                              void *v_shader,
3759                                              enum pipe_shader_type p_stage)
3760 {
3761    struct iris_screen *screen = (struct iris_screen *) pscreen;
3762 
3763    /* Threaded compilation is only used for the precompile.  If precompile is
3764     * disabled, threaded compilation is "done."
3765     */
3766    if (!screen->precompile)
3767       return true;
3768 
3769    struct iris_uncompiled_shader *ish = v_shader;
3770 
3771    /* When precompile is enabled, the first entry is the precompile variant.
3772     * Check the ready fence of the precompile variant.
3773     */
3774    struct iris_compiled_shader *first =
3775       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
3776 
3777    return util_queue_fence_is_signalled(&first->ready);
3778 }
3779 
3780 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)3781 iris_init_screen_program_functions(struct pipe_screen *pscreen)
3782 {
3783    pscreen->is_parallel_shader_compilation_finished =
3784       iris_is_parallel_shader_compilation_finished;
3785    pscreen->set_max_shader_compiler_threads =
3786       iris_set_max_shader_compiler_threads;
3787    pscreen->finalize_nir = iris_finalize_nir;
3788 }
3789 
3790 void
iris_init_program_functions(struct pipe_context * ctx)3791 iris_init_program_functions(struct pipe_context *ctx)
3792 {
3793    ctx->create_vs_state  = iris_create_shader_state;
3794    ctx->create_tcs_state = iris_create_shader_state;
3795    ctx->create_tes_state = iris_create_shader_state;
3796    ctx->create_gs_state  = iris_create_shader_state;
3797    ctx->create_fs_state  = iris_create_shader_state;
3798    ctx->create_compute_state = iris_create_compute_state;
3799 
3800    ctx->delete_vs_state  = iris_delete_shader_state;
3801    ctx->delete_tcs_state = iris_delete_shader_state;
3802    ctx->delete_tes_state = iris_delete_shader_state;
3803    ctx->delete_gs_state  = iris_delete_shader_state;
3804    ctx->delete_fs_state  = iris_delete_shader_state;
3805    ctx->delete_compute_state = iris_delete_shader_state;
3806 
3807    ctx->bind_vs_state  = iris_bind_vs_state;
3808    ctx->bind_tcs_state = iris_bind_tcs_state;
3809    ctx->bind_tes_state = iris_bind_tes_state;
3810    ctx->bind_gs_state  = iris_bind_gs_state;
3811    ctx->bind_fs_state  = iris_bind_fs_state;
3812    ctx->bind_compute_state = iris_bind_cs_state;
3813 
3814    ctx->get_compute_state_info = iris_get_compute_state_info;
3815    ctx->get_compute_state_subgroup_size = iris_get_compute_state_subgroup_size;
3816 }
3817 
3818 struct intel_cs_dispatch_info
iris_get_cs_dispatch_info(const struct intel_device_info * devinfo,const struct iris_compiled_shader * shader,const uint32_t block[3])3819 iris_get_cs_dispatch_info(const struct intel_device_info *devinfo,
3820                           const struct iris_compiled_shader *shader,
3821                           const uint32_t block[3])
3822 {
3823    if (shader->brw_prog_data) {
3824       return brw_cs_get_dispatch_info(devinfo,
3825                                       brw_cs_prog_data(shader->brw_prog_data),
3826                                       block);
3827    } else {
3828       assert(shader->elk_prog_data);
3829       return elk_cs_get_dispatch_info(devinfo,
3830                                       elk_cs_prog_data(shader->elk_prog_data),
3831                                       block);
3832    }
3833 }
3834 
3835 unsigned
iris_cs_push_const_total_size(const struct iris_compiled_shader * shader,unsigned threads)3836 iris_cs_push_const_total_size(const struct iris_compiled_shader *shader,
3837                               unsigned threads)
3838 {
3839    if (shader->brw_prog_data) {
3840       return brw_cs_push_const_total_size(brw_cs_prog_data(shader->brw_prog_data),
3841                                           threads);
3842    } else {
3843       assert(shader->elk_prog_data);
3844       return elk_cs_push_const_total_size(elk_cs_prog_data(shader->elk_prog_data),
3845                                           threads);
3846    }
3847 }
3848 
3849 uint32_t
iris_fs_barycentric_modes(const struct iris_compiled_shader * shader,enum intel_msaa_flags pushed_msaa_flags)3850 iris_fs_barycentric_modes(const struct iris_compiled_shader *shader,
3851                           enum intel_msaa_flags pushed_msaa_flags)
3852 {
3853    if (shader->brw_prog_data) {
3854       return wm_prog_data_barycentric_modes(brw_wm_prog_data(shader->brw_prog_data),
3855                                             pushed_msaa_flags);
3856    } else {
3857       assert(shader->elk_prog_data);
3858       return elk_wm_prog_data_barycentric_modes(elk_wm_prog_data(shader->elk_prog_data),
3859                                                 pushed_msaa_flags);
3860    }
3861 }
3862 
3863 bool
iris_use_tcs_multi_patch(struct iris_screen * screen)3864 iris_use_tcs_multi_patch(struct iris_screen *screen)
3865 {
3866    return screen->brw && screen->brw->use_tcs_multi_patch;
3867 }
3868 
3869 bool
iris_indirect_ubos_use_sampler(struct iris_screen * screen)3870 iris_indirect_ubos_use_sampler(struct iris_screen *screen)
3871 {
3872    if (screen->brw) {
3873       return screen->brw->indirect_ubos_use_sampler;
3874    } else {
3875       assert(screen->elk);
3876       return screen->elk->indirect_ubos_use_sampler;
3877    }
3878 }
3879 
3880 static void
iris_shader_debug_log(void * data,unsigned * id,const char * fmt,...)3881 iris_shader_debug_log(void *data, unsigned *id, const char *fmt, ...)
3882 {
3883    struct util_debug_callback *dbg = data;
3884    va_list args;
3885 
3886    if (!dbg->debug_message)
3887       return;
3888 
3889    va_start(args, fmt);
3890    dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_SHADER_INFO, fmt, args);
3891    va_end(args);
3892 }
3893 
3894 static void
iris_shader_perf_log(void * data,unsigned * id,const char * fmt,...)3895 iris_shader_perf_log(void *data, unsigned *id, const char *fmt, ...)
3896 {
3897    struct util_debug_callback *dbg = data;
3898    va_list args;
3899    va_start(args, fmt);
3900 
3901    if (INTEL_DEBUG(DEBUG_PERF)) {
3902       va_list args_copy;
3903       va_copy(args_copy, args);
3904       vfprintf(stderr, fmt, args_copy);
3905       va_end(args_copy);
3906    }
3907 
3908    if (dbg->debug_message) {
3909       dbg->debug_message(dbg->data, id, UTIL_DEBUG_TYPE_PERF_INFO, fmt, args);
3910    }
3911 
3912    va_end(args);
3913 }
3914 
3915 const void *
iris_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type pstage)3916 iris_get_compiler_options(struct pipe_screen *pscreen,
3917                           enum pipe_shader_ir ir,
3918                           enum pipe_shader_type pstage)
3919 {
3920    struct iris_screen *screen = (struct iris_screen *) pscreen;
3921    gl_shader_stage stage = stage_from_pipe(pstage);
3922    assert(ir == PIPE_SHADER_IR_NIR);
3923 
3924    return screen->brw ? screen->brw->nir_options[stage]
3925                       : screen->elk->nir_options[stage];
3926 }
3927 
3928 void
iris_compiler_init(struct iris_screen * screen)3929 iris_compiler_init(struct iris_screen *screen)
3930 {
3931    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == BRW_MAX_DRAW_BUFFERS);
3932    STATIC_ASSERT(IRIS_MAX_DRAW_BUFFERS == ELK_MAX_DRAW_BUFFERS);
3933    STATIC_ASSERT(IRIS_MAX_SOL_BINDINGS == ELK_MAX_SOL_BINDINGS);
3934 
3935    if (screen->devinfo->ver >= 9) {
3936       screen->brw = brw_compiler_create(screen, screen->devinfo);
3937       screen->brw->shader_debug_log = iris_shader_debug_log;
3938       screen->brw->shader_perf_log = iris_shader_perf_log;
3939    } else {
3940       screen->elk = elk_compiler_create(screen, screen->devinfo);
3941       screen->elk->shader_debug_log = iris_shader_debug_log;
3942       screen->elk->shader_perf_log = iris_shader_perf_log;
3943       screen->elk->supports_shader_constants = true;
3944    }
3945 }
3946