• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/brw_prim.h"
48 #include "iris_context.h"
49 #include "nir/tgsi_to_nir.h"
50 
51 #define KEY_INIT(prefix)                                                   \
52    .prefix.program_string_id = ish->program_id,                            \
53    .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
54 #define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
55    .base.program_string_id = prog_id,                      \
56    .base.limit_trig_input_range = limit_trig_input,        \
57    .base.tex.swizzles[0 ... BRW_MAX_SAMPLERS - 1] = 0x688, \
58    .base.tex.compressed_multisample_layout_mask = ~0,      \
59    .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
60 
61 struct iris_threaded_compile_job {
62    struct iris_screen *screen;
63    struct u_upload_mgr *uploader;
64    struct util_debug_callback *dbg;
65    struct iris_uncompiled_shader *ish;
66    struct iris_compiled_shader *shader;
67 };
68 
69 static unsigned
get_new_program_id(struct iris_screen * screen)70 get_new_program_id(struct iris_screen *screen)
71 {
72    return p_atomic_inc_return(&screen->program_id);
73 }
74 
75 void
iris_finalize_program(struct iris_compiled_shader * shader,struct brw_stage_prog_data * prog_data,uint32_t * streamout,enum brw_param_builtin * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)76 iris_finalize_program(struct iris_compiled_shader *shader,
77                       struct brw_stage_prog_data *prog_data,
78                       uint32_t *streamout,
79                       enum brw_param_builtin *system_values,
80                       unsigned num_system_values,
81                       unsigned kernel_input_size,
82                       unsigned num_cbufs,
83                       const struct iris_binding_table *bt)
84 {
85    shader->prog_data = prog_data;
86    shader->streamout = streamout;
87    shader->system_values = system_values;
88    shader->num_system_values = num_system_values;
89    shader->kernel_input_size = kernel_input_size;
90    shader->num_cbufs = num_cbufs;
91    shader->bt = *bt;
92 
93    ralloc_steal(shader, shader->prog_data);
94    ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
95    ralloc_steal(shader->prog_data, prog_data->param);
96    ralloc_steal(shader, shader->streamout);
97    ralloc_steal(shader, shader->system_values);
98 }
99 
100 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct iris_screen * screen,const struct iris_vs_prog_key * key)101 iris_to_brw_vs_key(const struct iris_screen *screen,
102                    const struct iris_vs_prog_key *key)
103 {
104    return (struct brw_vs_prog_key) {
105       BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
106                    key->vue.base.limit_trig_input_range),
107 
108       /* Don't tell the backend about our clip plane constants, we've
109        * already lowered them in NIR and don't want it doing it again.
110        */
111       .nr_userclip_plane_consts = 0,
112    };
113 }
114 
115 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct iris_screen * screen,const struct iris_tcs_prog_key * key)116 iris_to_brw_tcs_key(const struct iris_screen *screen,
117                     const struct iris_tcs_prog_key *key)
118 {
119    return (struct brw_tcs_prog_key) {
120       BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
121                    key->vue.base.limit_trig_input_range),
122       ._tes_primitive_mode = key->_tes_primitive_mode,
123       .input_vertices = key->input_vertices,
124       .patch_outputs_written = key->patch_outputs_written,
125       .outputs_written = key->outputs_written,
126       .quads_workaround = key->quads_workaround,
127    };
128 }
129 
130 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct iris_screen * screen,const struct iris_tes_prog_key * key)131 iris_to_brw_tes_key(const struct iris_screen *screen,
132                     const struct iris_tes_prog_key *key)
133 {
134    return (struct brw_tes_prog_key) {
135       BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
136                    key->vue.base.limit_trig_input_range),
137       .patch_inputs_read = key->patch_inputs_read,
138       .inputs_read = key->inputs_read,
139    };
140 }
141 
142 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct iris_screen * screen,const struct iris_gs_prog_key * key)143 iris_to_brw_gs_key(const struct iris_screen *screen,
144                    const struct iris_gs_prog_key *key)
145 {
146    return (struct brw_gs_prog_key) {
147       BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
148                    key->vue.base.limit_trig_input_range),
149    };
150 }
151 
152 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct iris_screen * screen,const struct iris_fs_prog_key * key)153 iris_to_brw_fs_key(const struct iris_screen *screen,
154                    const struct iris_fs_prog_key *key)
155 {
156    return (struct brw_wm_prog_key) {
157       BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
158                    key->base.limit_trig_input_range),
159       .nr_color_regions = key->nr_color_regions,
160       .flat_shade = key->flat_shade,
161       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
162       .alpha_to_coverage = key->alpha_to_coverage,
163       .clamp_fragment_color = key->clamp_fragment_color,
164       .persample_interp = key->persample_interp,
165       .multisample_fbo = key->multisample_fbo,
166       .force_dual_color_blend = key->force_dual_color_blend,
167       .coherent_fb_fetch = key->coherent_fb_fetch,
168       .color_outputs_valid = key->color_outputs_valid,
169       .input_slots_valid = key->input_slots_valid,
170       .ignore_sample_mask_out = !key->multisample_fbo,
171    };
172 }
173 
174 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct iris_screen * screen,const struct iris_cs_prog_key * key)175 iris_to_brw_cs_key(const struct iris_screen *screen,
176                    const struct iris_cs_prog_key *key)
177 {
178    return (struct brw_cs_prog_key) {
179       BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
180                    key->base.limit_trig_input_range),
181    };
182 }
183 
184 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)185 upload_state(struct u_upload_mgr *uploader,
186              struct iris_state_ref *ref,
187              unsigned size,
188              unsigned alignment)
189 {
190    void *p = NULL;
191    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
192    return p;
193 }
194 
195 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)196 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
197                                 struct pipe_shader_buffer *buf,
198                                 struct iris_state_ref *surf_state,
199                                 isl_surf_usage_flags_t usage)
200 {
201    struct pipe_context *ctx = &ice->ctx;
202    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
203    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
204 
205    void *map =
206       upload_state(ice->state.surface_uploader, surf_state,
207                    screen->isl_dev.ss.size, 64);
208    if (!unlikely(map)) {
209       surf_state->res = NULL;
210       return;
211    }
212 
213    struct iris_resource *res = (void *) buf->buffer;
214    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
215    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
216 
217    const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
218 
219    isl_buffer_fill_state(&screen->isl_dev, map,
220                          .address = res->bo->address + res->offset +
221                                     buf->buffer_offset,
222                          .size_B = buf->buffer_size - res->offset,
223                          .format = dataport ? ISL_FORMAT_RAW
224                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
225                          .swizzle = ISL_SWIZZLE_IDENTITY,
226                          .stride_B = 1,
227                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
228 }
229 
230 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)231 get_aoa_deref_offset(nir_builder *b,
232                      nir_deref_instr *deref,
233                      unsigned elem_size)
234 {
235    unsigned array_size = elem_size;
236    nir_ssa_def *offset = nir_imm_int(b, 0);
237 
238    while (deref->deref_type != nir_deref_type_var) {
239       assert(deref->deref_type == nir_deref_type_array);
240 
241       /* This level's element size is the previous level's array size */
242       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
243       assert(deref->arr.index.ssa);
244       offset = nir_iadd(b, offset,
245                            nir_imul(b, index, nir_imm_int(b, array_size)));
246 
247       deref = nir_deref_instr_parent(deref);
248       assert(glsl_type_is_array(deref->type));
249       array_size *= glsl_get_length(deref->type);
250    }
251 
252    /* Accessing an invalid surface index with the dataport can result in a
253     * hang.  According to the spec "if the index used to select an individual
254     * element is negative or greater than or equal to the size of the array,
255     * the results of the operation are undefined but may not lead to
256     * termination" -- which is one of the possible outcomes of the hang.
257     * Clamp the index to prevent access outside of the array bounds.
258     */
259    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
260 }
261 
262 static void
iris_lower_storage_image_derefs(nir_shader * nir)263 iris_lower_storage_image_derefs(nir_shader *nir)
264 {
265    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
266 
267    nir_builder b;
268    nir_builder_init(&b, impl);
269 
270    nir_foreach_block(block, impl) {
271       nir_foreach_instr_safe(instr, block) {
272          if (instr->type != nir_instr_type_intrinsic)
273             continue;
274 
275          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
276          switch (intrin->intrinsic) {
277          case nir_intrinsic_image_deref_load:
278          case nir_intrinsic_image_deref_store:
279          case nir_intrinsic_image_deref_atomic_add:
280          case nir_intrinsic_image_deref_atomic_imin:
281          case nir_intrinsic_image_deref_atomic_umin:
282          case nir_intrinsic_image_deref_atomic_imax:
283          case nir_intrinsic_image_deref_atomic_umax:
284          case nir_intrinsic_image_deref_atomic_and:
285          case nir_intrinsic_image_deref_atomic_or:
286          case nir_intrinsic_image_deref_atomic_xor:
287          case nir_intrinsic_image_deref_atomic_exchange:
288          case nir_intrinsic_image_deref_atomic_comp_swap:
289          case nir_intrinsic_image_deref_size:
290          case nir_intrinsic_image_deref_samples:
291          case nir_intrinsic_image_deref_load_raw_intel:
292          case nir_intrinsic_image_deref_store_raw_intel: {
293             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
294             nir_variable *var = nir_deref_instr_get_variable(deref);
295 
296             b.cursor = nir_before_instr(&intrin->instr);
297             nir_ssa_def *index =
298                nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
299                             get_aoa_deref_offset(&b, deref, 1));
300             nir_rewrite_image_intrinsic(intrin, index, false);
301             break;
302          }
303 
304          default:
305             break;
306          }
307       }
308    }
309 }
310 
311 static bool
iris_uses_image_atomic(const nir_shader * shader)312 iris_uses_image_atomic(const nir_shader *shader)
313 {
314    nir_foreach_function(function, shader) {
315       if (function->impl == NULL)
316          continue;
317 
318       nir_foreach_block(block, function->impl) {
319          nir_foreach_instr(instr, block) {
320             if (instr->type != nir_instr_type_intrinsic)
321                continue;
322 
323             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
324             switch (intrin->intrinsic) {
325             case nir_intrinsic_image_deref_atomic_add:
326             case nir_intrinsic_image_deref_atomic_imin:
327             case nir_intrinsic_image_deref_atomic_umin:
328             case nir_intrinsic_image_deref_atomic_imax:
329             case nir_intrinsic_image_deref_atomic_umax:
330             case nir_intrinsic_image_deref_atomic_and:
331             case nir_intrinsic_image_deref_atomic_or:
332             case nir_intrinsic_image_deref_atomic_xor:
333             case nir_intrinsic_image_deref_atomic_exchange:
334             case nir_intrinsic_image_deref_atomic_comp_swap:
335                unreachable("Should have been lowered in "
336                            "iris_lower_storage_image_derefs");
337 
338             case nir_intrinsic_image_atomic_add:
339             case nir_intrinsic_image_atomic_imin:
340             case nir_intrinsic_image_atomic_umin:
341             case nir_intrinsic_image_atomic_imax:
342             case nir_intrinsic_image_atomic_umax:
343             case nir_intrinsic_image_atomic_and:
344             case nir_intrinsic_image_atomic_or:
345             case nir_intrinsic_image_atomic_xor:
346             case nir_intrinsic_image_atomic_exchange:
347             case nir_intrinsic_image_atomic_comp_swap:
348                return true;
349 
350             default:
351                break;
352             }
353          }
354       }
355    }
356 
357    return false;
358 }
359 
360 /**
361  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
362  */
363 static bool
iris_fix_edge_flags(nir_shader * nir)364 iris_fix_edge_flags(nir_shader *nir)
365 {
366    if (nir->info.stage != MESA_SHADER_VERTEX) {
367       nir_shader_preserve_all_metadata(nir);
368       return false;
369    }
370 
371    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
372                                                        VARYING_SLOT_EDGE);
373    if (!var) {
374       nir_shader_preserve_all_metadata(nir);
375       return false;
376    }
377 
378    var->data.mode = nir_var_shader_temp;
379    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
380    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
381    nir_fixup_deref_modes(nir);
382 
383    nir_foreach_function(f, nir) {
384       if (f->impl) {
385          nir_metadata_preserve(f->impl, nir_metadata_block_index |
386                                         nir_metadata_dominance |
387                                         nir_metadata_live_ssa_defs |
388                                         nir_metadata_loop_analysis);
389       } else {
390          nir_metadata_preserve(f->impl, nir_metadata_all);
391       }
392    }
393 
394    return true;
395 }
396 
397 /**
398  * Fix an uncompiled shader's stream output info.
399  *
400  * Core Gallium stores output->register_index as a "slot" number, where
401  * slots are assigned consecutively to all outputs in info->outputs_written.
402  * This naive packing of outputs doesn't work for us - we too have slots,
403  * but the layout is defined by the VUE map, which we won't have until we
404  * compile a specific shader variant.  So, we remap these and simply store
405  * VARYING_SLOT_* in our copy's output->register_index fields.
406  *
407  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
408  * components of our VUE header.  See brw_vue_map.c for the layout.
409  */
410 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)411 update_so_info(struct pipe_stream_output_info *so_info,
412                uint64_t outputs_written)
413 {
414    uint8_t reverse_map[64] = {};
415    unsigned slot = 0;
416    while (outputs_written) {
417       reverse_map[slot++] = u_bit_scan64(&outputs_written);
418    }
419 
420    for (unsigned i = 0; i < so_info->num_outputs; i++) {
421       struct pipe_stream_output *output = &so_info->output[i];
422 
423       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
424       output->register_index = reverse_map[output->register_index];
425 
426       /* The VUE header contains three scalar fields packed together:
427        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
428        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
429        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
430        */
431       switch (output->register_index) {
432       case VARYING_SLOT_LAYER:
433          assert(output->num_components == 1);
434          output->register_index = VARYING_SLOT_PSIZ;
435          output->start_component = 1;
436          break;
437       case VARYING_SLOT_VIEWPORT:
438          assert(output->num_components == 1);
439          output->register_index = VARYING_SLOT_PSIZ;
440          output->start_component = 2;
441          break;
442       case VARYING_SLOT_PSIZ:
443          assert(output->num_components == 1);
444          output->start_component = 3;
445          break;
446       }
447 
448       //info->outputs_written |= 1ull << output->register_index;
449    }
450 }
451 
452 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)453 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
454                         unsigned offset, unsigned n)
455 {
456    assert(offset % sizeof(uint32_t) == 0);
457 
458    for (unsigned i = 0; i < n; ++i)
459       sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
460 
461    for (unsigned i = n; i < 4; ++i)
462       sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
463 }
464 
465 /**
466  * Associate NIR uniform variables with the prog_data->param[] mechanism
467  * used by the backend.  Also, decide which UBOs we'd like to push in an
468  * ideal situation (though the backend can reduce this).
469  */
470 static void
iris_setup_uniforms(const struct brw_compiler * compiler,void * mem_ctx,nir_shader * nir,struct brw_stage_prog_data * prog_data,unsigned kernel_input_size,enum brw_param_builtin ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)471 iris_setup_uniforms(const struct brw_compiler *compiler,
472                     void *mem_ctx,
473                     nir_shader *nir,
474                     struct brw_stage_prog_data *prog_data,
475                     unsigned kernel_input_size,
476                     enum brw_param_builtin **out_system_values,
477                     unsigned *out_num_system_values,
478                     unsigned *out_num_cbufs)
479 {
480    UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
481 
482    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
483 
484    const unsigned IRIS_MAX_SYSTEM_VALUES =
485       PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
486    enum brw_param_builtin *system_values =
487       rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
488    unsigned num_system_values = 0;
489 
490    unsigned patch_vert_idx = -1;
491    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
492    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
493    unsigned variable_group_size_idx = -1;
494    unsigned work_dim_idx = -1;
495    memset(ucp_idx, -1, sizeof(ucp_idx));
496    memset(img_idx, -1, sizeof(img_idx));
497 
498    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
499 
500    nir_builder b;
501    nir_builder_init(&b, impl);
502 
503    b.cursor = nir_before_block(nir_start_block(impl));
504    nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
505 
506    /* Turn system value intrinsics into uniforms */
507    nir_foreach_block(block, impl) {
508       nir_foreach_instr_safe(instr, block) {
509          if (instr->type != nir_instr_type_intrinsic)
510             continue;
511 
512          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
513          nir_ssa_def *offset;
514 
515          switch (intrin->intrinsic) {
516          case nir_intrinsic_load_constant: {
517             unsigned load_size = intrin->dest.ssa.num_components *
518                                  intrin->dest.ssa.bit_size / 8;
519             unsigned load_align = intrin->dest.ssa.bit_size / 8;
520 
521             /* This one is special because it reads from the shader constant
522              * data and not cbuf0 which gallium uploads for us.
523              */
524             b.cursor = nir_instr_remove(&intrin->instr);
525 
526             nir_ssa_def *offset =
527                nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
528                                 nir_intrinsic_base(intrin));
529 
530             assert(load_size < b.shader->constant_data_size);
531             unsigned max_offset = b.shader->constant_data_size - load_size;
532             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
533 
534             nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
535                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
536                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
537 
538             nir_ssa_def *data =
539                nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
540                                                 nir_u2u64(&b, offset)),
541                                load_align,
542                                intrin->dest.ssa.num_components,
543                                intrin->dest.ssa.bit_size);
544 
545             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
546                                      data);
547             continue;
548          }
549          case nir_intrinsic_load_user_clip_plane: {
550             unsigned ucp = nir_intrinsic_ucp_id(intrin);
551 
552             if (ucp_idx[ucp] == -1) {
553                ucp_idx[ucp] = num_system_values;
554                num_system_values += 4;
555             }
556 
557             for (int i = 0; i < 4; i++) {
558                system_values[ucp_idx[ucp] + i] =
559                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
560             }
561 
562             b.cursor = nir_before_instr(instr);
563             offset = nir_imm_int(&b, system_values_start +
564                                      ucp_idx[ucp] * sizeof(uint32_t));
565             break;
566          }
567          case nir_intrinsic_load_patch_vertices_in:
568             if (patch_vert_idx == -1)
569                patch_vert_idx = num_system_values++;
570 
571             system_values[patch_vert_idx] =
572                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
573 
574             b.cursor = nir_before_instr(instr);
575             offset = nir_imm_int(&b, system_values_start +
576                                      patch_vert_idx * sizeof(uint32_t));
577             break;
578          case nir_intrinsic_image_deref_load_param_intel: {
579             assert(devinfo->ver < 9);
580             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
581             nir_variable *var = nir_deref_instr_get_variable(deref);
582 
583             if (img_idx[var->data.binding] == -1) {
584                /* GL only allows arrays of arrays of images. */
585                assert(glsl_type_is_image(glsl_without_array(var->type)));
586                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
587 
588                for (int i = 0; i < num_images; i++) {
589                   const unsigned img = var->data.binding + i;
590 
591                   img_idx[img] = num_system_values;
592                   num_system_values += BRW_IMAGE_PARAM_SIZE;
593 
594                   uint32_t *img_sv = &system_values[img_idx[img]];
595 
596                   setup_vec4_image_sysval(
597                      img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
598                      offsetof(struct brw_image_param, offset), 2);
599                   setup_vec4_image_sysval(
600                      img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
601                      offsetof(struct brw_image_param, size), 3);
602                   setup_vec4_image_sysval(
603                      img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
604                      offsetof(struct brw_image_param, stride), 4);
605                   setup_vec4_image_sysval(
606                      img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
607                      offsetof(struct brw_image_param, tiling), 3);
608                   setup_vec4_image_sysval(
609                      img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
610                      offsetof(struct brw_image_param, swizzling), 2);
611                }
612             }
613 
614             b.cursor = nir_before_instr(instr);
615             offset = nir_iadd(&b,
616                get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
617                nir_imm_int(&b, system_values_start +
618                                img_idx[var->data.binding] * 4 +
619                                nir_intrinsic_base(intrin) * 16));
620             break;
621          }
622          case nir_intrinsic_load_workgroup_size: {
623             assert(nir->info.workgroup_size_variable);
624             if (variable_group_size_idx == -1) {
625                variable_group_size_idx = num_system_values;
626                num_system_values += 3;
627                for (int i = 0; i < 3; i++) {
628                   system_values[variable_group_size_idx + i] =
629                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
630                }
631             }
632 
633             b.cursor = nir_before_instr(instr);
634             offset = nir_imm_int(&b, system_values_start +
635                                      variable_group_size_idx * sizeof(uint32_t));
636             break;
637          }
638          case nir_intrinsic_load_work_dim: {
639             if (work_dim_idx == -1) {
640                work_dim_idx = num_system_values++;
641                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
642             }
643             b.cursor = nir_before_instr(instr);
644             offset = nir_imm_int(&b, system_values_start +
645                                      work_dim_idx * sizeof(uint32_t));
646             break;
647          }
648          case nir_intrinsic_load_kernel_input: {
649             assert(nir_intrinsic_base(intrin) +
650                    nir_intrinsic_range(intrin) <= kernel_input_size);
651             b.cursor = nir_before_instr(instr);
652             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
653                                       nir_intrinsic_base(intrin));
654             break;
655          }
656          default:
657             continue;
658          }
659 
660          nir_ssa_def *load =
661             nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,
662                          temp_ubo_name, offset,
663                          .align_mul = 4,
664                          .align_offset = 0,
665                          .range_base = 0,
666                          .range = ~0);
667 
668          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
669                                   load);
670          nir_instr_remove(instr);
671       }
672    }
673 
674    nir_validate_shader(nir, "before remapping");
675 
676    /* Uniforms are stored in constant buffer 0, the
677     * user-facing UBOs are indexed by one.  So if any constant buffer is
678     * needed, the constant buffer 0 will be needed, so account for it.
679     */
680    unsigned num_cbufs = nir->info.num_ubos;
681    if (num_cbufs || nir->num_uniforms)
682       num_cbufs++;
683 
684    /* Place the new params in a new cbuf. */
685    if (num_system_values > 0 || kernel_input_size > 0) {
686       unsigned sysval_cbuf_index = num_cbufs;
687       num_cbufs++;
688 
689       system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
690                                num_system_values);
691 
692       nir_foreach_block(block, impl) {
693          nir_foreach_instr_safe(instr, block) {
694             if (instr->type != nir_instr_type_intrinsic)
695                continue;
696 
697             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
698 
699             if (load->intrinsic != nir_intrinsic_load_ubo)
700                continue;
701 
702             b.cursor = nir_before_instr(instr);
703 
704             assert(load->src[0].is_ssa);
705 
706             if (load->src[0].ssa == temp_ubo_name) {
707                nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
708                nir_instr_rewrite_src(instr, &load->src[0],
709                                      nir_src_for_ssa(imm));
710             }
711          }
712       }
713 
714       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
715       nir_opt_constant_folding(nir);
716    } else {
717       ralloc_free(system_values);
718       system_values = NULL;
719    }
720 
721    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
722    nir_validate_shader(nir, "after remap");
723 
724    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
725     * to detect when cbuf0 exists but we don't need it anymore when we get
726     * here.  Instead, zero it out so that the back-end doesn't get confused
727     * when nr_params * 4 != num_uniforms != nr_params * 4.
728     */
729    nir->num_uniforms = 0;
730 
731    *out_system_values = system_values;
732    *out_num_system_values = num_system_values;
733    *out_num_cbufs = num_cbufs;
734 }
735 
736 static const char *surface_group_names[] = {
737    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
738    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
739    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
740    [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
741    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
742    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
743    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
744 };
745 
746 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)747 iris_print_binding_table(FILE *fp, const char *name,
748                          const struct iris_binding_table *bt)
749 {
750    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
751 
752    uint32_t total = 0;
753    uint32_t compacted = 0;
754 
755    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
756       uint32_t size = bt->sizes[i];
757       total += size;
758       if (size)
759          compacted += util_bitcount64(bt->used_mask[i]);
760    }
761 
762    if (total == 0) {
763       fprintf(fp, "Binding table for %s is empty\n\n", name);
764       return;
765    }
766 
767    if (total != compacted) {
768       fprintf(fp, "Binding table for %s "
769               "(compacted to %u entries from %u entries)\n",
770               name, compacted, total);
771    } else {
772       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
773    }
774 
775    uint32_t entry = 0;
776    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
777       uint64_t mask = bt->used_mask[i];
778       while (mask) {
779          int index = u_bit_scan64(&mask);
780          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
781       }
782    }
783    fprintf(fp, "\n");
784 }
785 
786 enum {
787    /* Max elements in a surface group. */
788    SURFACE_GROUP_MAX_ELEMENTS = 64,
789 };
790 
791 /**
792  * Map a <group, index> pair to a binding table index.
793  *
794  * For example: <UBO, 5> => binding table index 12
795  */
796 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)797 iris_group_index_to_bti(const struct iris_binding_table *bt,
798                         enum iris_surface_group group, uint32_t index)
799 {
800    assert(index < bt->sizes[group]);
801    uint64_t mask = bt->used_mask[group];
802    uint64_t bit = 1ull << index;
803    if (bit & mask) {
804       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
805    } else {
806       return IRIS_SURFACE_NOT_USED;
807    }
808 }
809 
810 /**
811  * Map a binding table index back to a <group, index> pair.
812  *
813  * For example: binding table index 12 => <UBO, 5>
814  */
815 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)816 iris_bti_to_group_index(const struct iris_binding_table *bt,
817                         enum iris_surface_group group, uint32_t bti)
818 {
819    uint64_t used_mask = bt->used_mask[group];
820    assert(bti >= bt->offsets[group]);
821 
822    uint32_t c = bti - bt->offsets[group];
823    while (used_mask) {
824       int i = u_bit_scan64(&used_mask);
825       if (c == 0)
826          return i;
827       c--;
828    }
829 
830    return IRIS_SURFACE_NOT_USED;
831 }
832 
833 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)834 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
835                      nir_instr *instr, nir_src *src,
836                      enum iris_surface_group group)
837 {
838    assert(bt->sizes[group] > 0);
839 
840    b->cursor = nir_before_instr(instr);
841    nir_ssa_def *bti;
842    if (nir_src_is_const(*src)) {
843       uint32_t index = nir_src_as_uint(*src);
844       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
845                            src->ssa->bit_size);
846    } else {
847       /* Indirect usage makes all the surfaces of the group to be available,
848        * so we can just add the base.
849        */
850       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
851       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
852    }
853    nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
854 }
855 
856 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)857 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
858                    enum iris_surface_group group)
859 {
860    assert(bt->sizes[group] > 0);
861 
862    if (nir_src_is_const(*src)) {
863       uint64_t index = nir_src_as_uint(*src);
864       assert(index < bt->sizes[group]);
865       bt->used_mask[group] |= 1ull << index;
866    } else {
867       /* There's an indirect usage, we need all the surfaces. */
868       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
869    }
870 }
871 
872 static bool
skip_compacting_binding_tables(void)873 skip_compacting_binding_tables(void)
874 {
875    static int skip = -1;
876    if (skip < 0)
877       skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
878    return skip;
879 }
880 
881 /**
882  * Set up the binding table indices and apply to the shader.
883  */
884 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)885 iris_setup_binding_table(const struct intel_device_info *devinfo,
886                          struct nir_shader *nir,
887                          struct iris_binding_table *bt,
888                          unsigned num_render_targets,
889                          unsigned num_system_values,
890                          unsigned num_cbufs)
891 {
892    const struct shader_info *info = &nir->info;
893 
894    memset(bt, 0, sizeof(*bt));
895 
896    /* Set the sizes for each surface group.  For some groups, we already know
897     * upfront how many will be used, so mark them.
898     */
899    if (info->stage == MESA_SHADER_FRAGMENT) {
900       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
901       /* All render targets used. */
902       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
903          BITFIELD64_MASK(num_render_targets);
904 
905       /* Setup render target read surface group in order to support non-coherent
906        * framebuffer fetch on Gfx8
907        */
908       if (devinfo->ver == 8 && info->outputs_read) {
909          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
910          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
911             BITFIELD64_MASK(num_render_targets);
912       }
913    } else if (info->stage == MESA_SHADER_COMPUTE) {
914       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
915    }
916 
917    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
918    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
919 
920    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
921 
922    /* Allocate an extra slot in the UBO section for NIR constants.
923     * Binding table compaction will remove it if unnecessary.
924     *
925     * We don't include them in iris_compiled_shader::num_cbufs because
926     * they are uploaded separately from shs->constbuf[], but from a shader
927     * point of view, they're another UBO (at the end of the section).
928     */
929    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
930 
931    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
932 
933    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
934       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
935 
936    /* Mark surfaces used for the cases we don't have the information available
937     * upfront.
938     */
939    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
940    nir_foreach_block (block, impl) {
941       nir_foreach_instr (instr, block) {
942          if (instr->type != nir_instr_type_intrinsic)
943             continue;
944 
945          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
946          switch (intrin->intrinsic) {
947          case nir_intrinsic_load_num_workgroups:
948             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
949             break;
950 
951          case nir_intrinsic_load_output:
952             if (devinfo->ver == 8) {
953                mark_used_with_src(bt, &intrin->src[0],
954                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
955             }
956             break;
957 
958          case nir_intrinsic_image_size:
959          case nir_intrinsic_image_load:
960          case nir_intrinsic_image_store:
961          case nir_intrinsic_image_atomic_add:
962          case nir_intrinsic_image_atomic_imin:
963          case nir_intrinsic_image_atomic_umin:
964          case nir_intrinsic_image_atomic_imax:
965          case nir_intrinsic_image_atomic_umax:
966          case nir_intrinsic_image_atomic_and:
967          case nir_intrinsic_image_atomic_or:
968          case nir_intrinsic_image_atomic_xor:
969          case nir_intrinsic_image_atomic_exchange:
970          case nir_intrinsic_image_atomic_comp_swap:
971          case nir_intrinsic_image_load_raw_intel:
972          case nir_intrinsic_image_store_raw_intel:
973             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
974             break;
975 
976          case nir_intrinsic_load_ubo:
977             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
978             break;
979 
980          case nir_intrinsic_store_ssbo:
981             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
982             break;
983 
984          case nir_intrinsic_get_ssbo_size:
985          case nir_intrinsic_ssbo_atomic_add:
986          case nir_intrinsic_ssbo_atomic_imin:
987          case nir_intrinsic_ssbo_atomic_umin:
988          case nir_intrinsic_ssbo_atomic_imax:
989          case nir_intrinsic_ssbo_atomic_umax:
990          case nir_intrinsic_ssbo_atomic_and:
991          case nir_intrinsic_ssbo_atomic_or:
992          case nir_intrinsic_ssbo_atomic_xor:
993          case nir_intrinsic_ssbo_atomic_exchange:
994          case nir_intrinsic_ssbo_atomic_comp_swap:
995          case nir_intrinsic_ssbo_atomic_fmin:
996          case nir_intrinsic_ssbo_atomic_fmax:
997          case nir_intrinsic_ssbo_atomic_fcomp_swap:
998          case nir_intrinsic_load_ssbo:
999             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
1000             break;
1001 
1002          default:
1003             break;
1004          }
1005       }
1006    }
1007 
1008    /* When disable we just mark everything as used. */
1009    if (unlikely(skip_compacting_binding_tables())) {
1010       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1011          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1012    }
1013 
1014    /* Calculate the offsets and the binding table size based on the used
1015     * surfaces.  After this point, the functions to go between "group indices"
1016     * and binding table indices can be used.
1017     */
1018    uint32_t next = 0;
1019    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1020       if (bt->used_mask[i] != 0) {
1021          bt->offsets[i] = next;
1022          next += util_bitcount64(bt->used_mask[i]);
1023       }
1024    }
1025    bt->size_bytes = next * 4;
1026 
1027    if (INTEL_DEBUG(DEBUG_BT)) {
1028       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1029    }
1030 
1031    /* Apply the binding table indices.  The backend compiler is not expected
1032     * to change those, as we haven't set any of the *_start entries in brw
1033     * binding_table.
1034     */
1035    nir_builder b;
1036    nir_builder_init(&b, impl);
1037 
1038    nir_foreach_block (block, impl) {
1039       nir_foreach_instr (instr, block) {
1040          if (instr->type == nir_instr_type_tex) {
1041             nir_tex_instr *tex = nir_instr_as_tex(instr);
1042             tex->texture_index =
1043                iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
1044                                        tex->texture_index);
1045             continue;
1046          }
1047 
1048          if (instr->type != nir_instr_type_intrinsic)
1049             continue;
1050 
1051          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1052          switch (intrin->intrinsic) {
1053          case nir_intrinsic_image_size:
1054          case nir_intrinsic_image_load:
1055          case nir_intrinsic_image_store:
1056          case nir_intrinsic_image_atomic_add:
1057          case nir_intrinsic_image_atomic_imin:
1058          case nir_intrinsic_image_atomic_umin:
1059          case nir_intrinsic_image_atomic_imax:
1060          case nir_intrinsic_image_atomic_umax:
1061          case nir_intrinsic_image_atomic_and:
1062          case nir_intrinsic_image_atomic_or:
1063          case nir_intrinsic_image_atomic_xor:
1064          case nir_intrinsic_image_atomic_exchange:
1065          case nir_intrinsic_image_atomic_comp_swap:
1066          case nir_intrinsic_image_load_raw_intel:
1067          case nir_intrinsic_image_store_raw_intel:
1068             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1069                                  IRIS_SURFACE_GROUP_IMAGE);
1070             break;
1071 
1072          case nir_intrinsic_load_ubo:
1073             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1074                                  IRIS_SURFACE_GROUP_UBO);
1075             break;
1076 
1077          case nir_intrinsic_store_ssbo:
1078             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1079                                  IRIS_SURFACE_GROUP_SSBO);
1080             break;
1081 
1082          case nir_intrinsic_load_output:
1083             if (devinfo->ver == 8) {
1084                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1085                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1086             }
1087             break;
1088 
1089          case nir_intrinsic_get_ssbo_size:
1090          case nir_intrinsic_ssbo_atomic_add:
1091          case nir_intrinsic_ssbo_atomic_imin:
1092          case nir_intrinsic_ssbo_atomic_umin:
1093          case nir_intrinsic_ssbo_atomic_imax:
1094          case nir_intrinsic_ssbo_atomic_umax:
1095          case nir_intrinsic_ssbo_atomic_and:
1096          case nir_intrinsic_ssbo_atomic_or:
1097          case nir_intrinsic_ssbo_atomic_xor:
1098          case nir_intrinsic_ssbo_atomic_exchange:
1099          case nir_intrinsic_ssbo_atomic_comp_swap:
1100          case nir_intrinsic_ssbo_atomic_fmin:
1101          case nir_intrinsic_ssbo_atomic_fmax:
1102          case nir_intrinsic_ssbo_atomic_fcomp_swap:
1103          case nir_intrinsic_load_ssbo:
1104             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1105                                  IRIS_SURFACE_GROUP_SSBO);
1106             break;
1107 
1108          default:
1109             break;
1110          }
1111       }
1112    }
1113 }
1114 
1115 static void
iris_debug_recompile(struct iris_screen * screen,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1116 iris_debug_recompile(struct iris_screen *screen,
1117                      struct util_debug_callback *dbg,
1118                      struct iris_uncompiled_shader *ish,
1119                      const struct brw_base_prog_key *key)
1120 {
1121    if (!ish || list_is_empty(&ish->variants)
1122             || list_is_singular(&ish->variants))
1123       return;
1124 
1125    const struct brw_compiler *c = screen->compiler;
1126    const struct shader_info *info = &ish->nir->info;
1127 
1128    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1129                        _mesa_shader_stage_to_string(info->stage),
1130                        info->name ? info->name : "(no identifier)",
1131                        info->label ? info->label : "");
1132 
1133    struct iris_compiled_shader *shader =
1134       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1135    const void *old_iris_key = &shader->key;
1136 
1137    union brw_any_prog_key old_key;
1138 
1139    switch (info->stage) {
1140    case MESA_SHADER_VERTEX:
1141       old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
1142       break;
1143    case MESA_SHADER_TESS_CTRL:
1144       old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
1145       break;
1146    case MESA_SHADER_TESS_EVAL:
1147       old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
1148       break;
1149    case MESA_SHADER_GEOMETRY:
1150       old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
1151       break;
1152    case MESA_SHADER_FRAGMENT:
1153       old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
1154       break;
1155    case MESA_SHADER_COMPUTE:
1156       old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
1157       break;
1158    default:
1159       unreachable("invalid shader stage");
1160    }
1161 
1162    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1163 }
1164 
1165 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1166 check_urb_size(struct iris_context *ice,
1167                unsigned needed_size,
1168                gl_shader_stage stage)
1169 {
1170    unsigned last_allocated_size = ice->shaders.urb.size[stage];
1171 
1172    /* If the last URB allocation wasn't large enough for our needs,
1173     * flag it as needing to be reconfigured.  Otherwise, we can use
1174     * the existing config.  However, if the URB is constrained, and
1175     * we can shrink our size for this stage, we may be able to gain
1176     * extra concurrency by reconfiguring it to be smaller.  Do so.
1177     */
1178    if (last_allocated_size < needed_size ||
1179        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1180       ice->state.dirty |= IRIS_DIRTY_URB;
1181    }
1182 }
1183 
1184 /**
1185  * Get the shader for the last enabled geometry stage.
1186  *
1187  * This stage is the one which will feed stream output and the rasterizer.
1188  */
1189 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1190 last_vue_stage(struct iris_context *ice)
1191 {
1192    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1193       return MESA_SHADER_GEOMETRY;
1194 
1195    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1196       return MESA_SHADER_TESS_EVAL;
1197 
1198    return MESA_SHADER_VERTEX;
1199 }
1200 
1201 /**
1202  * \param added  Set to \c true if the variant was added to the list (i.e., a
1203  *               variant matching \c key was not found).  Set to \c false
1204  *               otherwise.
1205  */
1206 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1207 find_or_add_variant(const struct iris_screen *screen,
1208                     struct iris_uncompiled_shader *ish,
1209                     enum iris_program_cache_id cache_id,
1210                     const void *key, unsigned key_size,
1211                     bool *added)
1212 {
1213    struct list_head *start = ish->variants.next;
1214 
1215    *added = false;
1216 
1217    if (screen->precompile) {
1218       /* Check the first list entry.  There will always be at least one
1219        * variant in the list (most likely the precompile variant), and
1220        * other contexts only append new variants, so we can safely check
1221        * it without locking, saving that cost in the common case.
1222        */
1223       struct iris_compiled_shader *first =
1224          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1225 
1226       if (memcmp(&first->key, key, key_size) == 0) {
1227          util_queue_fence_wait(&first->ready);
1228          return first;
1229       }
1230 
1231       /* Skip this one in the loop below */
1232       start = first->link.next;
1233    }
1234 
1235    struct iris_compiled_shader *variant = NULL;
1236 
1237    /* If it doesn't match, we have to walk the list; other contexts may be
1238     * concurrently appending shaders to it, so we need to lock here.
1239     */
1240    simple_mtx_lock(&ish->lock);
1241 
1242    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1243                             &ish->variants, link) {
1244       if (memcmp(&v->key, key, key_size) == 0) {
1245          variant = v;
1246          break;
1247       }
1248    }
1249 
1250    if (variant == NULL) {
1251       variant = iris_create_shader_variant(screen, NULL, cache_id,
1252                                            key_size, key);
1253 
1254       /* Append our new variant to the shader's variant list. */
1255       list_addtail(&variant->link, &ish->variants);
1256       *added = true;
1257 
1258       simple_mtx_unlock(&ish->lock);
1259    } else {
1260       simple_mtx_unlock(&ish->lock);
1261 
1262       util_queue_fence_wait(&variant->ready);
1263    }
1264 
1265    return variant;
1266 }
1267 
1268 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1269 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1270                                  UNUSED int thread_index)
1271 {
1272    free(_job);
1273 }
1274 
1275 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct util_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1276 iris_schedule_compile(struct iris_screen *screen,
1277                       struct util_queue_fence *ready_fence,
1278                       struct util_debug_callback *dbg,
1279                       struct iris_threaded_compile_job *job,
1280                       util_queue_execute_func execute)
1281 
1282 {
1283    struct util_async_debug_callback async_debug;
1284 
1285    if (dbg) {
1286       u_async_debug_init(&async_debug);
1287       job->dbg = &async_debug.base;
1288    }
1289 
1290    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1291                       iris_threaded_compile_job_delete, 0);
1292 
1293    if (screen->driconf.sync_compile || dbg)
1294       util_queue_fence_wait(ready_fence);
1295 
1296    if (dbg) {
1297       u_async_debug_drain(&async_debug, dbg);
1298       u_async_debug_cleanup(&async_debug);
1299    }
1300 }
1301 
1302 /**
1303  * Compile a vertex shader, and upload the assembly.
1304  */
1305 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1306 iris_compile_vs(struct iris_screen *screen,
1307                 struct u_upload_mgr *uploader,
1308                 struct util_debug_callback *dbg,
1309                 struct iris_uncompiled_shader *ish,
1310                 struct iris_compiled_shader *shader)
1311 {
1312    const struct brw_compiler *compiler = screen->compiler;
1313    const struct intel_device_info *devinfo = &screen->devinfo;
1314    void *mem_ctx = ralloc_context(NULL);
1315    struct brw_vs_prog_data *vs_prog_data =
1316       rzalloc(mem_ctx, struct brw_vs_prog_data);
1317    struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1318    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1319    enum brw_param_builtin *system_values;
1320    unsigned num_system_values;
1321    unsigned num_cbufs;
1322 
1323    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1324    const struct iris_vs_prog_key *const key = &shader->key.vs;
1325 
1326    if (key->vue.nr_userclip_plane_consts) {
1327       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1328       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1329                         true, false, NULL);
1330       nir_lower_io_to_temporaries(nir, impl, true, false);
1331       nir_lower_global_vars_to_local(nir);
1332       nir_lower_vars_to_ssa(nir);
1333       nir_shader_gather_info(nir, impl);
1334    }
1335 
1336    prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1337 
1338    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1339                        &num_system_values, &num_cbufs);
1340 
1341    struct iris_binding_table bt;
1342    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1343                             num_system_values, num_cbufs);
1344 
1345    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1346 
1347    brw_compute_vue_map(devinfo,
1348                        &vue_prog_data->vue_map, nir->info.outputs_written,
1349                        nir->info.separate_shader, /* pos_slots */ 1);
1350 
1351    struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
1352 
1353    struct brw_compile_vs_params params = {
1354       .nir = nir,
1355       .key = &brw_key,
1356       .prog_data = vs_prog_data,
1357       .log_data = dbg,
1358    };
1359 
1360    const unsigned *program = brw_compile_vs(compiler, mem_ctx, &params);
1361    if (program == NULL) {
1362       dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1363       ralloc_free(mem_ctx);
1364 
1365       shader->compilation_failed = true;
1366       util_queue_fence_signal(&shader->ready);
1367 
1368       return;
1369    }
1370 
1371    shader->compilation_failed = false;
1372 
1373    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1374 
1375    uint32_t *so_decls =
1376       screen->vtbl.create_so_decl_list(&ish->stream_output,
1377                                     &vue_prog_data->vue_map);
1378 
1379    iris_finalize_program(shader, prog_data, so_decls, system_values,
1380                          num_system_values, 0, num_cbufs, &bt);
1381 
1382    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1383                       sizeof(*key), key, program);
1384 
1385    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1386 
1387    ralloc_free(mem_ctx);
1388 }
1389 
1390 /**
1391  * Update the current vertex shader variant.
1392  *
1393  * Fill out the key, look in the cache, compile and bind if needed.
1394  */
1395 static void
iris_update_compiled_vs(struct iris_context * ice)1396 iris_update_compiled_vs(struct iris_context *ice)
1397 {
1398    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1399    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1400    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1401    struct iris_uncompiled_shader *ish =
1402       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1403 
1404    struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
1405    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1406 
1407    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1408    bool added;
1409    struct iris_compiled_shader *shader =
1410       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1411 
1412    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1413                                           &key, sizeof(key))) {
1414       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1415    }
1416 
1417    if (shader->compilation_failed)
1418       shader = NULL;
1419 
1420    if (old != shader) {
1421       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1422                                     shader);
1423       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1424       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1425                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
1426                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1427       shs->sysvals_need_upload = true;
1428 
1429       unsigned urb_entry_size = shader ?
1430          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1431       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1432    }
1433 }
1434 
1435 /**
1436  * Get the shader_info for a given stage, or NULL if the stage is disabled.
1437  */
1438 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)1439 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1440 {
1441    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1442 
1443    if (!ish)
1444       return NULL;
1445 
1446    const nir_shader *nir = ish->nir;
1447    return &nir->info;
1448 }
1449 
1450 /**
1451  * Get the union of TCS output and TES input slots.
1452  *
1453  * TCS and TES need to agree on a common URB entry layout.  In particular,
1454  * the data for all patch vertices is stored in a single URB entry (unlike
1455  * GS which has one entry per input vertex).  This means that per-vertex
1456  * array indexing needs a stride.
1457  *
1458  * SSO requires locations to match, but doesn't require the number of
1459  * outputs/inputs to match (in fact, the TCS often has extra outputs).
1460  * So, we need to take the extra step of unifying these on the fly.
1461  */
1462 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)1463 get_unified_tess_slots(const struct iris_context *ice,
1464                        uint64_t *per_vertex_slots,
1465                        uint32_t *per_patch_slots)
1466 {
1467    const struct shader_info *tcs =
1468       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1469    const struct shader_info *tes =
1470       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1471 
1472    *per_vertex_slots = tes->inputs_read;
1473    *per_patch_slots = tes->patch_inputs_read;
1474 
1475    if (tcs) {
1476       *per_vertex_slots |= tcs->outputs_written;
1477       *per_patch_slots |= tcs->patch_outputs_written;
1478    }
1479 }
1480 
1481 /**
1482  * Compile a tessellation control shader, and upload the assembly.
1483  */
1484 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1485 iris_compile_tcs(struct iris_screen *screen,
1486                  struct hash_table *passthrough_ht,
1487                  struct u_upload_mgr *uploader,
1488                  struct util_debug_callback *dbg,
1489                  struct iris_uncompiled_shader *ish,
1490                  struct iris_compiled_shader *shader)
1491 {
1492    const struct brw_compiler *compiler = screen->compiler;
1493    const struct nir_shader_compiler_options *options =
1494       compiler->nir_options[MESA_SHADER_TESS_CTRL];
1495    void *mem_ctx = ralloc_context(NULL);
1496    struct brw_tcs_prog_data *tcs_prog_data =
1497       rzalloc(mem_ctx, struct brw_tcs_prog_data);
1498    struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1499    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1500    const struct intel_device_info *devinfo = &screen->devinfo;
1501    enum brw_param_builtin *system_values = NULL;
1502    unsigned num_system_values = 0;
1503    unsigned num_cbufs = 0;
1504 
1505    nir_shader *nir;
1506 
1507    struct iris_binding_table bt;
1508 
1509    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
1510    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
1511 
1512    if (ish) {
1513       nir = nir_shader_clone(mem_ctx, ish->nir);
1514 
1515       iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1516                           &num_system_values, &num_cbufs);
1517       iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1518                                num_system_values, num_cbufs);
1519       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1520    } else {
1521       nir =
1522          brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1523 
1524       /* Reserve space for passing the default tess levels as constants. */
1525       num_cbufs = 1;
1526       num_system_values = 8;
1527       system_values =
1528          rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1529       prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1530       prog_data->nr_params = num_system_values;
1531 
1532       if (key->_tes_primitive_mode == TESS_PRIMITIVE_QUADS) {
1533          for (int i = 0; i < 4; i++)
1534             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1535 
1536          system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1537          system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1538       } else if (key->_tes_primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
1539          for (int i = 0; i < 3; i++)
1540             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1541 
1542          system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1543       } else {
1544          assert(key->_tes_primitive_mode == TESS_PRIMITIVE_ISOLINES);
1545          system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1546          system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1547       }
1548 
1549       /* Manually setup the TCS binding table. */
1550       memset(&bt, 0, sizeof(bt));
1551       bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1552       bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1553       bt.size_bytes = 4;
1554 
1555       prog_data->ubo_ranges[0].length = 1;
1556    }
1557 
1558    struct brw_compile_tcs_params params = {
1559       .nir = nir,
1560       .key = &brw_key,
1561       .prog_data = tcs_prog_data,
1562       .log_data = dbg,
1563    };
1564 
1565    const unsigned *program = brw_compile_tcs(compiler, mem_ctx, &params);
1566    if (program == NULL) {
1567       dbg_printf("Failed to compile control shader: %s\n", params.error_str);
1568       ralloc_free(mem_ctx);
1569 
1570       shader->compilation_failed = true;
1571       util_queue_fence_signal(&shader->ready);
1572 
1573       return;
1574    }
1575 
1576    shader->compilation_failed = false;
1577 
1578    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1579 
1580    iris_finalize_program(shader, prog_data, NULL, system_values,
1581                          num_system_values, 0, num_cbufs, &bt);
1582 
1583    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
1584                       IRIS_CACHE_TCS, sizeof(*key), key, program);
1585 
1586    if (ish)
1587       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1588 
1589    ralloc_free(mem_ctx);
1590 }
1591 
1592 /**
1593  * Update the current tessellation control shader variant.
1594  *
1595  * Fill out the key, look in the cache, compile and bind if needed.
1596  */
1597 static void
iris_update_compiled_tcs(struct iris_context * ice)1598 iris_update_compiled_tcs(struct iris_context *ice)
1599 {
1600    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1601    struct iris_uncompiled_shader *tcs =
1602       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1603    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1604    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1605    const struct brw_compiler *compiler = screen->compiler;
1606    const struct intel_device_info *devinfo = &screen->devinfo;
1607 
1608    const struct shader_info *tes_info =
1609       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1610    struct iris_tcs_prog_key key = {
1611       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1612       ._tes_primitive_mode = tes_info->tess._primitive_mode,
1613       .input_vertices =
1614          !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1615       .quads_workaround = devinfo->ver < 9 &&
1616                           tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
1617                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
1618    };
1619    get_unified_tess_slots(ice, &key.outputs_written,
1620                           &key.patch_outputs_written);
1621    screen->vtbl.populate_tcs_key(ice, &key);
1622 
1623    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1624    struct iris_compiled_shader *shader;
1625    bool added = false;
1626 
1627    if (tcs != NULL) {
1628       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
1629                                    sizeof(key), &added);
1630    } else {
1631       /* Look for and possibly create a passthrough TCS */
1632       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1633 
1634 
1635       if (shader == NULL) {
1636          shader = iris_create_shader_variant(screen, ice->shaders.cache,
1637                                              IRIS_CACHE_TCS, sizeof(key), &key);
1638          added = true;
1639       }
1640 
1641    }
1642 
1643    /* If the shader was not found in (whichever cache), call iris_compile_tcs
1644     * if either ish is NULL or the shader could not be found in the disk
1645     * cache.
1646     */
1647    if (added &&
1648        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
1649                                                  &key, sizeof(key)))) {
1650       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
1651                        shader);
1652    }
1653 
1654    if (shader->compilation_failed)
1655       shader = NULL;
1656 
1657    if (old != shader) {
1658       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
1659                                     shader);
1660       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1661                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
1662                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1663       shs->sysvals_need_upload = true;
1664 
1665       unsigned urb_entry_size = shader ?
1666          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1667       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
1668    }
1669 }
1670 
1671 /**
1672  * Compile a tessellation evaluation shader, and upload the assembly.
1673  */
1674 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1675 iris_compile_tes(struct iris_screen *screen,
1676                  struct u_upload_mgr *uploader,
1677                  struct util_debug_callback *dbg,
1678                  struct iris_uncompiled_shader *ish,
1679                  struct iris_compiled_shader *shader)
1680 {
1681    const struct brw_compiler *compiler = screen->compiler;
1682    void *mem_ctx = ralloc_context(NULL);
1683    struct brw_tes_prog_data *tes_prog_data =
1684       rzalloc(mem_ctx, struct brw_tes_prog_data);
1685    struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1686    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1687    enum brw_param_builtin *system_values;
1688    const struct intel_device_info *devinfo = &screen->devinfo;
1689    unsigned num_system_values;
1690    unsigned num_cbufs;
1691 
1692    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1693    const struct iris_tes_prog_key *const key = &shader->key.tes;
1694 
1695    if (key->vue.nr_userclip_plane_consts) {
1696       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1697       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1698                         true, false, NULL);
1699       nir_lower_io_to_temporaries(nir, impl, true, false);
1700       nir_lower_global_vars_to_local(nir);
1701       nir_lower_vars_to_ssa(nir);
1702       nir_shader_gather_info(nir, impl);
1703    }
1704 
1705    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1706                        &num_system_values, &num_cbufs);
1707 
1708    struct iris_binding_table bt;
1709    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1710                             num_system_values, num_cbufs);
1711 
1712    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1713 
1714    struct brw_vue_map input_vue_map;
1715    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1716                             key->patch_inputs_read);
1717 
1718    struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
1719 
1720    struct brw_compile_tes_params params = {
1721       .nir = nir,
1722       .key = &brw_key,
1723       .prog_data = tes_prog_data,
1724       .input_vue_map = &input_vue_map,
1725       .log_data = dbg,
1726    };
1727 
1728    const unsigned *program = brw_compile_tes(compiler, mem_ctx, &params);
1729    if (program == NULL) {
1730       dbg_printf("Failed to compile evaluation shader: %s\n", params.error_str);
1731       ralloc_free(mem_ctx);
1732 
1733       shader->compilation_failed = true;
1734       util_queue_fence_signal(&shader->ready);
1735 
1736       return;
1737    }
1738 
1739    shader->compilation_failed = false;
1740 
1741    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1742 
1743    uint32_t *so_decls =
1744       screen->vtbl.create_so_decl_list(&ish->stream_output,
1745                                     &vue_prog_data->vue_map);
1746 
1747    iris_finalize_program(shader, prog_data, so_decls, system_values,
1748                          num_system_values, 0, num_cbufs, &bt);
1749 
1750    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
1751                       sizeof(*key), key, program);
1752 
1753    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1754 
1755    ralloc_free(mem_ctx);
1756 }
1757 
1758 /**
1759  * Update the current tessellation evaluation shader variant.
1760  *
1761  * Fill out the key, look in the cache, compile and bind if needed.
1762  */
1763 static void
iris_update_compiled_tes(struct iris_context * ice)1764 iris_update_compiled_tes(struct iris_context *ice)
1765 {
1766    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1767    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1768    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1769    struct iris_uncompiled_shader *ish =
1770       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1771 
1772    struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
1773    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1774    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1775 
1776    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1777    bool added;
1778    struct iris_compiled_shader *shader =
1779       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
1780 
1781    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1782                                           &key, sizeof(key))) {
1783       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
1784    }
1785 
1786    if (shader->compilation_failed)
1787       shader = NULL;
1788 
1789    if (old != shader) {
1790       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
1791                                     shader);
1792       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1793                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
1794                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
1795       shs->sysvals_need_upload = true;
1796 
1797       unsigned urb_entry_size = shader ?
1798          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1799       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
1800    }
1801 
1802    /* TODO: Could compare and avoid flagging this. */
1803    const struct shader_info *tes_info = &ish->nir->info;
1804    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1805       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1806       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1807    }
1808 }
1809 
1810 /**
1811  * Compile a geometry shader, and upload the assembly.
1812  */
1813 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1814 iris_compile_gs(struct iris_screen *screen,
1815                 struct u_upload_mgr *uploader,
1816                 struct util_debug_callback *dbg,
1817                 struct iris_uncompiled_shader *ish,
1818                 struct iris_compiled_shader *shader)
1819 {
1820    const struct brw_compiler *compiler = screen->compiler;
1821    const struct intel_device_info *devinfo = &screen->devinfo;
1822    void *mem_ctx = ralloc_context(NULL);
1823    struct brw_gs_prog_data *gs_prog_data =
1824       rzalloc(mem_ctx, struct brw_gs_prog_data);
1825    struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1826    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1827    enum brw_param_builtin *system_values;
1828    unsigned num_system_values;
1829    unsigned num_cbufs;
1830 
1831    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1832    const struct iris_gs_prog_key *const key = &shader->key.gs;
1833 
1834    if (key->vue.nr_userclip_plane_consts) {
1835       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1836       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1837                         false, NULL);
1838       nir_lower_io_to_temporaries(nir, impl, true, false);
1839       nir_lower_global_vars_to_local(nir);
1840       nir_lower_vars_to_ssa(nir);
1841       nir_shader_gather_info(nir, impl);
1842    }
1843 
1844    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1845                        &num_system_values, &num_cbufs);
1846 
1847    struct iris_binding_table bt;
1848    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1849                             num_system_values, num_cbufs);
1850 
1851    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1852 
1853    brw_compute_vue_map(devinfo,
1854                        &vue_prog_data->vue_map, nir->info.outputs_written,
1855                        nir->info.separate_shader, /* pos_slots */ 1);
1856 
1857    struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
1858 
1859    struct brw_compile_gs_params params = {
1860       .nir = nir,
1861       .key = &brw_key,
1862       .prog_data = gs_prog_data,
1863       .log_data = dbg,
1864    };
1865 
1866    const unsigned *program = brw_compile_gs(compiler, mem_ctx, &params);
1867    if (program == NULL) {
1868       dbg_printf("Failed to compile geometry shader: %s\n", params.error_str);
1869       ralloc_free(mem_ctx);
1870 
1871       shader->compilation_failed = true;
1872       util_queue_fence_signal(&shader->ready);
1873 
1874       return;
1875    }
1876 
1877    shader->compilation_failed = false;
1878 
1879    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1880 
1881    uint32_t *so_decls =
1882       screen->vtbl.create_so_decl_list(&ish->stream_output,
1883                                     &vue_prog_data->vue_map);
1884 
1885    iris_finalize_program(shader, prog_data, so_decls, system_values,
1886                          num_system_values, 0, num_cbufs, &bt);
1887 
1888    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
1889                       sizeof(*key), key, program);
1890 
1891    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1892 
1893    ralloc_free(mem_ctx);
1894 }
1895 
1896 /**
1897  * Update the current geometry shader variant.
1898  *
1899  * Fill out the key, look in the cache, compile and bind if needed.
1900  */
1901 static void
iris_update_compiled_gs(struct iris_context * ice)1902 iris_update_compiled_gs(struct iris_context *ice)
1903 {
1904    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1905    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1906    struct iris_uncompiled_shader *ish =
1907       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1908    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1909    struct iris_compiled_shader *shader = NULL;
1910    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1911 
1912    if (ish) {
1913       struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
1914       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1915 
1916       bool added;
1917 
1918       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
1919                                    sizeof(key), &added);
1920 
1921       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1922                                              &key, sizeof(key))) {
1923          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
1924       }
1925 
1926       if (shader->compilation_failed)
1927          shader = NULL;
1928    }
1929 
1930    if (old != shader) {
1931       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
1932                                     shader);
1933       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1934                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
1935                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
1936       shs->sysvals_need_upload = true;
1937 
1938       unsigned urb_entry_size = shader ?
1939          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1940       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
1941    }
1942 }
1943 
1944 /**
1945  * Compile a fragment (pixel) shader, and upload the assembly.
1946  */
1947 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct brw_vue_map * vue_map)1948 iris_compile_fs(struct iris_screen *screen,
1949                 struct u_upload_mgr *uploader,
1950                 struct util_debug_callback *dbg,
1951                 struct iris_uncompiled_shader *ish,
1952                 struct iris_compiled_shader *shader,
1953                 struct brw_vue_map *vue_map)
1954 {
1955    const struct brw_compiler *compiler = screen->compiler;
1956    void *mem_ctx = ralloc_context(NULL);
1957    struct brw_wm_prog_data *fs_prog_data =
1958       rzalloc(mem_ctx, struct brw_wm_prog_data);
1959    struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1960    enum brw_param_builtin *system_values;
1961    const struct intel_device_info *devinfo = &screen->devinfo;
1962    unsigned num_system_values;
1963    unsigned num_cbufs;
1964 
1965    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1966    const struct iris_fs_prog_key *const key = &shader->key.fs;
1967 
1968    prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1969 
1970    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1971                        &num_system_values, &num_cbufs);
1972 
1973    /* Lower output variables to load_output intrinsics before setting up
1974     * binding tables, so iris_setup_binding_table can map any load_output
1975     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
1976     * non-coherent framebuffer fetches.
1977     */
1978    brw_nir_lower_fs_outputs(nir);
1979 
1980    /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
1981     * and do not need a binding table entry with a null surface.  Earlier
1982     * generations need an entry for a null surface.
1983     */
1984    int null_rts = devinfo->ver < 11 ? 1 : 0;
1985 
1986    struct iris_binding_table bt;
1987    iris_setup_binding_table(devinfo, nir, &bt,
1988                             MAX2(key->nr_color_regions, null_rts),
1989                             num_system_values, num_cbufs);
1990 
1991    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1992 
1993    struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
1994 
1995    struct brw_compile_fs_params params = {
1996       .nir = nir,
1997       .key = &brw_key,
1998       .prog_data = fs_prog_data,
1999 
2000       .allow_spilling = true,
2001       .vue_map = vue_map,
2002 
2003       .log_data = dbg,
2004    };
2005 
2006    const unsigned *program = brw_compile_fs(compiler, mem_ctx, &params);
2007    if (program == NULL) {
2008       dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
2009       ralloc_free(mem_ctx);
2010 
2011       shader->compilation_failed = true;
2012       util_queue_fence_signal(&shader->ready);
2013 
2014       return;
2015    }
2016 
2017    shader->compilation_failed = false;
2018 
2019    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2020 
2021    iris_finalize_program(shader, prog_data, NULL, system_values,
2022                          num_system_values, 0, num_cbufs, &bt);
2023 
2024    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2025                       sizeof(*key), key, program);
2026 
2027    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2028 
2029    ralloc_free(mem_ctx);
2030 }
2031 
2032 /**
2033  * Update the current fragment shader variant.
2034  *
2035  * Fill out the key, look in the cache, compile and bind if needed.
2036  */
2037 static void
iris_update_compiled_fs(struct iris_context * ice)2038 iris_update_compiled_fs(struct iris_context *ice)
2039 {
2040    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2041    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2042    struct iris_uncompiled_shader *ish =
2043       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2044    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2045    struct iris_fs_prog_key key = { KEY_INIT(base) };
2046    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2047 
2048    struct brw_vue_map *last_vue_map =
2049       &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2050 
2051    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2052       key.input_slots_valid = last_vue_map->slots_valid;
2053 
2054    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2055    bool added;
2056    struct iris_compiled_shader *shader =
2057       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2058                           sizeof(key), &added);
2059 
2060    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2061                                           &key, sizeof(key))) {
2062       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2063    }
2064 
2065    if (shader->compilation_failed)
2066       shader = NULL;
2067 
2068    if (old != shader) {
2069       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2070       // toggles.  might be able to avoid flagging SBE too.
2071       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2072                                     shader);
2073       ice->state.dirty |= IRIS_DIRTY_WM |
2074                           IRIS_DIRTY_CLIP |
2075                           IRIS_DIRTY_SBE;
2076       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2077                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2078                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2079       shs->sysvals_need_upload = true;
2080    }
2081 }
2082 
2083 /**
2084  * Update the last enabled stage's VUE map.
2085  *
2086  * When the shader feeding the rasterizer's output interface changes, we
2087  * need to re-emit various packets.
2088  */
2089 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2090 update_last_vue_map(struct iris_context *ice,
2091                     struct iris_compiled_shader *shader)
2092 {
2093    struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;
2094    struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
2095    struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :
2096       &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2097    const uint64_t changed_slots =
2098       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2099 
2100    if (changed_slots & VARYING_BIT_VIEWPORT) {
2101       ice->state.num_viewports =
2102          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2103       ice->state.dirty |= IRIS_DIRTY_CLIP |
2104                           IRIS_DIRTY_SF_CL_VIEWPORT |
2105                           IRIS_DIRTY_CC_VIEWPORT |
2106                           IRIS_DIRTY_SCISSOR_RECT;
2107       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2108          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2109    }
2110 
2111    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2112       ice->state.dirty |= IRIS_DIRTY_SBE;
2113    }
2114 
2115    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2116 }
2117 
2118 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2119 iris_update_pull_constant_descriptors(struct iris_context *ice,
2120                                       gl_shader_stage stage)
2121 {
2122    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2123 
2124    if (!shader || !shader->prog_data->has_ubo_pull)
2125       return;
2126 
2127    struct iris_shader_state *shs = &ice->state.shaders[stage];
2128    bool any_new_descriptors =
2129       shader->num_system_values > 0 && shs->sysvals_need_upload;
2130 
2131    unsigned bound_cbufs = shs->bound_cbufs;
2132 
2133    while (bound_cbufs) {
2134       const int i = u_bit_scan(&bound_cbufs);
2135       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2136       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2137       if (!surf_state->res && cbuf->buffer) {
2138          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2139                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2140          any_new_descriptors = true;
2141       }
2142    }
2143 
2144    if (any_new_descriptors)
2145       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2146 }
2147 
2148 /**
2149  * Update the current shader variants for the given state.
2150  *
2151  * This should be called on every draw call to ensure that the correct
2152  * shaders are bound.  It will also flag any dirty state triggered by
2153  * swapping out those shaders.
2154  */
2155 void
iris_update_compiled_shaders(struct iris_context * ice)2156 iris_update_compiled_shaders(struct iris_context *ice)
2157 {
2158    const uint64_t stage_dirty = ice->state.stage_dirty;
2159 
2160    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2161                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2162        struct iris_uncompiled_shader *tes =
2163           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2164        if (tes) {
2165           iris_update_compiled_tcs(ice);
2166           iris_update_compiled_tes(ice);
2167        } else {
2168          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2169          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2170           ice->state.stage_dirty |=
2171              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2172              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2173              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2174 
2175           if (ice->shaders.urb.constrained)
2176              ice->state.dirty |= IRIS_DIRTY_URB;
2177        }
2178    }
2179 
2180    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2181       iris_update_compiled_vs(ice);
2182    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2183       iris_update_compiled_gs(ice);
2184 
2185    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2186                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2187       const struct iris_compiled_shader *gs =
2188          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2189       const struct iris_compiled_shader *tes =
2190          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2191 
2192       bool points_or_lines = false;
2193 
2194       if (gs) {
2195          const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2196          points_or_lines =
2197             gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2198             gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2199       } else if (tes) {
2200          const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2201          points_or_lines =
2202             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2203             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2204       }
2205 
2206       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2207          /* Outbound to XY Clip enables */
2208          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2209          ice->state.dirty |= IRIS_DIRTY_CLIP;
2210       }
2211    }
2212 
2213    gl_shader_stage last_stage = last_vue_stage(ice);
2214    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2215    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2216    update_last_vue_map(ice, shader);
2217    if (ice->state.streamout != shader->streamout) {
2218       ice->state.streamout = shader->streamout;
2219       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2220    }
2221 
2222    if (ice->state.streamout_active) {
2223       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2224          struct iris_stream_output_target *so =
2225             (void *) ice->state.so_target[i];
2226          if (so)
2227             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2228       }
2229    }
2230 
2231    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2232       iris_update_compiled_fs(ice);
2233 
2234    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2235       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2236          iris_update_pull_constant_descriptors(ice, i);
2237    }
2238 }
2239 
2240 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct util_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2241 iris_compile_cs(struct iris_screen *screen,
2242                 struct u_upload_mgr *uploader,
2243                 struct util_debug_callback *dbg,
2244                 struct iris_uncompiled_shader *ish,
2245                 struct iris_compiled_shader *shader)
2246 {
2247    const struct brw_compiler *compiler = screen->compiler;
2248    void *mem_ctx = ralloc_context(NULL);
2249    struct brw_cs_prog_data *cs_prog_data =
2250       rzalloc(mem_ctx, struct brw_cs_prog_data);
2251    struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2252    enum brw_param_builtin *system_values;
2253    const struct intel_device_info *devinfo = &screen->devinfo;
2254    unsigned num_system_values;
2255    unsigned num_cbufs;
2256 
2257    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2258    const struct iris_cs_prog_key *const key = &shader->key.cs;
2259 
2260    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2261 
2262    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
2263                        ish->kernel_input_size,
2264                        &system_values, &num_system_values, &num_cbufs);
2265 
2266    struct iris_binding_table bt;
2267    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2268                             num_system_values, num_cbufs);
2269 
2270    struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
2271 
2272    struct brw_compile_cs_params params = {
2273       .nir = nir,
2274       .key = &brw_key,
2275       .prog_data = cs_prog_data,
2276       .log_data = dbg,
2277    };
2278 
2279    const unsigned *program = brw_compile_cs(compiler, mem_ctx, &params);
2280    if (program == NULL) {
2281       dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2282 
2283       shader->compilation_failed = true;
2284       util_queue_fence_signal(&shader->ready);
2285 
2286       return;
2287    }
2288 
2289    shader->compilation_failed = false;
2290 
2291    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2292 
2293    iris_finalize_program(shader, prog_data, NULL, system_values,
2294                          num_system_values, ish->kernel_input_size, num_cbufs,
2295                          &bt);
2296 
2297    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
2298                       sizeof(*key), key, program);
2299 
2300    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2301 
2302    ralloc_free(mem_ctx);
2303 }
2304 
2305 static void
iris_update_compiled_cs(struct iris_context * ice)2306 iris_update_compiled_cs(struct iris_context *ice)
2307 {
2308    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2309    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2310    struct iris_uncompiled_shader *ish =
2311       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2312    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2313    struct iris_cs_prog_key key = { KEY_INIT(base) };
2314    screen->vtbl.populate_cs_key(ice, &key);
2315 
2316    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2317    bool added;
2318    struct iris_compiled_shader *shader =
2319       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
2320                           sizeof(key), &added);
2321 
2322    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2323                                           &key, sizeof(key))) {
2324       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2325    }
2326 
2327    if (shader->compilation_failed)
2328       shader = NULL;
2329 
2330    if (old != shader) {
2331       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
2332                                     shader);
2333       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2334                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
2335                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
2336       shs->sysvals_need_upload = true;
2337    }
2338 }
2339 
2340 void
iris_update_compiled_compute_shader(struct iris_context * ice)2341 iris_update_compiled_compute_shader(struct iris_context *ice)
2342 {
2343    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2344       iris_update_compiled_cs(ice);
2345 
2346    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2347       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2348 }
2349 
2350 void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data * cs_prog_data,unsigned threads,uint32_t * dst)2351 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2352                                unsigned threads,
2353                                uint32_t *dst)
2354 {
2355    assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2356    assert(cs_prog_data->push.cross_thread.size == 0);
2357    assert(cs_prog_data->push.per_thread.dwords == 1);
2358    assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2359    for (unsigned t = 0; t < threads; t++)
2360       dst[8 * t] = t;
2361 }
2362 
2363 /**
2364  * Allocate scratch BOs as needed for the given per-thread size and stage.
2365  */
2366 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)2367 iris_get_scratch_space(struct iris_context *ice,
2368                        unsigned per_thread_scratch,
2369                        gl_shader_stage stage)
2370 {
2371    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2372    struct iris_bufmgr *bufmgr = screen->bufmgr;
2373    const struct intel_device_info *devinfo = &screen->devinfo;
2374 
2375    unsigned encoded_size = ffs(per_thread_scratch) - 11;
2376    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
2377    assert(per_thread_scratch == 1 << (encoded_size + 10));
2378 
2379    /* On GFX version 12.5, scratch access changed to a surface-based model.
2380     * Instead of each shader type having its own layout based on IDs passed
2381     * from the relevant fixed-function unit, all scratch access is based on
2382     * thread IDs like it always has been for compute.
2383     */
2384    if (devinfo->verx10 >= 125)
2385       stage = MESA_SHADER_COMPUTE;
2386 
2387    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2388 
2389    if (!*bop) {
2390       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2391       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2392       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1024,
2393                            IRIS_MEMZONE_SHADER, 0);
2394    }
2395 
2396    return *bop;
2397 }
2398 
2399 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)2400 iris_get_scratch_surf(struct iris_context *ice,
2401                       unsigned per_thread_scratch)
2402 {
2403    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2404    ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
2405 
2406    assert(devinfo->verx10 >= 125);
2407 
2408    unsigned encoded_size = ffs(per_thread_scratch) - 11;
2409    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
2410    assert(per_thread_scratch == 1 << (encoded_size + 10));
2411 
2412    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
2413 
2414    if (ref->res)
2415       return ref;
2416 
2417    struct iris_bo *scratch_bo =
2418       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
2419 
2420    void *map = upload_state(ice->state.bindless_uploader, ref,
2421                             screen->isl_dev.ss.size, 64);
2422 
2423    isl_buffer_fill_state(&screen->isl_dev, map,
2424                          .address = scratch_bo->address,
2425                          .size_B = scratch_bo->size,
2426                          .format = ISL_FORMAT_RAW,
2427                          .swizzle = ISL_SWIZZLE_IDENTITY,
2428                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
2429                          .stride_B = per_thread_scratch,
2430                          .is_scratch = true);
2431 
2432    return ref;
2433 }
2434 
2435 /* ------------------------------------------------------------------- */
2436 
2437 /**
2438  * The pipe->create_[stage]_state() driver hooks.
2439  *
2440  * Performs basic NIR preprocessing, records any state dependencies, and
2441  * returns an iris_uncompiled_shader as the Gallium CSO.
2442  *
2443  * Actual shader compilation to assembly happens later, at first use.
2444  */
2445 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)2446 iris_create_uncompiled_shader(struct iris_screen *screen,
2447                               nir_shader *nir,
2448                               const struct pipe_stream_output_info *so_info)
2449 {
2450    struct iris_uncompiled_shader *ish =
2451       calloc(1, sizeof(struct iris_uncompiled_shader));
2452    if (!ish)
2453       return NULL;
2454 
2455    pipe_reference_init(&ish->ref, 1);
2456    list_inithead(&ish->variants);
2457    simple_mtx_init(&ish->lock, mtx_plain);
2458    util_queue_fence_init(&ish->ready);
2459 
2460    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
2461 
2462    ish->program_id = get_new_program_id(screen);
2463    ish->nir = nir;
2464    if (so_info) {
2465       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2466       update_so_info(&ish->stream_output, nir->info.outputs_written);
2467    }
2468 
2469    if (screen->disk_cache) {
2470       /* Serialize the NIR to a binary blob that we can hash for the disk
2471        * cache.  Drop unnecessary information (like variable names)
2472        * so the serialized NIR is smaller, and also to let us detect more
2473        * isomorphic shaders when hashing, increasing cache hits.
2474        */
2475       struct blob blob;
2476       blob_init(&blob);
2477       nir_serialize(&blob, nir, true);
2478       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2479       blob_finish(&blob);
2480    }
2481 
2482    return ish;
2483 }
2484 
2485 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)2486 iris_create_compute_state(struct pipe_context *ctx,
2487                           const struct pipe_compute_state *state)
2488 {
2489    struct iris_context *ice = (void *) ctx;
2490    struct iris_screen *screen = (void *) ctx->screen;
2491    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2492    const nir_shader_compiler_options *options =
2493       screen->compiler->nir_options[MESA_SHADER_COMPUTE];
2494 
2495    nir_shader *nir;
2496    switch (state->ir_type) {
2497    case PIPE_SHADER_IR_NIR:
2498       nir = (void *)state->prog;
2499       break;
2500 
2501    case PIPE_SHADER_IR_NIR_SERIALIZED: {
2502       struct blob_reader reader;
2503       const struct pipe_binary_program_header *hdr = state->prog;
2504       blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2505       nir = nir_deserialize(NULL, options, &reader);
2506       break;
2507    }
2508 
2509    default:
2510       unreachable("Unsupported IR");
2511    }
2512 
2513    /* Most of iris doesn't really care about the difference between compute
2514     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
2515     * it's way easier if we just normalize to COMPUTE here.
2516     */
2517    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2518           nir->info.stage == MESA_SHADER_KERNEL);
2519    nir->info.stage = MESA_SHADER_COMPUTE;
2520 
2521    struct iris_uncompiled_shader *ish =
2522       iris_create_uncompiled_shader(screen, nir, NULL);
2523    ish->kernel_input_size = state->req_input_mem;
2524    ish->kernel_shared_size = state->req_local_mem;
2525 
2526    // XXX: disallow more than 64KB of shared variables
2527 
2528    if (screen->precompile) {
2529       struct iris_cs_prog_key key = { KEY_INIT(base) };
2530 
2531       struct iris_compiled_shader *shader =
2532          iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
2533                                     sizeof(key), &key);
2534 
2535       /* Append our new variant to the shader's variant list. */
2536       list_addtail(&shader->link, &ish->variants);
2537 
2538       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2539                                     &key, sizeof(key))) {
2540          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2541       }
2542    }
2543 
2544    return ish;
2545 }
2546 
2547 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)2548 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
2549 {
2550    const struct iris_threaded_compile_job *job =
2551       (struct iris_threaded_compile_job *) _job;
2552 
2553    struct iris_screen *screen = job->screen;
2554    struct u_upload_mgr *uploader = job->uploader;
2555    struct util_debug_callback *dbg = job->dbg;
2556    struct iris_uncompiled_shader *ish = job->ish;
2557    struct iris_compiled_shader *shader = job->shader;
2558 
2559    switch (ish->nir->info.stage) {
2560    case MESA_SHADER_VERTEX:
2561       iris_compile_vs(screen, uploader, dbg, ish, shader);
2562       break;
2563    case MESA_SHADER_TESS_CTRL:
2564       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
2565       break;
2566    case MESA_SHADER_TESS_EVAL:
2567       iris_compile_tes(screen, uploader, dbg, ish, shader);
2568       break;
2569    case MESA_SHADER_GEOMETRY:
2570       iris_compile_gs(screen, uploader, dbg, ish, shader);
2571       break;
2572    case MESA_SHADER_FRAGMENT:
2573       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
2574       break;
2575 
2576    default:
2577       unreachable("Invalid shader stage.");
2578    }
2579 }
2580 
2581 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2582 iris_create_shader_state(struct pipe_context *ctx,
2583                          const struct pipe_shader_state *state)
2584 {
2585    struct iris_context *ice = (void *) ctx;
2586    struct iris_screen *screen = (void *) ctx->screen;
2587    struct nir_shader *nir;
2588 
2589    if (state->type == PIPE_SHADER_IR_TGSI)
2590       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2591    else
2592       nir = state->ir.nir;
2593 
2594    const struct shader_info *const info = &nir->info;
2595    struct iris_uncompiled_shader *ish =
2596       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
2597 
2598    union iris_any_prog_key key;
2599    unsigned key_size = 0;
2600 
2601    memset(&key, 0, sizeof(key));
2602 
2603    switch (info->stage) {
2604    case MESA_SHADER_VERTEX:
2605       /* User clip planes */
2606       if (info->clip_distance_array_size == 0)
2607          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2608 
2609       key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
2610       key_size = sizeof(key.vs);
2611       break;
2612 
2613    case MESA_SHADER_TESS_CTRL: {
2614       key.tcs = (struct iris_tcs_prog_key) {
2615          KEY_INIT(vue.base),
2616          // XXX: make sure the linker fills this out from the TES...
2617          ._tes_primitive_mode =
2618          info->tess._primitive_mode ? info->tess._primitive_mode
2619                                    : TESS_PRIMITIVE_TRIANGLES,
2620          .outputs_written = info->outputs_written,
2621          .patch_outputs_written = info->patch_outputs_written,
2622       };
2623 
2624       /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2625        * We don't have that information, so we randomly guess that the input
2626        * and output patches are the same size.  This is a bad guess, but we
2627        * can't do much better.
2628        */
2629       if (screen->compiler->use_tcs_8_patch)
2630          key.tcs.input_vertices = info->tess.tcs_vertices_out;
2631 
2632       key_size = sizeof(key.tcs);
2633       break;
2634    }
2635 
2636    case MESA_SHADER_TESS_EVAL:
2637       /* User clip planes */
2638       if (info->clip_distance_array_size == 0)
2639          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2640 
2641       key.tes = (struct iris_tes_prog_key) {
2642          KEY_INIT(vue.base),
2643          // XXX: not ideal, need TCS output/TES input unification
2644          .inputs_read = info->inputs_read,
2645          .patch_inputs_read = info->patch_inputs_read,
2646       };
2647 
2648       key_size = sizeof(key.tes);
2649       break;
2650 
2651    case MESA_SHADER_GEOMETRY:
2652       /* User clip planes */
2653       if (info->clip_distance_array_size == 0)
2654          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2655 
2656       key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
2657       key_size = sizeof(key.gs);
2658       break;
2659 
2660    case MESA_SHADER_FRAGMENT:
2661       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2662                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2663                   (1ull << IRIS_NOS_RASTERIZER) |
2664                   (1ull << IRIS_NOS_BLEND);
2665 
2666       /* The program key needs the VUE map if there are > 16 inputs */
2667       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
2668          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2669       }
2670 
2671       const uint64_t color_outputs = info->outputs_written &
2672          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2673            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2674            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2675 
2676       bool can_rearrange_varyings =
2677          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2678 
2679       const struct intel_device_info *devinfo = &screen->devinfo;
2680 
2681       key.fs = (struct iris_fs_prog_key) {
2682          KEY_INIT(base),
2683          .nr_color_regions = util_bitcount(color_outputs),
2684          .coherent_fb_fetch = devinfo->ver >= 9,
2685          .input_slots_valid =
2686             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2687       };
2688 
2689       key_size = sizeof(key.fs);
2690       break;
2691 
2692    default:
2693       unreachable("Invalid shader stage.");
2694    }
2695 
2696    if (screen->precompile) {
2697       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2698 
2699       struct iris_compiled_shader *shader =
2700          iris_create_shader_variant(screen, NULL,
2701                                     (enum iris_program_cache_id) info->stage,
2702                                     key_size, &key);
2703 
2704       /* Append our new variant to the shader's variant list. */
2705       list_addtail(&shader->link, &ish->variants);
2706 
2707       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2708                                     &key, key_size)) {
2709          assert(!util_queue_fence_is_signalled(&shader->ready));
2710 
2711          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
2712 
2713          job->screen = screen;
2714          job->uploader = uploader;
2715          job->ish = ish;
2716          job->shader = shader;
2717 
2718          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
2719                                iris_compile_shader);
2720       }
2721    }
2722 
2723    return ish;
2724 }
2725 
2726 /**
2727  * Called when the refcount on the iris_uncompiled_shader reaches 0.
2728  *
2729  * Frees the iris_uncompiled_shader.
2730  *
2731  * \sa iris_delete_shader_state
2732  */
2733 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)2734 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
2735 {
2736    struct iris_uncompiled_shader *ish = state;
2737 
2738    /* No need to take ish->lock; we hold the last reference to ish */
2739    list_for_each_entry_safe(struct iris_compiled_shader, shader,
2740                             &ish->variants, link) {
2741       list_del(&shader->link);
2742 
2743       iris_shader_variant_reference(&shader, NULL);
2744    }
2745 
2746    simple_mtx_destroy(&ish->lock);
2747    util_queue_fence_destroy(&ish->ready);
2748 
2749    ralloc_free(ish->nir);
2750    free(ish);
2751 }
2752 
2753 /**
2754  * The pipe->delete_[stage]_state() driver hooks.
2755  *
2756  * \sa iris_destroy_shader_state
2757  */
2758 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)2759 iris_delete_shader_state(struct pipe_context *ctx, void *state)
2760 {
2761    struct iris_uncompiled_shader *ish = state;
2762    struct iris_context *ice = (void *) ctx;
2763 
2764    const gl_shader_stage stage = ish->nir->info.stage;
2765 
2766    if (ice->shaders.uncompiled[stage] == ish) {
2767       ice->shaders.uncompiled[stage] = NULL;
2768       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2769    }
2770 
2771    if (pipe_reference(&ish->ref, NULL))
2772       iris_destroy_shader_state(ctx, state);
2773 }
2774 
2775 /**
2776  * The pipe->bind_[stage]_state() driver hook.
2777  *
2778  * Binds an uncompiled shader as the current one for a particular stage.
2779  * Updates dirty tracking to account for the shader's NOS.
2780  */
2781 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)2782 bind_shader_state(struct iris_context *ice,
2783                   struct iris_uncompiled_shader *ish,
2784                   gl_shader_stage stage)
2785 {
2786    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2787    const uint64_t nos = ish ? ish->nos : 0;
2788 
2789    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2790    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2791 
2792    if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
2793        (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
2794       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2795    }
2796 
2797    ice->shaders.uncompiled[stage] = ish;
2798    ice->state.stage_dirty |= stage_dirty_bit;
2799 
2800    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2801     * (or that they no longer need to do so).
2802     */
2803    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2804       if (nos & (1 << i))
2805          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2806       else
2807          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2808    }
2809 }
2810 
2811 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)2812 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2813 {
2814    struct iris_context *ice = (struct iris_context *)ctx;
2815    struct iris_uncompiled_shader *ish = state;
2816 
2817    if (ish) {
2818       const struct shader_info *info = &ish->nir->info;
2819       if (ice->state.window_space_position != info->vs.window_space_position) {
2820          ice->state.window_space_position = info->vs.window_space_position;
2821 
2822          ice->state.dirty |= IRIS_DIRTY_CLIP |
2823                              IRIS_DIRTY_RASTER |
2824                              IRIS_DIRTY_CC_VIEWPORT;
2825       }
2826 
2827       const bool uses_draw_params =
2828          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
2829          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
2830       const bool uses_derived_draw_params =
2831          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
2832          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
2833       const bool needs_sgvs_element = uses_draw_params ||
2834          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
2835          BITSET_TEST(info->system_values_read,
2836                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
2837 
2838       if (ice->state.vs_uses_draw_params != uses_draw_params ||
2839           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
2840           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag ||
2841           ice->state.vs_needs_sgvs_element != needs_sgvs_element) {
2842          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
2843                              IRIS_DIRTY_VERTEX_ELEMENTS;
2844       }
2845 
2846       ice->state.vs_uses_draw_params = uses_draw_params;
2847       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
2848       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
2849       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
2850    }
2851 
2852    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2853 }
2854 
2855 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)2856 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2857 {
2858    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2859 }
2860 
2861 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)2862 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2863 {
2864    struct iris_context *ice = (struct iris_context *)ctx;
2865    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2866    const struct intel_device_info *devinfo = &screen->devinfo;
2867 
2868    /* Enabling/disabling optional stages requires a URB reconfiguration. */
2869    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2870       ice->state.dirty |= IRIS_DIRTY_URB | (devinfo->verx10 >= 125 ?
2871                                             IRIS_DIRTY_VFG : 0);
2872 
2873    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2874 }
2875 
2876 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)2877 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2878 {
2879    struct iris_context *ice = (struct iris_context *)ctx;
2880 
2881    /* Enabling/disabling optional stages requires a URB reconfiguration. */
2882    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2883       ice->state.dirty |= IRIS_DIRTY_URB;
2884 
2885    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2886 }
2887 
2888 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)2889 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2890 {
2891    struct iris_context *ice = (struct iris_context *) ctx;
2892    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2893    const struct intel_device_info *devinfo = &screen->devinfo;
2894    struct iris_uncompiled_shader *old_ish =
2895       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2896    struct iris_uncompiled_shader *new_ish = state;
2897 
2898    const unsigned color_bits =
2899       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2900       BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2901 
2902    /* Fragment shader outputs influence HasWriteableRT */
2903    if (!old_ish || !new_ish ||
2904        (old_ish->nir->info.outputs_written & color_bits) !=
2905        (new_ish->nir->info.outputs_written & color_bits))
2906       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2907 
2908    if (devinfo->ver == 8)
2909       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2910 
2911    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2912 }
2913 
2914 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)2915 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2916 {
2917    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2918 }
2919 
2920 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)2921 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
2922 {
2923    struct iris_screen *screen = (struct iris_screen *)_screen;
2924    struct nir_shader *nir = (struct nir_shader *) nirptr;
2925    const struct intel_device_info *devinfo = &screen->devinfo;
2926 
2927    NIR_PASS_V(nir, iris_fix_edge_flags);
2928 
2929    brw_preprocess_nir(screen->compiler, nir, NULL);
2930 
2931    NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2932    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2933 
2934    nir_sweep(nir);
2935 
2936    return NULL;
2937 }
2938 
2939 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)2940 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
2941                                      unsigned max_threads)
2942 {
2943    struct iris_screen *screen = (struct iris_screen *) pscreen;
2944    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
2945 }
2946 
2947 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)2948 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
2949                                              void *v_shader,
2950                                              enum pipe_shader_type p_stage)
2951 {
2952    struct iris_screen *screen = (struct iris_screen *) pscreen;
2953 
2954    /* Threaded compilation is only used for the precompile.  If precompile is
2955     * disabled, threaded compilation is "done."
2956     */
2957    if (!screen->precompile)
2958       return true;
2959 
2960    struct iris_uncompiled_shader *ish = v_shader;
2961 
2962    /* When precompile is enabled, the first entry is the precompile variant.
2963     * Check the ready fence of the precompile variant.
2964     */
2965    struct iris_compiled_shader *first =
2966       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
2967 
2968    return util_queue_fence_is_signalled(&first->ready);
2969 }
2970 
2971 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)2972 iris_init_screen_program_functions(struct pipe_screen *pscreen)
2973 {
2974    pscreen->is_parallel_shader_compilation_finished =
2975       iris_is_parallel_shader_compilation_finished;
2976    pscreen->set_max_shader_compiler_threads =
2977       iris_set_max_shader_compiler_threads;
2978    pscreen->finalize_nir = iris_finalize_nir;
2979 }
2980 
2981 void
iris_init_program_functions(struct pipe_context * ctx)2982 iris_init_program_functions(struct pipe_context *ctx)
2983 {
2984    ctx->create_vs_state  = iris_create_shader_state;
2985    ctx->create_tcs_state = iris_create_shader_state;
2986    ctx->create_tes_state = iris_create_shader_state;
2987    ctx->create_gs_state  = iris_create_shader_state;
2988    ctx->create_fs_state  = iris_create_shader_state;
2989    ctx->create_compute_state = iris_create_compute_state;
2990 
2991    ctx->delete_vs_state  = iris_delete_shader_state;
2992    ctx->delete_tcs_state = iris_delete_shader_state;
2993    ctx->delete_tes_state = iris_delete_shader_state;
2994    ctx->delete_gs_state  = iris_delete_shader_state;
2995    ctx->delete_fs_state  = iris_delete_shader_state;
2996    ctx->delete_compute_state = iris_delete_shader_state;
2997 
2998    ctx->bind_vs_state  = iris_bind_vs_state;
2999    ctx->bind_tcs_state = iris_bind_tcs_state;
3000    ctx->bind_tes_state = iris_bind_tes_state;
3001    ctx->bind_gs_state  = iris_bind_gs_state;
3002    ctx->bind_fs_state  = iris_bind_fs_state;
3003    ctx->bind_compute_state = iris_bind_cs_state;
3004 }
3005