• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file crocus_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See crocus_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "util/u_prim.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "intel/compiler/brw_prim.h"
48 #include "crocus_context.h"
49 #include "nir/tgsi_to_nir.h"
50 
51 #define KEY_INIT_NO_ID()                              \
52    .base.tex.swizzles[0 ... BRW_MAX_SAMPLERS - 1] = 0x688,   \
53    .base.tex.compressed_multisample_layout_mask = ~0
54 #define KEY_INIT()                                                        \
55    .base.program_string_id = ish->program_id,                             \
56    .base.limit_trig_input_range = screen->driconf.limit_trig_input_range, \
57    KEY_INIT_NO_ID()
58 
59 static void
crocus_sanitize_tex_key(struct brw_sampler_prog_key_data * key)60 crocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key)
61 {
62    key->gather_channel_quirk_mask = 0;
63    for (unsigned s = 0; s < BRW_MAX_SAMPLERS; s++) {
64       key->swizzles[s] = SWIZZLE_NOOP;
65       key->gfx6_gather_wa[s] = 0;
66    }
67 }
68 
69 static uint32_t
crocus_get_texture_swizzle(const struct crocus_context * ice,const struct crocus_sampler_view * t)70 crocus_get_texture_swizzle(const struct crocus_context *ice,
71                            const struct crocus_sampler_view *t)
72 {
73    uint32_t swiz = 0;
74 
75    for (int i = 0; i < 4; i++) {
76       swiz |= t->swizzle[i] << (i * 3);
77    }
78    return swiz;
79 }
80 
can_push_ubo(const struct intel_device_info * devinfo)81 static inline bool can_push_ubo(const struct intel_device_info *devinfo)
82 {
83    /* push works for everyone except SNB at the moment */
84    return devinfo->ver != 6;
85 }
86 
87 static uint8_t
gfx6_gather_workaround(enum pipe_format pformat)88 gfx6_gather_workaround(enum pipe_format pformat)
89 {
90    switch (pformat) {
91    case PIPE_FORMAT_R8_SINT: return WA_SIGN | WA_8BIT;
92    case PIPE_FORMAT_R8_UINT: return WA_8BIT;
93    case PIPE_FORMAT_R16_SINT: return WA_SIGN | WA_16BIT;
94    case PIPE_FORMAT_R16_UINT: return WA_16BIT;
95    default:
96       /* Note that even though PIPE_FORMAT_R32_SINT and
97        * PIPE_FORMAT_R32_UINThave format overrides in
98        * the surface state, there is no shader w/a required.
99        */
100       return 0;
101    }
102 }
103 
104 static const unsigned crocus_gfx6_swizzle_for_offset[4] = {
105    BRW_SWIZZLE4(0, 1, 2, 3),
106    BRW_SWIZZLE4(1, 2, 3, 3),
107    BRW_SWIZZLE4(2, 3, 3, 3),
108    BRW_SWIZZLE4(3, 3, 3, 3)
109 };
110 
111 static void
gfx6_gs_xfb_setup(const struct pipe_stream_output_info * so_info,struct brw_gs_prog_data * gs_prog_data)112 gfx6_gs_xfb_setup(const struct pipe_stream_output_info *so_info,
113                   struct brw_gs_prog_data *gs_prog_data)
114 {
115    /* Make sure that the VUE slots won't overflow the unsigned chars in
116     * prog_data->transform_feedback_bindings[].
117     */
118    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
119 
120    /* Make sure that we don't need more binding table entries than we've
121     * set aside for use in transform feedback.  (We shouldn't, since we
122     * set aside enough binding table entries to have one per component).
123     */
124    assert(so_info->num_outputs <= BRW_MAX_SOL_BINDINGS);
125 
126    gs_prog_data->num_transform_feedback_bindings = so_info->num_outputs;
127    for (unsigned i = 0; i < so_info->num_outputs; i++) {
128       gs_prog_data->transform_feedback_bindings[i] =
129          so_info->output[i].register_index;
130       gs_prog_data->transform_feedback_swizzles[i] =
131          crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component];
132    }
133 }
134 
135 static void
gfx6_ff_gs_xfb_setup(const struct pipe_stream_output_info * so_info,struct brw_ff_gs_prog_key * key)136 gfx6_ff_gs_xfb_setup(const struct pipe_stream_output_info *so_info,
137                      struct brw_ff_gs_prog_key *key)
138 {
139    key->num_transform_feedback_bindings = so_info->num_outputs;
140    for (unsigned i = 0; i < so_info->num_outputs; i++) {
141       key->transform_feedback_bindings[i] =
142          so_info->output[i].register_index;
143       key->transform_feedback_swizzles[i] =
144          crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component];
145    }
146 }
147 
148 static void
crocus_populate_sampler_prog_key_data(struct crocus_context * ice,const struct intel_device_info * devinfo,gl_shader_stage stage,struct crocus_uncompiled_shader * ish,bool uses_texture_gather,struct brw_sampler_prog_key_data * key)149 crocus_populate_sampler_prog_key_data(struct crocus_context *ice,
150                                       const struct intel_device_info *devinfo,
151                                       gl_shader_stage stage,
152                                       struct crocus_uncompiled_shader *ish,
153                                       bool uses_texture_gather,
154                                       struct brw_sampler_prog_key_data *key)
155 {
156    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
157    uint32_t mask = ish->nir->info.textures_used[0];
158 
159    while (mask) {
160       const int s = u_bit_scan(&mask);
161 
162       struct crocus_sampler_view *texture = ice->state.shaders[stage].textures[s];
163       key->swizzles[s] = SWIZZLE_NOOP;
164       key->scale_factors[s] = 0.0f;
165 
166       if (!texture)
167          continue;
168       if (texture->base.target == PIPE_BUFFER)
169          continue;
170       if (devinfo->verx10 < 75) {
171          key->swizzles[s] = crocus_get_texture_swizzle(ice, texture);
172       }
173 
174       screen->vtbl.fill_clamp_mask(ice->state.shaders[stage].samplers[s], s, key->gl_clamp_mask);
175 
176       /* gather4 for RG32* is broken in multiple ways on Gen7. */
177       if (devinfo->ver == 7 && uses_texture_gather) {
178          switch (texture->base.format) {
179          case PIPE_FORMAT_R32G32_UINT:
180          case PIPE_FORMAT_R32G32_SINT: {
181             /* We have to override the format to R32G32_FLOAT_LD.
182              * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
183              * (1.0) rather than integer 1.  This needs shader hacks.
184              *
185              * On Ivybridge, we whack W (alpha) to ONE in our key's
186              * swizzle.  On Haswell, we look at the original texture
187              * swizzle, and use XYZW with channels overridden to ONE,
188              * leaving normal texture swizzling to SCS.
189              */
190             unsigned src_swizzle = key->swizzles[s];
191             for (int i = 0; i < 4; i++) {
192                unsigned src_comp = GET_SWZ(src_swizzle, i);
193                if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
194                   key->swizzles[i] &= ~(0x7 << (3 * i));
195                   key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
196                }
197             }
198          }
199          FALLTHROUGH;
200          case PIPE_FORMAT_R32G32_FLOAT:
201             /* The channel select for green doesn't work - we have to
202              * request blue.  Haswell can use SCS for this, but Ivybridge
203              * needs a shader workaround.
204              */
205             if (devinfo->verx10 < 75)
206                key->gather_channel_quirk_mask |= 1 << s;
207             break;
208          default:
209             break;
210          }
211       }
212       if (devinfo->ver == 6 && uses_texture_gather) {
213          key->gfx6_gather_wa[s] = gfx6_gather_workaround(texture->base.format);
214       }
215    }
216 }
217 
218 static void
crocus_lower_swizzles(struct nir_shader * nir,const struct brw_sampler_prog_key_data * key_tex)219 crocus_lower_swizzles(struct nir_shader *nir,
220                       const struct brw_sampler_prog_key_data *key_tex)
221 {
222    struct nir_lower_tex_options tex_options = {
223       .lower_invalid_implicit_lod = true,
224    };
225    uint32_t mask = nir->info.textures_used[0];
226 
227    while (mask) {
228       const int s = u_bit_scan(&mask);
229 
230       if (key_tex->swizzles[s] == SWIZZLE_NOOP)
231          continue;
232 
233       tex_options.swizzle_result |= (1 << s);
234       for (unsigned c = 0; c < 4; c++)
235          tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
236    }
237    if (tex_options.swizzle_result)
238       nir_lower_tex(nir, &tex_options);
239 }
240 
241 static unsigned
get_new_program_id(struct crocus_screen * screen)242 get_new_program_id(struct crocus_screen *screen)
243 {
244    return p_atomic_inc_return(&screen->program_id);
245 }
246 
247 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)248 get_aoa_deref_offset(nir_builder *b,
249                      nir_deref_instr *deref,
250                      unsigned elem_size)
251 {
252    unsigned array_size = elem_size;
253    nir_ssa_def *offset = nir_imm_int(b, 0);
254 
255    while (deref->deref_type != nir_deref_type_var) {
256       assert(deref->deref_type == nir_deref_type_array);
257 
258       /* This level's element size is the previous level's array size */
259       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
260       assert(deref->arr.index.ssa);
261       offset = nir_iadd(b, offset,
262                         nir_imul(b, index, nir_imm_int(b, array_size)));
263 
264       deref = nir_deref_instr_parent(deref);
265       assert(glsl_type_is_array(deref->type));
266       array_size *= glsl_get_length(deref->type);
267    }
268 
269    /* Accessing an invalid surface index with the dataport can result in a
270     * hang.  According to the spec "if the index used to select an individual
271     * element is negative or greater than or equal to the size of the array,
272     * the results of the operation are undefined but may not lead to
273     * termination" -- which is one of the possible outcomes of the hang.
274     * Clamp the index to prevent access outside of the array bounds.
275     */
276    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
277 }
278 
279 static void
crocus_lower_storage_image_derefs(nir_shader * nir)280 crocus_lower_storage_image_derefs(nir_shader *nir)
281 {
282    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
283 
284    nir_builder b;
285    nir_builder_init(&b, impl);
286 
287    nir_foreach_block(block, impl) {
288       nir_foreach_instr_safe(instr, block) {
289          if (instr->type != nir_instr_type_intrinsic)
290             continue;
291 
292          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
293          switch (intrin->intrinsic) {
294          case nir_intrinsic_image_deref_load:
295          case nir_intrinsic_image_deref_store:
296          case nir_intrinsic_image_deref_atomic_add:
297          case nir_intrinsic_image_deref_atomic_imin:
298          case nir_intrinsic_image_deref_atomic_umin:
299          case nir_intrinsic_image_deref_atomic_imax:
300          case nir_intrinsic_image_deref_atomic_umax:
301          case nir_intrinsic_image_deref_atomic_and:
302          case nir_intrinsic_image_deref_atomic_or:
303          case nir_intrinsic_image_deref_atomic_xor:
304          case nir_intrinsic_image_deref_atomic_exchange:
305          case nir_intrinsic_image_deref_atomic_comp_swap:
306          case nir_intrinsic_image_deref_size:
307          case nir_intrinsic_image_deref_samples:
308          case nir_intrinsic_image_deref_load_raw_intel:
309          case nir_intrinsic_image_deref_store_raw_intel: {
310             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
311             nir_variable *var = nir_deref_instr_get_variable(deref);
312 
313             b.cursor = nir_before_instr(&intrin->instr);
314             nir_ssa_def *index =
315                nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
316                         get_aoa_deref_offset(&b, deref, 1));
317             nir_rewrite_image_intrinsic(intrin, index, false);
318             break;
319          }
320 
321          default:
322             break;
323          }
324       }
325    }
326 }
327 
328 // XXX: need unify_interfaces() at link time...
329 
330 /**
331  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
332  */
333 static bool
crocus_fix_edge_flags(nir_shader * nir)334 crocus_fix_edge_flags(nir_shader *nir)
335 {
336    if (nir->info.stage != MESA_SHADER_VERTEX) {
337       nir_shader_preserve_all_metadata(nir);
338       return false;
339    }
340 
341    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
342                                                        VARYING_SLOT_EDGE);
343    if (!var) {
344       nir_shader_preserve_all_metadata(nir);
345       return false;
346    }
347 
348    var->data.mode = nir_var_shader_temp;
349    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
350    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
351    nir_fixup_deref_modes(nir);
352 
353    nir_foreach_function(f, nir) {
354       if (f->impl) {
355          nir_metadata_preserve(f->impl, nir_metadata_block_index |
356                                nir_metadata_dominance |
357                                nir_metadata_live_ssa_defs |
358                                nir_metadata_loop_analysis);
359       } else {
360          nir_metadata_preserve(f->impl, nir_metadata_all);
361       }
362    }
363 
364    return true;
365 }
366 
367 /**
368  * Fix an uncompiled shader's stream output info.
369  *
370  * Core Gallium stores output->register_index as a "slot" number, where
371  * slots are assigned consecutively to all outputs in info->outputs_written.
372  * This naive packing of outputs doesn't work for us - we too have slots,
373  * but the layout is defined by the VUE map, which we won't have until we
374  * compile a specific shader variant.  So, we remap these and simply store
375  * VARYING_SLOT_* in our copy's output->register_index fields.
376  *
377  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
378  * components of our VUE header.  See brw_vue_map.c for the layout.
379  */
380 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)381 update_so_info(struct pipe_stream_output_info *so_info,
382                uint64_t outputs_written)
383 {
384    uint8_t reverse_map[64] = {};
385    unsigned slot = 0;
386    while (outputs_written) {
387       reverse_map[slot++] = u_bit_scan64(&outputs_written);
388    }
389 
390    for (unsigned i = 0; i < so_info->num_outputs; i++) {
391       struct pipe_stream_output *output = &so_info->output[i];
392 
393       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
394       output->register_index = reverse_map[output->register_index];
395 
396       /* The VUE header contains three scalar fields packed together:
397        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
398        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
399        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
400        */
401       switch (output->register_index) {
402       case VARYING_SLOT_LAYER:
403          assert(output->num_components == 1);
404          output->register_index = VARYING_SLOT_PSIZ;
405          output->start_component = 1;
406          break;
407       case VARYING_SLOT_VIEWPORT:
408          assert(output->num_components == 1);
409          output->register_index = VARYING_SLOT_PSIZ;
410          output->start_component = 2;
411          break;
412       case VARYING_SLOT_PSIZ:
413          assert(output->num_components == 1);
414          output->start_component = 3;
415          break;
416       }
417 
418       //info->outputs_written |= 1ull << output->register_index;
419    }
420 }
421 
422 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)423 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
424                         unsigned offset, unsigned n)
425 {
426    assert(offset % sizeof(uint32_t) == 0);
427 
428    for (unsigned i = 0; i < n; ++i)
429       sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
430 
431    for (unsigned i = n; i < 4; ++i)
432       sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
433 }
434 
435 /**
436  * Associate NIR uniform variables with the prog_data->param[] mechanism
437  * used by the backend.  Also, decide which UBOs we'd like to push in an
438  * ideal situation (though the backend can reduce this).
439  */
440 static void
crocus_setup_uniforms(const struct brw_compiler * compiler,void * mem_ctx,nir_shader * nir,struct brw_stage_prog_data * prog_data,enum brw_param_builtin ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)441 crocus_setup_uniforms(const struct brw_compiler *compiler,
442                       void *mem_ctx,
443                       nir_shader *nir,
444                       struct brw_stage_prog_data *prog_data,
445                       enum brw_param_builtin **out_system_values,
446                       unsigned *out_num_system_values,
447                       unsigned *out_num_cbufs)
448 {
449    UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
450 
451    const unsigned CROCUS_MAX_SYSTEM_VALUES =
452       PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
453    enum brw_param_builtin *system_values =
454       rzalloc_array(mem_ctx, enum brw_param_builtin, CROCUS_MAX_SYSTEM_VALUES);
455    unsigned num_system_values = 0;
456 
457    unsigned patch_vert_idx = -1;
458    unsigned ucp_idx[CROCUS_MAX_CLIP_PLANES];
459    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
460    unsigned variable_group_size_idx = -1;
461    memset(ucp_idx, -1, sizeof(ucp_idx));
462    memset(img_idx, -1, sizeof(img_idx));
463 
464    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
465 
466    nir_builder b;
467    nir_builder_init(&b, impl);
468 
469    b.cursor = nir_before_block(nir_start_block(impl));
470    nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
471    nir_ssa_def *temp_const_ubo_name = NULL;
472 
473    /* Turn system value intrinsics into uniforms */
474    nir_foreach_block(block, impl) {
475       nir_foreach_instr_safe(instr, block) {
476          if (instr->type != nir_instr_type_intrinsic)
477             continue;
478 
479          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
480          nir_ssa_def *offset;
481 
482          switch (intrin->intrinsic) {
483          case nir_intrinsic_load_constant: {
484             /* This one is special because it reads from the shader constant
485              * data and not cbuf0 which gallium uploads for us.
486              */
487             b.cursor = nir_before_instr(instr);
488             nir_ssa_def *offset =
489                nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
490                             nir_intrinsic_base(intrin));
491 
492             if (temp_const_ubo_name == NULL)
493                temp_const_ubo_name = nir_imm_int(&b, 0);
494 
495             nir_intrinsic_instr *load_ubo =
496                nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
497             load_ubo->num_components = intrin->num_components;
498             load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
499             load_ubo->src[1] = nir_src_for_ssa(offset);
500             nir_intrinsic_set_align(load_ubo, 4, 0);
501             nir_intrinsic_set_range_base(load_ubo, 0);
502             nir_intrinsic_set_range(load_ubo, ~0);
503             nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
504                               intrin->dest.ssa.num_components,
505                               intrin->dest.ssa.bit_size,
506                               NULL);
507             nir_builder_instr_insert(&b, &load_ubo->instr);
508 
509             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
510                                      &load_ubo->dest.ssa);
511             nir_instr_remove(&intrin->instr);
512             continue;
513          }
514          case nir_intrinsic_load_user_clip_plane: {
515             unsigned ucp = nir_intrinsic_ucp_id(intrin);
516 
517             if (ucp_idx[ucp] == -1) {
518                ucp_idx[ucp] = num_system_values;
519                num_system_values += 4;
520             }
521 
522             for (int i = 0; i < 4; i++) {
523                system_values[ucp_idx[ucp] + i] =
524                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
525             }
526 
527             b.cursor = nir_before_instr(instr);
528             offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
529             break;
530          }
531          case nir_intrinsic_load_patch_vertices_in:
532             if (patch_vert_idx == -1)
533                patch_vert_idx = num_system_values++;
534 
535             system_values[patch_vert_idx] =
536                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
537 
538             b.cursor = nir_before_instr(instr);
539             offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
540             break;
541          case nir_intrinsic_image_deref_load_param_intel: {
542             assert(devinfo->ver < 9);
543             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
544             nir_variable *var = nir_deref_instr_get_variable(deref);
545 
546             if (img_idx[var->data.binding] == -1) {
547                /* GL only allows arrays of arrays of images. */
548                assert(glsl_type_is_image(glsl_without_array(var->type)));
549                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
550 
551                for (int i = 0; i < num_images; i++) {
552                   const unsigned img = var->data.binding + i;
553 
554                   img_idx[img] = num_system_values;
555                   num_system_values += BRW_IMAGE_PARAM_SIZE;
556 
557                   uint32_t *img_sv = &system_values[img_idx[img]];
558 
559                   setup_vec4_image_sysval(
560                      img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
561                      offsetof(struct brw_image_param, offset), 2);
562                   setup_vec4_image_sysval(
563                      img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
564                      offsetof(struct brw_image_param, size), 3);
565                   setup_vec4_image_sysval(
566                      img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
567                      offsetof(struct brw_image_param, stride), 4);
568                   setup_vec4_image_sysval(
569                      img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
570                      offsetof(struct brw_image_param, tiling), 3);
571                   setup_vec4_image_sysval(
572                      img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
573                      offsetof(struct brw_image_param, swizzling), 2);
574                }
575             }
576 
577             b.cursor = nir_before_instr(instr);
578             offset = nir_iadd(&b,
579                               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
580                               nir_imm_int(&b, img_idx[var->data.binding] * 4 +
581                                           nir_intrinsic_base(intrin) * 16));
582             break;
583          }
584          case nir_intrinsic_load_workgroup_size: {
585             assert(nir->info.workgroup_size_variable);
586             if (variable_group_size_idx == -1) {
587                variable_group_size_idx = num_system_values;
588                num_system_values += 3;
589                for (int i = 0; i < 3; i++) {
590                   system_values[variable_group_size_idx + i] =
591                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
592                }
593             }
594 
595             b.cursor = nir_before_instr(instr);
596             offset = nir_imm_int(&b,
597                                  variable_group_size_idx * sizeof(uint32_t));
598             break;
599          }
600          default:
601             continue;
602          }
603 
604          unsigned comps = nir_intrinsic_dest_components(intrin);
605 
606          nir_intrinsic_instr *load =
607             nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
608          load->num_components = comps;
609          load->src[0] = nir_src_for_ssa(temp_ubo_name);
610          load->src[1] = nir_src_for_ssa(offset);
611          nir_intrinsic_set_align(load, 4, 0);
612          nir_intrinsic_set_range_base(load, 0);
613          nir_intrinsic_set_range(load, ~0);
614          nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
615          nir_builder_instr_insert(&b, &load->instr);
616          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
617                                   &load->dest.ssa);
618          nir_instr_remove(instr);
619       }
620    }
621 
622    nir_validate_shader(nir, "before remapping");
623 
624    /* Uniforms are stored in constant buffer 0, the
625     * user-facing UBOs are indexed by one.  So if any constant buffer is
626     * needed, the constant buffer 0 will be needed, so account for it.
627     */
628    unsigned num_cbufs = nir->info.num_ubos;
629    if (num_cbufs || nir->num_uniforms)
630       num_cbufs++;
631 
632    /* Place the new params in a new cbuf. */
633    if (num_system_values > 0) {
634       unsigned sysval_cbuf_index = num_cbufs;
635       num_cbufs++;
636 
637       system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
638                                num_system_values);
639 
640       nir_foreach_block(block, impl) {
641          nir_foreach_instr_safe(instr, block) {
642             if (instr->type != nir_instr_type_intrinsic)
643                continue;
644 
645             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
646 
647             if (load->intrinsic != nir_intrinsic_load_ubo)
648                continue;
649 
650             b.cursor = nir_before_instr(instr);
651 
652             assert(load->src[0].is_ssa);
653 
654             if (load->src[0].ssa == temp_ubo_name) {
655                nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
656                nir_instr_rewrite_src(instr, &load->src[0],
657                                      nir_src_for_ssa(imm));
658             }
659          }
660       }
661 
662       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
663       nir_opt_constant_folding(nir);
664    } else {
665       ralloc_free(system_values);
666       system_values = NULL;
667    }
668 
669    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
670    nir_validate_shader(nir, "after remap");
671 
672    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
673     * to detect when cbuf0 exists but we don't need it anymore when we get
674     * here.  Instead, zero it out so that the back-end doesn't get confused
675     * when nr_params * 4 != num_uniforms != nr_params * 4.
676     */
677    nir->num_uniforms = 0;
678 
679    /* Constant loads (if any) need to go at the end of the constant buffers so
680     * we need to know num_cbufs before we can lower to them.
681     */
682    if (temp_const_ubo_name != NULL) {
683       nir_load_const_instr *const_ubo_index =
684          nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
685       assert(const_ubo_index->def.bit_size == 32);
686       const_ubo_index->value[0].u32 = num_cbufs;
687    }
688 
689    *out_system_values = system_values;
690    *out_num_system_values = num_system_values;
691    *out_num_cbufs = num_cbufs;
692 }
693 
694 static const char *surface_group_names[] = {
695    [CROCUS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
696    [CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
697    [CROCUS_SURFACE_GROUP_SOL]                = "streamout",
698    [CROCUS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
699    [CROCUS_SURFACE_GROUP_TEXTURE]            = "texture",
700    [CROCUS_SURFACE_GROUP_TEXTURE_GATHER]     = "texture gather",
701    [CROCUS_SURFACE_GROUP_UBO]                = "ubo",
702    [CROCUS_SURFACE_GROUP_SSBO]               = "ssbo",
703    [CROCUS_SURFACE_GROUP_IMAGE]              = "image",
704 };
705 
706 static void
crocus_print_binding_table(FILE * fp,const char * name,const struct crocus_binding_table * bt)707 crocus_print_binding_table(FILE *fp, const char *name,
708                            const struct crocus_binding_table *bt)
709 {
710    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == CROCUS_SURFACE_GROUP_COUNT);
711 
712    uint32_t total = 0;
713    uint32_t compacted = 0;
714 
715    for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {
716       uint32_t size = bt->sizes[i];
717       total += size;
718       if (size)
719          compacted += util_bitcount64(bt->used_mask[i]);
720    }
721 
722    if (total == 0) {
723       fprintf(fp, "Binding table for %s is empty\n\n", name);
724       return;
725    }
726 
727    if (total != compacted) {
728       fprintf(fp, "Binding table for %s "
729               "(compacted to %u entries from %u entries)\n",
730               name, compacted, total);
731    } else {
732       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
733    }
734 
735    uint32_t entry = 0;
736    for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {
737       uint64_t mask = bt->used_mask[i];
738       while (mask) {
739          int index = u_bit_scan64(&mask);
740          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
741       }
742    }
743    fprintf(fp, "\n");
744 }
745 
746 enum {
747    /* Max elements in a surface group. */
748    SURFACE_GROUP_MAX_ELEMENTS = 64,
749 };
750 
751 static void
rewrite_src_with_bti(nir_builder * b,struct crocus_binding_table * bt,nir_instr * instr,nir_src * src,enum crocus_surface_group group)752 rewrite_src_with_bti(nir_builder *b, struct crocus_binding_table *bt,
753                      nir_instr *instr, nir_src *src,
754                      enum crocus_surface_group group)
755 {
756    assert(bt->sizes[group] > 0);
757 
758    b->cursor = nir_before_instr(instr);
759    nir_ssa_def *bti;
760    if (nir_src_is_const(*src)) {
761       uint32_t index = nir_src_as_uint(*src);
762       bti = nir_imm_intN_t(b, crocus_group_index_to_bti(bt, group, index),
763                            src->ssa->bit_size);
764    } else {
765       /* Indirect usage makes all the surfaces of the group to be available,
766        * so we can just add the base.
767        */
768       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
769       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
770    }
771    nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
772 }
773 
774 static void
mark_used_with_src(struct crocus_binding_table * bt,nir_src * src,enum crocus_surface_group group)775 mark_used_with_src(struct crocus_binding_table *bt, nir_src *src,
776                    enum crocus_surface_group group)
777 {
778    assert(bt->sizes[group] > 0);
779 
780    if (nir_src_is_const(*src)) {
781       uint64_t index = nir_src_as_uint(*src);
782       assert(index < bt->sizes[group]);
783       bt->used_mask[group] |= 1ull << index;
784    } else {
785       /* There's an indirect usage, we need all the surfaces. */
786       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
787    }
788 }
789 
790 static bool
skip_compacting_binding_tables(void)791 skip_compacting_binding_tables(void)
792 {
793    static int skip = -1;
794    if (skip < 0)
795       skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
796    return skip;
797 }
798 
799 /**
800  * Set up the binding table indices and apply to the shader.
801  */
802 static void
crocus_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct crocus_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs,const struct brw_sampler_prog_key_data * key)803 crocus_setup_binding_table(const struct intel_device_info *devinfo,
804                            struct nir_shader *nir,
805                            struct crocus_binding_table *bt,
806                            unsigned num_render_targets,
807                            unsigned num_system_values,
808                            unsigned num_cbufs,
809                            const struct brw_sampler_prog_key_data *key)
810 {
811    const struct shader_info *info = &nir->info;
812 
813    memset(bt, 0, sizeof(*bt));
814 
815    /* Set the sizes for each surface group.  For some groups, we already know
816     * upfront how many will be used, so mark them.
817     */
818    if (info->stage == MESA_SHADER_FRAGMENT) {
819       bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
820       /* All render targets used. */
821       bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET] =
822          BITFIELD64_MASK(num_render_targets);
823 
824       /* Setup render target read surface group in order to support non-coherent
825        * framebuffer fetch on Gfx7
826        */
827       if (devinfo->ver >= 6 && info->outputs_read) {
828          bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
829          bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] =
830             BITFIELD64_MASK(num_render_targets);
831       }
832    } else if (info->stage == MESA_SHADER_COMPUTE) {
833       bt->sizes[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
834    } else if (info->stage == MESA_SHADER_GEOMETRY) {
835       /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
836        * feedback surfaces.
837        */
838       if (devinfo->ver == 6) {
839          bt->sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS;
840          bt->used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1;
841       }
842    }
843 
844    bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
845    bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
846 
847    if (info->uses_texture_gather && devinfo->ver < 8) {
848       bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = BITSET_LAST_BIT(info->textures_used);
849       bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = info->textures_used[0];
850    }
851 
852    bt->sizes[CROCUS_SURFACE_GROUP_IMAGE] = info->num_images;
853 
854    /* Allocate an extra slot in the UBO section for NIR constants.
855     * Binding table compaction will remove it if unnecessary.
856     *
857     * We don't include them in crocus_compiled_shader::num_cbufs because
858     * they are uploaded separately from shs->constbufs[], but from a shader
859     * point of view, they're another UBO (at the end of the section).
860     */
861    bt->sizes[CROCUS_SURFACE_GROUP_UBO] = num_cbufs + 1;
862 
863    bt->sizes[CROCUS_SURFACE_GROUP_SSBO] = info->num_ssbos;
864 
865    for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++)
866       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
867 
868    /* Mark surfaces used for the cases we don't have the information available
869     * upfront.
870     */
871    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
872    nir_foreach_block (block, impl) {
873       nir_foreach_instr (instr, block) {
874          if (instr->type != nir_instr_type_intrinsic)
875             continue;
876 
877          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
878          switch (intrin->intrinsic) {
879          case nir_intrinsic_load_num_workgroups:
880             bt->used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
881             break;
882 
883          case nir_intrinsic_load_output:
884             if (devinfo->ver >= 6) {
885                mark_used_with_src(bt, &intrin->src[0],
886                                   CROCUS_SURFACE_GROUP_RENDER_TARGET_READ);
887             }
888             break;
889 
890          case nir_intrinsic_image_size:
891          case nir_intrinsic_image_load:
892          case nir_intrinsic_image_store:
893          case nir_intrinsic_image_atomic_add:
894          case nir_intrinsic_image_atomic_imin:
895          case nir_intrinsic_image_atomic_umin:
896          case nir_intrinsic_image_atomic_imax:
897          case nir_intrinsic_image_atomic_umax:
898          case nir_intrinsic_image_atomic_and:
899          case nir_intrinsic_image_atomic_or:
900          case nir_intrinsic_image_atomic_xor:
901          case nir_intrinsic_image_atomic_exchange:
902          case nir_intrinsic_image_atomic_comp_swap:
903          case nir_intrinsic_image_load_raw_intel:
904          case nir_intrinsic_image_store_raw_intel:
905             mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_IMAGE);
906             break;
907 
908          case nir_intrinsic_load_ubo:
909             mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_UBO);
910             break;
911 
912          case nir_intrinsic_store_ssbo:
913             mark_used_with_src(bt, &intrin->src[1], CROCUS_SURFACE_GROUP_SSBO);
914             break;
915 
916          case nir_intrinsic_get_ssbo_size:
917          case nir_intrinsic_ssbo_atomic_add:
918          case nir_intrinsic_ssbo_atomic_imin:
919          case nir_intrinsic_ssbo_atomic_umin:
920          case nir_intrinsic_ssbo_atomic_imax:
921          case nir_intrinsic_ssbo_atomic_umax:
922          case nir_intrinsic_ssbo_atomic_and:
923          case nir_intrinsic_ssbo_atomic_or:
924          case nir_intrinsic_ssbo_atomic_xor:
925          case nir_intrinsic_ssbo_atomic_exchange:
926          case nir_intrinsic_ssbo_atomic_comp_swap:
927          case nir_intrinsic_ssbo_atomic_fmin:
928          case nir_intrinsic_ssbo_atomic_fmax:
929          case nir_intrinsic_ssbo_atomic_fcomp_swap:
930          case nir_intrinsic_load_ssbo:
931             mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_SSBO);
932             break;
933 
934          default:
935             break;
936          }
937       }
938    }
939 
940    /* When disable we just mark everything as used. */
941    if (unlikely(skip_compacting_binding_tables())) {
942       for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++)
943          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
944    }
945 
946    /* Calculate the offsets and the binding table size based on the used
947     * surfaces.  After this point, the functions to go between "group indices"
948     * and binding table indices can be used.
949     */
950    uint32_t next = 0;
951    for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {
952       if (bt->used_mask[i] != 0) {
953          bt->offsets[i] = next;
954          next += util_bitcount64(bt->used_mask[i]);
955       }
956    }
957    bt->size_bytes = next * 4;
958 
959    if (INTEL_DEBUG(DEBUG_BT)) {
960       crocus_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
961    }
962 
963    /* Apply the binding table indices.  The backend compiler is not expected
964     * to change those, as we haven't set any of the *_start entries in brw
965     * binding_table.
966     */
967    nir_builder b;
968    nir_builder_init(&b, impl);
969 
970    nir_foreach_block (block, impl) {
971       nir_foreach_instr (instr, block) {
972          if (instr->type == nir_instr_type_tex) {
973             nir_tex_instr *tex = nir_instr_as_tex(instr);
974             bool is_gather = devinfo->ver < 8 && tex->op == nir_texop_tg4;
975 
976             /* rewrite the tg4 component from green to blue before replacing the
977                texture index */
978             if (devinfo->verx10 == 70) {
979                if (tex->component == 1)
980                   if (key->gather_channel_quirk_mask & (1 << tex->texture_index))
981                      tex->component = 2;
982             }
983 
984             if (is_gather && devinfo->ver == 6 && key->gfx6_gather_wa[tex->texture_index]) {
985                b.cursor = nir_after_instr(instr);
986                enum gfx6_gather_sampler_wa wa = key->gfx6_gather_wa[tex->texture_index];
987                int width = (wa & WA_8BIT) ? 8 : 16;
988 
989                nir_ssa_def *val = nir_fmul_imm(&b, &tex->dest.ssa, (1 << width) - 1);
990                val = nir_f2u32(&b, val);
991                if (wa & WA_SIGN) {
992                   val = nir_ishl(&b, val, nir_imm_int(&b, 32 - width));
993                   val = nir_ishr(&b, val, nir_imm_int(&b, 32 - width));
994                }
995                nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, val, val->parent_instr);
996             }
997 
998             tex->texture_index =
999                crocus_group_index_to_bti(bt, is_gather ? CROCUS_SURFACE_GROUP_TEXTURE_GATHER : CROCUS_SURFACE_GROUP_TEXTURE,
1000                                          tex->texture_index);
1001             continue;
1002          }
1003 
1004          if (instr->type != nir_instr_type_intrinsic)
1005             continue;
1006 
1007          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1008          switch (intrin->intrinsic) {
1009          case nir_intrinsic_image_size:
1010          case nir_intrinsic_image_load:
1011          case nir_intrinsic_image_store:
1012          case nir_intrinsic_image_atomic_add:
1013          case nir_intrinsic_image_atomic_imin:
1014          case nir_intrinsic_image_atomic_umin:
1015          case nir_intrinsic_image_atomic_imax:
1016          case nir_intrinsic_image_atomic_umax:
1017          case nir_intrinsic_image_atomic_and:
1018          case nir_intrinsic_image_atomic_or:
1019          case nir_intrinsic_image_atomic_xor:
1020          case nir_intrinsic_image_atomic_exchange:
1021          case nir_intrinsic_image_atomic_comp_swap:
1022          case nir_intrinsic_image_load_raw_intel:
1023          case nir_intrinsic_image_store_raw_intel:
1024             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1025                                  CROCUS_SURFACE_GROUP_IMAGE);
1026             break;
1027 
1028          case nir_intrinsic_load_ubo:
1029             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1030                                  CROCUS_SURFACE_GROUP_UBO);
1031             break;
1032 
1033          case nir_intrinsic_store_ssbo:
1034             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1035                                  CROCUS_SURFACE_GROUP_SSBO);
1036             break;
1037 
1038          case nir_intrinsic_load_output:
1039             if (devinfo->ver >= 6) {
1040                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1041                                     CROCUS_SURFACE_GROUP_RENDER_TARGET_READ);
1042             }
1043             break;
1044 
1045          case nir_intrinsic_get_ssbo_size:
1046          case nir_intrinsic_ssbo_atomic_add:
1047          case nir_intrinsic_ssbo_atomic_imin:
1048          case nir_intrinsic_ssbo_atomic_umin:
1049          case nir_intrinsic_ssbo_atomic_imax:
1050          case nir_intrinsic_ssbo_atomic_umax:
1051          case nir_intrinsic_ssbo_atomic_and:
1052          case nir_intrinsic_ssbo_atomic_or:
1053          case nir_intrinsic_ssbo_atomic_xor:
1054          case nir_intrinsic_ssbo_atomic_exchange:
1055          case nir_intrinsic_ssbo_atomic_comp_swap:
1056          case nir_intrinsic_ssbo_atomic_fmin:
1057          case nir_intrinsic_ssbo_atomic_fmax:
1058          case nir_intrinsic_ssbo_atomic_fcomp_swap:
1059          case nir_intrinsic_load_ssbo:
1060             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1061                                  CROCUS_SURFACE_GROUP_SSBO);
1062             break;
1063 
1064          default:
1065             break;
1066          }
1067       }
1068    }
1069 }
1070 
1071 static void
crocus_debug_recompile(struct crocus_context * ice,struct shader_info * info,const struct brw_base_prog_key * key)1072 crocus_debug_recompile(struct crocus_context *ice,
1073                        struct shader_info *info,
1074                        const struct brw_base_prog_key *key)
1075 {
1076    struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
1077    const struct brw_compiler *c = screen->compiler;
1078 
1079    if (!info)
1080       return;
1081 
1082    brw_shader_perf_log(c, &ice->dbg, "Recompiling %s shader for program %s: %s\n",
1083                        _mesa_shader_stage_to_string(info->stage),
1084                        info->name ? info->name : "(no identifier)",
1085                        info->label ? info->label : "");
1086 
1087    const void *old_key =
1088       crocus_find_previous_compile(ice, info->stage, key->program_string_id);
1089 
1090    brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);
1091 }
1092 
1093 /**
1094  * Get the shader for the last enabled geometry stage.
1095  *
1096  * This stage is the one which will feed stream output and the rasterizer.
1097  */
1098 static gl_shader_stage
last_vue_stage(struct crocus_context * ice)1099 last_vue_stage(struct crocus_context *ice)
1100 {
1101    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1102       return MESA_SHADER_GEOMETRY;
1103 
1104    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1105       return MESA_SHADER_TESS_EVAL;
1106 
1107    return MESA_SHADER_VERTEX;
1108 }
1109 
1110 static GLbitfield64
crocus_vs_outputs_written(struct crocus_context * ice,const struct brw_vs_prog_key * key,GLbitfield64 user_varyings)1111 crocus_vs_outputs_written(struct crocus_context *ice,
1112                           const struct brw_vs_prog_key *key,
1113                           GLbitfield64 user_varyings)
1114 {
1115    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1116    const struct intel_device_info *devinfo = &screen->devinfo;
1117    GLbitfield64 outputs_written = user_varyings;
1118 
1119    if (devinfo->ver < 6) {
1120 
1121       if (key->copy_edgeflag)
1122          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
1123 
1124       /* Put dummy slots into the VUE for the SF to put the replaced
1125        * point sprite coords in.  We shouldn't need these dummy slots,
1126        * which take up precious URB space, but it would mean that the SF
1127        * doesn't get nice aligned pairs of input coords into output
1128        * coords, which would be a pain to handle.
1129        */
1130       for (unsigned i = 0; i < 8; i++) {
1131          if (key->point_coord_replace & (1 << i))
1132             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
1133       }
1134 
1135       /* if back colors are written, allocate slots for front colors too */
1136       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
1137          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
1138       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
1139          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
1140    }
1141 
1142    /* In order for legacy clipping to work, we need to populate the clip
1143     * distance varying slots whenever clipping is enabled, even if the vertex
1144     * shader doesn't write to gl_ClipDistance.
1145     */
1146    if (key->nr_userclip_plane_consts > 0) {
1147       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
1148       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
1149    }
1150 
1151    return outputs_written;
1152 }
1153 
1154 /*
1155  * If no edgeflags come from the user, gen4/5
1156  * require giving the clip shader a default edgeflag.
1157  *
1158  * This will always be 1.0.
1159  */
1160 static void
crocus_lower_default_edgeflags(struct nir_shader * nir)1161 crocus_lower_default_edgeflags(struct nir_shader *nir)
1162 {
1163    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1164 
1165    nir_builder b;
1166    nir_builder_init(&b, impl);
1167 
1168    b.cursor = nir_after_cf_list(&b.impl->body);
1169    nir_variable *var = nir_variable_create(nir, nir_var_shader_out,
1170                                            glsl_float_type(),
1171                                            "edgeflag");
1172    var->data.location = VARYING_SLOT_EDGE;
1173    nir_store_var(&b, var, nir_imm_float(&b, 1.0), 0x1);
1174 }
1175 
1176 /**
1177  * Compile a vertex shader, and upload the assembly.
1178  */
1179 static struct crocus_compiled_shader *
crocus_compile_vs(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_vs_prog_key * key)1180 crocus_compile_vs(struct crocus_context *ice,
1181                   struct crocus_uncompiled_shader *ish,
1182                   const struct brw_vs_prog_key *key)
1183 {
1184    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1185    const struct brw_compiler *compiler = screen->compiler;
1186    const struct intel_device_info *devinfo = &screen->devinfo;
1187    void *mem_ctx = ralloc_context(NULL);
1188    struct brw_vs_prog_data *vs_prog_data =
1189       rzalloc(mem_ctx, struct brw_vs_prog_data);
1190    struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1191    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1192    enum brw_param_builtin *system_values;
1193    unsigned num_system_values;
1194    unsigned num_cbufs;
1195 
1196    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1197 
1198    if (key->nr_userclip_plane_consts) {
1199       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1200       nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true,
1201                         false, NULL);
1202       nir_lower_io_to_temporaries(nir, impl, true, false);
1203       nir_lower_global_vars_to_local(nir);
1204       nir_lower_vars_to_ssa(nir);
1205       nir_shader_gather_info(nir, impl);
1206    }
1207 
1208    if (key->clamp_pointsize)
1209       nir_lower_point_size(nir, 1.0, 255.0);
1210 
1211    prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1212 
1213    crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1214                          &num_system_values, &num_cbufs);
1215 
1216    crocus_lower_swizzles(nir, &key->base.tex);
1217 
1218    if (devinfo->ver <= 5 &&
1219        !(nir->info.inputs_read & BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)))
1220       crocus_lower_default_edgeflags(nir);
1221 
1222    struct crocus_binding_table bt;
1223    crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1224                               num_system_values, num_cbufs, &key->base.tex);
1225 
1226    if (can_push_ubo(devinfo))
1227       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1228 
1229    uint64_t outputs_written =
1230       crocus_vs_outputs_written(ice, key, nir->info.outputs_written);
1231    brw_compute_vue_map(devinfo,
1232                        &vue_prog_data->vue_map, outputs_written,
1233                        nir->info.separate_shader, /* pos slots */ 1);
1234 
1235    /* Don't tell the backend about our clip plane constants, we've already
1236     * lowered them in NIR and we don't want it doing it again.
1237     */
1238    struct brw_vs_prog_key key_no_ucp = *key;
1239    key_no_ucp.nr_userclip_plane_consts = 0;
1240    key_no_ucp.copy_edgeflag = false;
1241    crocus_sanitize_tex_key(&key_no_ucp.base.tex);
1242 
1243    struct brw_compile_vs_params params = {
1244       .nir = nir,
1245       .key = &key_no_ucp,
1246       .prog_data = vs_prog_data,
1247       .edgeflag_is_last = devinfo->ver < 6,
1248       .log_data = &ice->dbg,
1249    };
1250    const unsigned *program =
1251       brw_compile_vs(compiler, mem_ctx, &params);
1252    if (program == NULL) {
1253       dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1254       ralloc_free(mem_ctx);
1255       return false;
1256    }
1257 
1258    if (ish->compiled_once) {
1259       crocus_debug_recompile(ice, &nir->info, &key->base);
1260    } else {
1261       ish->compiled_once = true;
1262    }
1263 
1264    uint32_t *so_decls = NULL;
1265    if (devinfo->ver > 6)
1266       so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
1267                                                   &vue_prog_data->vue_map);
1268 
1269    struct crocus_compiled_shader *shader =
1270       crocus_upload_shader(ice, CROCUS_CACHE_VS, sizeof(*key), key, program,
1271                            prog_data->program_size,
1272                            prog_data, sizeof(*vs_prog_data), so_decls,
1273                            system_values, num_system_values,
1274                            num_cbufs, &bt);
1275 
1276    crocus_disk_cache_store(screen->disk_cache, ish, shader,
1277                            ice->shaders.cache_bo_map,
1278                            key, sizeof(*key));
1279 
1280    ralloc_free(mem_ctx);
1281    return shader;
1282 }
1283 
1284 /**
1285  * Update the current vertex shader variant.
1286  *
1287  * Fill out the key, look in the cache, compile and bind if needed.
1288  */
1289 static void
crocus_update_compiled_vs(struct crocus_context * ice)1290 crocus_update_compiled_vs(struct crocus_context *ice)
1291 {
1292    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1293    struct crocus_uncompiled_shader *ish =
1294       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1295    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1296    const struct intel_device_info *devinfo = &screen->devinfo;
1297    struct brw_vs_prog_key key = { KEY_INIT() };
1298 
1299    if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
1300       crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_VERTEX, ish,
1301                                             ish->nir->info.uses_texture_gather, &key.base.tex);
1302    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1303 
1304    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_VS];
1305    struct crocus_compiled_shader *shader =
1306       crocus_find_cached_shader(ice, CROCUS_CACHE_VS, sizeof(key), &key);
1307 
1308    if (!shader)
1309       shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1310 
1311    if (!shader)
1312       shader = crocus_compile_vs(ice, ish, &key);
1313 
1314    if (old != shader) {
1315       ice->shaders.prog[CROCUS_CACHE_VS] = shader;
1316       if (devinfo->ver == 8)
1317          ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;
1318       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS |
1319                                 CROCUS_STAGE_DIRTY_BINDINGS_VS |
1320                                 CROCUS_STAGE_DIRTY_CONSTANTS_VS;
1321       shs->sysvals_need_upload = true;
1322 
1323       const struct brw_vs_prog_data *vs_prog_data =
1324          (void *) shader->prog_data;
1325       const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
1326                                     vs_prog_data->uses_baseinstance;
1327       const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
1328                                             vs_prog_data->uses_is_indexed_draw;
1329       const bool needs_sgvs_element = uses_draw_params ||
1330                                       vs_prog_data->uses_instanceid ||
1331                                       vs_prog_data->uses_vertexid;
1332 
1333       if (ice->state.vs_uses_draw_params != uses_draw_params ||
1334           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1335           ice->state.vs_needs_edge_flag != ish->needs_edge_flag ||
1336           ice->state.vs_uses_vertexid != vs_prog_data->uses_vertexid ||
1337           ice->state.vs_uses_instanceid != vs_prog_data->uses_instanceid) {
1338          ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |
1339                              CROCUS_DIRTY_VERTEX_ELEMENTS;
1340       }
1341       ice->state.vs_uses_draw_params = uses_draw_params;
1342       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
1343       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1344       ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1345       ice->state.vs_uses_vertexid = vs_prog_data->uses_vertexid;
1346       ice->state.vs_uses_instanceid = vs_prog_data->uses_instanceid;
1347    }
1348 }
1349 
1350 /**
1351  * Get the shader_info for a given stage, or NULL if the stage is disabled.
1352  */
1353 const struct shader_info *
crocus_get_shader_info(const struct crocus_context * ice,gl_shader_stage stage)1354 crocus_get_shader_info(const struct crocus_context *ice, gl_shader_stage stage)
1355 {
1356    const struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1357 
1358    if (!ish)
1359       return NULL;
1360 
1361    const nir_shader *nir = ish->nir;
1362    return &nir->info;
1363 }
1364 
1365 /**
1366  * Get the union of TCS output and TES input slots.
1367  *
1368  * TCS and TES need to agree on a common URB entry layout.  In particular,
1369  * the data for all patch vertices is stored in a single URB entry (unlike
1370  * GS which has one entry per input vertex).  This means that per-vertex
1371  * array indexing needs a stride.
1372  *
1373  * SSO requires locations to match, but doesn't require the number of
1374  * outputs/inputs to match (in fact, the TCS often has extra outputs).
1375  * So, we need to take the extra step of unifying these on the fly.
1376  */
1377 static void
get_unified_tess_slots(const struct crocus_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)1378 get_unified_tess_slots(const struct crocus_context *ice,
1379                        uint64_t *per_vertex_slots,
1380                        uint32_t *per_patch_slots)
1381 {
1382    const struct shader_info *tcs =
1383       crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1384    const struct shader_info *tes =
1385       crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1386 
1387    *per_vertex_slots = tes->inputs_read;
1388    *per_patch_slots = tes->patch_inputs_read;
1389 
1390    if (tcs) {
1391       *per_vertex_slots |= tcs->outputs_written;
1392       *per_patch_slots |= tcs->patch_outputs_written;
1393    }
1394 }
1395 
1396 /**
1397  * Compile a tessellation control shader, and upload the assembly.
1398  */
1399 static struct crocus_compiled_shader *
crocus_compile_tcs(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_tcs_prog_key * key)1400 crocus_compile_tcs(struct crocus_context *ice,
1401                    struct crocus_uncompiled_shader *ish,
1402                    const struct brw_tcs_prog_key *key)
1403 {
1404    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1405    const struct brw_compiler *compiler = screen->compiler;
1406    const struct nir_shader_compiler_options *options =
1407       compiler->nir_options[MESA_SHADER_TESS_CTRL];
1408    void *mem_ctx = ralloc_context(NULL);
1409    struct brw_tcs_prog_data *tcs_prog_data =
1410       rzalloc(mem_ctx, struct brw_tcs_prog_data);
1411    struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1412    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1413    const struct intel_device_info *devinfo = &screen->devinfo;
1414    enum brw_param_builtin *system_values = NULL;
1415    unsigned num_system_values = 0;
1416    unsigned num_cbufs = 0;
1417 
1418    nir_shader *nir;
1419 
1420    struct crocus_binding_table bt;
1421 
1422    if (ish) {
1423       nir = nir_shader_clone(mem_ctx, ish->nir);
1424 
1425       crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1426                             &num_system_values, &num_cbufs);
1427 
1428       crocus_lower_swizzles(nir, &key->base.tex);
1429       crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1430                                  num_system_values, num_cbufs, &key->base.tex);
1431       if (can_push_ubo(devinfo))
1432          brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1433    } else {
1434       nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
1435 
1436       /* Reserve space for passing the default tess levels as constants. */
1437       num_cbufs = 1;
1438       num_system_values = 8;
1439       system_values =
1440          rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1441       prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1442       prog_data->nr_params = num_system_values;
1443 
1444       if (key->_tes_primitive_mode == TESS_PRIMITIVE_QUADS) {
1445          for (int i = 0; i < 4; i++)
1446             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1447 
1448          system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1449          system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1450       } else if (key->_tes_primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
1451          for (int i = 0; i < 3; i++)
1452             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1453 
1454          system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1455       } else {
1456          assert(key->_tes_primitive_mode == TESS_PRIMITIVE_ISOLINES);
1457          system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1458          system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1459       }
1460 
1461       /* Manually setup the TCS binding table. */
1462       memset(&bt, 0, sizeof(bt));
1463       bt.sizes[CROCUS_SURFACE_GROUP_UBO] = 1;
1464       bt.used_mask[CROCUS_SURFACE_GROUP_UBO] = 1;
1465       bt.size_bytes = 4;
1466 
1467       prog_data->ubo_ranges[0].length = 1;
1468    }
1469 
1470    struct brw_tcs_prog_key key_clean = *key;
1471    crocus_sanitize_tex_key(&key_clean.base.tex);
1472 
1473    struct brw_compile_tcs_params params = {
1474       .nir = nir,
1475       .key = &key_clean,
1476       .prog_data = tcs_prog_data,
1477       .log_data = &ice->dbg,
1478    };
1479 
1480    const unsigned *program = brw_compile_tcs(compiler, mem_ctx, &params);
1481    if (program == NULL) {
1482       dbg_printf("Failed to compile control shader: %s\n", params.error_str);
1483       ralloc_free(mem_ctx);
1484       return false;
1485    }
1486 
1487    if (ish) {
1488       if (ish->compiled_once) {
1489          crocus_debug_recompile(ice, &nir->info, &key->base);
1490       } else {
1491          ish->compiled_once = true;
1492       }
1493    }
1494 
1495    struct crocus_compiled_shader *shader =
1496       crocus_upload_shader(ice, CROCUS_CACHE_TCS, sizeof(*key), key, program,
1497                            prog_data->program_size,
1498                            prog_data, sizeof(*tcs_prog_data), NULL,
1499                            system_values, num_system_values,
1500                            num_cbufs, &bt);
1501 
1502    if (ish)
1503       crocus_disk_cache_store(screen->disk_cache, ish, shader,
1504                               ice->shaders.cache_bo_map,
1505                               key, sizeof(*key));
1506 
1507    ralloc_free(mem_ctx);
1508    return shader;
1509 }
1510 
1511 /**
1512  * Update the current tessellation control shader variant.
1513  *
1514  * Fill out the key, look in the cache, compile and bind if needed.
1515  */
1516 static void
crocus_update_compiled_tcs(struct crocus_context * ice)1517 crocus_update_compiled_tcs(struct crocus_context *ice)
1518 {
1519    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1520    struct crocus_uncompiled_shader *tcs =
1521       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1522    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1523    const struct intel_device_info *devinfo = &screen->devinfo;
1524 
1525    const struct shader_info *tes_info =
1526       crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1527    struct brw_tcs_prog_key key = {
1528       KEY_INIT_NO_ID(),
1529       .base.program_string_id = tcs ? tcs->program_id : 0,
1530       ._tes_primitive_mode = tes_info->tess._primitive_mode,
1531       .input_vertices = ice->state.vertices_per_patch,
1532       .quads_workaround = tes_info->tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
1533                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
1534    };
1535 
1536    if (tcs && tcs->nos & (1ull << CROCUS_NOS_TEXTURES))
1537       crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_CTRL, tcs,
1538                                             tcs->nir->info.uses_texture_gather, &key.base.tex);
1539    get_unified_tess_slots(ice, &key.outputs_written,
1540                           &key.patch_outputs_written);
1541    screen->vtbl.populate_tcs_key(ice, &key);
1542 
1543    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TCS];
1544    struct crocus_compiled_shader *shader =
1545       crocus_find_cached_shader(ice, CROCUS_CACHE_TCS, sizeof(key), &key);
1546 
1547    if (tcs && !shader)
1548       shader = crocus_disk_cache_retrieve(ice, tcs, &key, sizeof(key));
1549 
1550    if (!shader)
1551       shader = crocus_compile_tcs(ice, tcs, &key);
1552 
1553    if (old != shader) {
1554       ice->shaders.prog[CROCUS_CACHE_TCS] = shader;
1555       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TCS |
1556                                 CROCUS_STAGE_DIRTY_BINDINGS_TCS |
1557                                 CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
1558       shs->sysvals_need_upload = true;
1559    }
1560 }
1561 
1562 /**
1563  * Compile a tessellation evaluation shader, and upload the assembly.
1564  */
1565 static struct crocus_compiled_shader *
crocus_compile_tes(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_tes_prog_key * key)1566 crocus_compile_tes(struct crocus_context *ice,
1567                    struct crocus_uncompiled_shader *ish,
1568                    const struct brw_tes_prog_key *key)
1569 {
1570    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1571    const struct brw_compiler *compiler = screen->compiler;
1572    void *mem_ctx = ralloc_context(NULL);
1573    struct brw_tes_prog_data *tes_prog_data =
1574       rzalloc(mem_ctx, struct brw_tes_prog_data);
1575    struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1576    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1577    enum brw_param_builtin *system_values;
1578    const struct intel_device_info *devinfo = &screen->devinfo;
1579    unsigned num_system_values;
1580    unsigned num_cbufs;
1581 
1582    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1583 
1584    if (key->nr_userclip_plane_consts) {
1585       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1586       nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true,
1587                         false, NULL);
1588       nir_lower_io_to_temporaries(nir, impl, true, false);
1589       nir_lower_global_vars_to_local(nir);
1590       nir_lower_vars_to_ssa(nir);
1591       nir_shader_gather_info(nir, impl);
1592    }
1593 
1594    if (key->clamp_pointsize)
1595       nir_lower_point_size(nir, 1.0, 255.0);
1596 
1597    crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1598                          &num_system_values, &num_cbufs);
1599    crocus_lower_swizzles(nir, &key->base.tex);
1600    struct crocus_binding_table bt;
1601    crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1602                               num_system_values, num_cbufs, &key->base.tex);
1603 
1604    if (can_push_ubo(devinfo))
1605       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1606 
1607    struct brw_vue_map input_vue_map;
1608    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1609                             key->patch_inputs_read);
1610 
1611    struct brw_tes_prog_key key_clean = *key;
1612    crocus_sanitize_tex_key(&key_clean.base.tex);
1613 
1614    struct brw_compile_tes_params params = {
1615       .nir = nir,
1616       .key = &key_clean,
1617       .prog_data = tes_prog_data,
1618       .input_vue_map = &input_vue_map,
1619       .log_data = &ice->dbg,
1620    };
1621 
1622    const unsigned *program = brw_compile_tes(compiler, mem_ctx, &params);
1623    if (program == NULL) {
1624       dbg_printf("Failed to compile evaluation shader: %s\n", params.error_str);
1625       ralloc_free(mem_ctx);
1626       return false;
1627    }
1628 
1629    if (ish->compiled_once) {
1630       crocus_debug_recompile(ice, &nir->info, &key->base);
1631    } else {
1632       ish->compiled_once = true;
1633    }
1634 
1635    uint32_t *so_decls = NULL;
1636    if (devinfo->ver > 6)
1637       so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
1638                                                   &vue_prog_data->vue_map);
1639 
1640    struct crocus_compiled_shader *shader =
1641       crocus_upload_shader(ice, CROCUS_CACHE_TES, sizeof(*key), key, program,
1642                            prog_data->program_size,
1643                            prog_data, sizeof(*tes_prog_data), so_decls,
1644                            system_values, num_system_values,
1645                            num_cbufs, &bt);
1646 
1647    crocus_disk_cache_store(screen->disk_cache, ish, shader,
1648                            ice->shaders.cache_bo_map,
1649                            key, sizeof(*key));
1650 
1651    ralloc_free(mem_ctx);
1652    return shader;
1653 }
1654 
1655 /**
1656  * Update the current tessellation evaluation shader variant.
1657  *
1658  * Fill out the key, look in the cache, compile and bind if needed.
1659  */
1660 static void
crocus_update_compiled_tes(struct crocus_context * ice)1661 crocus_update_compiled_tes(struct crocus_context *ice)
1662 {
1663    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1664    struct crocus_uncompiled_shader *ish =
1665       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1666    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1667    struct brw_tes_prog_key key = { KEY_INIT() };
1668    const struct intel_device_info *devinfo = &screen->devinfo;
1669 
1670    if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
1671       crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_EVAL, ish,
1672                                             ish->nir->info.uses_texture_gather, &key.base.tex);
1673    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1674    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1675 
1676    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TES];
1677    struct crocus_compiled_shader *shader =
1678       crocus_find_cached_shader(ice, CROCUS_CACHE_TES, sizeof(key), &key);
1679 
1680    if (!shader)
1681       shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1682 
1683    if (!shader)
1684       shader = crocus_compile_tes(ice, ish, &key);
1685 
1686    if (old != shader) {
1687       ice->shaders.prog[CROCUS_CACHE_TES] = shader;
1688       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TES |
1689                                 CROCUS_STAGE_DIRTY_BINDINGS_TES |
1690                                 CROCUS_STAGE_DIRTY_CONSTANTS_TES;
1691       shs->sysvals_need_upload = true;
1692    }
1693 
1694    /* TODO: Could compare and avoid flagging this. */
1695    const struct shader_info *tes_info = &ish->nir->info;
1696    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1697       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES;
1698       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1699    }
1700 }
1701 
1702 /**
1703  * Compile a geometry shader, and upload the assembly.
1704  */
1705 static struct crocus_compiled_shader *
crocus_compile_gs(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_gs_prog_key * key)1706 crocus_compile_gs(struct crocus_context *ice,
1707                   struct crocus_uncompiled_shader *ish,
1708                   const struct brw_gs_prog_key *key)
1709 {
1710    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1711    const struct brw_compiler *compiler = screen->compiler;
1712    const struct intel_device_info *devinfo = &screen->devinfo;
1713    void *mem_ctx = ralloc_context(NULL);
1714    struct brw_gs_prog_data *gs_prog_data =
1715       rzalloc(mem_ctx, struct brw_gs_prog_data);
1716    struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1717    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1718    enum brw_param_builtin *system_values;
1719    unsigned num_system_values;
1720    unsigned num_cbufs;
1721 
1722    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1723 
1724    if (key->nr_userclip_plane_consts) {
1725       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1726       nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1, false,
1727                         NULL);
1728       nir_lower_io_to_temporaries(nir, impl, true, false);
1729       nir_lower_global_vars_to_local(nir);
1730       nir_lower_vars_to_ssa(nir);
1731       nir_shader_gather_info(nir, impl);
1732    }
1733 
1734    if (key->clamp_pointsize)
1735       nir_lower_point_size(nir, 1.0, 255.0);
1736 
1737    crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1738                          &num_system_values, &num_cbufs);
1739    crocus_lower_swizzles(nir, &key->base.tex);
1740    struct crocus_binding_table bt;
1741    crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1742                               num_system_values, num_cbufs, &key->base.tex);
1743 
1744    if (can_push_ubo(devinfo))
1745       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1746 
1747    brw_compute_vue_map(devinfo,
1748                        &vue_prog_data->vue_map, nir->info.outputs_written,
1749                        nir->info.separate_shader, /* pos slots */ 1);
1750 
1751    if (devinfo->ver == 6)
1752       gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data);
1753    struct brw_gs_prog_key key_clean = *key;
1754    crocus_sanitize_tex_key(&key_clean.base.tex);
1755 
1756    struct brw_compile_gs_params params = {
1757       .nir = nir,
1758       .key = &key_clean,
1759       .prog_data = gs_prog_data,
1760       .log_data = &ice->dbg,
1761    };
1762 
1763    const unsigned *program = brw_compile_gs(compiler, mem_ctx, &params);
1764    if (program == NULL) {
1765       dbg_printf("Failed to compile geometry shader: %s\n", params.error_str);
1766       ralloc_free(mem_ctx);
1767       return false;
1768    }
1769 
1770    if (ish->compiled_once) {
1771       crocus_debug_recompile(ice, &nir->info, &key->base);
1772    } else {
1773       ish->compiled_once = true;
1774    }
1775 
1776    uint32_t *so_decls = NULL;
1777    if (devinfo->ver > 6)
1778       so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
1779                                                   &vue_prog_data->vue_map);
1780 
1781    struct crocus_compiled_shader *shader =
1782       crocus_upload_shader(ice, CROCUS_CACHE_GS, sizeof(*key), key, program,
1783                            prog_data->program_size,
1784                            prog_data, sizeof(*gs_prog_data), so_decls,
1785                            system_values, num_system_values,
1786                            num_cbufs, &bt);
1787 
1788    crocus_disk_cache_store(screen->disk_cache, ish, shader,
1789                            ice->shaders.cache_bo_map,
1790                            key, sizeof(*key));
1791 
1792    ralloc_free(mem_ctx);
1793    return shader;
1794 }
1795 
1796 /**
1797  * Update the current geometry shader variant.
1798  *
1799  * Fill out the key, look in the cache, compile and bind if needed.
1800  */
1801 static void
crocus_update_compiled_gs(struct crocus_context * ice)1802 crocus_update_compiled_gs(struct crocus_context *ice)
1803 {
1804    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1805    struct crocus_uncompiled_shader *ish =
1806       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1807    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_GS];
1808    struct crocus_compiled_shader *shader = NULL;
1809 
1810    if (ish) {
1811       struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1812       const struct intel_device_info *devinfo = &screen->devinfo;
1813       struct brw_gs_prog_key key = { KEY_INIT() };
1814 
1815       if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
1816          crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_GEOMETRY, ish,
1817                                                ish->nir->info.uses_texture_gather, &key.base.tex);
1818       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1819 
1820       shader =
1821          crocus_find_cached_shader(ice, CROCUS_CACHE_GS, sizeof(key), &key);
1822 
1823       if (!shader)
1824          shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1825 
1826       if (!shader)
1827          shader = crocus_compile_gs(ice, ish, &key);
1828    }
1829 
1830    if (old != shader) {
1831       ice->shaders.prog[CROCUS_CACHE_GS] = shader;
1832       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |
1833                                 CROCUS_STAGE_DIRTY_BINDINGS_GS |
1834                                 CROCUS_STAGE_DIRTY_CONSTANTS_GS;
1835       shs->sysvals_need_upload = true;
1836    }
1837 }
1838 
1839 /**
1840  * Compile a fragment (pixel) shader, and upload the assembly.
1841  */
1842 static struct crocus_compiled_shader *
crocus_compile_fs(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_wm_prog_key * key,struct brw_vue_map * vue_map)1843 crocus_compile_fs(struct crocus_context *ice,
1844                   struct crocus_uncompiled_shader *ish,
1845                   const struct brw_wm_prog_key *key,
1846                   struct brw_vue_map *vue_map)
1847 {
1848    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1849    const struct brw_compiler *compiler = screen->compiler;
1850    void *mem_ctx = ralloc_context(NULL);
1851    struct brw_wm_prog_data *fs_prog_data =
1852       rzalloc(mem_ctx, struct brw_wm_prog_data);
1853    struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1854    enum brw_param_builtin *system_values;
1855    const struct intel_device_info *devinfo = &screen->devinfo;
1856    unsigned num_system_values;
1857    unsigned num_cbufs;
1858 
1859    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1860 
1861    prog_data->use_alt_mode = nir->info.use_legacy_math_rules;
1862 
1863    crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1864                          &num_system_values, &num_cbufs);
1865 
1866    /* Lower output variables to load_output intrinsics before setting up
1867     * binding tables, so crocus_setup_binding_table can map any load_output
1868     * intrinsics to CROCUS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for
1869     * non-coherent framebuffer fetches.
1870     */
1871    brw_nir_lower_fs_outputs(nir);
1872 
1873    /* lower swizzles before binding table */
1874    crocus_lower_swizzles(nir, &key->base.tex);
1875    int null_rts = 1;
1876 
1877    struct crocus_binding_table bt;
1878    crocus_setup_binding_table(devinfo, nir, &bt,
1879                               MAX2(key->nr_color_regions, null_rts),
1880                               num_system_values, num_cbufs,
1881                               &key->base.tex);
1882 
1883    if (can_push_ubo(devinfo))
1884       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1885 
1886    struct brw_wm_prog_key key_clean = *key;
1887    crocus_sanitize_tex_key(&key_clean.base.tex);
1888 
1889    struct brw_compile_fs_params params = {
1890       .nir = nir,
1891       .key = &key_clean,
1892       .prog_data = fs_prog_data,
1893 
1894       .allow_spilling = true,
1895       .vue_map = vue_map,
1896 
1897       .log_data = &ice->dbg,
1898    };
1899    const unsigned *program =
1900       brw_compile_fs(compiler, mem_ctx, &params);
1901    if (program == NULL) {
1902       dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
1903       ralloc_free(mem_ctx);
1904       return false;
1905    }
1906 
1907    if (ish->compiled_once) {
1908       crocus_debug_recompile(ice, &nir->info, &key->base);
1909    } else {
1910       ish->compiled_once = true;
1911    }
1912 
1913    struct crocus_compiled_shader *shader =
1914       crocus_upload_shader(ice, CROCUS_CACHE_FS, sizeof(*key), key, program,
1915                            prog_data->program_size,
1916                            prog_data, sizeof(*fs_prog_data), NULL,
1917                            system_values, num_system_values,
1918                            num_cbufs, &bt);
1919 
1920    crocus_disk_cache_store(screen->disk_cache, ish, shader,
1921                            ice->shaders.cache_bo_map,
1922                            key, sizeof(*key));
1923 
1924    ralloc_free(mem_ctx);
1925    return shader;
1926 }
1927 
1928 /**
1929  * Update the current fragment shader variant.
1930  *
1931  * Fill out the key, look in the cache, compile and bind if needed.
1932  */
1933 static void
crocus_update_compiled_fs(struct crocus_context * ice)1934 crocus_update_compiled_fs(struct crocus_context *ice)
1935 {
1936    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1937    const struct intel_device_info *devinfo = &screen->devinfo;
1938    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
1939    struct crocus_uncompiled_shader *ish =
1940       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1941    struct brw_wm_prog_key key = { KEY_INIT() };
1942 
1943    if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
1944       crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_FRAGMENT, ish,
1945                                             ish->nir->info.uses_texture_gather, &key.base.tex);
1946    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
1947 
1948    if (ish->nos & (1ull << CROCUS_NOS_LAST_VUE_MAP))
1949       key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;
1950 
1951    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_FS];
1952    struct crocus_compiled_shader *shader =
1953       crocus_find_cached_shader(ice, CROCUS_CACHE_FS, sizeof(key), &key);
1954 
1955    if (!shader)
1956       shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));
1957 
1958    if (!shader)
1959       shader = crocus_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
1960 
1961    if (old != shader) {
1962       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
1963       // toggles.  might be able to avoid flagging SBE too.
1964       ice->shaders.prog[CROCUS_CACHE_FS] = shader;
1965       ice->state.dirty |= CROCUS_DIRTY_WM;
1966       /* gen4 clip/sf rely on fs prog_data */
1967       if (devinfo->ver < 6)
1968          ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
1969       else
1970          ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_GEN6_BLEND_STATE;
1971       if (devinfo->ver == 6)
1972          ice->state.dirty |= CROCUS_DIRTY_RASTER;
1973       if (devinfo->ver >= 7)
1974          ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
1975       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS |
1976                                 CROCUS_STAGE_DIRTY_BINDINGS_FS |
1977                                 CROCUS_STAGE_DIRTY_CONSTANTS_FS;
1978       shs->sysvals_need_upload = true;
1979    }
1980 }
1981 
1982 /**
1983  * Update the last enabled stage's VUE map.
1984  *
1985  * When the shader feeding the rasterizer's output interface changes, we
1986  * need to re-emit various packets.
1987  */
1988 static void
update_last_vue_map(struct crocus_context * ice,struct brw_stage_prog_data * prog_data)1989 update_last_vue_map(struct crocus_context *ice,
1990                     struct brw_stage_prog_data *prog_data)
1991 {
1992    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1993    const struct intel_device_info *devinfo = &screen->devinfo;
1994    struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1995    struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1996    struct brw_vue_map *old_map = ice->shaders.last_vue_map;
1997    const uint64_t changed_slots =
1998       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
1999 
2000    if (changed_slots & VARYING_BIT_VIEWPORT) {
2001       ice->state.num_viewports =
2002          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? CROCUS_MAX_VIEWPORTS : 1;
2003       ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT |
2004                           CROCUS_DIRTY_CC_VIEWPORT;
2005       if (devinfo->ver < 6)
2006          ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
2007 
2008       if (devinfo->ver <= 6)
2009          ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
2010 
2011       if (devinfo->ver >= 6)
2012          ice->state.dirty |= CROCUS_DIRTY_CLIP |
2013                              CROCUS_DIRTY_GEN6_SCISSOR_RECT;;
2014       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS |
2015          ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP];
2016    }
2017 
2018    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2019       ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
2020       if (devinfo->ver < 6)
2021          ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
2022       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS;
2023    }
2024 
2025    ice->shaders.last_vue_map = &vue_prog_data->vue_map;
2026 }
2027 
2028 static void
crocus_update_pull_constant_descriptors(struct crocus_context * ice,gl_shader_stage stage)2029 crocus_update_pull_constant_descriptors(struct crocus_context *ice,
2030                                         gl_shader_stage stage)
2031 {
2032    struct crocus_compiled_shader *shader = ice->shaders.prog[stage];
2033 
2034    if (!shader || !shader->prog_data->has_ubo_pull)
2035       return;
2036 
2037    struct crocus_shader_state *shs = &ice->state.shaders[stage];
2038    bool any_new_descriptors =
2039       shader->num_system_values > 0 && shs->sysvals_need_upload;
2040 
2041    unsigned bound_cbufs = shs->bound_cbufs;
2042 
2043    while (bound_cbufs) {
2044       const int i = u_bit_scan(&bound_cbufs);
2045       struct pipe_constant_buffer *cbuf = &shs->constbufs[i];
2046       if (cbuf->buffer) {
2047          any_new_descriptors = true;
2048       }
2049    }
2050 
2051    if (any_new_descriptors)
2052       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;
2053 }
2054 
2055 /**
2056  * Get the prog_data for a given stage, or NULL if the stage is disabled.
2057  */
2058 static struct brw_vue_prog_data *
get_vue_prog_data(struct crocus_context * ice,gl_shader_stage stage)2059 get_vue_prog_data(struct crocus_context *ice, gl_shader_stage stage)
2060 {
2061    if (!ice->shaders.prog[stage])
2062       return NULL;
2063 
2064    return (void *) ice->shaders.prog[stage]->prog_data;
2065 }
2066 
2067 static struct crocus_compiled_shader *
crocus_compile_clip(struct crocus_context * ice,struct brw_clip_prog_key * key)2068 crocus_compile_clip(struct crocus_context *ice, struct brw_clip_prog_key *key)
2069 {
2070    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2071    const struct brw_compiler *compiler = screen->compiler;
2072    void *mem_ctx;
2073    unsigned program_size;
2074    mem_ctx = ralloc_context(NULL);
2075 
2076    struct brw_clip_prog_data *clip_prog_data =
2077       rzalloc(mem_ctx, struct brw_clip_prog_data);
2078 
2079    const unsigned *program = brw_compile_clip(compiler, mem_ctx, key, clip_prog_data,
2080                                               ice->shaders.last_vue_map, &program_size);
2081 
2082    if (program == NULL) {
2083       dbg_printf("failed to compile clip shader\n");
2084       ralloc_free(mem_ctx);
2085       return false;
2086    }
2087    struct crocus_binding_table bt;
2088    memset(&bt, 0, sizeof(bt));
2089 
2090    struct crocus_compiled_shader *shader =
2091       crocus_upload_shader(ice, CROCUS_CACHE_CLIP, sizeof(*key), key, program,
2092                            program_size,
2093                            (struct brw_stage_prog_data *)clip_prog_data, sizeof(*clip_prog_data),
2094                            NULL, NULL, 0, 0, &bt);
2095    ralloc_free(mem_ctx);
2096    return shader;
2097 }
2098 static void
crocus_update_compiled_clip(struct crocus_context * ice)2099 crocus_update_compiled_clip(struct crocus_context *ice)
2100 {
2101    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2102    struct brw_clip_prog_key key;
2103    struct crocus_compiled_shader *old = ice->shaders.clip_prog;
2104    memset(&key, 0, sizeof(key));
2105 
2106    const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
2107    if (wm_prog_data) {
2108       key.contains_flat_varying = wm_prog_data->contains_flat_varying;
2109       key.contains_noperspective_varying =
2110          wm_prog_data->contains_noperspective_varying;
2111       memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode));
2112    }
2113 
2114    key.primitive = ice->state.reduced_prim_mode;
2115    key.attrs = ice->shaders.last_vue_map->slots_valid;
2116 
2117    struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
2118    key.pv_first = rs_state->flatshade_first;
2119 
2120    if (rs_state->clip_plane_enable)
2121       key.nr_userclip = util_logbase2(rs_state->clip_plane_enable) + 1;
2122 
2123    if (screen->devinfo.ver == 5)
2124       key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;
2125    else
2126       key.clip_mode = BRW_CLIP_MODE_NORMAL;
2127 
2128    if (key.primitive == PIPE_PRIM_TRIANGLES) {
2129       if (rs_state->cull_face == PIPE_FACE_FRONT_AND_BACK)
2130          key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;
2131       else {
2132          uint32_t fill_front = BRW_CLIP_FILL_MODE_CULL;
2133          uint32_t fill_back = BRW_CLIP_FILL_MODE_CULL;
2134          uint32_t offset_front = 0;
2135          uint32_t offset_back = 0;
2136 
2137          if (!(rs_state->cull_face & PIPE_FACE_FRONT)) {
2138             switch (rs_state->fill_front) {
2139             case PIPE_POLYGON_MODE_FILL:
2140                fill_front = BRW_CLIP_FILL_MODE_FILL;
2141                offset_front = 0;
2142                break;
2143             case PIPE_POLYGON_MODE_LINE:
2144                fill_front = BRW_CLIP_FILL_MODE_LINE;
2145                offset_front = rs_state->offset_line;
2146                break;
2147             case PIPE_POLYGON_MODE_POINT:
2148                fill_front = BRW_CLIP_FILL_MODE_POINT;
2149                offset_front = rs_state->offset_point;
2150                break;
2151             }
2152          }
2153 
2154          if (!(rs_state->cull_face & PIPE_FACE_BACK)) {
2155             switch (rs_state->fill_back) {
2156             case PIPE_POLYGON_MODE_FILL:
2157                fill_back = BRW_CLIP_FILL_MODE_FILL;
2158                offset_back = 0;
2159                break;
2160             case PIPE_POLYGON_MODE_LINE:
2161                fill_back = BRW_CLIP_FILL_MODE_LINE;
2162                offset_back = rs_state->offset_line;
2163                break;
2164             case PIPE_POLYGON_MODE_POINT:
2165                fill_back = BRW_CLIP_FILL_MODE_POINT;
2166                offset_back = rs_state->offset_point;
2167                break;
2168             }
2169          }
2170 
2171          if (rs_state->fill_back != PIPE_POLYGON_MODE_FILL ||
2172              rs_state->fill_front != PIPE_POLYGON_MODE_FILL) {
2173             key.do_unfilled = 1;
2174 
2175             /* Most cases the fixed function units will handle.  Cases where
2176              * one or more polygon faces are unfilled will require help:
2177              */
2178             key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;
2179 
2180             if (offset_back || offset_front) {
2181                double mrd = 0.0;
2182                if (ice->state.framebuffer.zsbuf)
2183                   mrd = util_get_depth_format_mrd(util_format_description(ice->state.framebuffer.zsbuf->format));
2184                key.offset_units = rs_state->offset_units * mrd * 2;
2185                key.offset_factor = rs_state->offset_scale * mrd;
2186                key.offset_clamp = rs_state->offset_clamp * mrd;
2187             }
2188 
2189             if (!(rs_state->front_ccw ^ rs_state->bottom_edge_rule)) {
2190                key.fill_ccw = fill_front;
2191                key.fill_cw = fill_back;
2192                key.offset_ccw = offset_front;
2193                key.offset_cw = offset_back;
2194                if (rs_state->light_twoside &&
2195                    key.fill_cw != BRW_CLIP_FILL_MODE_CULL)
2196                   key.copy_bfc_cw = 1;
2197             } else {
2198                key.fill_cw = fill_front;
2199                key.fill_ccw = fill_back;
2200                key.offset_cw = offset_front;
2201                key.offset_ccw = offset_back;
2202                if (rs_state->light_twoside &&
2203                    key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)
2204                   key.copy_bfc_ccw = 1;
2205             }
2206          }
2207       }
2208    }
2209    struct crocus_compiled_shader *shader =
2210       crocus_find_cached_shader(ice, CROCUS_CACHE_CLIP, sizeof(key), &key);
2211 
2212    if (!shader)
2213       shader = crocus_compile_clip(ice, &key);
2214 
2215    if (old != shader) {
2216       ice->state.dirty |= CROCUS_DIRTY_CLIP;
2217       ice->shaders.clip_prog = shader;
2218    }
2219 }
2220 
2221 static struct crocus_compiled_shader *
crocus_compile_sf(struct crocus_context * ice,struct brw_sf_prog_key * key)2222 crocus_compile_sf(struct crocus_context *ice, struct brw_sf_prog_key *key)
2223 {
2224    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2225    const struct brw_compiler *compiler = screen->compiler;
2226    void *mem_ctx;
2227    unsigned program_size;
2228    mem_ctx = ralloc_context(NULL);
2229 
2230    struct brw_sf_prog_data *sf_prog_data =
2231       rzalloc(mem_ctx, struct brw_sf_prog_data);
2232 
2233    const unsigned *program = brw_compile_sf(compiler, mem_ctx, key, sf_prog_data,
2234                                             ice->shaders.last_vue_map, &program_size);
2235 
2236    if (program == NULL) {
2237       dbg_printf("failed to compile sf shader\n");
2238       ralloc_free(mem_ctx);
2239       return false;
2240    }
2241 
2242    struct crocus_binding_table bt;
2243    memset(&bt, 0, sizeof(bt));
2244    struct crocus_compiled_shader *shader =
2245       crocus_upload_shader(ice, CROCUS_CACHE_SF, sizeof(*key), key, program,
2246                            program_size,
2247                            (struct brw_stage_prog_data *)sf_prog_data, sizeof(*sf_prog_data),
2248                            NULL, NULL, 0, 0, &bt);
2249    ralloc_free(mem_ctx);
2250    return shader;
2251 }
2252 
2253 static void
crocus_update_compiled_sf(struct crocus_context * ice)2254 crocus_update_compiled_sf(struct crocus_context *ice)
2255 {
2256    struct brw_sf_prog_key key;
2257    struct crocus_compiled_shader *old = ice->shaders.sf_prog;
2258    memset(&key, 0, sizeof(key));
2259 
2260    key.attrs = ice->shaders.last_vue_map->slots_valid;
2261 
2262    switch (ice->state.reduced_prim_mode) {
2263    case PIPE_PRIM_TRIANGLES:
2264    default:
2265       if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
2266          key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;
2267       else
2268          key.primitive = BRW_SF_PRIM_TRIANGLES;
2269       break;
2270    case PIPE_PRIM_LINES:
2271       key.primitive = BRW_SF_PRIM_LINES;
2272       break;
2273    case PIPE_PRIM_POINTS:
2274       key.primitive = BRW_SF_PRIM_POINTS;
2275       break;
2276    }
2277 
2278    struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
2279    key.userclip_active = rs_state->clip_plane_enable != 0;
2280    const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);
2281    if (wm_prog_data) {
2282       key.contains_flat_varying = wm_prog_data->contains_flat_varying;
2283       memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode));
2284    }
2285 
2286    key.do_twoside_color = rs_state->light_twoside;
2287 
2288    key.do_point_sprite = rs_state->point_quad_rasterization;
2289    if (key.do_point_sprite) {
2290       key.point_sprite_coord_replace = rs_state->sprite_coord_enable & 0xff;
2291       if (rs_state->sprite_coord_enable & (1 << 8))
2292          key.do_point_coord = 1;
2293       if (wm_prog_data && wm_prog_data->urb_setup[VARYING_SLOT_PNTC] != -1)
2294          key.do_point_coord = 1;
2295    }
2296 
2297    key.sprite_origin_lower_left = rs_state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT;
2298 
2299    if (key.do_twoside_color) {
2300       key.frontface_ccw = rs_state->front_ccw;
2301    }
2302    struct crocus_compiled_shader *shader =
2303       crocus_find_cached_shader(ice, CROCUS_CACHE_SF, sizeof(key), &key);
2304 
2305    if (!shader)
2306       shader = crocus_compile_sf(ice, &key);
2307 
2308    if (old != shader) {
2309       ice->state.dirty |= CROCUS_DIRTY_RASTER;
2310       ice->shaders.sf_prog = shader;
2311    }
2312 }
2313 
2314 static struct crocus_compiled_shader *
crocus_compile_ff_gs(struct crocus_context * ice,struct brw_ff_gs_prog_key * key)2315 crocus_compile_ff_gs(struct crocus_context *ice, struct brw_ff_gs_prog_key *key)
2316 {
2317    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2318    struct brw_compiler *compiler = screen->compiler;
2319    void *mem_ctx;
2320    unsigned program_size;
2321    mem_ctx = ralloc_context(NULL);
2322 
2323    struct brw_ff_gs_prog_data *ff_gs_prog_data =
2324       rzalloc(mem_ctx, struct brw_ff_gs_prog_data);
2325 
2326    const unsigned *program = brw_compile_ff_gs_prog(compiler, mem_ctx, key, ff_gs_prog_data,
2327                                                     ice->shaders.last_vue_map, &program_size);
2328 
2329    if (program == NULL) {
2330       dbg_printf("failed to compile sf shader\n");
2331       ralloc_free(mem_ctx);
2332       return false;
2333    }
2334 
2335    struct crocus_binding_table bt;
2336    memset(&bt, 0, sizeof(bt));
2337 
2338    if (screen->devinfo.ver == 6) {
2339       bt.sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS;
2340       bt.used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1;
2341 
2342       bt.size_bytes = BRW_MAX_SOL_BINDINGS * 4;
2343    }
2344 
2345    struct crocus_compiled_shader *shader =
2346       crocus_upload_shader(ice, CROCUS_CACHE_FF_GS, sizeof(*key), key, program,
2347                            program_size,
2348                            (struct brw_stage_prog_data *)ff_gs_prog_data, sizeof(*ff_gs_prog_data),
2349                            NULL, NULL, 0, 0, &bt);
2350    ralloc_free(mem_ctx);
2351    return shader;
2352 }
2353 
2354 static void
crocus_update_compiled_ff_gs(struct crocus_context * ice)2355 crocus_update_compiled_ff_gs(struct crocus_context *ice)
2356 {
2357    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2358    const struct intel_device_info *devinfo = &screen->devinfo;
2359    struct brw_ff_gs_prog_key key;
2360    struct crocus_compiled_shader *old = ice->shaders.ff_gs_prog;
2361    memset(&key, 0, sizeof(key));
2362 
2363    assert(devinfo->ver < 7);
2364 
2365    key.attrs = ice->shaders.last_vue_map->slots_valid;
2366 
2367    key.primitive = screen->vtbl.translate_prim_type(ice->state.prim_mode, 0);
2368 
2369    struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
2370    key.pv_first = rs_state->flatshade_first;
2371 
2372    if (key.primitive == _3DPRIM_QUADLIST && !rs_state->flatshade) {
2373       /* Provide consistenbbbbbt primitive order with brw_set_prim's
2374        * optimization of single quads to trifans.
2375        */
2376       key.pv_first = true;
2377    }
2378 
2379    if (devinfo->ver >= 6) {
2380       key.need_gs_prog = ice->state.streamout_active;
2381       if (key.need_gs_prog) {
2382          struct crocus_uncompiled_shader *vs =
2383             ice->shaders.uncompiled[MESA_SHADER_VERTEX];
2384          gfx6_ff_gs_xfb_setup(&vs->stream_output,
2385                               &key);
2386       }
2387    } else {
2388       key.need_gs_prog = (key.primitive == _3DPRIM_QUADLIST ||
2389                           key.primitive == _3DPRIM_QUADSTRIP ||
2390                           key.primitive == _3DPRIM_LINELOOP);
2391    }
2392 
2393    struct crocus_compiled_shader *shader = NULL;
2394    if (key.need_gs_prog) {
2395       shader = crocus_find_cached_shader(ice, CROCUS_CACHE_FF_GS,
2396                                          sizeof(key), &key);
2397       if (!shader)
2398          shader = crocus_compile_ff_gs(ice, &key);
2399    }
2400    if (old != shader) {
2401       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS;
2402       if (!!old != !!shader)
2403          ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;
2404       ice->shaders.ff_gs_prog = shader;
2405       if (shader) {
2406          const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data;
2407          ice->state.last_xfb_verts_per_prim = gs_prog_data->svbi_postincrement_value;
2408       }
2409    }
2410 }
2411 
2412 // XXX: crocus_compiled_shaders are space-leaking :(
2413 // XXX: do remember to unbind them if deleting them.
2414 
2415 /**
2416  * Update the current shader variants for the given state.
2417  *
2418  * This should be called on every draw call to ensure that the correct
2419  * shaders are bound.  It will also flag any dirty state triggered by
2420  * swapping out those shaders.
2421  */
2422 bool
crocus_update_compiled_shaders(struct crocus_context * ice)2423 crocus_update_compiled_shaders(struct crocus_context *ice)
2424 {
2425    struct crocus_screen *screen = (void *) ice->ctx.screen;
2426    const uint64_t stage_dirty = ice->state.stage_dirty;
2427 
2428    struct brw_vue_prog_data *old_prog_datas[4];
2429    if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) {
2430       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
2431          old_prog_datas[i] = get_vue_prog_data(ice, i);
2432    }
2433 
2434    if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |
2435                       CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) {
2436       struct crocus_uncompiled_shader *tes =
2437          ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2438       if (tes) {
2439          crocus_update_compiled_tcs(ice);
2440          crocus_update_compiled_tes(ice);
2441       } else {
2442          ice->shaders.prog[CROCUS_CACHE_TCS] = NULL;
2443          ice->shaders.prog[CROCUS_CACHE_TES] = NULL;
2444          ice->state.stage_dirty |=
2445             CROCUS_STAGE_DIRTY_TCS | CROCUS_STAGE_DIRTY_TES |
2446             CROCUS_STAGE_DIRTY_BINDINGS_TCS | CROCUS_STAGE_DIRTY_BINDINGS_TES |
2447             CROCUS_STAGE_DIRTY_CONSTANTS_TCS | CROCUS_STAGE_DIRTY_CONSTANTS_TES;
2448       }
2449    }
2450 
2451    if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_VS)
2452       crocus_update_compiled_vs(ice);
2453    if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_GS)
2454       crocus_update_compiled_gs(ice);
2455 
2456    if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_GS |
2457                       CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) {
2458       const struct crocus_compiled_shader *gs =
2459          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2460       const struct crocus_compiled_shader *tes =
2461          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2462 
2463       bool points_or_lines = false;
2464 
2465       if (gs) {
2466          const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2467          points_or_lines =
2468             gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2469             gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2470       } else if (tes) {
2471          const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2472          points_or_lines =
2473             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2474             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2475       }
2476 
2477       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2478          /* Outbound to XY Clip enables */
2479          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2480          ice->state.dirty |= CROCUS_DIRTY_CLIP;
2481       }
2482    }
2483 
2484    if (!ice->shaders.prog[MESA_SHADER_VERTEX])
2485       return false;
2486 
2487    gl_shader_stage last_stage = last_vue_stage(ice);
2488    struct crocus_compiled_shader *shader = ice->shaders.prog[last_stage];
2489    struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2490    update_last_vue_map(ice, shader->prog_data);
2491    if (ice->state.streamout != shader->streamout) {
2492       ice->state.streamout = shader->streamout;
2493       ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST | CROCUS_DIRTY_STREAMOUT;
2494    }
2495 
2496    if (ice->state.streamout_active) {
2497       screen->vtbl.update_so_strides(ice, ish->stream_output.stride);
2498    }
2499 
2500    /* use ice->state version as last_vue_map can dirty this bit */
2501    if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_FS)
2502       crocus_update_compiled_fs(ice);
2503 
2504    if (screen->devinfo.ver <= 6) {
2505       if (ice->state.dirty & CROCUS_DIRTY_GEN4_FF_GS_PROG &&
2506           !ice->shaders.prog[MESA_SHADER_GEOMETRY])
2507          crocus_update_compiled_ff_gs(ice);
2508    }
2509 
2510    if (screen->devinfo.ver < 6) {
2511       if (ice->state.dirty & CROCUS_DIRTY_GEN4_CLIP_PROG)
2512          crocus_update_compiled_clip(ice);
2513       if (ice->state.dirty & CROCUS_DIRTY_GEN4_SF_PROG)
2514          crocus_update_compiled_sf(ice);
2515    }
2516 
2517 
2518    /* Changing shader interfaces may require a URB configuration. */
2519    if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) {
2520       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
2521          struct brw_vue_prog_data *old = old_prog_datas[i];
2522          struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
2523          if (!!old != !!new ||
2524              (new && new->urb_entry_size != old->urb_entry_size)) {
2525             ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;
2526             break;
2527          }
2528       }
2529    }
2530 
2531    if (ice->state.stage_dirty & CROCUS_RENDER_STAGE_DIRTY_CONSTANTS) {
2532       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2533          if (ice->state.stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << i))
2534             crocus_update_pull_constant_descriptors(ice, i);
2535       }
2536    }
2537    return true;
2538 }
2539 
2540 static struct crocus_compiled_shader *
crocus_compile_cs(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,const struct brw_cs_prog_key * key)2541 crocus_compile_cs(struct crocus_context *ice,
2542                   struct crocus_uncompiled_shader *ish,
2543                   const struct brw_cs_prog_key *key)
2544 {
2545    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2546    const struct brw_compiler *compiler = screen->compiler;
2547    void *mem_ctx = ralloc_context(NULL);
2548    struct brw_cs_prog_data *cs_prog_data =
2549       rzalloc(mem_ctx, struct brw_cs_prog_data);
2550    struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2551    enum brw_param_builtin *system_values;
2552    const struct intel_device_info *devinfo = &screen->devinfo;
2553    unsigned num_system_values;
2554    unsigned num_cbufs;
2555 
2556    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2557 
2558    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2559 
2560    crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
2561                          &num_system_values, &num_cbufs);
2562    crocus_lower_swizzles(nir, &key->base.tex);
2563    struct crocus_binding_table bt;
2564    crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2565                               num_system_values, num_cbufs, &key->base.tex);
2566 
2567    struct brw_compile_cs_params params = {
2568       .nir = nir,
2569       .key = key,
2570       .prog_data = cs_prog_data,
2571       .log_data = &ice->dbg,
2572    };
2573 
2574    const unsigned *program =
2575       brw_compile_cs(compiler, mem_ctx, &params);
2576    if (program == NULL) {
2577       dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2578       ralloc_free(mem_ctx);
2579       return false;
2580    }
2581 
2582    if (ish->compiled_once) {
2583       crocus_debug_recompile(ice, &nir->info, &key->base);
2584    } else {
2585       ish->compiled_once = true;
2586    }
2587 
2588    struct crocus_compiled_shader *shader =
2589       crocus_upload_shader(ice, CROCUS_CACHE_CS, sizeof(*key), key, program,
2590                            prog_data->program_size,
2591                            prog_data, sizeof(*cs_prog_data), NULL,
2592                            system_values, num_system_values,
2593                            num_cbufs, &bt);
2594 
2595    crocus_disk_cache_store(screen->disk_cache, ish, shader,
2596                            ice->shaders.cache_bo_map,
2597                            key, sizeof(*key));
2598 
2599    ralloc_free(mem_ctx);
2600    return shader;
2601 }
2602 
2603 static void
crocus_update_compiled_cs(struct crocus_context * ice)2604 crocus_update_compiled_cs(struct crocus_context *ice)
2605 {
2606    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2607    struct crocus_uncompiled_shader *ish =
2608       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2609    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2610    const struct intel_device_info *devinfo = &screen->devinfo;
2611    struct brw_cs_prog_key key = { KEY_INIT() };
2612 
2613    if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
2614       crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_COMPUTE, ish,
2615                                             ish->nir->info.uses_texture_gather, &key.base.tex);
2616    screen->vtbl.populate_cs_key(ice, &key);
2617 
2618    struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_CS];
2619    struct crocus_compiled_shader *shader =
2620       crocus_find_cached_shader(ice, CROCUS_CACHE_CS, sizeof(key), &key);
2621 
2622    if (!shader)
2623       shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));
2624 
2625    if (!shader)
2626       shader = crocus_compile_cs(ice, ish, &key);
2627 
2628    if (old != shader) {
2629       ice->shaders.prog[CROCUS_CACHE_CS] = shader;
2630       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS |
2631                           CROCUS_STAGE_DIRTY_BINDINGS_CS |
2632                           CROCUS_STAGE_DIRTY_CONSTANTS_CS;
2633       shs->sysvals_need_upload = true;
2634    }
2635 }
2636 
2637 void
crocus_update_compiled_compute_shader(struct crocus_context * ice)2638 crocus_update_compiled_compute_shader(struct crocus_context *ice)
2639 {
2640    if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_CS)
2641       crocus_update_compiled_cs(ice);
2642 
2643    if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS)
2644       crocus_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2645 }
2646 
2647 void
crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data * cs_prog_data,unsigned threads,uint32_t * dst)2648 crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2649                                  unsigned threads,
2650                                  uint32_t *dst)
2651 {
2652    assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2653    assert(cs_prog_data->push.cross_thread.size == 0);
2654    assert(cs_prog_data->push.per_thread.dwords == 1);
2655    assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2656    for (unsigned t = 0; t < threads; t++)
2657       dst[8 * t] = t;
2658 }
2659 
2660 /**
2661  * Allocate scratch BOs as needed for the given per-thread size and stage.
2662  */
2663 struct crocus_bo *
crocus_get_scratch_space(struct crocus_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)2664 crocus_get_scratch_space(struct crocus_context *ice,
2665                          unsigned per_thread_scratch,
2666                          gl_shader_stage stage)
2667 {
2668    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
2669    struct crocus_bufmgr *bufmgr = screen->bufmgr;
2670    const struct intel_device_info *devinfo = &screen->devinfo;
2671 
2672    unsigned encoded_size = ffs(per_thread_scratch) - 11;
2673    assert(encoded_size < (1 << 16));
2674 
2675    struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2676 
2677    if (!*bop) {
2678       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2679       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2680       *bop = crocus_bo_alloc(bufmgr, "scratch", size);
2681    }
2682 
2683    return *bop;
2684 }
2685 
2686 /* ------------------------------------------------------------------- */
2687 
2688 /**
2689  * The pipe->create_[stage]_state() driver hooks.
2690  *
2691  * Performs basic NIR preprocessing, records any state dependencies, and
2692  * returns an crocus_uncompiled_shader as the Gallium CSO.
2693  *
2694  * Actual shader compilation to assembly happens later, at first use.
2695  */
2696 static void *
crocus_create_uncompiled_shader(struct pipe_context * ctx,nir_shader * nir,const struct pipe_stream_output_info * so_info)2697 crocus_create_uncompiled_shader(struct pipe_context *ctx,
2698                                 nir_shader *nir,
2699                                 const struct pipe_stream_output_info *so_info)
2700 {
2701    struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
2702    const struct intel_device_info *devinfo = &screen->devinfo;
2703    struct crocus_uncompiled_shader *ish =
2704       calloc(1, sizeof(struct crocus_uncompiled_shader));
2705    if (!ish)
2706       return NULL;
2707 
2708    if (devinfo->ver >= 6)
2709      NIR_PASS(ish->needs_edge_flag, nir, crocus_fix_edge_flags);
2710    else
2711      ish->needs_edge_flag = false;
2712 
2713    brw_preprocess_nir(screen->compiler, nir, NULL);
2714 
2715    NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2716    NIR_PASS_V(nir, crocus_lower_storage_image_derefs);
2717 
2718    nir_sweep(nir);
2719 
2720    ish->program_id = get_new_program_id(screen);
2721    ish->nir = nir;
2722    if (so_info) {
2723       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2724       update_so_info(&ish->stream_output, nir->info.outputs_written);
2725    }
2726 
2727    if (screen->disk_cache) {
2728       /* Serialize the NIR to a binary blob that we can hash for the disk
2729        * cache.  Drop unnecessary information (like variable names)
2730        * so the serialized NIR is smaller, and also to let us detect more
2731        * isomorphic shaders when hashing, increasing cache hits.
2732        */
2733       struct blob blob;
2734       blob_init(&blob);
2735       nir_serialize(&blob, nir, true);
2736       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2737       blob_finish(&blob);
2738    }
2739 
2740    return ish;
2741 }
2742 
2743 static struct crocus_uncompiled_shader *
crocus_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2744 crocus_create_shader_state(struct pipe_context *ctx,
2745                            const struct pipe_shader_state *state)
2746 {
2747    struct nir_shader *nir;
2748 
2749    if (state->type == PIPE_SHADER_IR_TGSI)
2750       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2751    else
2752       nir = state->ir.nir;
2753 
2754    return crocus_create_uncompiled_shader(ctx, nir, &state->stream_output);
2755 }
2756 
2757 static void *
crocus_create_vs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2758 crocus_create_vs_state(struct pipe_context *ctx,
2759                        const struct pipe_shader_state *state)
2760 {
2761    struct crocus_context *ice = (void *) ctx;
2762    struct crocus_screen *screen = (void *) ctx->screen;
2763    struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);
2764 
2765    ish->nos |= (1ull << CROCUS_NOS_TEXTURES);
2766    /* User clip planes or gen5 sprite coord enable */
2767    if (ish->nir->info.clip_distance_array_size == 0 ||
2768        screen->devinfo.ver <= 5)
2769       ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);
2770 
2771    if (screen->devinfo.verx10 < 75)
2772       ish->nos |= (1ull << CROCUS_NOS_VERTEX_ELEMENTS);
2773 
2774    if (screen->precompile) {
2775       struct brw_vs_prog_key key = { KEY_INIT() };
2776 
2777       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2778          crocus_compile_vs(ice, ish, &key);
2779    }
2780 
2781    return ish;
2782 }
2783 
2784 static void *
crocus_create_tcs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2785 crocus_create_tcs_state(struct pipe_context *ctx,
2786                         const struct pipe_shader_state *state)
2787 {
2788    struct crocus_context *ice = (void *) ctx;
2789    struct crocus_screen *screen = (void *) ctx->screen;
2790    struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);
2791    struct shader_info *info = &ish->nir->info;
2792 
2793    ish->nos |= (1ull << CROCUS_NOS_TEXTURES);
2794    if (screen->precompile) {
2795       struct brw_tcs_prog_key key = {
2796          KEY_INIT(),
2797          // XXX: make sure the linker fills this out from the TES...
2798          ._tes_primitive_mode =
2799             info->tess._primitive_mode ? info->tess._primitive_mode
2800                                       : TESS_PRIMITIVE_TRIANGLES,
2801          .outputs_written = info->outputs_written,
2802          .patch_outputs_written = info->patch_outputs_written,
2803       };
2804 
2805       key.input_vertices = info->tess.tcs_vertices_out;
2806 
2807       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2808          crocus_compile_tcs(ice, ish, &key);
2809    }
2810 
2811    return ish;
2812 }
2813 
2814 static void *
crocus_create_tes_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2815 crocus_create_tes_state(struct pipe_context *ctx,
2816                         const struct pipe_shader_state *state)
2817 {
2818    struct crocus_context *ice = (void *) ctx;
2819    struct crocus_screen *screen = (void *) ctx->screen;
2820    struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);
2821    struct shader_info *info = &ish->nir->info;
2822 
2823    ish->nos |= (1ull << CROCUS_NOS_TEXTURES);
2824    /* User clip planes */
2825    if (ish->nir->info.clip_distance_array_size == 0)
2826       ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);
2827 
2828    if (screen->precompile) {
2829       struct brw_tes_prog_key key = {
2830          KEY_INIT(),
2831          // XXX: not ideal, need TCS output/TES input unification
2832          .inputs_read = info->inputs_read,
2833          .patch_inputs_read = info->patch_inputs_read,
2834       };
2835 
2836       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2837          crocus_compile_tes(ice, ish, &key);
2838    }
2839 
2840    return ish;
2841 }
2842 
2843 static void *
crocus_create_gs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2844 crocus_create_gs_state(struct pipe_context *ctx,
2845                        const struct pipe_shader_state *state)
2846 {
2847    struct crocus_context *ice = (void *) ctx;
2848    struct crocus_screen *screen = (void *) ctx->screen;
2849    struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);
2850 
2851    ish->nos |= (1ull << CROCUS_NOS_TEXTURES);
2852    /* User clip planes */
2853    if (ish->nir->info.clip_distance_array_size == 0)
2854       ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);
2855 
2856    if (screen->precompile) {
2857       struct brw_gs_prog_key key = { KEY_INIT() };
2858 
2859       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2860          crocus_compile_gs(ice, ish, &key);
2861    }
2862 
2863    return ish;
2864 }
2865 
2866 static void *
crocus_create_fs_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2867 crocus_create_fs_state(struct pipe_context *ctx,
2868                        const struct pipe_shader_state *state)
2869 {
2870    struct crocus_context *ice = (void *) ctx;
2871    struct crocus_screen *screen = (void *) ctx->screen;
2872    struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);
2873    struct shader_info *info = &ish->nir->info;
2874 
2875    ish->nos |= (1ull << CROCUS_NOS_FRAMEBUFFER) |
2876                (1ull << CROCUS_NOS_DEPTH_STENCIL_ALPHA) |
2877                (1ull << CROCUS_NOS_RASTERIZER) |
2878                (1ull << CROCUS_NOS_TEXTURES) |
2879                (1ull << CROCUS_NOS_BLEND);
2880 
2881    /* The program key needs the VUE map if there are > 16 inputs or gen4/5 */
2882    if (screen->devinfo.ver < 6 || util_bitcount64(ish->nir->info.inputs_read &
2883                                                   BRW_FS_VARYING_INPUT_MASK) > 16) {
2884       ish->nos |= (1ull << CROCUS_NOS_LAST_VUE_MAP);
2885    }
2886 
2887    if (screen->precompile) {
2888       const uint64_t color_outputs = info->outputs_written &
2889          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2890            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2891            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2892 
2893       bool can_rearrange_varyings =
2894          screen->devinfo.ver > 6 && util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2895 
2896       const struct intel_device_info *devinfo = &screen->devinfo;
2897       struct brw_wm_prog_key key = {
2898          KEY_INIT(),
2899          .nr_color_regions = util_bitcount(color_outputs),
2900          .coherent_fb_fetch = false,
2901          .ignore_sample_mask_out = screen->devinfo.ver < 6 ? 1 : 0,
2902          .input_slots_valid =
2903          can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2904       };
2905 
2906       struct brw_vue_map vue_map;
2907       if (devinfo->ver < 6) {
2908          brw_compute_vue_map(devinfo, &vue_map,
2909                              info->inputs_read | VARYING_BIT_POS,
2910                              false, /* pos slots */ 1);
2911       }
2912       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2913          crocus_compile_fs(ice, ish, &key, &vue_map);
2914    }
2915 
2916    return ish;
2917 }
2918 
2919 static void *
crocus_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)2920 crocus_create_compute_state(struct pipe_context *ctx,
2921                             const struct pipe_compute_state *state)
2922 {
2923    assert(state->ir_type == PIPE_SHADER_IR_NIR);
2924 
2925    struct crocus_context *ice = (void *) ctx;
2926    struct crocus_screen *screen = (void *) ctx->screen;
2927    struct crocus_uncompiled_shader *ish =
2928       crocus_create_uncompiled_shader(ctx, (void *) state->prog, NULL);
2929 
2930    ish->nos |= (1ull << CROCUS_NOS_TEXTURES);
2931    // XXX: disallow more than 64KB of shared variables
2932 
2933    if (screen->precompile) {
2934       struct brw_cs_prog_key key = { KEY_INIT() };
2935 
2936       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
2937          crocus_compile_cs(ice, ish, &key);
2938    }
2939 
2940    return ish;
2941 }
2942 
2943 /**
2944  * The pipe->delete_[stage]_state() driver hooks.
2945  *
2946  * Frees the crocus_uncompiled_shader.
2947  */
2948 static void
crocus_delete_shader_state(struct pipe_context * ctx,void * state,gl_shader_stage stage)2949 crocus_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)
2950 {
2951    struct crocus_uncompiled_shader *ish = state;
2952    struct crocus_context *ice = (void *) ctx;
2953 
2954    if (ice->shaders.uncompiled[stage] == ish) {
2955       ice->shaders.uncompiled[stage] = NULL;
2956       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2957    }
2958 
2959    if (ish->const_data) {
2960       pipe_resource_reference(&ish->const_data, NULL);
2961       pipe_resource_reference(&ish->const_data_state.res, NULL);
2962    }
2963 
2964    ralloc_free(ish->nir);
2965    free(ish);
2966 }
2967 
2968 static void
crocus_delete_vs_state(struct pipe_context * ctx,void * state)2969 crocus_delete_vs_state(struct pipe_context *ctx, void *state)
2970 {
2971    crocus_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);
2972 }
2973 
2974 static void
crocus_delete_tcs_state(struct pipe_context * ctx,void * state)2975 crocus_delete_tcs_state(struct pipe_context *ctx, void *state)
2976 {
2977    crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);
2978 }
2979 
2980 static void
crocus_delete_tes_state(struct pipe_context * ctx,void * state)2981 crocus_delete_tes_state(struct pipe_context *ctx, void *state)
2982 {
2983    crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);
2984 }
2985 
2986 static void
crocus_delete_gs_state(struct pipe_context * ctx,void * state)2987 crocus_delete_gs_state(struct pipe_context *ctx, void *state)
2988 {
2989    crocus_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);
2990 }
2991 
2992 static void
crocus_delete_fs_state(struct pipe_context * ctx,void * state)2993 crocus_delete_fs_state(struct pipe_context *ctx, void *state)
2994 {
2995    crocus_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);
2996 }
2997 
2998 static void
crocus_delete_cs_state(struct pipe_context * ctx,void * state)2999 crocus_delete_cs_state(struct pipe_context *ctx, void *state)
3000 {
3001    crocus_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);
3002 }
3003 
3004 /**
3005  * The pipe->bind_[stage]_state() driver hook.
3006  *
3007  * Binds an uncompiled shader as the current one for a particular stage.
3008  * Updates dirty tracking to account for the shader's NOS.
3009  */
3010 static void
bind_shader_state(struct crocus_context * ice,struct crocus_uncompiled_shader * ish,gl_shader_stage stage)3011 bind_shader_state(struct crocus_context *ice,
3012                   struct crocus_uncompiled_shader *ish,
3013                   gl_shader_stage stage)
3014 {
3015    uint64_t dirty_bit = CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage;
3016    const uint64_t nos = ish ? ish->nos : 0;
3017 
3018    const struct shader_info *old_info = crocus_get_shader_info(ice, stage);
3019    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
3020 
3021    if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
3022        (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
3023       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
3024    }
3025 
3026    ice->shaders.uncompiled[stage] = ish;
3027    ice->state.stage_dirty |= dirty_bit;
3028 
3029    /* Record that CSOs need to mark CROCUS_DIRTY_UNCOMPILED_XS when they change
3030     * (or that they no longer need to do so).
3031     */
3032    for (int i = 0; i < CROCUS_NOS_COUNT; i++) {
3033       if (nos & (1 << i))
3034          ice->state.stage_dirty_for_nos[i] |= dirty_bit;
3035       else
3036          ice->state.stage_dirty_for_nos[i] &= ~dirty_bit;
3037    }
3038 }
3039 
3040 static void
crocus_bind_vs_state(struct pipe_context * ctx,void * state)3041 crocus_bind_vs_state(struct pipe_context *ctx, void *state)
3042 {
3043    struct crocus_context *ice = (struct crocus_context *)ctx;
3044    struct crocus_uncompiled_shader *new_ish = state;
3045    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
3046    const struct intel_device_info *devinfo = &screen->devinfo;
3047 
3048    if (new_ish &&
3049        ice->state.window_space_position !=
3050        new_ish->nir->info.vs.window_space_position) {
3051       ice->state.window_space_position =
3052          new_ish->nir->info.vs.window_space_position;
3053 
3054       ice->state.dirty |= CROCUS_DIRTY_CLIP |
3055                           CROCUS_DIRTY_RASTER |
3056                           CROCUS_DIRTY_CC_VIEWPORT;
3057    }
3058 
3059    if (devinfo->ver == 6) {
3060       ice->state.stage_dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
3061    }
3062 
3063    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
3064 }
3065 
3066 static void
crocus_bind_tcs_state(struct pipe_context * ctx,void * state)3067 crocus_bind_tcs_state(struct pipe_context *ctx, void *state)
3068 {
3069    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
3070 }
3071 
3072 static void
crocus_bind_tes_state(struct pipe_context * ctx,void * state)3073 crocus_bind_tes_state(struct pipe_context *ctx, void *state)
3074 {
3075    struct crocus_context *ice = (struct crocus_context *)ctx;
3076 
3077    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3078    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
3079       ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;
3080 
3081    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
3082 }
3083 
3084 static void
crocus_bind_gs_state(struct pipe_context * ctx,void * state)3085 crocus_bind_gs_state(struct pipe_context *ctx, void *state)
3086 {
3087    struct crocus_context *ice = (struct crocus_context *)ctx;
3088 
3089    /* Enabling/disabling optional stages requires a URB reconfiguration. */
3090    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
3091       ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;
3092 
3093    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
3094 }
3095 
3096 static void
crocus_bind_fs_state(struct pipe_context * ctx,void * state)3097 crocus_bind_fs_state(struct pipe_context *ctx, void *state)
3098 {
3099    struct crocus_context *ice = (struct crocus_context *) ctx;
3100    struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;
3101    const struct intel_device_info *devinfo = &screen->devinfo;
3102    struct crocus_uncompiled_shader *old_ish =
3103       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
3104    struct crocus_uncompiled_shader *new_ish = state;
3105 
3106    const unsigned color_bits =
3107       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
3108       BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
3109 
3110    /* Fragment shader outputs influence HasWriteableRT */
3111    if (!old_ish || !new_ish ||
3112        (old_ish->nir->info.outputs_written & color_bits) !=
3113        (new_ish->nir->info.outputs_written & color_bits)) {
3114       if (devinfo->ver == 8)
3115          ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;
3116       else
3117          ice->state.dirty |= CROCUS_DIRTY_WM;
3118    }
3119 
3120    if (devinfo->ver == 8)
3121       ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
3122    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
3123 }
3124 
3125 static void
crocus_bind_cs_state(struct pipe_context * ctx,void * state)3126 crocus_bind_cs_state(struct pipe_context *ctx, void *state)
3127 {
3128    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
3129 }
3130 
3131 void
crocus_init_program_functions(struct pipe_context * ctx)3132 crocus_init_program_functions(struct pipe_context *ctx)
3133 {
3134    ctx->create_vs_state  = crocus_create_vs_state;
3135    ctx->create_tcs_state = crocus_create_tcs_state;
3136    ctx->create_tes_state = crocus_create_tes_state;
3137    ctx->create_gs_state  = crocus_create_gs_state;
3138    ctx->create_fs_state  = crocus_create_fs_state;
3139    ctx->create_compute_state = crocus_create_compute_state;
3140 
3141    ctx->delete_vs_state  = crocus_delete_vs_state;
3142    ctx->delete_tcs_state = crocus_delete_tcs_state;
3143    ctx->delete_tes_state = crocus_delete_tes_state;
3144    ctx->delete_gs_state  = crocus_delete_gs_state;
3145    ctx->delete_fs_state  = crocus_delete_fs_state;
3146    ctx->delete_compute_state = crocus_delete_cs_state;
3147 
3148    ctx->bind_vs_state  = crocus_bind_vs_state;
3149    ctx->bind_tcs_state = crocus_bind_tcs_state;
3150    ctx->bind_tes_state = crocus_bind_tes_state;
3151    ctx->bind_gs_state  = crocus_bind_gs_state;
3152    ctx->bind_fs_state  = crocus_bind_fs_state;
3153    ctx->bind_compute_state = crocus_bind_cs_state;
3154 }
3155