• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   *   Brian Paul
31   */
32 
33 
34 #include "nir_builder.h"
35 #include "main/errors.h"
36 
37 #include "main/hash.h"
38 #include "main/mtypes.h"
39 #include "nir/nir_xfb_info.h"
40 #include "nir/pipe_nir.h"
41 #include "program/prog_parameter.h"
42 #include "program/prog_print.h"
43 #include "program/prog_to_nir.h"
44 
45 #include "compiler/glsl/gl_nir.h"
46 #include "compiler/glsl/gl_nir_linker.h"
47 #include "compiler/nir/nir.h"
48 #include "compiler/nir/nir_serialize.h"
49 #include "draw/draw_context.h"
50 
51 #include "pipe/p_context.h"
52 #include "pipe/p_defines.h"
53 #include "pipe/p_shader_tokens.h"
54 #include "draw/draw_context.h"
55 
56 #include "util/u_dump.h"
57 #include "util/u_memory.h"
58 
59 #include "st_debug.h"
60 #include "st_cb_bitmap.h"
61 #include "st_cb_drawpixels.h"
62 #include "st_context.h"
63 #include "st_program.h"
64 #include "st_atifs_to_nir.h"
65 #include "st_nir.h"
66 #include "st_shader_cache.h"
67 #include "st_util.h"
68 #include "cso_cache/cso_context.h"
69 
70 
71 static void
72 destroy_program_variants(struct st_context *st, struct gl_program *target);
73 
74 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)75 set_affected_state_flags(uint64_t *states,
76                          struct gl_program *prog,
77                          uint64_t new_constants,
78                          uint64_t new_sampler_views,
79                          uint64_t new_samplers,
80                          uint64_t new_images,
81                          uint64_t new_ubos,
82                          uint64_t new_ssbos,
83                          uint64_t new_atomics)
84 {
85    if (prog->Parameters->NumParameters)
86       *states |= new_constants;
87 
88    if (prog->info.num_textures)
89       *states |= new_sampler_views | new_samplers;
90 
91    if (prog->info.num_images)
92       *states |= new_images;
93 
94    if (prog->info.num_ubos)
95       *states |= new_ubos;
96 
97    if (prog->info.num_ssbos)
98       *states |= new_ssbos;
99 
100    if (prog->info.num_abos)
101       *states |= new_atomics;
102 }
103 
104 /**
105  * This determines which states will be updated when the shader is bound.
106  */
107 void
st_set_prog_affected_state_flags(struct gl_program * prog)108 st_set_prog_affected_state_flags(struct gl_program *prog)
109 {
110    uint64_t *states;
111 
112    switch (prog->info.stage) {
113    case MESA_SHADER_VERTEX:
114       states = &prog->affected_states;
115 
116       *states = ST_NEW_VS_STATE |
117                 ST_NEW_RASTERIZER |
118                 ST_NEW_VERTEX_ARRAYS;
119 
120       set_affected_state_flags(states, prog,
121                                ST_NEW_VS_CONSTANTS,
122                                ST_NEW_VS_SAMPLER_VIEWS,
123                                ST_NEW_VS_SAMPLERS,
124                                ST_NEW_VS_IMAGES,
125                                ST_NEW_VS_UBOS,
126                                ST_NEW_VS_SSBOS,
127                                ST_NEW_VS_ATOMICS);
128       break;
129 
130    case MESA_SHADER_TESS_CTRL:
131       states = &prog->affected_states;
132 
133       *states = ST_NEW_TCS_STATE;
134 
135       set_affected_state_flags(states, prog,
136                                ST_NEW_TCS_CONSTANTS,
137                                ST_NEW_TCS_SAMPLER_VIEWS,
138                                ST_NEW_TCS_SAMPLERS,
139                                ST_NEW_TCS_IMAGES,
140                                ST_NEW_TCS_UBOS,
141                                ST_NEW_TCS_SSBOS,
142                                ST_NEW_TCS_ATOMICS);
143       break;
144 
145    case MESA_SHADER_TESS_EVAL:
146       states = &prog->affected_states;
147 
148       *states = ST_NEW_TES_STATE |
149                 ST_NEW_RASTERIZER;
150 
151       set_affected_state_flags(states, prog,
152                                ST_NEW_TES_CONSTANTS,
153                                ST_NEW_TES_SAMPLER_VIEWS,
154                                ST_NEW_TES_SAMPLERS,
155                                ST_NEW_TES_IMAGES,
156                                ST_NEW_TES_UBOS,
157                                ST_NEW_TES_SSBOS,
158                                ST_NEW_TES_ATOMICS);
159       break;
160 
161    case MESA_SHADER_GEOMETRY:
162       states = &prog->affected_states;
163 
164       *states = ST_NEW_GS_STATE |
165                 ST_NEW_RASTERIZER;
166 
167       set_affected_state_flags(states, prog,
168                                ST_NEW_GS_CONSTANTS,
169                                ST_NEW_GS_SAMPLER_VIEWS,
170                                ST_NEW_GS_SAMPLERS,
171                                ST_NEW_GS_IMAGES,
172                                ST_NEW_GS_UBOS,
173                                ST_NEW_GS_SSBOS,
174                                ST_NEW_GS_ATOMICS);
175       break;
176 
177    case MESA_SHADER_FRAGMENT:
178       states = &prog->affected_states;
179 
180       /* gl_FragCoord and glDrawPixels always use constants. */
181       *states = ST_NEW_FS_STATE |
182                 ST_NEW_SAMPLE_SHADING |
183                 ST_NEW_FS_CONSTANTS;
184 
185       set_affected_state_flags(states, prog,
186                                ST_NEW_FS_CONSTANTS,
187                                ST_NEW_FS_SAMPLER_VIEWS,
188                                ST_NEW_FS_SAMPLERS,
189                                ST_NEW_FS_IMAGES,
190                                ST_NEW_FS_UBOS,
191                                ST_NEW_FS_SSBOS,
192                                ST_NEW_FS_ATOMICS);
193       break;
194 
195    case MESA_SHADER_COMPUTE:
196       states = &prog->affected_states;
197 
198       *states = ST_NEW_CS_STATE;
199 
200       set_affected_state_flags(states, prog,
201                                ST_NEW_CS_CONSTANTS,
202                                ST_NEW_CS_SAMPLER_VIEWS,
203                                ST_NEW_CS_SAMPLERS,
204                                ST_NEW_CS_IMAGES,
205                                ST_NEW_CS_UBOS,
206                                ST_NEW_CS_SSBOS,
207                                ST_NEW_CS_ATOMICS);
208       break;
209 
210    default:
211       unreachable("unhandled shader stage");
212    }
213 }
214 
215 
216 /**
217  * Delete a shader variant.  Note the caller must unlink the variant from
218  * the linked list.
219  */
220 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)221 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
222 {
223    if (v->driver_shader) {
224       if (target == GL_VERTEX_PROGRAM_ARB &&
225           ((struct st_common_variant*)v)->key.is_draw_shader) {
226          /* Draw shader. */
227          draw_delete_vertex_shader(st->draw, v->driver_shader);
228       } else if (st->has_shareable_shaders || v->st == st) {
229          /* The shader's context matches the calling context, or we
230           * don't care.
231           */
232          switch (target) {
233          case GL_VERTEX_PROGRAM_ARB:
234             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
235             break;
236          case GL_TESS_CONTROL_PROGRAM_NV:
237             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
238             break;
239          case GL_TESS_EVALUATION_PROGRAM_NV:
240             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
241             break;
242          case GL_GEOMETRY_PROGRAM_NV:
243             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
244             break;
245          case GL_FRAGMENT_PROGRAM_ARB:
246             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
247             break;
248          case GL_COMPUTE_PROGRAM_NV:
249             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
250             break;
251          default:
252             unreachable("bad shader type in delete_basic_variant");
253          }
254       } else {
255          /* We can't delete a shader with a context different from the one
256           * that created it.  Add it to the creating context's zombie list.
257           */
258          enum pipe_shader_type type =
259             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
260 
261          st_save_zombie_shader(v->st, type, v->driver_shader);
262       }
263    }
264 
265    FREE(v);
266 }
267 
268 static void
st_unbind_program(struct st_context * st,struct gl_program * p)269 st_unbind_program(struct st_context *st, struct gl_program *p)
270 {
271    struct gl_context *ctx = st->ctx;
272 
273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
274    switch (p->info.stage) {
275    case MESA_SHADER_VERTEX:
276       cso_set_vertex_shader_handle(st->cso_context, NULL);
277       ctx->NewDriverState |= ST_NEW_VS_STATE;
278       break;
279    case MESA_SHADER_TESS_CTRL:
280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
281       ctx->NewDriverState |= ST_NEW_TCS_STATE;
282       break;
283    case MESA_SHADER_TESS_EVAL:
284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
285       ctx->NewDriverState |= ST_NEW_TES_STATE;
286       break;
287    case MESA_SHADER_GEOMETRY:
288       cso_set_geometry_shader_handle(st->cso_context, NULL);
289       ctx->NewDriverState |= ST_NEW_GS_STATE;
290       break;
291    case MESA_SHADER_FRAGMENT:
292       cso_set_fragment_shader_handle(st->cso_context, NULL);
293       ctx->NewDriverState |= ST_NEW_FS_STATE;
294       break;
295    case MESA_SHADER_COMPUTE:
296       cso_set_compute_shader_handle(st->cso_context, NULL);
297       ctx->NewDriverState |= ST_NEW_CS_STATE;
298       break;
299    default:
300       unreachable("invalid shader type");
301    }
302 }
303 
304 /**
305  * Free all basic program variants.
306  */
307 void
st_release_variants(struct st_context * st,struct gl_program * p)308 st_release_variants(struct st_context *st, struct gl_program *p)
309 {
310    struct st_variant *v;
311 
312    /* If we are releasing shaders, re-bind them, because we don't
313     * know which shaders are bound in the driver.
314     */
315    if (p->variants)
316       st_unbind_program(st, p);
317 
318    for (v = p->variants; v; ) {
319       struct st_variant *next = v->next;
320       delete_variant(st, v, p->Target);
321       v = next;
322    }
323 
324    p->variants = NULL;
325 
326    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
327     * it has resulted in the driver taking ownership of the NIR.  Those
328     * callers should be NULLing out the nir field in any pipe_shader_state
329     * that might have this called in order to indicate that.
330     *
331     * GLSL IR and ARB programs will have set gl_program->nir to the same
332     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
333     */
334 }
335 
336 /**
337  * Free all basic program variants and unref program.
338  */
339 void
st_release_program(struct st_context * st,struct gl_program ** p)340 st_release_program(struct st_context *st, struct gl_program **p)
341 {
342    if (!*p)
343       return;
344 
345    destroy_program_variants(st, *p);
346    _mesa_reference_program(st->ctx, p, NULL);
347 }
348 
349 static void
st_prog_to_nir_postprocess(struct st_context * st,nir_shader * nir,struct gl_program * prog)350 st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
351                            struct gl_program *prog)
352 {
353    struct pipe_screen *screen = st->screen;
354 
355    NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa);
356    nir_validate_shader(nir, "after st/ptn lower_reg_intrinsics_to_ssa");
357 
358    NIR_PASS(_, nir, st_nir_lower_wpos_ytransform, prog, screen);
359    NIR_PASS(_, nir, nir_lower_system_values);
360 
361    struct nir_lower_compute_system_values_options cs_options = {
362       .has_base_global_invocation_id = false,
363       .has_base_workgroup_id = false,
364    };
365    NIR_PASS(_, nir, nir_lower_compute_system_values, &cs_options);
366 
367    /* Optimise NIR */
368    NIR_PASS(_, nir, nir_opt_constant_folding);
369    gl_nir_opts(nir);
370 
371    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
372    /* This must be done after optimizations to assign IO bases. */
373    nir_recompute_io_bases(nir, nir_var_shader_in | nir_var_shader_out);
374 
375    if (st->allow_st_finalize_nir_twice) {
376       st_serialize_base_nir(prog, nir);
377       st_finalize_nir(st, prog, NULL, nir, true, false);
378 
379       if (screen->finalize_nir) {
380          char *msg = screen->finalize_nir(screen, nir);
381          free(msg);
382       }
383    }
384 
385    nir_validate_shader(nir, "after st/glsl finalize_nir");
386 }
387 
388 /**
389  * Prepare st_vertex_program info.
390  *
391  * attrib_to_index is an optional mapping from a vertex attrib to a shader
392  * input index.
393  */
394 void
st_prepare_vertex_program(struct gl_program * prog)395 st_prepare_vertex_program(struct gl_program *prog)
396 {
397    struct gl_vertex_program *stvp = (struct gl_vertex_program *)prog;
398 
399    stvp->num_inputs = util_bitcount64(prog->info.inputs_read);
400    stvp->vert_attrib_mask = prog->info.inputs_read;
401 
402    /* Compute mapping of vertex program outputs to slots. */
403    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
404    unsigned num_outputs = 0;
405    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
406       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
407          stvp->result_to_output[attr] = num_outputs++;
408    }
409    /* pre-setup potentially unused edgeflag output */
410    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
411 }
412 
413 void
st_translate_stream_output_info(struct gl_program * prog)414 st_translate_stream_output_info(struct gl_program *prog)
415 {
416    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
417    if (!info)
418       return;
419 
420    /* Determine the (default) output register mapping for each output. */
421    unsigned num_outputs = 0;
422    uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
423    memset(output_mapping, 0, sizeof(output_mapping));
424 
425    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
426       /* this output was added by mesa/st and should not be tracked for xfb:
427        * drivers must check var->data.explicit_location to find the original output
428        * and only emit that one for xfb
429        */
430       if (prog->skip_pointsize_xfb && attr == VARYING_SLOT_PSIZ)
431          continue;
432       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
433          output_mapping[attr] = num_outputs++;
434    }
435 
436    /* Translate stream output info. */
437    struct pipe_stream_output_info *so_info =
438       &prog->state.stream_output;
439 
440    if (!num_outputs) {
441       so_info->num_outputs = 0;
442       return;
443    }
444 
445    for (unsigned i = 0; i < info->NumOutputs; i++) {
446       so_info->output[i].register_index =
447          output_mapping[info->Outputs[i].OutputRegister];
448       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
449       so_info->output[i].num_components = info->Outputs[i].NumComponents;
450       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
451       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
452       so_info->output[i].stream = info->Outputs[i].StreamId;
453    }
454 
455    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
456       so_info->stride[i] = info->Buffers[i].Stride;
457    }
458    so_info->num_outputs = info->NumOutputs;
459 }
460 
461 /**
462  * Creates a driver shader from a NIR shader.  Takes ownership of the
463  * passed nir_shader.
464  */
465 void *
st_create_nir_shader(struct st_context * st,struct pipe_shader_state * state)466 st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
467 {
468    struct pipe_context *pipe = st->pipe;
469 
470    assert(state->type == PIPE_SHADER_IR_NIR);
471    nir_shader *nir = state->ir.nir;
472    gl_shader_stage stage = nir->info.stage;
473 
474    /* Renumber SSA defs to make it easier to run diff on printed NIR. */
475    nir_foreach_function_impl(impl, nir) {
476       nir_index_ssa_defs(impl);
477    }
478 
479    if (ST_DEBUG & DEBUG_PRINT_IR) {
480       fprintf(stderr, "NIR before handing off to driver:\n");
481       nir_print_shader(nir, stderr);
482    }
483 
484    if (ST_DEBUG & DEBUG_PRINT_XFB) {
485       if (nir->info.io_lowered) {
486          if (nir->xfb_info && nir->xfb_info->output_count) {
487             fprintf(stderr, "XFB info before handing off to driver:\n");
488             fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
489                     nir->info.xfb_stride[0], nir->info.xfb_stride[1],
490                     nir->info.xfb_stride[2], nir->info.xfb_stride[3]);
491             nir_print_xfb_info(nir->xfb_info, stderr);
492          }
493       } else {
494          struct pipe_stream_output_info *so = &state->stream_output;
495 
496          if (so->num_outputs) {
497             fprintf(stderr, "XFB info before handing off to driver:\n");
498             fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
499                     so->stride[0], so->stride[1], so->stride[2],
500                     so->stride[3]);
501 
502             for (unsigned i = 0; i < so->num_outputs; i++) {
503                fprintf(stderr, "output%u: buffer=%u offset=%u, location=%u, "
504                                "component_offset=%u, component_mask=0x%x, "
505                                "stream=%u\n",
506                        i, so->output[i].output_buffer,
507                        so->output[i].dst_offset * 4,
508                        so->output[i].register_index,
509                        so->output[i].start_component,
510                        BITFIELD_RANGE(so->output[i].start_component,
511                                       so->output[i].num_components),
512                        so->output[i].stream);
513             }
514          }
515       }
516    }
517 
518    void *shader;
519    switch (stage) {
520    case MESA_SHADER_VERTEX:
521       shader = pipe->create_vs_state(pipe, state);
522       break;
523    case MESA_SHADER_TESS_CTRL:
524       shader = pipe->create_tcs_state(pipe, state);
525       break;
526    case MESA_SHADER_TESS_EVAL:
527       shader = pipe->create_tes_state(pipe, state);
528       break;
529    case MESA_SHADER_GEOMETRY:
530       shader = pipe->create_gs_state(pipe, state);
531       break;
532    case MESA_SHADER_FRAGMENT:
533       shader = pipe->create_fs_state(pipe, state);
534       break;
535    case MESA_SHADER_COMPUTE: {
536       /* We'd like to use this for all stages but we need to rework streamout in
537        * gallium first.
538        */
539       shader = pipe_shader_from_nir(pipe, nir);
540       break;
541    }
542    default:
543       unreachable("unsupported shader stage");
544       return NULL;
545    }
546 
547    return shader;
548 }
549 
550 /**
551  * Translate a vertex program.
552  */
553 static bool
st_translate_vertex_program(struct st_context * st,struct gl_program * prog)554 st_translate_vertex_program(struct st_context *st,
555                             struct gl_program *prog)
556 {
557    /* This determines which states will be updated when the assembly
558       * shader is bound.
559       */
560    prog->affected_states = ST_NEW_VS_STATE |
561                            ST_NEW_RASTERIZER |
562                            ST_NEW_VERTEX_ARRAYS;
563 
564    if (prog->Parameters->NumParameters)
565       prog->affected_states |= ST_NEW_VS_CONSTANTS;
566 
567    if (prog->arb.Instructions && prog->nir)
568       ralloc_free(prog->nir);
569 
570    if (prog->serialized_nir) {
571       free(prog->serialized_nir);
572       prog->serialized_nir = NULL;
573    }
574    free(prog->base_serialized_nir);
575 
576    prog->state.type = PIPE_SHADER_IR_NIR;
577    if (prog->arb.Instructions)
578       prog->nir = prog_to_nir(st->ctx, prog);
579    st_prog_to_nir_postprocess(st, prog->nir, prog);
580    prog->info = prog->nir->info;
581 
582    st_prepare_vertex_program(prog);
583    return true;
584 }
585 
586 static const struct nir_shader_compiler_options draw_nir_options = {
587    .lower_scmp = true,
588    .lower_flrp32 = true,
589    .lower_flrp64 = true,
590    .lower_fsat = true,
591    .lower_bitfield_insert = true,
592    .lower_bitfield_extract = true,
593    .lower_fdph = true,
594    .lower_ffma16 = true,
595    .lower_ffma32 = true,
596    .lower_ffma64 = true,
597    .lower_flrp16 = true,
598    .lower_fmod = true,
599    .lower_hadd = true,
600    .lower_uadd_sat = true,
601    .lower_usub_sat = true,
602    .lower_iadd_sat = true,
603    .lower_ldexp = true,
604    .lower_pack_snorm_2x16 = true,
605    .lower_pack_snorm_4x8 = true,
606    .lower_pack_unorm_2x16 = true,
607    .lower_pack_unorm_4x8 = true,
608    .lower_pack_half_2x16 = true,
609    .lower_pack_split = true,
610    .lower_unpack_snorm_2x16 = true,
611    .lower_unpack_snorm_4x8 = true,
612    .lower_unpack_unorm_2x16 = true,
613    .lower_unpack_unorm_4x8 = true,
614    .lower_unpack_half_2x16 = true,
615    .lower_extract_byte = true,
616    .lower_extract_word = true,
617    .lower_insert_byte = true,
618    .lower_insert_word = true,
619    .lower_uadd_carry = true,
620    .lower_usub_borrow = true,
621    .lower_mul_2x32_64 = true,
622    .lower_ifind_msb = true,
623    .lower_int64_options = nir_lower_imul_2x32_64,
624    .lower_doubles_options = nir_lower_dround_even,
625    .max_unroll_iterations = 32,
626    .lower_to_scalar = true,
627    .lower_uniforms_to_ubo = true,
628    .lower_vector_cmp = true,
629    .lower_device_index_to_zero = true,
630    .support_16bit_alu = true,
631    .lower_fisnormal = true,
632    .lower_fquantize2f16 = true,
633    .driver_functions = true,
634 };
635 
636 static struct nir_shader *
get_nir_shader(struct st_context * st,struct gl_program * prog,bool is_draw)637 get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
638 {
639    if ((!is_draw || !st->ctx->Const.PackedDriverUniformStorage) && prog->nir) {
640       nir_shader *nir = prog->nir;
641 
642       /* The first shader variant takes ownership of NIR, so that there is
643        * no cloning. Additional shader variants are always generated from
644        * serialized NIR to save memory.
645        */
646       prog->nir = NULL;
647       assert(prog->serialized_nir && prog->serialized_nir_size);
648       return nir;
649    }
650 
651    struct blob_reader blob_reader;
652    const struct nir_shader_compiler_options *options =
653       is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
654 
655    if (is_draw && st->ctx->Const.PackedDriverUniformStorage &&
656        (!prog->shader_program || prog->shader_program->data->LinkStatus != LINKING_SKIPPED)) {
657       assert(prog->base_serialized_nir);
658       blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
659    } else {
660       assert(prog->serialized_nir);
661       blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
662    }
663    return nir_deserialize(NULL, options, &blob_reader);
664 }
665 
666 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)667 lower_ucp(struct st_context *st,
668           struct nir_shader *nir,
669           unsigned ucp_enables,
670           struct gl_program_parameter_list *params)
671 {
672    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
673       NIR_PASS(_, nir, nir_lower_clip_disable, ucp_enables);
674    else {
675       bool can_compact = nir->options->compact_arrays;
676       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
677 
678       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
679       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
680          if (use_eye) {
681             clipplane_state[i][0] = STATE_CLIPPLANE;
682             clipplane_state[i][1] = i;
683          } else {
684             clipplane_state[i][0] = STATE_CLIP_INTERNAL;
685             clipplane_state[i][1] = i;
686          }
687          _mesa_add_state_reference(params, clipplane_state[i]);
688       }
689 
690       if (nir->info.stage == MESA_SHADER_VERTEX ||
691           nir->info.stage == MESA_SHADER_TESS_EVAL) {
692          NIR_PASS(_, nir, nir_lower_clip_vs, ucp_enables,
693                     true, can_compact, clipplane_state);
694       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
695          NIR_PASS(_, nir, nir_lower_clip_gs, ucp_enables,
696                     can_compact, clipplane_state);
697       }
698    }
699 }
700 
701 static bool
force_persample_shading(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)702 force_persample_shading(struct nir_builder *b, nir_intrinsic_instr *intr,
703                         void *data)
704 {
705    if (intr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
706        intr->intrinsic == nir_intrinsic_load_barycentric_centroid) {
707       intr->intrinsic = nir_intrinsic_load_barycentric_sample;
708       return true;
709    }
710 
711    return false;
712 }
713 
714 static int
xfb_compare_dst_offset(const void * a,const void * b)715 xfb_compare_dst_offset(const void *a, const void *b)
716 {
717    const struct pipe_stream_output *var0 = (const struct pipe_stream_output*)a;
718    const struct pipe_stream_output *var1 = (const struct pipe_stream_output*)b;
719 
720    if (var0->output_buffer != var1->output_buffer)
721       return var0->output_buffer > var1->output_buffer ? 1 : -1;
722 
723    return var0->dst_offset - var1->dst_offset;
724 }
725 
726 static void
get_stream_output_info_from_nir(nir_shader * nir,struct pipe_stream_output_info * info)727 get_stream_output_info_from_nir(nir_shader *nir,
728                                 struct pipe_stream_output_info *info)
729 {
730    /* Get pipe_stream_output_info from NIR. Only used by IO variables. */
731    nir_xfb_info *xfb = nir->xfb_info;
732    memset(info, 0, sizeof(*info));
733 
734    if (!xfb)
735       return;
736 
737    info->num_outputs = xfb->output_count;
738 
739    for (unsigned i = 0; i < 4; i++)
740       info->stride[i] = nir->info.xfb_stride[i];
741 
742    for (unsigned i = 0; i < xfb->output_count; i++) {
743       struct pipe_stream_output *out = &info->output[i];
744 
745       assert(!xfb->outputs[i].high_16bits);
746 
747       out->register_index =
748          util_bitcount64(nir->info.outputs_written &
749                          BITFIELD64_MASK(xfb->outputs[i].location));
750       out->start_component = xfb->outputs[i].component_offset;
751       out->num_components = util_bitcount(xfb->outputs[i].component_mask);
752       out->output_buffer = xfb->outputs[i].buffer;
753       out->dst_offset = xfb->outputs[i].offset / 4;
754       out->stream = xfb->buffer_to_stream[out->output_buffer];
755    }
756 
757    /* Intel requires that xfb outputs are sorted by dst_offset. */
758    qsort(info->output, info->num_outputs, sizeof(info->output[0]),
759          xfb_compare_dst_offset);
760 }
761 
762 static struct st_common_variant *
st_create_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key,bool report_compile_error,char ** error)763 st_create_common_variant(struct st_context *st,
764                          struct gl_program *prog,
765                          const struct st_common_variant_key *key,
766                          bool report_compile_error, char **error)
767 {
768    MESA_TRACE_FUNC();
769 
770    struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
771    struct pipe_shader_state state = {0};
772 
773    static const gl_state_index16 point_size_state[STATE_LENGTH] =
774       { STATE_POINT_SIZE_CLAMPED, 0 };
775    struct gl_program_parameter_list *params = prog->Parameters;
776 
777    v->key = *key;
778 
779    state.stream_output = prog->state.stream_output;
780 
781    bool finalize = false;
782 
783    state.type = PIPE_SHADER_IR_NIR;
784    state.ir.nir = get_nir_shader(st, prog, key->is_draw_shader);
785    const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
786 
787    if (key->clamp_color) {
788       NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
789       finalize = true;
790    }
791    if (key->passthrough_edgeflags) {
792       NIR_PASS(_, state.ir.nir, nir_lower_passthrough_edgeflags);
793       finalize = true;
794    }
795 
796    if (key->export_point_size) {
797       /* if flag is set, shader must export psiz */
798       _mesa_add_state_reference(params, point_size_state);
799       NIR_PASS(_, state.ir.nir, nir_lower_point_size_mov,
800                   point_size_state);
801 
802       finalize = true;
803    }
804 
805    if (key->lower_ucp) {
806       assert(!options->unify_interfaces);
807       lower_ucp(st, state.ir.nir, key->lower_ucp, params);
808       finalize = true;
809    }
810 
811    if (st->emulate_gl_clamp &&
812          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
813       nir_lower_tex_options tex_opts = {0};
814       tex_opts.saturate_s = key->gl_clamp[0];
815       tex_opts.saturate_t = key->gl_clamp[1];
816       tex_opts.saturate_r = key->gl_clamp[2];
817       NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
818    }
819 
820    if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
821       st_finalize_nir(st, prog, prog->shader_program, state.ir.nir, false,
822                       key->is_draw_shader);
823    }
824 
825    assert(state.ir.nir->info.stage == MESA_SHADER_COMPUTE ||
826           state.ir.nir->info.io_lowered);
827 
828    /* This should be after all passes that touch IO. */
829    if (state.ir.nir->info.io_lowered &&
830        (!(state.ir.nir->options->io_options & nir_io_has_intrinsics) ||
831         key->is_draw_shader)) {
832       assert(!state.stream_output.num_outputs || state.ir.nir->xfb_info);
833       get_stream_output_info_from_nir(state.ir.nir, &state.stream_output);
834       /* Some lowering passes can leave dead code behind, but dead IO intrinsics
835        * are still counted as enabled IO, which breaks things.
836        */
837       NIR_PASS(_, state.ir.nir, nir_opt_dce);
838 
839       /* vc4, vc5 require this. */
840       if (state.ir.nir->info.stage == MESA_SHADER_VERTEX ||
841           state.ir.nir->info.stage == MESA_SHADER_TESS_EVAL)
842          NIR_PASS(_, state.ir.nir, nir_move_output_stores_to_end);
843 
844       NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
845 
846       if (state.ir.nir->info.stage == MESA_SHADER_TESS_CTRL &&
847           state.ir.nir->options->compact_arrays &&
848           state.ir.nir->options->vectorize_tess_levels)
849          NIR_PASS(_, state.ir.nir, nir_vectorize_tess_levels);
850 
851       gl_nir_opts(state.ir.nir);
852       finalize = true;
853    }
854 
855    if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
856       struct pipe_screen *screen = st->screen;
857       if (!key->is_draw_shader && screen->finalize_nir) {
858          char *msg = screen->finalize_nir(screen, state.ir.nir);
859          free(msg);
860       }
861 
862       /* Clip lowering and edgeflags may have introduced new varyings, so
863        * update the inputs_read/outputs_written. However, with
864        * unify_interfaces set (aka iris) the non-SSO varyings layout is
865        * decided at link time with outputs_written updated so the two line
866        * up.  A driver with this flag set may not use any of the lowering
867        * passes that would change the varyings, so skip to make sure we don't
868        * break its linkage.
869        */
870       if (!options->unify_interfaces) {
871          nir_shader_gather_info(state.ir.nir,
872                                  nir_shader_get_entrypoint(state.ir.nir));
873       }
874    }
875 
876    if (key->is_draw_shader) {
877       NIR_PASS(_, state.ir.nir, gl_nir_lower_images, false);
878       v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
879    }
880    else
881       v->base.driver_shader = st_create_nir_shader(st, &state);
882 
883    if (report_compile_error && state.error_message) {
884       *error = state.error_message;
885       return NULL;
886    }
887 
888    if (error)
889       *error = NULL;
890    return v;
891 }
892 
893 static void
st_add_variant(struct st_variant ** list,struct st_variant * v)894 st_add_variant(struct st_variant **list, struct st_variant *v)
895 {
896    struct st_variant *first = *list;
897 
898    /* Make sure that the default variant stays the first in the list, and insert
899     * any later variants in as the second entry.
900     */
901    if (first) {
902       v->next = first->next;
903       first->next = v;
904    } else {
905       *list = v;
906    }
907 }
908 
909 /**
910  * Find/create a vertex program variant.
911  */
912 struct st_common_variant *
st_get_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key,bool report_compile_error,char ** error)913 st_get_common_variant(struct st_context *st,
914                       struct gl_program *prog,
915                       const struct st_common_variant_key *key,
916                       bool report_compile_error, char **error)
917 {
918    struct st_common_variant *v;
919 
920    /* Search for existing variant */
921    for (v = st_common_variant(prog->variants); v;
922         v = st_common_variant(v->base.next)) {
923       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
924          break;
925       }
926    }
927 
928    if (!v) {
929       if (prog->variants != NULL) {
930          _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
931                           "Compiling %s shader variant (%s%s%s%s%s%s)",
932                           _mesa_shader_stage_to_string(prog->info.stage),
933                           key->passthrough_edgeflags ? "edgeflags," : "",
934                           key->clamp_color ? "clamp_color," : "",
935                           key->export_point_size ? "point_size," : "",
936                           key->lower_ucp ? "ucp," : "",
937                           key->is_draw_shader ? "draw," : "",
938                           key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
939       }
940 
941       /* create now */
942       v = st_create_common_variant(st, prog, key, report_compile_error, error);
943       if (v) {
944          v->base.st = key->st;
945 
946          if (prog->info.stage == MESA_SHADER_VERTEX) {
947             struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
948 
949             v->vert_attrib_mask =
950                vp->vert_attrib_mask |
951                (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
952          }
953 
954          st_add_variant(&prog->variants, &v->base);
955       }
956    }
957 
958    return v;
959 }
960 
961 
962 /**
963  * Translate a non-GLSL Mesa fragment shader into a NIR shader.
964  */
965 static bool
st_translate_fragment_program(struct st_context * st,struct gl_program * prog)966 st_translate_fragment_program(struct st_context *st,
967                               struct gl_program *prog)
968 {
969    /* This determines which states will be updated when the assembly
970     * shader is bound.
971     *
972     * fragment.position and glDrawPixels always use constants.
973     */
974    prog->affected_states = ST_NEW_FS_STATE |
975                            ST_NEW_SAMPLE_SHADING |
976                            ST_NEW_FS_CONSTANTS;
977 
978    if (prog->ati_fs) {
979       /* Just set them for ATI_fs unconditionally. */
980       prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
981                                ST_NEW_FS_SAMPLERS;
982    } else {
983       /* ARB_fp */
984       if (prog->SamplersUsed)
985          prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
986                                   ST_NEW_FS_SAMPLERS;
987    }
988 
989    /* Translate to NIR. */
990    if (prog->nir && prog->arb.Instructions)
991       ralloc_free(prog->nir);
992 
993    if (prog->serialized_nir) {
994       free(prog->serialized_nir);
995       prog->serialized_nir = NULL;
996    }
997 
998    prog->state.type = PIPE_SHADER_IR_NIR;
999    if (prog->arb.Instructions) {
1000       prog->nir = prog_to_nir(st->ctx, prog);
1001    } else if (prog->ati_fs) {
1002       const struct nir_shader_compiler_options *options =
1003          st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1004 
1005       assert(!prog->nir);
1006       prog->nir = st_translate_atifs_program(prog->ati_fs, prog, options);
1007    }
1008    st_prog_to_nir_postprocess(st, prog->nir, prog);
1009 
1010    prog->info = prog->nir->info;
1011    if (prog->ati_fs) {
1012       /* ATI_fs will lower fixed function fog at variant time, after the FF vertex
1013        * prog has been generated.  So we have to always declare a read of FOGC so
1014        * that FF vp feeds it to us just in case.
1015        */
1016       prog->info.inputs_read |= VARYING_BIT_FOGC;
1017    }
1018 
1019    return true;
1020 }
1021 
1022 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key,bool report_compile_error,char ** error)1023 st_create_fp_variant(struct st_context *st,
1024                      struct gl_program *fp,
1025                      const struct st_fp_variant_key *key,
1026                      bool report_compile_error, char **error)
1027 {
1028    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1029    struct pipe_shader_state state = {0};
1030    struct gl_program_parameter_list *params = fp->Parameters;
1031    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1032       { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1033    static const gl_state_index16 scale_state[STATE_LENGTH] =
1034       { STATE_PT_SCALE };
1035    static const gl_state_index16 bias_state[STATE_LENGTH] =
1036       { STATE_PT_BIAS };
1037    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1038       { STATE_ALPHA_REF };
1039 
1040    if (!variant)
1041       return NULL;
1042 
1043    MESA_TRACE_FUNC();
1044 
1045    /* Translate ATI_fs to NIR at variant time because that's when we have the
1046     * texture types.
1047     */
1048    state.ir.nir = get_nir_shader(st, fp, false);
1049    state.type = PIPE_SHADER_IR_NIR;
1050    state.report_compile_error = report_compile_error;
1051 
1052    bool finalize = false;
1053 
1054    if (fp->ati_fs) {
1055       if (key->fog) {
1056          NIR_PASS(_, state.ir.nir, st_nir_lower_fog, key->fog, fp->Parameters);
1057       }
1058 
1059       NIR_PASS(_, state.ir.nir, st_nir_lower_atifs_samplers, key->texture_index);
1060 
1061       finalize = true;
1062    }
1063 
1064    if (key->clamp_color) {
1065       NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
1066       finalize = true;
1067    }
1068 
1069    if (key->lower_flatshade) {
1070       NIR_PASS(_, state.ir.nir, nir_lower_flatshade);
1071       finalize = true;
1072    }
1073 
1074    if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1075       _mesa_add_state_reference(params, alpha_ref_state);
1076       NIR_PASS(_, state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1077                   false, alpha_ref_state);
1078       finalize = true;
1079    }
1080 
1081    if (key->lower_two_sided_color) {
1082       bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1083       NIR_PASS(_, state.ir.nir, nir_lower_two_sided_color, face_sysval);
1084       finalize = true;
1085    }
1086 
1087    if (key->persample_shading) {
1088       nir_shader *shader = state.ir.nir;
1089       nir_shader_intrinsics_pass(shader, force_persample_shading,
1090                                  nir_metadata_all, NULL);
1091 
1092       /* In addition to requiring per-sample interpolation, sample shading
1093        * changes the behaviour of gl_SampleMaskIn, so we need per-sample shading
1094        * even if there are no shader-in variables at all. In that case,
1095        * uses_sample_shading won't be set by glsl_to_nir. We need to do so here.
1096        */
1097       shader->info.fs.uses_sample_shading = true;
1098 
1099       finalize = true;
1100    }
1101 
1102    if (st->emulate_gl_clamp &&
1103          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1104       nir_lower_tex_options tex_opts = {0};
1105       tex_opts.saturate_s = key->gl_clamp[0];
1106       tex_opts.saturate_t = key->gl_clamp[1];
1107       tex_opts.saturate_r = key->gl_clamp[2];
1108       NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
1109       finalize = true;
1110    }
1111 
1112    assert(!(key->bitmap && key->drawpixels));
1113 
1114    /* glBitmap */
1115    if (key->bitmap) {
1116       nir_lower_bitmap_options options = {0};
1117 
1118       variant->bitmap_sampler = ffs(~fp->SamplersUsed) - 1;
1119       options.sampler = variant->bitmap_sampler;
1120       options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1121 
1122       NIR_PASS(_, state.ir.nir, nir_lower_bitmap, &options);
1123       finalize = true;
1124    }
1125 
1126    /* glDrawPixels (color only) */
1127    if (key->drawpixels) {
1128       nir_lower_drawpixels_options options = {{0}};
1129       unsigned samplers_used = fp->SamplersUsed;
1130 
1131       /* Find the first unused slot. */
1132       variant->drawpix_sampler = ffs(~samplers_used) - 1;
1133       options.drawpix_sampler = variant->drawpix_sampler;
1134       samplers_used |= (1 << variant->drawpix_sampler);
1135 
1136       options.pixel_maps = key->pixelMaps;
1137       if (key->pixelMaps) {
1138          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1139          options.pixelmap_sampler = variant->pixelmap_sampler;
1140       }
1141 
1142       options.scale_and_bias = key->scaleAndBias;
1143       if (key->scaleAndBias) {
1144          _mesa_add_state_reference(params, scale_state);
1145          memcpy(options.scale_state_tokens, scale_state,
1146                   sizeof(options.scale_state_tokens));
1147          _mesa_add_state_reference(params, bias_state);
1148          memcpy(options.bias_state_tokens, bias_state,
1149                   sizeof(options.bias_state_tokens));
1150       }
1151 
1152       _mesa_add_state_reference(params, texcoord_state);
1153       memcpy(options.texcoord_state_tokens, texcoord_state,
1154                sizeof(options.texcoord_state_tokens));
1155 
1156       NIR_PASS(_, state.ir.nir, nir_lower_drawpixels, &options);
1157       finalize = true;
1158    }
1159 
1160    bool need_lower_tex_src_plane = false;
1161 
1162    if (unlikely(key->external.lower_nv12 || key->external.lower_nv21 ||
1163                   key->external.lower_iyuv ||
1164                   key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1165                   key->external.lower_yx_xvxu || key->external.lower_xy_vxux ||
1166                   key->external.lower_ayuv || key->external.lower_xyuv ||
1167                   key->external.lower_yuv || key->external.lower_yu_yv ||
1168                   key->external.lower_yv_yu || key->external.lower_y41x)) {
1169 
1170       st_nir_lower_samplers(st->screen, state.ir.nir,
1171                               fp->shader_program, fp);
1172 
1173       nir_lower_tex_options options = {0};
1174       options.lower_y_uv_external = key->external.lower_nv12;
1175       options.lower_y_vu_external = key->external.lower_nv21;
1176       options.lower_y_u_v_external = key->external.lower_iyuv;
1177       options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1178       options.lower_xy_vxux_external = key->external.lower_xy_vxux;
1179       options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1180       options.lower_yx_xvxu_external = key->external.lower_yx_xvxu;
1181       options.lower_ayuv_external = key->external.lower_ayuv;
1182       options.lower_xyuv_external = key->external.lower_xyuv;
1183       options.lower_yuv_external = key->external.lower_yuv;
1184       options.lower_yu_yv_external = key->external.lower_yu_yv;
1185       options.lower_yv_yu_external = key->external.lower_yv_yu;
1186       options.lower_y41x_external = key->external.lower_y41x;
1187       options.bt709_external = key->external.bt709;
1188       options.bt2020_external = key->external.bt2020;
1189       options.yuv_full_range_external = key->external.yuv_full_range;
1190       NIR_PASS(_, state.ir.nir, nir_lower_tex, &options);
1191       finalize = true;
1192       need_lower_tex_src_plane = true;
1193    }
1194 
1195    if (finalize || !st->allow_st_finalize_nir_twice)
1196       st_finalize_nir(st, fp, fp->shader_program, state.ir.nir, false, false);
1197 
1198    /* This pass needs to happen *after* nir_lower_sampler */
1199    if (unlikely(need_lower_tex_src_plane)) {
1200       NIR_PASS(_, state.ir.nir, st_nir_lower_tex_src_plane,
1201                   ~fp->SamplersUsed,
1202                   key->external.lower_nv12 | key->external.lower_nv21 |
1203                      key->external.lower_xy_uxvx | key->external.lower_xy_vxux |
1204                      key->external.lower_yx_xuxv | key->external.lower_yx_xvxu,
1205                   key->external.lower_iyuv);
1206       finalize = true;
1207    }
1208 
1209    /* It is undefined behavior when an ARB assembly uses SHADOW2D target
1210     * with a texture in not depth format. In this case NVIDIA automatically
1211     * replaces SHADOW sampler with a normal sampler and some games like
1212     * Penumbra Overture which abuses this UB (issues/8425) works fine but
1213     * breaks with mesa. Replace the shadow sampler with a normal one here
1214     */
1215    if (!fp->shader_program && ~key->depth_textures & fp->ShadowSamplers) {
1216       NIR_PASS(_, state.ir.nir, nir_remove_tex_shadow,
1217                  ~key->depth_textures & fp->ShadowSamplers);
1218       finalize = true;
1219    }
1220 
1221    assert(state.ir.nir->info.io_lowered);
1222 
1223    /* This should be after all passes that touch IO. */
1224    if (!(state.ir.nir->options->io_options & nir_io_has_intrinsics)) {
1225       /* Some lowering passes can leave dead code behind, but dead IO intrinsics
1226        * are still counted as enabled IO, which breaks things.
1227        */
1228       NIR_PASS(_, state.ir.nir, nir_opt_dce);
1229       NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
1230       gl_nir_opts(state.ir.nir);
1231       finalize = true;
1232    }
1233 
1234    if (finalize || !st->allow_st_finalize_nir_twice) {
1235       /* Some of the lowering above may have introduced new varyings */
1236       nir_shader_gather_info(state.ir.nir,
1237                               nir_shader_get_entrypoint(state.ir.nir));
1238 
1239       struct pipe_screen *screen = st->screen;
1240       if (screen->finalize_nir) {
1241          char *msg = screen->finalize_nir(screen, state.ir.nir);
1242          free(msg);
1243       }
1244    }
1245 
1246    variant->base.driver_shader = st_create_nir_shader(st, &state);
1247    if (report_compile_error && state.error_message) {
1248       *error = state.error_message;
1249       return NULL;
1250    }
1251 
1252    variant->key = *key;
1253    if (error)
1254       *error = NULL;
1255    return variant;
1256 }
1257 
1258 /**
1259  * Translate fragment program if needed.
1260  */
1261 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key,bool report_compile_error,char ** error)1262 st_get_fp_variant(struct st_context *st,
1263                   struct gl_program *fp,
1264                   const struct st_fp_variant_key *key,
1265                   bool report_compile_error, char **error)
1266 {
1267    struct st_fp_variant *fpv;
1268 
1269    /* Search for existing variant */
1270    for (fpv = st_fp_variant(fp->variants); fpv;
1271         fpv = st_fp_variant(fpv->base.next)) {
1272       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1273          break;
1274       }
1275    }
1276 
1277    if (!fpv) {
1278       /* create new */
1279 
1280       if (fp->variants != NULL) {
1281          _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1282                           "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%d)",
1283                           key->bitmap ? "bitmap," : "",
1284                           key->drawpixels ? "drawpixels," : "",
1285                           key->scaleAndBias ? "scale_bias," : "",
1286                           key->pixelMaps ? "pixel_maps," : "",
1287                           key->clamp_color ? "clamp_color," : "",
1288                           key->persample_shading ? "persample_shading," : "",
1289                           key->fog ? "fog," : "",
1290                           key->lower_two_sided_color ? "twoside," : "",
1291                           key->lower_flatshade ? "flatshade," : "",
1292                           key->lower_alpha_func != COMPARE_FUNC_ALWAYS ? "alpha_compare," : "",
1293                           /* skipped ATI_fs targets */
1294                           fp->ExternalSamplersUsed ? "external?," : "",
1295                           key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "",
1296                           "depth_textures=", key->depth_textures);
1297       }
1298 
1299       fpv = st_create_fp_variant(st, fp, key, report_compile_error, error);
1300       if (fpv) {
1301          fpv->base.st = key->st;
1302 
1303          st_add_variant(&fp->variants, &fpv->base);
1304       }
1305    }
1306 
1307    return fpv;
1308 }
1309 
1310 /**
1311  * Vert/Geom/Frag programs have per-context variants.  Free all the
1312  * variants attached to the given program which match the given context.
1313  */
1314 static void
destroy_program_variants(struct st_context * st,struct gl_program * p)1315 destroy_program_variants(struct st_context *st, struct gl_program *p)
1316 {
1317    if (!p || p == &_mesa_DummyProgram)
1318       return;
1319 
1320    struct st_variant *v, **prevPtr = &p->variants;
1321    bool unbound = false;
1322 
1323    for (v = p->variants; v; ) {
1324       struct st_variant *next = v->next;
1325       if (v->st == st) {
1326          if (!unbound) {
1327             st_unbind_program(st, p);
1328             unbound = true;
1329          }
1330 
1331          /* unlink from list */
1332          *prevPtr = next;
1333          /* destroy this variant */
1334          delete_variant(st, v, p->Target);
1335       }
1336       else {
1337          prevPtr = &v->next;
1338       }
1339       v = next;
1340    }
1341 }
1342 
1343 
1344 /**
1345  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1346  * which match the given context.
1347  */
1348 static void
destroy_shader_program_variants_cb(void * data,void * userData)1349 destroy_shader_program_variants_cb(void *data, void *userData)
1350 {
1351    struct st_context *st = (struct st_context *) userData;
1352    struct gl_shader *shader = (struct gl_shader *) data;
1353 
1354    switch (shader->Type) {
1355    case GL_SHADER_PROGRAM_MESA:
1356       {
1357          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1358          GLuint i;
1359 
1360          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1361             if (shProg->_LinkedShaders[i])
1362                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1363          }
1364       }
1365       break;
1366    case GL_VERTEX_SHADER:
1367    case GL_FRAGMENT_SHADER:
1368    case GL_GEOMETRY_SHADER:
1369    case GL_TESS_CONTROL_SHADER:
1370    case GL_TESS_EVALUATION_SHADER:
1371    case GL_COMPUTE_SHADER:
1372       break;
1373    default:
1374       assert(0);
1375    }
1376 }
1377 
1378 
1379 /**
1380  * Callback for _mesa_HashWalk.  Free all the program variants which match
1381  * the given context.
1382  */
1383 static void
destroy_program_variants_cb(void * data,void * userData)1384 destroy_program_variants_cb(void *data, void *userData)
1385 {
1386    struct st_context *st = (struct st_context *) userData;
1387    struct gl_program *program = (struct gl_program *) data;
1388    destroy_program_variants(st, program);
1389 }
1390 
1391 
1392 /**
1393  * Walk over all shaders and programs to delete any variants which
1394  * belong to the given context.
1395  * This is called during context tear-down.
1396  */
1397 void
st_destroy_program_variants(struct st_context * st)1398 st_destroy_program_variants(struct st_context *st)
1399 {
1400    /* If shaders can be shared with other contexts, the last context will
1401     * call DeleteProgram on all shaders, releasing everything.
1402     */
1403    if (st->has_shareable_shaders)
1404       return;
1405 
1406    /* ARB vert/frag program */
1407    _mesa_HashWalk(&st->ctx->Shared->Programs,
1408                   destroy_program_variants_cb, st);
1409 
1410    /* GLSL vert/frag/geom shaders */
1411    _mesa_HashWalk(&st->ctx->Shared->ShaderObjects,
1412                   destroy_shader_program_variants_cb, st);
1413 }
1414 
1415 /**
1416  * Compile one shader variant.
1417  */
1418 static char *
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog,bool report_compile_error)1419 st_precompile_shader_variant(struct st_context *st,
1420                              struct gl_program *prog,
1421                              bool report_compile_error)
1422 {
1423    char *error = NULL;
1424 
1425    switch (prog->Target) {
1426    case GL_VERTEX_PROGRAM_ARB:
1427    case GL_TESS_CONTROL_PROGRAM_NV:
1428    case GL_TESS_EVALUATION_PROGRAM_NV:
1429    case GL_GEOMETRY_PROGRAM_NV:
1430    case GL_COMPUTE_PROGRAM_NV: {
1431       struct st_common_variant_key key;
1432 
1433       memset(&key, 0, sizeof(key));
1434 
1435       if (_mesa_is_desktop_gl_compat(st->ctx) &&
1436           st->clamp_vert_color_in_shader &&
1437           (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1438                                          VARYING_SLOT_COL1 |
1439                                          VARYING_SLOT_BFC0 |
1440                                          VARYING_SLOT_BFC1))) {
1441          key.clamp_color = true;
1442       }
1443 
1444       key.st = st->has_shareable_shaders ? NULL : st;
1445       st_get_common_variant(st, prog, &key, report_compile_error, &error);
1446       return error;
1447    }
1448 
1449    case GL_FRAGMENT_PROGRAM_ARB: {
1450       struct st_fp_variant_key key;
1451 
1452       memset(&key, 0, sizeof(key));
1453 
1454       key.st = st->has_shareable_shaders ? NULL : st;
1455       key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1456       if (prog->ati_fs) {
1457          for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1458             key.texture_index[i] = TEXTURE_2D_INDEX;
1459       }
1460 
1461       /* Shadow samplers require texture in depth format, which we lower to
1462        * non-shadow if necessary for ARB programs
1463        */
1464       if (!prog->shader_program)
1465          key.depth_textures = prog->ShadowSamplers;
1466 
1467       st_get_fp_variant(st, prog, &key, report_compile_error, &error);
1468       return error;
1469    }
1470 
1471    default:
1472       unreachable("invalid shader stage");
1473    }
1474 }
1475 
1476 void
st_serialize_nir(struct gl_program * prog)1477 st_serialize_nir(struct gl_program *prog)
1478 {
1479    if (!prog->serialized_nir) {
1480       struct blob blob;
1481       size_t size;
1482 
1483       blob_init(&blob);
1484       nir_serialize(&blob, prog->nir, false);
1485       blob_finish_get_buffer(&blob, &prog->serialized_nir, &size);
1486       prog->serialized_nir_size = size;
1487    }
1488 }
1489 
1490 void
st_serialize_base_nir(struct gl_program * prog,nir_shader * nir)1491 st_serialize_base_nir(struct gl_program *prog, nir_shader *nir)
1492 {
1493    if (!prog->base_serialized_nir && nir->info.stage == MESA_SHADER_VERTEX) {
1494       struct blob blob;
1495       size_t size;
1496 
1497       blob_init(&blob);
1498       nir_serialize(&blob, nir, false);
1499       blob_finish_get_buffer(&blob, &prog->base_serialized_nir, &size);
1500       prog->base_serialized_nir_size = size;
1501    }
1502 }
1503 
1504 char *
st_finalize_program(struct st_context * st,struct gl_program * prog,bool report_compile_error)1505 st_finalize_program(struct st_context *st, struct gl_program *prog,
1506                     bool report_compile_error)
1507 {
1508    struct gl_context *ctx = st->ctx;
1509    bool is_bound = false;
1510 
1511    MESA_TRACE_FUNC();
1512 
1513    if (prog->info.stage == MESA_SHADER_VERTEX)
1514       is_bound = prog == ctx->VertexProgram._Current;
1515    else if (prog->info.stage == MESA_SHADER_TESS_CTRL)
1516       is_bound = prog == ctx->TessCtrlProgram._Current;
1517    else if (prog->info.stage == MESA_SHADER_TESS_EVAL)
1518       is_bound = prog == ctx->TessEvalProgram._Current;
1519    else if (prog->info.stage == MESA_SHADER_GEOMETRY)
1520       is_bound = prog == ctx->GeometryProgram._Current;
1521    else if (prog->info.stage == MESA_SHADER_FRAGMENT)
1522       is_bound = prog == ctx->FragmentProgram._Current;
1523    else if (prog->info.stage == MESA_SHADER_COMPUTE)
1524       is_bound = prog == ctx->ComputeProgram._Current;
1525 
1526    if (is_bound) {
1527       if (prog->info.stage == MESA_SHADER_VERTEX) {
1528          ctx->Array.NewVertexElements = true;
1529          ctx->NewDriverState |= ST_NEW_VERTEX_PROGRAM(ctx, prog);
1530       } else {
1531          ctx->NewDriverState |= prog->affected_states;
1532       }
1533    }
1534 
1535    if (prog->nir) {
1536       nir_sweep(prog->nir);
1537 
1538       /* This is only needed for ARB_vp/fp programs and when the disk cache
1539        * is disabled. If the disk cache is enabled, GLSL programs are
1540        * serialized in write_nir_to_cache.
1541        */
1542       st_serialize_base_nir(prog, prog->nir);
1543       st_serialize_nir(prog);
1544    }
1545 
1546    /* Always create the default variant of the program. */
1547    return st_precompile_shader_variant(st, prog, report_compile_error);
1548 }
1549 
1550 /**
1551  * Called when the program's text/code is changed.  We have to free
1552  * all shader variants and corresponding gallium shaders when this happens.
1553  */
1554 GLboolean
st_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1555 st_program_string_notify( struct gl_context *ctx,
1556                           GLenum target,
1557                           struct gl_program *prog )
1558 {
1559    struct st_context *st = st_context(ctx);
1560 
1561    /* GLSL-to-NIR should not end up here. */
1562    assert(!prog->shader_program);
1563 
1564    st_release_variants(st, prog);
1565 
1566    if (target == GL_FRAGMENT_PROGRAM_ARB ||
1567        target == GL_FRAGMENT_SHADER_ATI) {
1568       if (!st_translate_fragment_program(st, prog))
1569          return false;
1570    } else if (target == GL_VERTEX_PROGRAM_ARB) {
1571       if (!st_translate_vertex_program(st, prog))
1572          return false;
1573       if (st->add_point_size &&
1574           gl_nir_can_add_pointsize_to_program(&st->ctx->Const, prog)) {
1575          prog->skip_pointsize_xfb = true;
1576          NIR_PASS(_, prog->nir, gl_nir_add_point_size);
1577       }
1578    }
1579 
1580    st_finalize_program(st, prog, false);
1581    return GL_TRUE;
1582 }
1583