• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   *   Brian Paul
31   */
32 
33 
34 #include "nir_builder.h"
35 #include "main/errors.h"
36 
37 #include "main/hash.h"
38 #include "main/mtypes.h"
39 #include "nir/nir_xfb_info.h"
40 #include "nir/pipe_nir.h"
41 #include "program/prog_parameter.h"
42 #include "program/prog_print.h"
43 #include "program/prog_to_nir.h"
44 
45 #include "compiler/glsl/gl_nir.h"
46 #include "compiler/glsl/gl_nir_linker.h"
47 #include "compiler/nir/nir.h"
48 #include "compiler/nir/nir_serialize.h"
49 #include "draw/draw_context.h"
50 
51 #include "pipe/p_context.h"
52 #include "pipe/p_defines.h"
53 #include "pipe/p_shader_tokens.h"
54 #include "draw/draw_context.h"
55 
56 #include "util/u_dump.h"
57 #include "util/u_memory.h"
58 
59 #include "st_debug.h"
60 #include "st_cb_bitmap.h"
61 #include "st_cb_drawpixels.h"
62 #include "st_context.h"
63 #include "st_program.h"
64 #include "st_atifs_to_nir.h"
65 #include "st_nir.h"
66 #include "st_shader_cache.h"
67 #include "st_util.h"
68 #include "cso_cache/cso_context.h"
69 
70 
71 static void
72 destroy_program_variants(struct st_context *st, struct gl_program *target);
73 
74 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)75 set_affected_state_flags(uint64_t *states,
76                          struct gl_program *prog,
77                          uint64_t new_constants,
78                          uint64_t new_sampler_views,
79                          uint64_t new_samplers,
80                          uint64_t new_images,
81                          uint64_t new_ubos,
82                          uint64_t new_ssbos,
83                          uint64_t new_atomics)
84 {
85    if (prog->Parameters->NumParameters)
86       *states |= new_constants;
87 
88    if (prog->info.num_textures)
89       *states |= new_sampler_views | new_samplers;
90 
91    if (prog->info.num_images)
92       *states |= new_images;
93 
94    if (prog->info.num_ubos)
95       *states |= new_ubos;
96 
97    if (prog->info.num_ssbos)
98       *states |= new_ssbos;
99 
100    if (prog->info.num_abos)
101       *states |= new_atomics;
102 }
103 
104 /**
105  * This determines which states will be updated when the shader is bound.
106  */
107 void
st_set_prog_affected_state_flags(struct gl_program * prog)108 st_set_prog_affected_state_flags(struct gl_program *prog)
109 {
110    uint64_t *states;
111 
112    switch (prog->info.stage) {
113    case MESA_SHADER_VERTEX:
114       states = &prog->affected_states;
115 
116       *states = ST_NEW_VS_STATE |
117                 ST_NEW_RASTERIZER |
118                 ST_NEW_VERTEX_ARRAYS;
119 
120       set_affected_state_flags(states, prog,
121                                ST_NEW_VS_CONSTANTS,
122                                ST_NEW_VS_SAMPLER_VIEWS,
123                                ST_NEW_VS_SAMPLERS,
124                                ST_NEW_VS_IMAGES,
125                                ST_NEW_VS_UBOS,
126                                ST_NEW_VS_SSBOS,
127                                ST_NEW_VS_ATOMICS);
128       break;
129 
130    case MESA_SHADER_TESS_CTRL:
131       states = &prog->affected_states;
132 
133       *states = ST_NEW_TCS_STATE;
134 
135       set_affected_state_flags(states, prog,
136                                ST_NEW_TCS_CONSTANTS,
137                                ST_NEW_TCS_SAMPLER_VIEWS,
138                                ST_NEW_TCS_SAMPLERS,
139                                ST_NEW_TCS_IMAGES,
140                                ST_NEW_TCS_UBOS,
141                                ST_NEW_TCS_SSBOS,
142                                ST_NEW_TCS_ATOMICS);
143       break;
144 
145    case MESA_SHADER_TESS_EVAL:
146       states = &prog->affected_states;
147 
148       *states = ST_NEW_TES_STATE |
149                 ST_NEW_RASTERIZER;
150 
151       set_affected_state_flags(states, prog,
152                                ST_NEW_TES_CONSTANTS,
153                                ST_NEW_TES_SAMPLER_VIEWS,
154                                ST_NEW_TES_SAMPLERS,
155                                ST_NEW_TES_IMAGES,
156                                ST_NEW_TES_UBOS,
157                                ST_NEW_TES_SSBOS,
158                                ST_NEW_TES_ATOMICS);
159       break;
160 
161    case MESA_SHADER_GEOMETRY:
162       states = &prog->affected_states;
163 
164       *states = ST_NEW_GS_STATE |
165                 ST_NEW_RASTERIZER;
166 
167       set_affected_state_flags(states, prog,
168                                ST_NEW_GS_CONSTANTS,
169                                ST_NEW_GS_SAMPLER_VIEWS,
170                                ST_NEW_GS_SAMPLERS,
171                                ST_NEW_GS_IMAGES,
172                                ST_NEW_GS_UBOS,
173                                ST_NEW_GS_SSBOS,
174                                ST_NEW_GS_ATOMICS);
175       break;
176 
177    case MESA_SHADER_FRAGMENT:
178       states = &prog->affected_states;
179 
180       /* gl_FragCoord and glDrawPixels always use constants. */
181       *states = ST_NEW_FS_STATE |
182                 ST_NEW_SAMPLE_SHADING |
183                 ST_NEW_FS_CONSTANTS;
184 
185       set_affected_state_flags(states, prog,
186                                ST_NEW_FS_CONSTANTS,
187                                ST_NEW_FS_SAMPLER_VIEWS,
188                                ST_NEW_FS_SAMPLERS,
189                                ST_NEW_FS_IMAGES,
190                                ST_NEW_FS_UBOS,
191                                ST_NEW_FS_SSBOS,
192                                ST_NEW_FS_ATOMICS);
193       break;
194 
195    case MESA_SHADER_COMPUTE:
196       states = &prog->affected_states;
197 
198       *states = ST_NEW_CS_STATE;
199 
200       set_affected_state_flags(states, prog,
201                                ST_NEW_CS_CONSTANTS,
202                                ST_NEW_CS_SAMPLER_VIEWS,
203                                ST_NEW_CS_SAMPLERS,
204                                ST_NEW_CS_IMAGES,
205                                ST_NEW_CS_UBOS,
206                                ST_NEW_CS_SSBOS,
207                                ST_NEW_CS_ATOMICS);
208       break;
209 
210    default:
211       unreachable("unhandled shader stage");
212    }
213 }
214 
215 
216 /**
217  * Delete a shader variant.  Note the caller must unlink the variant from
218  * the linked list.
219  */
220 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)221 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
222 {
223    if (v->driver_shader) {
224       if (target == GL_VERTEX_PROGRAM_ARB &&
225           ((struct st_common_variant*)v)->key.is_draw_shader) {
226          /* Draw shader. */
227          draw_delete_vertex_shader(st->draw, v->driver_shader);
228       } else if (st->has_shareable_shaders || v->st == st) {
229          /* The shader's context matches the calling context, or we
230           * don't care.
231           */
232          switch (target) {
233          case GL_VERTEX_PROGRAM_ARB:
234             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
235             break;
236          case GL_TESS_CONTROL_PROGRAM_NV:
237             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
238             break;
239          case GL_TESS_EVALUATION_PROGRAM_NV:
240             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
241             break;
242          case GL_GEOMETRY_PROGRAM_NV:
243             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
244             break;
245          case GL_FRAGMENT_PROGRAM_ARB:
246             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
247             break;
248          case GL_COMPUTE_PROGRAM_NV:
249             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
250             break;
251          default:
252             unreachable("bad shader type in delete_basic_variant");
253          }
254       } else {
255          /* We can't delete a shader with a context different from the one
256           * that created it.  Add it to the creating context's zombie list.
257           */
258          enum pipe_shader_type type =
259             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
260 
261          st_save_zombie_shader(v->st, type, v->driver_shader);
262       }
263    }
264 
265    FREE(v);
266 }
267 
268 static void
st_unbind_program(struct st_context * st,struct gl_program * p)269 st_unbind_program(struct st_context *st, struct gl_program *p)
270 {
271    struct gl_context *ctx = st->ctx;
272 
273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
274    switch (p->info.stage) {
275    case MESA_SHADER_VERTEX:
276       cso_set_vertex_shader_handle(st->cso_context, NULL);
277       ctx->NewDriverState |= ST_NEW_VS_STATE;
278       break;
279    case MESA_SHADER_TESS_CTRL:
280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
281       ctx->NewDriverState |= ST_NEW_TCS_STATE;
282       break;
283    case MESA_SHADER_TESS_EVAL:
284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
285       ctx->NewDriverState |= ST_NEW_TES_STATE;
286       break;
287    case MESA_SHADER_GEOMETRY:
288       cso_set_geometry_shader_handle(st->cso_context, NULL);
289       ctx->NewDriverState |= ST_NEW_GS_STATE;
290       break;
291    case MESA_SHADER_FRAGMENT:
292       cso_set_fragment_shader_handle(st->cso_context, NULL);
293       ctx->NewDriverState |= ST_NEW_FS_STATE;
294       break;
295    case MESA_SHADER_COMPUTE:
296       cso_set_compute_shader_handle(st->cso_context, NULL);
297       ctx->NewDriverState |= ST_NEW_CS_STATE;
298       break;
299    default:
300       unreachable("invalid shader type");
301    }
302 }
303 
304 /**
305  * Free all basic program variants.
306  */
307 void
st_release_variants(struct st_context * st,struct gl_program * p)308 st_release_variants(struct st_context *st, struct gl_program *p)
309 {
310    struct st_variant *v;
311 
312    /* If we are releasing shaders, re-bind them, because we don't
313     * know which shaders are bound in the driver.
314     */
315    if (p->variants)
316       st_unbind_program(st, p);
317 
318    for (v = p->variants; v; ) {
319       struct st_variant *next = v->next;
320       delete_variant(st, v, p->Target);
321       v = next;
322    }
323 
324    p->variants = NULL;
325 
326    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
327     * it has resulted in the driver taking ownership of the NIR.  Those
328     * callers should be NULLing out the nir field in any pipe_shader_state
329     * that might have this called in order to indicate that.
330     *
331     * GLSL IR and ARB programs will have set gl_program->nir to the same
332     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
333     */
334 }
335 
336 /**
337  * Free all basic program variants and unref program.
338  */
339 void
st_release_program(struct st_context * st,struct gl_program ** p)340 st_release_program(struct st_context *st, struct gl_program **p)
341 {
342    if (!*p)
343       return;
344 
345    destroy_program_variants(st, *p);
346    _mesa_reference_program(st->ctx, p, NULL);
347 }
348 
349 void
st_finalize_nir_before_variants(struct nir_shader * nir)350 st_finalize_nir_before_variants(struct nir_shader *nir)
351 {
352    NIR_PASS(_, nir, nir_split_var_copies);
353    NIR_PASS(_, nir, nir_lower_var_copies);
354    if (nir->options->lower_all_io_to_temps ||
355        nir->options->lower_all_io_to_elements ||
356        nir->info.stage == MESA_SHADER_VERTEX ||
357        nir->info.stage == MESA_SHADER_GEOMETRY) {
358       NIR_PASS(_, nir, nir_lower_io_arrays_to_elements_no_indirects, false);
359    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
360       NIR_PASS(_, nir, nir_lower_io_arrays_to_elements_no_indirects, true);
361    }
362 
363    /* st_nir_assign_vs_in_locations requires correct shader info. */
364    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
365 
366    st_nir_assign_vs_in_locations(nir);
367 }
368 
369 static void
st_prog_to_nir_postprocess(struct st_context * st,nir_shader * nir,struct gl_program * prog)370 st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
371                            struct gl_program *prog)
372 {
373    struct pipe_screen *screen = st->screen;
374 
375    NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa);
376    nir_validate_shader(nir, "after st/ptn lower_reg_intrinsics_to_ssa");
377 
378    /* Lower outputs to temporaries to avoid reading from output variables (which
379     * is permitted by the language but generally not implemented in HW).
380     */
381    NIR_PASS(_, nir, nir_lower_io_to_temporaries,
382                nir_shader_get_entrypoint(nir),
383                true, false);
384    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
385 
386    NIR_PASS(_, nir, st_nir_lower_wpos_ytransform, prog, screen);
387    NIR_PASS(_, nir, nir_lower_system_values);
388 
389    struct nir_lower_compute_system_values_options cs_options = {
390       .has_base_global_invocation_id = false,
391       .has_base_workgroup_id = false,
392    };
393    NIR_PASS(_, nir, nir_lower_compute_system_values, &cs_options);
394 
395    /* Optimise NIR */
396    NIR_PASS(_, nir, nir_opt_constant_folding);
397    gl_nir_opts(nir);
398 
399    /* This must be done after optimizations to assign IO bases. */
400    nir_recompute_io_bases(nir, nir_var_shader_in | nir_var_shader_out);
401    st_finalize_nir_before_variants(nir);
402 
403    if (st->allow_st_finalize_nir_twice) {
404       st_serialize_base_nir(prog, nir);
405       st_finalize_nir(st, prog, NULL, nir, true, false);
406 
407       if (screen->finalize_nir) {
408          char *msg = screen->finalize_nir(screen, nir);
409          free(msg);
410       }
411    }
412 
413    nir_validate_shader(nir, "after st/glsl finalize_nir");
414 }
415 
416 /**
417  * Prepare st_vertex_program info.
418  *
419  * attrib_to_index is an optional mapping from a vertex attrib to a shader
420  * input index.
421  */
422 void
st_prepare_vertex_program(struct gl_program * prog)423 st_prepare_vertex_program(struct gl_program *prog)
424 {
425    struct gl_vertex_program *stvp = (struct gl_vertex_program *)prog;
426 
427    stvp->num_inputs = util_bitcount64(prog->info.inputs_read);
428    stvp->vert_attrib_mask = prog->info.inputs_read;
429 
430    /* Compute mapping of vertex program outputs to slots. */
431    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
432    unsigned num_outputs = 0;
433    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
434       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
435          stvp->result_to_output[attr] = num_outputs++;
436    }
437    /* pre-setup potentially unused edgeflag output */
438    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
439 }
440 
441 void
st_translate_stream_output_info(struct gl_program * prog)442 st_translate_stream_output_info(struct gl_program *prog)
443 {
444    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
445    if (!info)
446       return;
447 
448    /* Determine the (default) output register mapping for each output. */
449    unsigned num_outputs = 0;
450    uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
451    memset(output_mapping, 0, sizeof(output_mapping));
452 
453    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
454       /* this output was added by mesa/st and should not be tracked for xfb:
455        * drivers must check var->data.explicit_location to find the original output
456        * and only emit that one for xfb
457        */
458       if (prog->skip_pointsize_xfb && attr == VARYING_SLOT_PSIZ)
459          continue;
460       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
461          output_mapping[attr] = num_outputs++;
462    }
463 
464    /* Translate stream output info. */
465    struct pipe_stream_output_info *so_info =
466       &prog->state.stream_output;
467 
468    if (!num_outputs) {
469       so_info->num_outputs = 0;
470       return;
471    }
472 
473    for (unsigned i = 0; i < info->NumOutputs; i++) {
474       so_info->output[i].register_index =
475          output_mapping[info->Outputs[i].OutputRegister];
476       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
477       so_info->output[i].num_components = info->Outputs[i].NumComponents;
478       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
479       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
480       so_info->output[i].stream = info->Outputs[i].StreamId;
481    }
482 
483    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
484       so_info->stride[i] = info->Buffers[i].Stride;
485    }
486    so_info->num_outputs = info->NumOutputs;
487 }
488 
489 /**
490  * Creates a driver shader from a NIR shader.  Takes ownership of the
491  * passed nir_shader.
492  */
493 void *
st_create_nir_shader(struct st_context * st,struct pipe_shader_state * state)494 st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
495 {
496    struct pipe_context *pipe = st->pipe;
497 
498    assert(state->type == PIPE_SHADER_IR_NIR);
499    nir_shader *nir = state->ir.nir;
500    gl_shader_stage stage = nir->info.stage;
501 
502    /* Renumber SSA defs to make it easier to run diff on printed NIR. */
503    nir_foreach_function_impl(impl, nir) {
504       nir_index_ssa_defs(impl);
505    }
506 
507    if (ST_DEBUG & DEBUG_PRINT_IR) {
508       fprintf(stderr, "NIR before handing off to driver:\n");
509       nir_print_shader(nir, stderr);
510    }
511 
512    if (ST_DEBUG & DEBUG_PRINT_XFB) {
513       if (nir->info.io_lowered) {
514          if (nir->xfb_info && nir->xfb_info->output_count) {
515             fprintf(stderr, "XFB info before handing off to driver:\n");
516             fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
517                     nir->info.xfb_stride[0], nir->info.xfb_stride[1],
518                     nir->info.xfb_stride[2], nir->info.xfb_stride[3]);
519             nir_print_xfb_info(nir->xfb_info, stderr);
520          }
521       } else {
522          struct pipe_stream_output_info *so = &state->stream_output;
523 
524          if (so->num_outputs) {
525             fprintf(stderr, "XFB info before handing off to driver:\n");
526             fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
527                     so->stride[0], so->stride[1], so->stride[2],
528                     so->stride[3]);
529 
530             for (unsigned i = 0; i < so->num_outputs; i++) {
531                fprintf(stderr, "output%u: buffer=%u offset=%u, location=%u, "
532                                "component_offset=%u, component_mask=0x%x, "
533                                "stream=%u\n",
534                        i, so->output[i].output_buffer,
535                        so->output[i].dst_offset * 4,
536                        so->output[i].register_index,
537                        so->output[i].start_component,
538                        BITFIELD_RANGE(so->output[i].start_component,
539                                       so->output[i].num_components),
540                        so->output[i].stream);
541             }
542          }
543       }
544    }
545 
546    void *shader;
547    switch (stage) {
548    case MESA_SHADER_VERTEX:
549       shader = pipe->create_vs_state(pipe, state);
550       break;
551    case MESA_SHADER_TESS_CTRL:
552       shader = pipe->create_tcs_state(pipe, state);
553       break;
554    case MESA_SHADER_TESS_EVAL:
555       shader = pipe->create_tes_state(pipe, state);
556       break;
557    case MESA_SHADER_GEOMETRY:
558       shader = pipe->create_gs_state(pipe, state);
559       break;
560    case MESA_SHADER_FRAGMENT:
561       shader = pipe->create_fs_state(pipe, state);
562       break;
563    case MESA_SHADER_COMPUTE: {
564       /* We'd like to use this for all stages but we need to rework streamout in
565        * gallium first.
566        */
567       shader = pipe_shader_from_nir(pipe, nir);
568       break;
569    }
570    default:
571       unreachable("unsupported shader stage");
572       return NULL;
573    }
574 
575    return shader;
576 }
577 
578 /**
579  * Translate a vertex program.
580  */
581 static bool
st_translate_vertex_program(struct st_context * st,struct gl_program * prog)582 st_translate_vertex_program(struct st_context *st,
583                             struct gl_program *prog)
584 {
585    /* This determines which states will be updated when the assembly
586       * shader is bound.
587       */
588    prog->affected_states = ST_NEW_VS_STATE |
589                            ST_NEW_RASTERIZER |
590                            ST_NEW_VERTEX_ARRAYS;
591 
592    if (prog->Parameters->NumParameters)
593       prog->affected_states |= ST_NEW_VS_CONSTANTS;
594 
595    if (prog->arb.Instructions && prog->nir)
596       ralloc_free(prog->nir);
597 
598    if (prog->serialized_nir) {
599       free(prog->serialized_nir);
600       prog->serialized_nir = NULL;
601    }
602    free(prog->base_serialized_nir);
603 
604    prog->state.type = PIPE_SHADER_IR_NIR;
605    if (prog->arb.Instructions)
606       prog->nir = prog_to_nir(st->ctx, prog);
607    st_prog_to_nir_postprocess(st, prog->nir, prog);
608    prog->info = prog->nir->info;
609 
610    st_prepare_vertex_program(prog);
611    return true;
612 }
613 
614 static const struct nir_shader_compiler_options draw_nir_options = {
615    .lower_scmp = true,
616    .lower_flrp32 = true,
617    .lower_flrp64 = true,
618    .lower_fsat = true,
619    .lower_bitfield_insert = true,
620    .lower_bitfield_extract = true,
621    .lower_fdph = true,
622    .lower_ffma16 = true,
623    .lower_ffma32 = true,
624    .lower_ffma64 = true,
625    .lower_flrp16 = true,
626    .lower_fmod = true,
627    .lower_hadd = true,
628    .lower_uadd_sat = true,
629    .lower_usub_sat = true,
630    .lower_iadd_sat = true,
631    .lower_ldexp = true,
632    .lower_pack_snorm_2x16 = true,
633    .lower_pack_snorm_4x8 = true,
634    .lower_pack_unorm_2x16 = true,
635    .lower_pack_unorm_4x8 = true,
636    .lower_pack_half_2x16 = true,
637    .lower_pack_split = true,
638    .lower_unpack_snorm_2x16 = true,
639    .lower_unpack_snorm_4x8 = true,
640    .lower_unpack_unorm_2x16 = true,
641    .lower_unpack_unorm_4x8 = true,
642    .lower_unpack_half_2x16 = true,
643    .lower_extract_byte = true,
644    .lower_extract_word = true,
645    .lower_insert_byte = true,
646    .lower_insert_word = true,
647    .lower_uadd_carry = true,
648    .lower_usub_borrow = true,
649    .lower_mul_2x32_64 = true,
650    .lower_ifind_msb = true,
651    .lower_int64_options = nir_lower_imul_2x32_64,
652    .lower_doubles_options = nir_lower_dround_even,
653    .max_unroll_iterations = 32,
654    .lower_to_scalar = true,
655    .lower_uniforms_to_ubo = true,
656    .lower_vector_cmp = true,
657    .lower_device_index_to_zero = true,
658    .support_16bit_alu = true,
659    .lower_fisnormal = true,
660    .lower_fquantize2f16 = true,
661    .driver_functions = true,
662 };
663 
664 static struct nir_shader *
get_nir_shader(struct st_context * st,struct gl_program * prog,bool is_draw)665 get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
666 {
667    if ((!is_draw || !st->ctx->Const.PackedDriverUniformStorage) && prog->nir) {
668       nir_shader *nir = prog->nir;
669 
670       /* The first shader variant takes ownership of NIR, so that there is
671        * no cloning. Additional shader variants are always generated from
672        * serialized NIR to save memory.
673        */
674       prog->nir = NULL;
675       assert(prog->serialized_nir && prog->serialized_nir_size);
676       return nir;
677    }
678 
679    struct blob_reader blob_reader;
680    const struct nir_shader_compiler_options *options =
681       is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
682 
683    if (is_draw && st->ctx->Const.PackedDriverUniformStorage &&
684        (!prog->shader_program || prog->shader_program->data->LinkStatus != LINKING_SKIPPED)) {
685       assert(prog->base_serialized_nir);
686       blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
687    } else {
688       assert(prog->serialized_nir);
689       blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
690    }
691    return nir_deserialize(NULL, options, &blob_reader);
692 }
693 
694 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)695 lower_ucp(struct st_context *st,
696           struct nir_shader *nir,
697           unsigned ucp_enables,
698           struct gl_program_parameter_list *params)
699 {
700    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
701       NIR_PASS(_, nir, nir_lower_clip_disable, ucp_enables);
702    else {
703       bool can_compact = nir->options->compact_arrays;
704       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
705 
706       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
707       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
708          if (use_eye) {
709             clipplane_state[i][0] = STATE_CLIPPLANE;
710             clipplane_state[i][1] = i;
711          } else {
712             clipplane_state[i][0] = STATE_CLIP_INTERNAL;
713             clipplane_state[i][1] = i;
714          }
715          _mesa_add_state_reference(params, clipplane_state[i]);
716       }
717 
718       if (nir->info.stage == MESA_SHADER_VERTEX ||
719           nir->info.stage == MESA_SHADER_TESS_EVAL) {
720          NIR_PASS(_, nir, nir_lower_clip_vs, ucp_enables,
721                     true, can_compact, clipplane_state);
722       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
723          NIR_PASS(_, nir, nir_lower_clip_gs, ucp_enables,
724                     can_compact, clipplane_state);
725       }
726 
727       NIR_PASS(_, nir, nir_lower_io_to_temporaries,
728                  nir_shader_get_entrypoint(nir), true, false);
729       NIR_PASS(_, nir, nir_lower_global_vars_to_local);
730    }
731 }
732 
733 static bool
force_persample_shading(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)734 force_persample_shading(struct nir_builder *b, nir_intrinsic_instr *intr,
735                         void *data)
736 {
737    if (intr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
738        intr->intrinsic == nir_intrinsic_load_barycentric_centroid) {
739       intr->intrinsic = nir_intrinsic_load_barycentric_sample;
740       return true;
741    }
742 
743    return false;
744 }
745 
746 static int
xfb_compare_dst_offset(const void * a,const void * b)747 xfb_compare_dst_offset(const void *a, const void *b)
748 {
749    const struct pipe_stream_output *var0 = (const struct pipe_stream_output*)a;
750    const struct pipe_stream_output *var1 = (const struct pipe_stream_output*)b;
751 
752    if (var0->output_buffer != var1->output_buffer)
753       return var0->output_buffer > var1->output_buffer ? 1 : -1;
754 
755    return var0->dst_offset - var1->dst_offset;
756 }
757 
758 static void
get_stream_output_info_from_nir(nir_shader * nir,struct pipe_stream_output_info * info)759 get_stream_output_info_from_nir(nir_shader *nir,
760                                 struct pipe_stream_output_info *info)
761 {
762    /* Get pipe_stream_output_info from NIR. Only used by IO variables. */
763    nir_xfb_info *xfb = nir->xfb_info;
764    memset(info, 0, sizeof(*info));
765 
766    if (!xfb)
767       return;
768 
769    info->num_outputs = xfb->output_count;
770 
771    for (unsigned i = 0; i < 4; i++)
772       info->stride[i] = nir->info.xfb_stride[i];
773 
774    for (unsigned i = 0; i < xfb->output_count; i++) {
775       struct pipe_stream_output *out = &info->output[i];
776 
777       assert(!xfb->outputs[i].high_16bits);
778 
779       out->register_index =
780          util_bitcount64(nir->info.outputs_written &
781                          BITFIELD64_MASK(xfb->outputs[i].location));
782       out->start_component = xfb->outputs[i].component_offset;
783       out->num_components = util_bitcount(xfb->outputs[i].component_mask);
784       out->output_buffer = xfb->outputs[i].buffer;
785       out->dst_offset = xfb->outputs[i].offset / 4;
786       out->stream = xfb->buffer_to_stream[out->output_buffer];
787    }
788 
789    /* Intel requires that xfb outputs are sorted by dst_offset. */
790    qsort(info->output, info->num_outputs, sizeof(info->output[0]),
791          xfb_compare_dst_offset);
792 }
793 
794 static struct st_common_variant *
st_create_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key)795 st_create_common_variant(struct st_context *st,
796                          struct gl_program *prog,
797                          const struct st_common_variant_key *key)
798 {
799    MESA_TRACE_FUNC();
800 
801    struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
802    struct pipe_shader_state state = {0};
803 
804    static const gl_state_index16 point_size_state[STATE_LENGTH] =
805       { STATE_POINT_SIZE_CLAMPED, 0 };
806    struct gl_program_parameter_list *params = prog->Parameters;
807 
808    v->key = *key;
809 
810    state.stream_output = prog->state.stream_output;
811 
812    bool finalize = false;
813 
814    state.type = PIPE_SHADER_IR_NIR;
815    state.ir.nir = get_nir_shader(st, prog, key->is_draw_shader);
816    const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
817 
818    if (key->clamp_color) {
819       NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
820       finalize = true;
821    }
822    if (key->passthrough_edgeflags) {
823       NIR_PASS(_, state.ir.nir, nir_lower_passthrough_edgeflags);
824       finalize = true;
825    }
826 
827    if (key->export_point_size) {
828       /* if flag is set, shader must export psiz */
829       _mesa_add_state_reference(params, point_size_state);
830       NIR_PASS(_, state.ir.nir, nir_lower_point_size_mov,
831                   point_size_state);
832 
833       finalize = true;
834    }
835 
836    if (key->lower_ucp) {
837       assert(!options->unify_interfaces);
838       lower_ucp(st, state.ir.nir, key->lower_ucp, params);
839       finalize = true;
840    }
841 
842    if (st->emulate_gl_clamp &&
843          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
844       nir_lower_tex_options tex_opts = {0};
845       tex_opts.saturate_s = key->gl_clamp[0];
846       tex_opts.saturate_t = key->gl_clamp[1];
847       tex_opts.saturate_r = key->gl_clamp[2];
848       NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
849    }
850 
851    if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
852       st_finalize_nir(st, prog, prog->shader_program, state.ir.nir, false,
853                       key->is_draw_shader);
854    }
855 
856    /* This should be after all passes that touch IO. */
857    if (state.ir.nir->info.io_lowered &&
858        (!(state.ir.nir->options->io_options & nir_io_has_intrinsics) ||
859         key->is_draw_shader)) {
860       assert(!state.stream_output.num_outputs || state.ir.nir->xfb_info);
861       get_stream_output_info_from_nir(state.ir.nir, &state.stream_output);
862       /* Some lowering passes can leave dead code behind, but dead IO intrinsics
863        * are still counted as enabled IO, which breaks things.
864        */
865       NIR_PASS(_, state.ir.nir, nir_opt_dce);
866       NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
867 
868       if (state.ir.nir->info.stage == MESA_SHADER_TESS_CTRL &&
869           state.ir.nir->options->compact_arrays &&
870           state.ir.nir->options->vectorize_tess_levels)
871          NIR_PASS(_, state.ir.nir, nir_vectorize_tess_levels);
872 
873       gl_nir_opts(state.ir.nir);
874       finalize = true;
875    }
876 
877    if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
878       struct pipe_screen *screen = st->screen;
879       if (!key->is_draw_shader && screen->finalize_nir) {
880          char *msg = screen->finalize_nir(screen, state.ir.nir);
881          free(msg);
882       }
883 
884       /* Clip lowering and edgeflags may have introduced new varyings, so
885        * update the inputs_read/outputs_written. However, with
886        * unify_interfaces set (aka iris) the non-SSO varyings layout is
887        * decided at link time with outputs_written updated so the two line
888        * up.  A driver with this flag set may not use any of the lowering
889        * passes that would change the varyings, so skip to make sure we don't
890        * break its linkage.
891        */
892       if (!options->unify_interfaces) {
893          nir_shader_gather_info(state.ir.nir,
894                                  nir_shader_get_entrypoint(state.ir.nir));
895       }
896    }
897 
898    if (key->is_draw_shader) {
899       NIR_PASS(_, state.ir.nir, gl_nir_lower_images, false);
900       v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
901    }
902    else
903       v->base.driver_shader = st_create_nir_shader(st, &state);
904 
905    return v;
906 }
907 
908 static void
st_add_variant(struct st_variant ** list,struct st_variant * v)909 st_add_variant(struct st_variant **list, struct st_variant *v)
910 {
911    struct st_variant *first = *list;
912 
913    /* Make sure that the default variant stays the first in the list, and insert
914     * any later variants in as the second entry.
915     */
916    if (first) {
917       v->next = first->next;
918       first->next = v;
919    } else {
920       *list = v;
921    }
922 }
923 
924 /**
925  * Find/create a vertex program variant.
926  */
927 struct st_common_variant *
st_get_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key)928 st_get_common_variant(struct st_context *st,
929                       struct gl_program *prog,
930                       const struct st_common_variant_key *key)
931 {
932    struct st_common_variant *v;
933 
934    /* Search for existing variant */
935    for (v = st_common_variant(prog->variants); v;
936         v = st_common_variant(v->base.next)) {
937       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
938          break;
939       }
940    }
941 
942    if (!v) {
943       if (prog->variants != NULL) {
944          _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
945                           "Compiling %s shader variant (%s%s%s%s%s%s)",
946                           _mesa_shader_stage_to_string(prog->info.stage),
947                           key->passthrough_edgeflags ? "edgeflags," : "",
948                           key->clamp_color ? "clamp_color," : "",
949                           key->export_point_size ? "point_size," : "",
950                           key->lower_ucp ? "ucp," : "",
951                           key->is_draw_shader ? "draw," : "",
952                           key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
953       }
954 
955       /* create now */
956       v = st_create_common_variant(st, prog, key);
957       if (v) {
958          v->base.st = key->st;
959 
960          if (prog->info.stage == MESA_SHADER_VERTEX) {
961             struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
962 
963             v->vert_attrib_mask =
964                vp->vert_attrib_mask |
965                (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
966          }
967 
968          st_add_variant(&prog->variants, &v->base);
969       }
970    }
971 
972    return v;
973 }
974 
975 
976 /**
977  * Translate a non-GLSL Mesa fragment shader into a NIR shader.
978  */
979 static bool
st_translate_fragment_program(struct st_context * st,struct gl_program * prog)980 st_translate_fragment_program(struct st_context *st,
981                               struct gl_program *prog)
982 {
983    /* This determines which states will be updated when the assembly
984     * shader is bound.
985     *
986     * fragment.position and glDrawPixels always use constants.
987     */
988    prog->affected_states = ST_NEW_FS_STATE |
989                            ST_NEW_SAMPLE_SHADING |
990                            ST_NEW_FS_CONSTANTS;
991 
992    if (prog->ati_fs) {
993       /* Just set them for ATI_fs unconditionally. */
994       prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
995                                ST_NEW_FS_SAMPLERS;
996    } else {
997       /* ARB_fp */
998       if (prog->SamplersUsed)
999          prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1000                                   ST_NEW_FS_SAMPLERS;
1001    }
1002 
1003    /* Translate to NIR. */
1004    if (prog->nir && prog->arb.Instructions)
1005       ralloc_free(prog->nir);
1006 
1007    if (prog->serialized_nir) {
1008       free(prog->serialized_nir);
1009       prog->serialized_nir = NULL;
1010    }
1011 
1012    prog->state.type = PIPE_SHADER_IR_NIR;
1013    if (prog->arb.Instructions) {
1014       prog->nir = prog_to_nir(st->ctx, prog);
1015    } else if (prog->ati_fs) {
1016       const struct nir_shader_compiler_options *options =
1017          st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1018 
1019       assert(!prog->nir);
1020       prog->nir = st_translate_atifs_program(prog->ati_fs, prog, options);
1021    }
1022    st_prog_to_nir_postprocess(st, prog->nir, prog);
1023 
1024    prog->info = prog->nir->info;
1025    if (prog->ati_fs) {
1026       /* ATI_fs will lower fixed function fog at variant time, after the FF vertex
1027        * prog has been generated.  So we have to always declare a read of FOGC so
1028        * that FF vp feeds it to us just in case.
1029        */
1030       prog->info.inputs_read |= VARYING_BIT_FOGC;
1031    }
1032 
1033    return true;
1034 }
1035 
1036 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key)1037 st_create_fp_variant(struct st_context *st,
1038                      struct gl_program *fp,
1039                      const struct st_fp_variant_key *key)
1040 {
1041    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1042    struct pipe_shader_state state = {0};
1043    struct gl_program_parameter_list *params = fp->Parameters;
1044    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1045       { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1046    static const gl_state_index16 scale_state[STATE_LENGTH] =
1047       { STATE_PT_SCALE };
1048    static const gl_state_index16 bias_state[STATE_LENGTH] =
1049       { STATE_PT_BIAS };
1050    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1051       { STATE_ALPHA_REF };
1052 
1053    if (!variant)
1054       return NULL;
1055 
1056    MESA_TRACE_FUNC();
1057 
1058    /* Translate ATI_fs to NIR at variant time because that's when we have the
1059     * texture types.
1060     */
1061    state.ir.nir = get_nir_shader(st, fp, false);
1062    state.type = PIPE_SHADER_IR_NIR;
1063 
1064    bool finalize = false;
1065 
1066    if (fp->ati_fs) {
1067       if (key->fog) {
1068          NIR_PASS(_, state.ir.nir, st_nir_lower_fog, key->fog, fp->Parameters);
1069          NIR_PASS(_, state.ir.nir, nir_lower_io_to_temporaries,
1070             nir_shader_get_entrypoint(state.ir.nir),
1071             true, false);
1072          nir_lower_global_vars_to_local(state.ir.nir);
1073       }
1074 
1075       NIR_PASS(_, state.ir.nir, st_nir_lower_atifs_samplers, key->texture_index);
1076 
1077       finalize = true;
1078    }
1079 
1080    if (key->clamp_color) {
1081       NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
1082       finalize = true;
1083    }
1084 
1085    if (key->lower_flatshade) {
1086       NIR_PASS(_, state.ir.nir, nir_lower_flatshade);
1087       finalize = true;
1088    }
1089 
1090    if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1091       _mesa_add_state_reference(params, alpha_ref_state);
1092       NIR_PASS(_, state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1093                   false, alpha_ref_state);
1094       finalize = true;
1095    }
1096 
1097    if (key->lower_two_sided_color) {
1098       bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1099       NIR_PASS(_, state.ir.nir, nir_lower_two_sided_color, face_sysval);
1100       finalize = true;
1101    }
1102 
1103    if (key->persample_shading) {
1104       nir_shader *shader = state.ir.nir;
1105       if (shader->info.io_lowered) {
1106          nir_shader_intrinsics_pass(shader, force_persample_shading,
1107                                     nir_metadata_all, NULL);
1108       } else {
1109          nir_foreach_shader_in_variable(var, shader)
1110             var->data.sample = true;
1111       }
1112 
1113       /* In addition to requiring per-sample interpolation, sample shading
1114        * changes the behaviour of gl_SampleMaskIn, so we need per-sample shading
1115        * even if there are no shader-in variables at all. In that case,
1116        * uses_sample_shading won't be set by glsl_to_nir. We need to do so here.
1117        */
1118       shader->info.fs.uses_sample_shading = true;
1119 
1120       finalize = true;
1121    }
1122 
1123    if (st->emulate_gl_clamp &&
1124          (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1125       nir_lower_tex_options tex_opts = {0};
1126       tex_opts.saturate_s = key->gl_clamp[0];
1127       tex_opts.saturate_t = key->gl_clamp[1];
1128       tex_opts.saturate_r = key->gl_clamp[2];
1129       NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
1130       finalize = true;
1131    }
1132 
1133    assert(!(key->bitmap && key->drawpixels));
1134 
1135    /* glBitmap */
1136    if (key->bitmap) {
1137       nir_lower_bitmap_options options = {0};
1138 
1139       variant->bitmap_sampler = ffs(~fp->SamplersUsed) - 1;
1140       options.sampler = variant->bitmap_sampler;
1141       options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1142 
1143       NIR_PASS(_, state.ir.nir, nir_lower_bitmap, &options);
1144       finalize = true;
1145    }
1146 
1147    /* glDrawPixels (color only) */
1148    if (key->drawpixels) {
1149       nir_lower_drawpixels_options options = {{0}};
1150       unsigned samplers_used = fp->SamplersUsed;
1151 
1152       /* Find the first unused slot. */
1153       variant->drawpix_sampler = ffs(~samplers_used) - 1;
1154       options.drawpix_sampler = variant->drawpix_sampler;
1155       samplers_used |= (1 << variant->drawpix_sampler);
1156 
1157       options.pixel_maps = key->pixelMaps;
1158       if (key->pixelMaps) {
1159          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1160          options.pixelmap_sampler = variant->pixelmap_sampler;
1161       }
1162 
1163       options.scale_and_bias = key->scaleAndBias;
1164       if (key->scaleAndBias) {
1165          _mesa_add_state_reference(params, scale_state);
1166          memcpy(options.scale_state_tokens, scale_state,
1167                   sizeof(options.scale_state_tokens));
1168          _mesa_add_state_reference(params, bias_state);
1169          memcpy(options.bias_state_tokens, bias_state,
1170                   sizeof(options.bias_state_tokens));
1171       }
1172 
1173       _mesa_add_state_reference(params, texcoord_state);
1174       memcpy(options.texcoord_state_tokens, texcoord_state,
1175                sizeof(options.texcoord_state_tokens));
1176 
1177       NIR_PASS(_, state.ir.nir, nir_lower_drawpixels, &options);
1178       finalize = true;
1179    }
1180 
1181    bool need_lower_tex_src_plane = false;
1182 
1183    if (unlikely(key->external.lower_nv12 || key->external.lower_nv21 ||
1184                   key->external.lower_iyuv ||
1185                   key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1186                   key->external.lower_yx_xvxu || key->external.lower_xy_vxux ||
1187                   key->external.lower_ayuv || key->external.lower_xyuv ||
1188                   key->external.lower_yuv || key->external.lower_yu_yv ||
1189                   key->external.lower_yv_yu || key->external.lower_y41x)) {
1190 
1191       st_nir_lower_samplers(st->screen, state.ir.nir,
1192                               fp->shader_program, fp);
1193 
1194       nir_lower_tex_options options = {0};
1195       options.lower_y_uv_external = key->external.lower_nv12;
1196       options.lower_y_vu_external = key->external.lower_nv21;
1197       options.lower_y_u_v_external = key->external.lower_iyuv;
1198       options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1199       options.lower_xy_vxux_external = key->external.lower_xy_vxux;
1200       options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1201       options.lower_yx_xvxu_external = key->external.lower_yx_xvxu;
1202       options.lower_ayuv_external = key->external.lower_ayuv;
1203       options.lower_xyuv_external = key->external.lower_xyuv;
1204       options.lower_yuv_external = key->external.lower_yuv;
1205       options.lower_yu_yv_external = key->external.lower_yu_yv;
1206       options.lower_yv_yu_external = key->external.lower_yv_yu;
1207       options.lower_y41x_external = key->external.lower_y41x;
1208       options.bt709_external = key->external.bt709;
1209       options.bt2020_external = key->external.bt2020;
1210       options.yuv_full_range_external = key->external.yuv_full_range;
1211       NIR_PASS(_, state.ir.nir, nir_lower_tex, &options);
1212       finalize = true;
1213       need_lower_tex_src_plane = true;
1214    }
1215 
1216    if (finalize || !st->allow_st_finalize_nir_twice)
1217       st_finalize_nir(st, fp, fp->shader_program, state.ir.nir, false, false);
1218 
1219    /* This pass needs to happen *after* nir_lower_sampler */
1220    if (unlikely(need_lower_tex_src_plane)) {
1221       NIR_PASS(_, state.ir.nir, st_nir_lower_tex_src_plane,
1222                   ~fp->SamplersUsed,
1223                   key->external.lower_nv12 | key->external.lower_nv21 |
1224                      key->external.lower_xy_uxvx | key->external.lower_xy_vxux |
1225                      key->external.lower_yx_xuxv | key->external.lower_yx_xvxu,
1226                   key->external.lower_iyuv);
1227       finalize = true;
1228    }
1229 
1230    /* It is undefined behavior when an ARB assembly uses SHADOW2D target
1231     * with a texture in not depth format. In this case NVIDIA automatically
1232     * replaces SHADOW sampler with a normal sampler and some games like
1233     * Penumbra Overture which abuses this UB (issues/8425) works fine but
1234     * breaks with mesa. Replace the shadow sampler with a normal one here
1235     */
1236    if (!fp->shader_program && ~key->depth_textures & fp->ShadowSamplers) {
1237       NIR_PASS(_, state.ir.nir, nir_remove_tex_shadow,
1238                  ~key->depth_textures & fp->ShadowSamplers);
1239       finalize = true;
1240    }
1241 
1242    /* This should be after all passes that touch IO. */
1243    if (state.ir.nir->info.io_lowered &&
1244        !(state.ir.nir->options->io_options & nir_io_has_intrinsics)) {
1245       /* Some lowering passes can leave dead code behind, but dead IO intrinsics
1246        * are still counted as enabled IO, which breaks things.
1247        */
1248       NIR_PASS(_, state.ir.nir, nir_opt_dce);
1249       NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
1250       gl_nir_opts(state.ir.nir);
1251       finalize = true;
1252    }
1253 
1254    if (finalize || !st->allow_st_finalize_nir_twice) {
1255       /* Some of the lowering above may have introduced new varyings */
1256       nir_shader_gather_info(state.ir.nir,
1257                               nir_shader_get_entrypoint(state.ir.nir));
1258 
1259       struct pipe_screen *screen = st->screen;
1260       if (screen->finalize_nir) {
1261          char *msg = screen->finalize_nir(screen, state.ir.nir);
1262          free(msg);
1263       }
1264    }
1265 
1266    variant->base.driver_shader = st_create_nir_shader(st, &state);
1267    variant->key = *key;
1268 
1269    return variant;
1270 }
1271 
1272 /**
1273  * Translate fragment program if needed.
1274  */
1275 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key)1276 st_get_fp_variant(struct st_context *st,
1277                   struct gl_program *fp,
1278                   const struct st_fp_variant_key *key)
1279 {
1280    struct st_fp_variant *fpv;
1281 
1282    /* Search for existing variant */
1283    for (fpv = st_fp_variant(fp->variants); fpv;
1284         fpv = st_fp_variant(fpv->base.next)) {
1285       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1286          break;
1287       }
1288    }
1289 
1290    if (!fpv) {
1291       /* create new */
1292 
1293       if (fp->variants != NULL) {
1294          _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1295                           "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%d)",
1296                           key->bitmap ? "bitmap," : "",
1297                           key->drawpixels ? "drawpixels," : "",
1298                           key->scaleAndBias ? "scale_bias," : "",
1299                           key->pixelMaps ? "pixel_maps," : "",
1300                           key->clamp_color ? "clamp_color," : "",
1301                           key->persample_shading ? "persample_shading," : "",
1302                           key->fog ? "fog," : "",
1303                           key->lower_two_sided_color ? "twoside," : "",
1304                           key->lower_flatshade ? "flatshade," : "",
1305                           key->lower_alpha_func != COMPARE_FUNC_ALWAYS ? "alpha_compare," : "",
1306                           /* skipped ATI_fs targets */
1307                           fp->ExternalSamplersUsed ? "external?," : "",
1308                           key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "",
1309                           "depth_textures=", key->depth_textures);
1310       }
1311 
1312       fpv = st_create_fp_variant(st, fp, key);
1313       if (fpv) {
1314          fpv->base.st = key->st;
1315 
1316          st_add_variant(&fp->variants, &fpv->base);
1317       }
1318    }
1319 
1320    return fpv;
1321 }
1322 
1323 /**
1324  * Vert/Geom/Frag programs have per-context variants.  Free all the
1325  * variants attached to the given program which match the given context.
1326  */
1327 static void
destroy_program_variants(struct st_context * st,struct gl_program * p)1328 destroy_program_variants(struct st_context *st, struct gl_program *p)
1329 {
1330    if (!p || p == &_mesa_DummyProgram)
1331       return;
1332 
1333    struct st_variant *v, **prevPtr = &p->variants;
1334    bool unbound = false;
1335 
1336    for (v = p->variants; v; ) {
1337       struct st_variant *next = v->next;
1338       if (v->st == st) {
1339          if (!unbound) {
1340             st_unbind_program(st, p);
1341             unbound = true;
1342          }
1343 
1344          /* unlink from list */
1345          *prevPtr = next;
1346          /* destroy this variant */
1347          delete_variant(st, v, p->Target);
1348       }
1349       else {
1350          prevPtr = &v->next;
1351       }
1352       v = next;
1353    }
1354 }
1355 
1356 
1357 /**
1358  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1359  * which match the given context.
1360  */
1361 static void
destroy_shader_program_variants_cb(void * data,void * userData)1362 destroy_shader_program_variants_cb(void *data, void *userData)
1363 {
1364    struct st_context *st = (struct st_context *) userData;
1365    struct gl_shader *shader = (struct gl_shader *) data;
1366 
1367    switch (shader->Type) {
1368    case GL_SHADER_PROGRAM_MESA:
1369       {
1370          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1371          GLuint i;
1372 
1373          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1374             if (shProg->_LinkedShaders[i])
1375                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1376          }
1377       }
1378       break;
1379    case GL_VERTEX_SHADER:
1380    case GL_FRAGMENT_SHADER:
1381    case GL_GEOMETRY_SHADER:
1382    case GL_TESS_CONTROL_SHADER:
1383    case GL_TESS_EVALUATION_SHADER:
1384    case GL_COMPUTE_SHADER:
1385       break;
1386    default:
1387       assert(0);
1388    }
1389 }
1390 
1391 
1392 /**
1393  * Callback for _mesa_HashWalk.  Free all the program variants which match
1394  * the given context.
1395  */
1396 static void
destroy_program_variants_cb(void * data,void * userData)1397 destroy_program_variants_cb(void *data, void *userData)
1398 {
1399    struct st_context *st = (struct st_context *) userData;
1400    struct gl_program *program = (struct gl_program *) data;
1401    destroy_program_variants(st, program);
1402 }
1403 
1404 
1405 /**
1406  * Walk over all shaders and programs to delete any variants which
1407  * belong to the given context.
1408  * This is called during context tear-down.
1409  */
1410 void
st_destroy_program_variants(struct st_context * st)1411 st_destroy_program_variants(struct st_context *st)
1412 {
1413    /* If shaders can be shared with other contexts, the last context will
1414     * call DeleteProgram on all shaders, releasing everything.
1415     */
1416    if (st->has_shareable_shaders)
1417       return;
1418 
1419    /* ARB vert/frag program */
1420    _mesa_HashWalk(&st->ctx->Shared->Programs,
1421                   destroy_program_variants_cb, st);
1422 
1423    /* GLSL vert/frag/geom shaders */
1424    _mesa_HashWalk(&st->ctx->Shared->ShaderObjects,
1425                   destroy_shader_program_variants_cb, st);
1426 }
1427 
1428 /**
1429  * Compile one shader variant.
1430  */
1431 static void
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog)1432 st_precompile_shader_variant(struct st_context *st,
1433                              struct gl_program *prog)
1434 {
1435    switch (prog->Target) {
1436    case GL_VERTEX_PROGRAM_ARB:
1437    case GL_TESS_CONTROL_PROGRAM_NV:
1438    case GL_TESS_EVALUATION_PROGRAM_NV:
1439    case GL_GEOMETRY_PROGRAM_NV:
1440    case GL_COMPUTE_PROGRAM_NV: {
1441       struct st_common_variant_key key;
1442 
1443       memset(&key, 0, sizeof(key));
1444 
1445       if (_mesa_is_desktop_gl_compat(st->ctx) &&
1446           st->clamp_vert_color_in_shader &&
1447           (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1448                                          VARYING_SLOT_COL1 |
1449                                          VARYING_SLOT_BFC0 |
1450                                          VARYING_SLOT_BFC1))) {
1451          key.clamp_color = true;
1452       }
1453 
1454       key.st = st->has_shareable_shaders ? NULL : st;
1455       st_get_common_variant(st, prog, &key);
1456       break;
1457    }
1458 
1459    case GL_FRAGMENT_PROGRAM_ARB: {
1460       struct st_fp_variant_key key;
1461 
1462       memset(&key, 0, sizeof(key));
1463 
1464       key.st = st->has_shareable_shaders ? NULL : st;
1465       key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1466       if (prog->ati_fs) {
1467          for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1468             key.texture_index[i] = TEXTURE_2D_INDEX;
1469       }
1470 
1471       /* Shadow samplers require texture in depth format, which we lower to
1472        * non-shadow if necessary for ARB programs
1473        */
1474       if (!prog->shader_program)
1475          key.depth_textures = prog->ShadowSamplers;
1476 
1477       st_get_fp_variant(st, prog, &key);
1478       break;
1479    }
1480 
1481    default:
1482       assert(0);
1483    }
1484 }
1485 
1486 void
st_serialize_nir(struct gl_program * prog)1487 st_serialize_nir(struct gl_program *prog)
1488 {
1489    if (!prog->serialized_nir) {
1490       struct blob blob;
1491       size_t size;
1492 
1493       blob_init(&blob);
1494       nir_serialize(&blob, prog->nir, false);
1495       blob_finish_get_buffer(&blob, &prog->serialized_nir, &size);
1496       prog->serialized_nir_size = size;
1497    }
1498 }
1499 
1500 void
st_serialize_base_nir(struct gl_program * prog,nir_shader * nir)1501 st_serialize_base_nir(struct gl_program *prog, nir_shader *nir)
1502 {
1503    if (!prog->base_serialized_nir && nir->info.stage == MESA_SHADER_VERTEX) {
1504       struct blob blob;
1505       size_t size;
1506 
1507       blob_init(&blob);
1508       nir_serialize(&blob, nir, false);
1509       blob_finish_get_buffer(&blob, &prog->base_serialized_nir, &size);
1510       prog->base_serialized_nir_size = size;
1511    }
1512 }
1513 
1514 void
st_finalize_program(struct st_context * st,struct gl_program * prog)1515 st_finalize_program(struct st_context *st, struct gl_program *prog)
1516 {
1517    struct gl_context *ctx = st->ctx;
1518    bool is_bound = false;
1519 
1520    MESA_TRACE_FUNC();
1521 
1522    if (prog->info.stage == MESA_SHADER_VERTEX)
1523       is_bound = prog == ctx->VertexProgram._Current;
1524    else if (prog->info.stage == MESA_SHADER_TESS_CTRL)
1525       is_bound = prog == ctx->TessCtrlProgram._Current;
1526    else if (prog->info.stage == MESA_SHADER_TESS_EVAL)
1527       is_bound = prog == ctx->TessEvalProgram._Current;
1528    else if (prog->info.stage == MESA_SHADER_GEOMETRY)
1529       is_bound = prog == ctx->GeometryProgram._Current;
1530    else if (prog->info.stage == MESA_SHADER_FRAGMENT)
1531       is_bound = prog == ctx->FragmentProgram._Current;
1532    else if (prog->info.stage == MESA_SHADER_COMPUTE)
1533       is_bound = prog == ctx->ComputeProgram._Current;
1534 
1535    if (is_bound) {
1536       if (prog->info.stage == MESA_SHADER_VERTEX) {
1537          ctx->Array.NewVertexElements = true;
1538          ctx->NewDriverState |= ST_NEW_VERTEX_PROGRAM(ctx, prog);
1539       } else {
1540          ctx->NewDriverState |= prog->affected_states;
1541       }
1542    }
1543 
1544    if (prog->nir) {
1545       nir_sweep(prog->nir);
1546 
1547       /* This is only needed for ARB_vp/fp programs and when the disk cache
1548        * is disabled. If the disk cache is enabled, GLSL programs are
1549        * serialized in write_nir_to_cache.
1550        */
1551       st_serialize_base_nir(prog, prog->nir);
1552       st_serialize_nir(prog);
1553    }
1554 
1555    /* Always create the default variant of the program. */
1556    st_precompile_shader_variant(st, prog);
1557 }
1558 
1559 /**
1560  * Called when the program's text/code is changed.  We have to free
1561  * all shader variants and corresponding gallium shaders when this happens.
1562  */
1563 GLboolean
st_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1564 st_program_string_notify( struct gl_context *ctx,
1565                           GLenum target,
1566                           struct gl_program *prog )
1567 {
1568    struct st_context *st = st_context(ctx);
1569 
1570    /* GLSL-to-NIR should not end up here. */
1571    assert(!prog->shader_program);
1572 
1573    st_release_variants(st, prog);
1574 
1575    if (target == GL_FRAGMENT_PROGRAM_ARB ||
1576        target == GL_FRAGMENT_SHADER_ATI) {
1577       if (!st_translate_fragment_program(st, prog))
1578          return false;
1579    } else if (target == GL_VERTEX_PROGRAM_ARB) {
1580       if (!st_translate_vertex_program(st, prog))
1581          return false;
1582       if (st->add_point_size &&
1583           gl_nir_can_add_pointsize_to_program(&st->ctx->Const, prog)) {
1584          prog->skip_pointsize_xfb = true;
1585          NIR_PASS(_, prog->nir, gl_nir_add_point_size);
1586       }
1587    }
1588 
1589    st_finalize_program(st, prog);
1590    return GL_TRUE;
1591 }
1592