1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "main/errors.h"
35
36 #include "main/hash.h"
37 #include "main/mtypes.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_print.h"
40 #include "program/prog_to_nir.h"
41 #include "program/programopt.h"
42
43 #include "compiler/nir/nir.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "draw/draw_context.h"
46
47 #include "pipe/p_context.h"
48 #include "pipe/p_defines.h"
49 #include "pipe/p_shader_tokens.h"
50 #include "draw/draw_context.h"
51 #include "tgsi/tgsi_dump.h"
52 #include "tgsi/tgsi_emulate.h"
53 #include "tgsi/tgsi_parse.h"
54 #include "tgsi/tgsi_ureg.h"
55
56 #include "util/u_memory.h"
57
58 #include "st_debug.h"
59 #include "st_cb_bitmap.h"
60 #include "st_cb_drawpixels.h"
61 #include "st_context.h"
62 #include "st_tgsi_lower_depth_clamp.h"
63 #include "st_tgsi_lower_yuv.h"
64 #include "st_program.h"
65 #include "st_mesa_to_tgsi.h"
66 #include "st_atifs_to_tgsi.h"
67 #include "st_nir.h"
68 #include "st_shader_cache.h"
69 #include "st_util.h"
70 #include "cso_cache/cso_context.h"
71
72
73 static void
74 destroy_program_variants(struct st_context *st, struct gl_program *target);
75
76 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)77 set_affected_state_flags(uint64_t *states,
78 struct gl_program *prog,
79 uint64_t new_constants,
80 uint64_t new_sampler_views,
81 uint64_t new_samplers,
82 uint64_t new_images,
83 uint64_t new_ubos,
84 uint64_t new_ssbos,
85 uint64_t new_atomics)
86 {
87 if (prog->Parameters->NumParameters)
88 *states |= new_constants;
89
90 if (prog->info.num_textures)
91 *states |= new_sampler_views | new_samplers;
92
93 if (prog->info.num_images)
94 *states |= new_images;
95
96 if (prog->info.num_ubos)
97 *states |= new_ubos;
98
99 if (prog->info.num_ssbos)
100 *states |= new_ssbos;
101
102 if (prog->info.num_abos)
103 *states |= new_atomics;
104 }
105
106 /**
107 * This determines which states will be updated when the shader is bound.
108 */
109 void
st_set_prog_affected_state_flags(struct gl_program * prog)110 st_set_prog_affected_state_flags(struct gl_program *prog)
111 {
112 uint64_t *states;
113
114 switch (prog->info.stage) {
115 case MESA_SHADER_VERTEX:
116 states = &((struct st_program*)prog)->affected_states;
117
118 *states = ST_NEW_VS_STATE |
119 ST_NEW_RASTERIZER |
120 ST_NEW_VERTEX_ARRAYS;
121
122 set_affected_state_flags(states, prog,
123 ST_NEW_VS_CONSTANTS,
124 ST_NEW_VS_SAMPLER_VIEWS,
125 ST_NEW_VS_SAMPLERS,
126 ST_NEW_VS_IMAGES,
127 ST_NEW_VS_UBOS,
128 ST_NEW_VS_SSBOS,
129 ST_NEW_VS_ATOMICS);
130 break;
131
132 case MESA_SHADER_TESS_CTRL:
133 states = &(st_program(prog))->affected_states;
134
135 *states = ST_NEW_TCS_STATE;
136
137 set_affected_state_flags(states, prog,
138 ST_NEW_TCS_CONSTANTS,
139 ST_NEW_TCS_SAMPLER_VIEWS,
140 ST_NEW_TCS_SAMPLERS,
141 ST_NEW_TCS_IMAGES,
142 ST_NEW_TCS_UBOS,
143 ST_NEW_TCS_SSBOS,
144 ST_NEW_TCS_ATOMICS);
145 break;
146
147 case MESA_SHADER_TESS_EVAL:
148 states = &(st_program(prog))->affected_states;
149
150 *states = ST_NEW_TES_STATE |
151 ST_NEW_RASTERIZER;
152
153 set_affected_state_flags(states, prog,
154 ST_NEW_TES_CONSTANTS,
155 ST_NEW_TES_SAMPLER_VIEWS,
156 ST_NEW_TES_SAMPLERS,
157 ST_NEW_TES_IMAGES,
158 ST_NEW_TES_UBOS,
159 ST_NEW_TES_SSBOS,
160 ST_NEW_TES_ATOMICS);
161 break;
162
163 case MESA_SHADER_GEOMETRY:
164 states = &(st_program(prog))->affected_states;
165
166 *states = ST_NEW_GS_STATE |
167 ST_NEW_RASTERIZER;
168
169 set_affected_state_flags(states, prog,
170 ST_NEW_GS_CONSTANTS,
171 ST_NEW_GS_SAMPLER_VIEWS,
172 ST_NEW_GS_SAMPLERS,
173 ST_NEW_GS_IMAGES,
174 ST_NEW_GS_UBOS,
175 ST_NEW_GS_SSBOS,
176 ST_NEW_GS_ATOMICS);
177 break;
178
179 case MESA_SHADER_FRAGMENT:
180 states = &((struct st_program*)prog)->affected_states;
181
182 /* gl_FragCoord and glDrawPixels always use constants. */
183 *states = ST_NEW_FS_STATE |
184 ST_NEW_SAMPLE_SHADING |
185 ST_NEW_FS_CONSTANTS;
186
187 set_affected_state_flags(states, prog,
188 ST_NEW_FS_CONSTANTS,
189 ST_NEW_FS_SAMPLER_VIEWS,
190 ST_NEW_FS_SAMPLERS,
191 ST_NEW_FS_IMAGES,
192 ST_NEW_FS_UBOS,
193 ST_NEW_FS_SSBOS,
194 ST_NEW_FS_ATOMICS);
195 break;
196
197 case MESA_SHADER_COMPUTE:
198 states = &((struct st_program*)prog)->affected_states;
199
200 *states = ST_NEW_CS_STATE;
201
202 set_affected_state_flags(states, prog,
203 ST_NEW_CS_CONSTANTS,
204 ST_NEW_CS_SAMPLER_VIEWS,
205 ST_NEW_CS_SAMPLERS,
206 ST_NEW_CS_IMAGES,
207 ST_NEW_CS_UBOS,
208 ST_NEW_CS_SSBOS,
209 ST_NEW_CS_ATOMICS);
210 break;
211
212 default:
213 unreachable("unhandled shader stage");
214 }
215 }
216
217
218 /**
219 * Delete a shader variant. Note the caller must unlink the variant from
220 * the linked list.
221 */
222 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)223 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
224 {
225 if (v->driver_shader) {
226 if (target == GL_VERTEX_PROGRAM_ARB &&
227 ((struct st_common_variant*)v)->key.is_draw_shader) {
228 /* Draw shader. */
229 draw_delete_vertex_shader(st->draw, v->driver_shader);
230 } else if (st->has_shareable_shaders || v->st == st) {
231 /* The shader's context matches the calling context, or we
232 * don't care.
233 */
234 switch (target) {
235 case GL_VERTEX_PROGRAM_ARB:
236 st->pipe->delete_vs_state(st->pipe, v->driver_shader);
237 break;
238 case GL_TESS_CONTROL_PROGRAM_NV:
239 st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
240 break;
241 case GL_TESS_EVALUATION_PROGRAM_NV:
242 st->pipe->delete_tes_state(st->pipe, v->driver_shader);
243 break;
244 case GL_GEOMETRY_PROGRAM_NV:
245 st->pipe->delete_gs_state(st->pipe, v->driver_shader);
246 break;
247 case GL_FRAGMENT_PROGRAM_ARB:
248 st->pipe->delete_fs_state(st->pipe, v->driver_shader);
249 break;
250 case GL_COMPUTE_PROGRAM_NV:
251 st->pipe->delete_compute_state(st->pipe, v->driver_shader);
252 break;
253 default:
254 unreachable("bad shader type in delete_basic_variant");
255 }
256 } else {
257 /* We can't delete a shader with a context different from the one
258 * that created it. Add it to the creating context's zombie list.
259 */
260 enum pipe_shader_type type =
261 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
262
263 st_save_zombie_shader(v->st, type, v->driver_shader);
264 }
265 }
266
267 free(v);
268 }
269
270 static void
st_unbind_program(struct st_context * st,struct st_program * p)271 st_unbind_program(struct st_context *st, struct st_program *p)
272 {
273 /* Unbind the shader in cso_context and re-bind in st/mesa. */
274 switch (p->Base.info.stage) {
275 case MESA_SHADER_VERTEX:
276 cso_set_vertex_shader_handle(st->cso_context, NULL);
277 st->dirty |= ST_NEW_VS_STATE;
278 break;
279 case MESA_SHADER_TESS_CTRL:
280 cso_set_tessctrl_shader_handle(st->cso_context, NULL);
281 st->dirty |= ST_NEW_TCS_STATE;
282 break;
283 case MESA_SHADER_TESS_EVAL:
284 cso_set_tesseval_shader_handle(st->cso_context, NULL);
285 st->dirty |= ST_NEW_TES_STATE;
286 break;
287 case MESA_SHADER_GEOMETRY:
288 cso_set_geometry_shader_handle(st->cso_context, NULL);
289 st->dirty |= ST_NEW_GS_STATE;
290 break;
291 case MESA_SHADER_FRAGMENT:
292 cso_set_fragment_shader_handle(st->cso_context, NULL);
293 st->dirty |= ST_NEW_FS_STATE;
294 break;
295 case MESA_SHADER_COMPUTE:
296 cso_set_compute_shader_handle(st->cso_context, NULL);
297 st->dirty |= ST_NEW_CS_STATE;
298 break;
299 default:
300 unreachable("invalid shader type");
301 }
302 }
303
304 /**
305 * Free all basic program variants.
306 */
307 void
st_release_variants(struct st_context * st,struct st_program * p)308 st_release_variants(struct st_context *st, struct st_program *p)
309 {
310 struct st_variant *v;
311
312 /* If we are releasing shaders, re-bind them, because we don't
313 * know which shaders are bound in the driver.
314 */
315 if (p->variants)
316 st_unbind_program(st, p);
317
318 for (v = p->variants; v; ) {
319 struct st_variant *next = v->next;
320 delete_variant(st, v, p->Base.Target);
321 v = next;
322 }
323
324 p->variants = NULL;
325
326 if (p->state.tokens) {
327 ureg_free_tokens(p->state.tokens);
328 p->state.tokens = NULL;
329 }
330
331 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
332 * it has resulted in the driver taking ownership of the NIR. Those
333 * callers should be NULLing out the nir field in any pipe_shader_state
334 * that might have this called in order to indicate that.
335 *
336 * GLSL IR and ARB programs will have set gl_program->nir to the same
337 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
338 */
339 }
340
341 /**
342 * Free all basic program variants and unref program.
343 */
344 void
st_release_program(struct st_context * st,struct st_program ** p)345 st_release_program(struct st_context *st, struct st_program **p)
346 {
347 if (!*p)
348 return;
349
350 destroy_program_variants(st, &((*p)->Base));
351 st_reference_prog(st, p, NULL);
352 }
353
354 void
st_finalize_nir_before_variants(struct nir_shader * nir)355 st_finalize_nir_before_variants(struct nir_shader *nir)
356 {
357 NIR_PASS_V(nir, nir_split_var_copies);
358 NIR_PASS_V(nir, nir_lower_var_copies);
359 if (nir->options->lower_all_io_to_temps ||
360 nir->options->lower_all_io_to_elements ||
361 nir->info.stage == MESA_SHADER_VERTEX ||
362 nir->info.stage == MESA_SHADER_GEOMETRY) {
363 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
364 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
365 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
366 }
367
368 /* st_nir_assign_vs_in_locations requires correct shader info. */
369 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
370
371 st_nir_assign_vs_in_locations(nir);
372 }
373
374 /**
375 * Translate ARB (asm) program to NIR
376 */
377 static nir_shader *
st_translate_prog_to_nir(struct st_context * st,struct gl_program * prog,gl_shader_stage stage)378 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
379 gl_shader_stage stage)
380 {
381 struct pipe_screen *screen = st->pipe->screen;
382 const struct gl_shader_compiler_options *options =
383 &st->ctx->Const.ShaderCompilerOptions[stage];
384
385 /* Translate to NIR */
386 nir_shader *nir = prog_to_nir(prog, options->NirOptions);
387 NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
388 nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
389
390 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
391 NIR_PASS_V(nir, nir_lower_system_values);
392 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
393
394 /* Optimise NIR */
395 NIR_PASS_V(nir, nir_opt_constant_folding);
396 st_nir_opts(nir);
397 st_finalize_nir_before_variants(nir);
398
399 if (st->allow_st_finalize_nir_twice)
400 st_finalize_nir(st, prog, NULL, nir, true);
401
402 nir_validate_shader(nir, "after st/glsl finalize_nir");
403
404 return nir;
405 }
406
407 void
st_prepare_vertex_program(struct st_program * stp)408 st_prepare_vertex_program(struct st_program *stp)
409 {
410 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
411
412 stvp->num_inputs = 0;
413 memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
414 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
415
416 /* Determine number of inputs, the mappings between VERT_ATTRIB_x
417 * and TGSI generic input indexes, plus input attrib semantic info.
418 */
419 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
420 if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
421 stvp->input_to_index[attr] = stvp->num_inputs;
422 stvp->index_to_input[stvp->num_inputs] = attr;
423 stvp->num_inputs++;
424
425 if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
426 /* add placeholder for second part of a double attribute */
427 stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
428 stvp->num_inputs++;
429 }
430 }
431 }
432 /* pre-setup potentially unused edgeflag input */
433 stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
434 stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
435
436 /* Compute mapping of vertex program outputs to slots. */
437 unsigned num_outputs = 0;
438 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
439 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
440 stvp->result_to_output[attr] = num_outputs++;
441 }
442 /* pre-setup potentially unused edgeflag output */
443 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
444 }
445
446 void
st_translate_stream_output_info(struct gl_program * prog)447 st_translate_stream_output_info(struct gl_program *prog)
448 {
449 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
450 if (!info)
451 return;
452
453 /* Determine the (default) output register mapping for each output. */
454 unsigned num_outputs = 0;
455 ubyte output_mapping[VARYING_SLOT_TESS_MAX];
456 memset(output_mapping, 0, sizeof(output_mapping));
457
458 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
459 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
460 output_mapping[attr] = num_outputs++;
461 }
462
463 /* Translate stream output info. */
464 struct pipe_stream_output_info *so_info =
465 &((struct st_program*)prog)->state.stream_output;
466
467 for (unsigned i = 0; i < info->NumOutputs; i++) {
468 so_info->output[i].register_index =
469 output_mapping[info->Outputs[i].OutputRegister];
470 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
471 so_info->output[i].num_components = info->Outputs[i].NumComponents;
472 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
473 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
474 so_info->output[i].stream = info->Outputs[i].StreamId;
475 }
476
477 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
478 so_info->stride[i] = info->Buffers[i].Stride;
479 }
480 so_info->num_outputs = info->NumOutputs;
481 }
482
483 /**
484 * Translate a vertex program.
485 */
486 bool
st_translate_vertex_program(struct st_context * st,struct st_program * stp)487 st_translate_vertex_program(struct st_context *st,
488 struct st_program *stp)
489 {
490 struct ureg_program *ureg;
491 enum pipe_error error;
492 unsigned num_outputs = 0;
493 unsigned attr;
494 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
495 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
496
497 if (stp->Base.arb.IsPositionInvariant)
498 _mesa_insert_mvp_code(st->ctx, &stp->Base);
499
500 /* ARB_vp: */
501 if (!stp->glsl_to_tgsi) {
502 _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
503
504 /* This determines which states will be updated when the assembly
505 * shader is bound.
506 */
507 stp->affected_states = ST_NEW_VS_STATE |
508 ST_NEW_RASTERIZER |
509 ST_NEW_VERTEX_ARRAYS;
510
511 if (stp->Base.Parameters->NumParameters)
512 stp->affected_states |= ST_NEW_VS_CONSTANTS;
513
514 /* Translate to NIR if preferred. */
515 if (PIPE_SHADER_IR_NIR ==
516 st->pipe->screen->get_shader_param(st->pipe->screen,
517 PIPE_SHADER_VERTEX,
518 PIPE_SHADER_CAP_PREFERRED_IR)) {
519 assert(!stp->glsl_to_tgsi);
520
521 if (stp->Base.nir)
522 ralloc_free(stp->Base.nir);
523
524 if (stp->serialized_nir) {
525 free(stp->serialized_nir);
526 stp->serialized_nir = NULL;
527 }
528
529 stp->state.type = PIPE_SHADER_IR_NIR;
530 stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
531 MESA_SHADER_VERTEX);
532 stp->Base.info = stp->Base.nir->info;
533
534 /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
535 * use LLVM.
536 */
537 if (draw_has_llvm()) {
538 st_prepare_vertex_program(stp);
539 return true;
540 }
541 }
542 }
543
544 st_prepare_vertex_program(stp);
545
546 /* Get semantic names and indices. */
547 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
548 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
549 unsigned slot = num_outputs++;
550 unsigned semantic_name, semantic_index;
551 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
552 &semantic_name, &semantic_index);
553 output_semantic_name[slot] = semantic_name;
554 output_semantic_index[slot] = semantic_index;
555 }
556 }
557 /* pre-setup potentially unused edgeflag output */
558 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
559 output_semantic_index[num_outputs] = 0;
560
561 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
562 if (ureg == NULL)
563 return false;
564
565 ureg_setup_shader_info(ureg, &stp->Base.info);
566
567 if (ST_DEBUG & DEBUG_MESA) {
568 _mesa_print_program(&stp->Base);
569 _mesa_print_program_parameters(st->ctx, &stp->Base);
570 debug_printf("\n");
571 }
572
573 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
574
575 if (stp->glsl_to_tgsi) {
576 error = st_translate_program(st->ctx,
577 PIPE_SHADER_VERTEX,
578 ureg,
579 stp->glsl_to_tgsi,
580 &stp->Base,
581 /* inputs */
582 stvp->num_inputs,
583 stvp->input_to_index,
584 NULL, /* inputSlotToAttr */
585 NULL, /* input semantic name */
586 NULL, /* input semantic index */
587 NULL, /* interp mode */
588 /* outputs */
589 num_outputs,
590 stvp->result_to_output,
591 output_semantic_name,
592 output_semantic_index);
593
594 st_translate_stream_output_info(&stp->Base);
595
596 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
597 } else
598 error = st_translate_mesa_program(st->ctx,
599 PIPE_SHADER_VERTEX,
600 ureg,
601 &stp->Base,
602 /* inputs */
603 stvp->num_inputs,
604 stvp->input_to_index,
605 NULL, /* input semantic name */
606 NULL, /* input semantic index */
607 NULL,
608 /* outputs */
609 num_outputs,
610 stvp->result_to_output,
611 output_semantic_name,
612 output_semantic_index);
613
614 if (error) {
615 debug_printf("%s: failed to translate Mesa program:\n", __func__);
616 _mesa_print_program(&stp->Base);
617 debug_assert(0);
618 return false;
619 }
620
621 stp->state.tokens = ureg_get_tokens(ureg, NULL);
622 ureg_destroy(ureg);
623
624 if (stp->glsl_to_tgsi) {
625 stp->glsl_to_tgsi = NULL;
626 st_store_ir_in_disk_cache(st, &stp->Base, false);
627 }
628
629 return stp->state.tokens != NULL;
630 }
631
632 static struct nir_shader *
get_nir_shader(struct st_context * st,struct st_program * stp)633 get_nir_shader(struct st_context *st, struct st_program *stp)
634 {
635 if (stp->Base.nir) {
636 nir_shader *nir = stp->Base.nir;
637
638 /* The first shader variant takes ownership of NIR, so that there is
639 * no cloning. Additional shader variants are always generated from
640 * serialized NIR to save memory.
641 */
642 stp->Base.nir = NULL;
643 assert(stp->serialized_nir && stp->serialized_nir_size);
644 return nir;
645 }
646
647 struct blob_reader blob_reader;
648 const struct nir_shader_compiler_options *options =
649 st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
650
651 blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
652 return nir_deserialize(NULL, options, &blob_reader);
653 }
654
655 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)656 lower_ucp(struct st_context *st,
657 struct nir_shader *nir,
658 unsigned ucp_enables,
659 struct gl_program_parameter_list *params)
660 {
661 if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
662 NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
663 else {
664 struct pipe_screen *screen = st->pipe->screen;
665 bool can_compact = screen->get_param(screen,
666 PIPE_CAP_NIR_COMPACT_ARRAYS);
667 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
668
669 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
670 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
671 if (use_eye) {
672 clipplane_state[i][0] = STATE_CLIPPLANE;
673 clipplane_state[i][1] = i;
674 } else {
675 clipplane_state[i][0] = STATE_INTERNAL;
676 clipplane_state[i][1] = STATE_CLIP_INTERNAL;
677 clipplane_state[i][2] = i;
678 }
679 _mesa_add_state_reference(params, clipplane_state[i]);
680 }
681
682 if (nir->info.stage == MESA_SHADER_VERTEX) {
683 NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
684 true, can_compact, clipplane_state);
685 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
686 NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
687 can_compact, clipplane_state);
688 }
689
690 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
691 nir_shader_get_entrypoint(nir), true, false);
692 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
693 }
694 }
695
696 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
697 { STATE_DEPTH_RANGE };
698
699 static struct st_common_variant *
st_create_vp_variant(struct st_context * st,struct st_program * stvp,const struct st_common_variant_key * key)700 st_create_vp_variant(struct st_context *st,
701 struct st_program *stvp,
702 const struct st_common_variant_key *key)
703 {
704 struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
705 struct pipe_context *pipe = st->pipe;
706 struct pipe_shader_state state = {0};
707
708 static const gl_state_index16 point_size_state[STATE_LENGTH] =
709 { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
710 struct gl_program_parameter_list *params = stvp->Base.Parameters;
711
712 vpv->key = *key;
713
714 state.stream_output = stvp->state.stream_output;
715
716 if (stvp->state.type == PIPE_SHADER_IR_NIR &&
717 (!key->is_draw_shader || draw_has_llvm())) {
718 bool finalize = false;
719
720 state.type = PIPE_SHADER_IR_NIR;
721 state.ir.nir = get_nir_shader(st, stvp);
722 if (key->clamp_color) {
723 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
724 finalize = true;
725 }
726 if (key->passthrough_edgeflags) {
727 NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
728 finalize = true;
729 }
730
731 if (key->lower_point_size) {
732 _mesa_add_state_reference(params, point_size_state);
733 NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
734 point_size_state);
735 finalize = true;
736 }
737
738 if (key->lower_ucp) {
739 lower_ucp(st, state.ir.nir, key->lower_ucp, params);
740 finalize = true;
741 }
742
743 if (finalize || !st->allow_st_finalize_nir_twice) {
744 st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
745 true);
746
747 /* Some of the lowering above may have introduced new varyings */
748 nir_shader_gather_info(state.ir.nir,
749 nir_shader_get_entrypoint(state.ir.nir));
750 }
751
752 if (ST_DEBUG & DEBUG_PRINT_IR)
753 nir_print_shader(state.ir.nir, stderr);
754
755 if (key->is_draw_shader)
756 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
757 else
758 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
759
760 return vpv;
761 }
762
763 state.type = PIPE_SHADER_IR_TGSI;
764 state.tokens = tgsi_dup_tokens(stvp->state.tokens);
765
766 /* Emulate features. */
767 if (key->clamp_color || key->passthrough_edgeflags) {
768 const struct tgsi_token *tokens;
769 unsigned flags =
770 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
771 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
772
773 tokens = tgsi_emulate(state.tokens, flags);
774
775 if (tokens) {
776 tgsi_free_tokens(state.tokens);
777 state.tokens = tokens;
778 } else {
779 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
780 }
781 }
782
783 if (key->lower_depth_clamp) {
784 unsigned depth_range_const =
785 _mesa_add_state_reference(params, depth_range_state);
786
787 const struct tgsi_token *tokens;
788 tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
789 key->clip_negative_one_to_one);
790 if (tokens != state.tokens)
791 tgsi_free_tokens(state.tokens);
792 state.tokens = tokens;
793 }
794
795 if (ST_DEBUG & DEBUG_PRINT_IR)
796 tgsi_dump(state.tokens, 0);
797
798 if (key->is_draw_shader)
799 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
800 else
801 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
802
803 if (state.tokens) {
804 tgsi_free_tokens(state.tokens);
805 }
806
807 return vpv;
808 }
809
810
811 /**
812 * Find/create a vertex program variant.
813 */
814 struct st_common_variant *
st_get_vp_variant(struct st_context * st,struct st_program * stp,const struct st_common_variant_key * key)815 st_get_vp_variant(struct st_context *st,
816 struct st_program *stp,
817 const struct st_common_variant_key *key)
818 {
819 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
820 struct st_common_variant *vpv;
821
822 /* Search for existing variant */
823 for (vpv = st_common_variant(stp->variants); vpv;
824 vpv = st_common_variant(vpv->base.next)) {
825 if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
826 break;
827 }
828 }
829
830 if (!vpv) {
831 /* create now */
832 vpv = st_create_vp_variant(st, stp, key);
833 if (vpv) {
834 vpv->base.st = key->st;
835
836 unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
837 for (unsigned index = 0; index < num_inputs; ++index) {
838 unsigned attr = stvp->index_to_input[index];
839 if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
840 continue;
841 vpv->vert_attrib_mask |= 1u << attr;
842 }
843
844 /* insert into list */
845 vpv->base.next = stp->variants;
846 stp->variants = &vpv->base;
847 }
848 }
849
850 return vpv;
851 }
852
853
854 /**
855 * Translate a Mesa fragment shader into a TGSI shader.
856 */
857 bool
st_translate_fragment_program(struct st_context * st,struct st_program * stfp)858 st_translate_fragment_program(struct st_context *st,
859 struct st_program *stfp)
860 {
861 /* Non-GLSL programs: */
862 if (!stfp->glsl_to_tgsi) {
863 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
864 if (st->ctx->Const.GLSLFragCoordIsSysVal)
865 _mesa_program_fragment_position_to_sysval(&stfp->Base);
866
867 /* This determines which states will be updated when the assembly
868 * shader is bound.
869 *
870 * fragment.position and glDrawPixels always use constants.
871 */
872 stfp->affected_states = ST_NEW_FS_STATE |
873 ST_NEW_SAMPLE_SHADING |
874 ST_NEW_FS_CONSTANTS;
875
876 if (stfp->ati_fs) {
877 /* Just set them for ATI_fs unconditionally. */
878 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
879 ST_NEW_FS_SAMPLERS;
880 } else {
881 /* ARB_fp */
882 if (stfp->Base.SamplersUsed)
883 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
884 ST_NEW_FS_SAMPLERS;
885 }
886
887 /* Translate to NIR. */
888 if (!stfp->ati_fs &&
889 PIPE_SHADER_IR_NIR ==
890 st->pipe->screen->get_shader_param(st->pipe->screen,
891 PIPE_SHADER_FRAGMENT,
892 PIPE_SHADER_CAP_PREFERRED_IR)) {
893 nir_shader *nir =
894 st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
895
896 if (stfp->Base.nir)
897 ralloc_free(stfp->Base.nir);
898 if (stfp->serialized_nir) {
899 free(stfp->serialized_nir);
900 stfp->serialized_nir = NULL;
901 }
902 stfp->state.type = PIPE_SHADER_IR_NIR;
903 stfp->Base.nir = nir;
904 return true;
905 }
906 }
907
908 ubyte outputMapping[2 * FRAG_RESULT_MAX];
909 ubyte inputMapping[VARYING_SLOT_MAX];
910 ubyte inputSlotToAttr[VARYING_SLOT_MAX];
911 ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */
912 GLuint attr;
913 GLbitfield64 inputsRead;
914 struct ureg_program *ureg;
915
916 GLboolean write_all = GL_FALSE;
917
918 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
919 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
920 uint fs_num_inputs = 0;
921
922 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
923 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
924 uint fs_num_outputs = 0;
925
926 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
927
928 /*
929 * Convert Mesa program inputs to TGSI input register semantics.
930 */
931 inputsRead = stfp->Base.info.inputs_read;
932 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
933 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
934 const GLuint slot = fs_num_inputs++;
935
936 inputMapping[attr] = slot;
937 inputSlotToAttr[slot] = attr;
938
939 switch (attr) {
940 case VARYING_SLOT_POS:
941 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
942 input_semantic_index[slot] = 0;
943 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
944 break;
945 case VARYING_SLOT_COL0:
946 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
947 input_semantic_index[slot] = 0;
948 interpMode[slot] = stfp->glsl_to_tgsi ?
949 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
950 break;
951 case VARYING_SLOT_COL1:
952 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
953 input_semantic_index[slot] = 1;
954 interpMode[slot] = stfp->glsl_to_tgsi ?
955 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
956 break;
957 case VARYING_SLOT_FOGC:
958 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
959 input_semantic_index[slot] = 0;
960 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
961 break;
962 case VARYING_SLOT_FACE:
963 input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
964 input_semantic_index[slot] = 0;
965 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
966 break;
967 case VARYING_SLOT_PRIMITIVE_ID:
968 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
969 input_semantic_index[slot] = 0;
970 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
971 break;
972 case VARYING_SLOT_LAYER:
973 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
974 input_semantic_index[slot] = 0;
975 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
976 break;
977 case VARYING_SLOT_VIEWPORT:
978 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
979 input_semantic_index[slot] = 0;
980 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
981 break;
982 case VARYING_SLOT_CLIP_DIST0:
983 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
984 input_semantic_index[slot] = 0;
985 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
986 break;
987 case VARYING_SLOT_CLIP_DIST1:
988 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
989 input_semantic_index[slot] = 1;
990 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
991 break;
992 case VARYING_SLOT_CULL_DIST0:
993 case VARYING_SLOT_CULL_DIST1:
994 /* these should have been lowered by GLSL */
995 assert(0);
996 break;
997 /* In most cases, there is nothing special about these
998 * inputs, so adopt a convention to use the generic
999 * semantic name and the mesa VARYING_SLOT_ number as the
1000 * index.
1001 *
1002 * All that is required is that the vertex shader labels
1003 * its own outputs similarly, and that the vertex shader
1004 * generates at least every output required by the
1005 * fragment shader plus fixed-function hardware (such as
1006 * BFC).
1007 *
1008 * However, some drivers may need us to identify the PNTC and TEXi
1009 * varyings if, for example, their capability to replace them with
1010 * sprite coordinates is limited.
1011 */
1012 case VARYING_SLOT_PNTC:
1013 if (st->needs_texcoord_semantic) {
1014 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1015 input_semantic_index[slot] = 0;
1016 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1017 break;
1018 }
1019 /* fall through */
1020 case VARYING_SLOT_TEX0:
1021 case VARYING_SLOT_TEX1:
1022 case VARYING_SLOT_TEX2:
1023 case VARYING_SLOT_TEX3:
1024 case VARYING_SLOT_TEX4:
1025 case VARYING_SLOT_TEX5:
1026 case VARYING_SLOT_TEX6:
1027 case VARYING_SLOT_TEX7:
1028 if (st->needs_texcoord_semantic) {
1029 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1030 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1031 interpMode[slot] = stfp->glsl_to_tgsi ?
1032 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1033 break;
1034 }
1035 /* fall through */
1036 case VARYING_SLOT_VAR0:
1037 default:
1038 /* Semantic indices should be zero-based because drivers may choose
1039 * to assign a fixed slot determined by that index.
1040 * This is useful because ARB_separate_shader_objects uses location
1041 * qualifiers for linkage, and if the semantic index corresponds to
1042 * these locations, linkage passes in the driver become unecessary.
1043 *
1044 * If needs_texcoord_semantic is true, no semantic indices will be
1045 * consumed for the TEXi varyings, and we can base the locations of
1046 * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
1047 */
1048 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1049 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1050 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1051 input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1052 if (attr == VARYING_SLOT_PNTC)
1053 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1054 else {
1055 interpMode[slot] = stfp->glsl_to_tgsi ?
1056 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1057 }
1058 break;
1059 }
1060 }
1061 else {
1062 inputMapping[attr] = -1;
1063 }
1064 }
1065
1066 /*
1067 * Semantics and mapping for outputs
1068 */
1069 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1070
1071 /* if z is written, emit that first */
1072 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1073 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1074 fs_output_semantic_index[fs_num_outputs] = 0;
1075 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1076 fs_num_outputs++;
1077 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1078 }
1079
1080 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1081 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1082 fs_output_semantic_index[fs_num_outputs] = 0;
1083 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1084 fs_num_outputs++;
1085 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1086 }
1087
1088 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1089 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1090 fs_output_semantic_index[fs_num_outputs] = 0;
1091 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1092 fs_num_outputs++;
1093 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1094 }
1095
1096 /* handle remaining outputs (color) */
1097 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1098 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1099 stfp->Base.SecondaryOutputsWritten;
1100 const unsigned loc = attr % FRAG_RESULT_MAX;
1101
1102 if (written & BITFIELD64_BIT(loc)) {
1103 switch (loc) {
1104 case FRAG_RESULT_DEPTH:
1105 case FRAG_RESULT_STENCIL:
1106 case FRAG_RESULT_SAMPLE_MASK:
1107 /* handled above */
1108 assert(0);
1109 break;
1110 case FRAG_RESULT_COLOR:
1111 write_all = GL_TRUE; /* fallthrough */
1112 default: {
1113 int index;
1114 assert(loc == FRAG_RESULT_COLOR ||
1115 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1116
1117 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1118
1119 if (attr >= FRAG_RESULT_MAX) {
1120 /* Secondary color for dual source blending. */
1121 assert(index == 0);
1122 index++;
1123 }
1124
1125 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1126 fs_output_semantic_index[fs_num_outputs] = index;
1127 outputMapping[attr] = fs_num_outputs;
1128 break;
1129 }
1130 }
1131
1132 fs_num_outputs++;
1133 }
1134 }
1135
1136 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1137 if (ureg == NULL)
1138 return false;
1139
1140 ureg_setup_shader_info(ureg, &stfp->Base.info);
1141
1142 if (ST_DEBUG & DEBUG_MESA) {
1143 _mesa_print_program(&stfp->Base);
1144 _mesa_print_program_parameters(st->ctx, &stfp->Base);
1145 debug_printf("\n");
1146 }
1147 if (write_all == GL_TRUE)
1148 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1149
1150 if (stfp->glsl_to_tgsi) {
1151 st_translate_program(st->ctx,
1152 PIPE_SHADER_FRAGMENT,
1153 ureg,
1154 stfp->glsl_to_tgsi,
1155 &stfp->Base,
1156 /* inputs */
1157 fs_num_inputs,
1158 inputMapping,
1159 inputSlotToAttr,
1160 input_semantic_name,
1161 input_semantic_index,
1162 interpMode,
1163 /* outputs */
1164 fs_num_outputs,
1165 outputMapping,
1166 fs_output_semantic_name,
1167 fs_output_semantic_index);
1168
1169 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1170 } else if (stfp->ati_fs)
1171 st_translate_atifs_program(ureg,
1172 stfp->ati_fs,
1173 &stfp->Base,
1174 /* inputs */
1175 fs_num_inputs,
1176 inputMapping,
1177 input_semantic_name,
1178 input_semantic_index,
1179 interpMode,
1180 /* outputs */
1181 fs_num_outputs,
1182 outputMapping,
1183 fs_output_semantic_name,
1184 fs_output_semantic_index);
1185 else
1186 st_translate_mesa_program(st->ctx,
1187 PIPE_SHADER_FRAGMENT,
1188 ureg,
1189 &stfp->Base,
1190 /* inputs */
1191 fs_num_inputs,
1192 inputMapping,
1193 input_semantic_name,
1194 input_semantic_index,
1195 interpMode,
1196 /* outputs */
1197 fs_num_outputs,
1198 outputMapping,
1199 fs_output_semantic_name,
1200 fs_output_semantic_index);
1201
1202 stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1203 ureg_destroy(ureg);
1204
1205 if (stfp->glsl_to_tgsi) {
1206 stfp->glsl_to_tgsi = NULL;
1207 st_store_ir_in_disk_cache(st, &stfp->Base, false);
1208 }
1209
1210 return stfp->state.tokens != NULL;
1211 }
1212
1213 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct st_program * stfp,const struct st_fp_variant_key * key)1214 st_create_fp_variant(struct st_context *st,
1215 struct st_program *stfp,
1216 const struct st_fp_variant_key *key)
1217 {
1218 struct pipe_context *pipe = st->pipe;
1219 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1220 struct pipe_shader_state state = {0};
1221 struct gl_program_parameter_list *params = stfp->Base.Parameters;
1222 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1223 { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1224 static const gl_state_index16 scale_state[STATE_LENGTH] =
1225 { STATE_INTERNAL, STATE_PT_SCALE };
1226 static const gl_state_index16 bias_state[STATE_LENGTH] =
1227 { STATE_INTERNAL, STATE_PT_BIAS };
1228 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1229 { STATE_INTERNAL, STATE_ALPHA_REF };
1230
1231 if (!variant)
1232 return NULL;
1233
1234 if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1235 bool finalize = false;
1236
1237 state.type = PIPE_SHADER_IR_NIR;
1238 state.ir.nir = get_nir_shader(st, stfp);
1239
1240 if (key->clamp_color) {
1241 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1242 finalize = true;
1243 }
1244
1245 if (key->lower_flatshade) {
1246 NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1247 finalize = true;
1248 }
1249
1250 if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1251 _mesa_add_state_reference(params, alpha_ref_state);
1252 NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1253 false, alpha_ref_state);
1254 finalize = true;
1255 }
1256
1257 if (key->lower_two_sided_color) {
1258 bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1259 NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1260 finalize = true;
1261 }
1262
1263 if (key->persample_shading) {
1264 nir_shader *shader = state.ir.nir;
1265 nir_foreach_shader_in_variable(var, shader)
1266 var->data.sample = true;
1267 finalize = true;
1268 }
1269
1270 assert(!(key->bitmap && key->drawpixels));
1271
1272 /* glBitmap */
1273 if (key->bitmap) {
1274 nir_lower_bitmap_options options = {0};
1275
1276 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1277 options.sampler = variant->bitmap_sampler;
1278 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1279
1280 NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1281 finalize = true;
1282 }
1283
1284 /* glDrawPixels (color only) */
1285 if (key->drawpixels) {
1286 nir_lower_drawpixels_options options = {{0}};
1287 unsigned samplers_used = stfp->Base.SamplersUsed;
1288
1289 /* Find the first unused slot. */
1290 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1291 options.drawpix_sampler = variant->drawpix_sampler;
1292 samplers_used |= (1 << variant->drawpix_sampler);
1293
1294 options.pixel_maps = key->pixelMaps;
1295 if (key->pixelMaps) {
1296 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1297 options.pixelmap_sampler = variant->pixelmap_sampler;
1298 }
1299
1300 options.scale_and_bias = key->scaleAndBias;
1301 if (key->scaleAndBias) {
1302 _mesa_add_state_reference(params, scale_state);
1303 memcpy(options.scale_state_tokens, scale_state,
1304 sizeof(options.scale_state_tokens));
1305 _mesa_add_state_reference(params, bias_state);
1306 memcpy(options.bias_state_tokens, bias_state,
1307 sizeof(options.bias_state_tokens));
1308 }
1309
1310 _mesa_add_state_reference(params, texcoord_state);
1311 memcpy(options.texcoord_state_tokens, texcoord_state,
1312 sizeof(options.texcoord_state_tokens));
1313
1314 NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1315 finalize = true;
1316 }
1317
1318 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1319 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1320 key->external.lower_ayuv || key->external.lower_xyuv ||
1321 key->external.lower_yuv)) {
1322
1323 st_nir_lower_samplers(pipe->screen, state.ir.nir,
1324 stfp->shader_program, &stfp->Base);
1325
1326 nir_lower_tex_options options = {0};
1327 options.lower_y_uv_external = key->external.lower_nv12;
1328 options.lower_y_u_v_external = key->external.lower_iyuv;
1329 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1330 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1331 options.lower_ayuv_external = key->external.lower_ayuv;
1332 options.lower_xyuv_external = key->external.lower_xyuv;
1333 options.lower_yuv_external = key->external.lower_yuv;
1334 NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1335 finalize = true;
1336 }
1337
1338 if (finalize || !st->allow_st_finalize_nir_twice) {
1339 st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1340 false);
1341 }
1342
1343 /* This pass needs to happen *after* nir_lower_sampler */
1344 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1345 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1346 key->external.lower_ayuv || key->external.lower_xyuv ||
1347 key->external.lower_yuv)) {
1348 NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1349 ~stfp->Base.SamplersUsed,
1350 key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1351 key->external.lower_yx_xuxv,
1352 key->external.lower_iyuv);
1353 finalize = true;
1354 }
1355
1356 if (finalize || !st->allow_st_finalize_nir_twice) {
1357 /* Some of the lowering above may have introduced new varyings */
1358 nir_shader_gather_info(state.ir.nir,
1359 nir_shader_get_entrypoint(state.ir.nir));
1360
1361 struct pipe_screen *screen = pipe->screen;
1362 if (screen->finalize_nir)
1363 screen->finalize_nir(screen, state.ir.nir, false);
1364 }
1365
1366 if (ST_DEBUG & DEBUG_PRINT_IR)
1367 nir_print_shader(state.ir.nir, stderr);
1368
1369 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1370 variant->key = *key;
1371
1372 return variant;
1373 }
1374
1375 state.tokens = stfp->state.tokens;
1376
1377 assert(!(key->bitmap && key->drawpixels));
1378
1379 /* Fix texture targets and add fog for ATI_fs */
1380 if (stfp->ati_fs) {
1381 const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1382
1383 if (tokens)
1384 state.tokens = tokens;
1385 else
1386 fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1387 }
1388
1389 /* Emulate features. */
1390 if (key->clamp_color || key->persample_shading) {
1391 const struct tgsi_token *tokens;
1392 unsigned flags =
1393 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1394 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1395
1396 tokens = tgsi_emulate(state.tokens, flags);
1397
1398 if (tokens) {
1399 if (state.tokens != stfp->state.tokens)
1400 tgsi_free_tokens(state.tokens);
1401 state.tokens = tokens;
1402 } else
1403 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1404 }
1405
1406 /* glBitmap */
1407 if (key->bitmap) {
1408 const struct tgsi_token *tokens;
1409
1410 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1411
1412 tokens = st_get_bitmap_shader(state.tokens,
1413 st->internal_target,
1414 variant->bitmap_sampler,
1415 st->needs_texcoord_semantic,
1416 st->bitmap.tex_format ==
1417 PIPE_FORMAT_R8_UNORM);
1418
1419 if (tokens) {
1420 if (state.tokens != stfp->state.tokens)
1421 tgsi_free_tokens(state.tokens);
1422 state.tokens = tokens;
1423 } else
1424 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1425 }
1426
1427 /* glDrawPixels (color only) */
1428 if (key->drawpixels) {
1429 const struct tgsi_token *tokens;
1430 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1431
1432 /* Find the first unused slot. */
1433 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1434
1435 if (key->pixelMaps) {
1436 unsigned samplers_used = stfp->Base.SamplersUsed |
1437 (1 << variant->drawpix_sampler);
1438
1439 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1440 }
1441
1442 if (key->scaleAndBias) {
1443 scale_const = _mesa_add_state_reference(params, scale_state);
1444 bias_const = _mesa_add_state_reference(params, bias_state);
1445 }
1446
1447 texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1448
1449 tokens = st_get_drawpix_shader(state.tokens,
1450 st->needs_texcoord_semantic,
1451 key->scaleAndBias, scale_const,
1452 bias_const, key->pixelMaps,
1453 variant->drawpix_sampler,
1454 variant->pixelmap_sampler,
1455 texcoord_const, st->internal_target);
1456
1457 if (tokens) {
1458 if (state.tokens != stfp->state.tokens)
1459 tgsi_free_tokens(state.tokens);
1460 state.tokens = tokens;
1461 } else
1462 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1463 }
1464
1465 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1466 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1467 const struct tgsi_token *tokens;
1468
1469 /* samplers inserted would conflict, but this should be unpossible: */
1470 assert(!(key->bitmap || key->drawpixels));
1471
1472 tokens = st_tgsi_lower_yuv(state.tokens,
1473 ~stfp->Base.SamplersUsed,
1474 key->external.lower_nv12 ||
1475 key->external.lower_xy_uxvx ||
1476 key->external.lower_yx_xuxv,
1477 key->external.lower_iyuv);
1478 if (tokens) {
1479 if (state.tokens != stfp->state.tokens)
1480 tgsi_free_tokens(state.tokens);
1481 state.tokens = tokens;
1482 } else {
1483 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1484 }
1485 }
1486
1487 if (key->lower_depth_clamp) {
1488 unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1489
1490 const struct tgsi_token *tokens;
1491 tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1492 if (state.tokens != stfp->state.tokens)
1493 tgsi_free_tokens(state.tokens);
1494 state.tokens = tokens;
1495 }
1496
1497 if (ST_DEBUG & DEBUG_PRINT_IR)
1498 tgsi_dump(state.tokens, 0);
1499
1500 /* fill in variant */
1501 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1502 variant->key = *key;
1503
1504 if (state.tokens != stfp->state.tokens)
1505 tgsi_free_tokens(state.tokens);
1506 return variant;
1507 }
1508
1509 /**
1510 * Translate fragment program if needed.
1511 */
1512 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct st_program * stfp,const struct st_fp_variant_key * key)1513 st_get_fp_variant(struct st_context *st,
1514 struct st_program *stfp,
1515 const struct st_fp_variant_key *key)
1516 {
1517 struct st_fp_variant *fpv;
1518
1519 /* Search for existing variant */
1520 for (fpv = st_fp_variant(stfp->variants); fpv;
1521 fpv = st_fp_variant(fpv->base.next)) {
1522 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1523 break;
1524 }
1525 }
1526
1527 if (!fpv) {
1528 /* create new */
1529 fpv = st_create_fp_variant(st, stfp, key);
1530 if (fpv) {
1531 fpv->base.st = key->st;
1532
1533 if (key->bitmap || key->drawpixels) {
1534 /* Regular variants should always come before the
1535 * bitmap & drawpixels variants, (unless there
1536 * are no regular variants) so that
1537 * st_update_fp can take a fast path when
1538 * shader_has_one_variant is set.
1539 */
1540 if (!stfp->variants) {
1541 stfp->variants = &fpv->base;
1542 } else {
1543 /* insert into list after the first one */
1544 fpv->base.next = stfp->variants->next;
1545 stfp->variants->next = &fpv->base;
1546 }
1547 } else {
1548 /* insert into list */
1549 fpv->base.next = stfp->variants;
1550 stfp->variants = &fpv->base;
1551 }
1552 }
1553 }
1554
1555 return fpv;
1556 }
1557
1558 /**
1559 * Translate a program. This is common code for geometry and tessellation
1560 * shaders.
1561 */
1562 bool
st_translate_common_program(struct st_context * st,struct st_program * stp)1563 st_translate_common_program(struct st_context *st,
1564 struct st_program *stp)
1565 {
1566 struct gl_program *prog = &stp->Base;
1567 enum pipe_shader_type stage =
1568 pipe_shader_type_from_mesa(stp->Base.info.stage);
1569 struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1570
1571 if (ureg == NULL)
1572 return false;
1573
1574 ureg_setup_shader_info(ureg, &stp->Base.info);
1575
1576 ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1577 ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1578 ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1579 GLuint attr;
1580
1581 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1582 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1583 uint num_inputs = 0;
1584
1585 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1586 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1587 uint num_outputs = 0;
1588
1589 GLint i;
1590
1591 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1592 memset(inputMapping, 0, sizeof(inputMapping));
1593 memset(outputMapping, 0, sizeof(outputMapping));
1594 memset(&stp->state, 0, sizeof(stp->state));
1595
1596 /*
1597 * Convert Mesa program inputs to TGSI input register semantics.
1598 */
1599 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1600 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1601 continue;
1602
1603 unsigned slot = num_inputs++;
1604
1605 inputMapping[attr] = slot;
1606 inputSlotToAttr[slot] = attr;
1607
1608 unsigned semantic_name, semantic_index;
1609 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1610 &semantic_name, &semantic_index);
1611 input_semantic_name[slot] = semantic_name;
1612 input_semantic_index[slot] = semantic_index;
1613 }
1614
1615 /* Also add patch inputs. */
1616 for (attr = 0; attr < 32; attr++) {
1617 if (prog->info.patch_inputs_read & (1u << attr)) {
1618 GLuint slot = num_inputs++;
1619 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1620
1621 inputMapping[patch_attr] = slot;
1622 inputSlotToAttr[slot] = patch_attr;
1623 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1624 input_semantic_index[slot] = attr;
1625 }
1626 }
1627
1628 /* initialize output semantics to defaults */
1629 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1630 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1631 output_semantic_index[i] = 0;
1632 }
1633
1634 /*
1635 * Determine number of outputs, the (default) output register
1636 * mapping and the semantic information for each output.
1637 */
1638 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1639 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1640 GLuint slot = num_outputs++;
1641
1642 outputMapping[attr] = slot;
1643
1644 unsigned semantic_name, semantic_index;
1645 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1646 &semantic_name, &semantic_index);
1647 output_semantic_name[slot] = semantic_name;
1648 output_semantic_index[slot] = semantic_index;
1649 }
1650 }
1651
1652 /* Also add patch outputs. */
1653 for (attr = 0; attr < 32; attr++) {
1654 if (prog->info.patch_outputs_written & (1u << attr)) {
1655 GLuint slot = num_outputs++;
1656 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1657
1658 outputMapping[patch_attr] = slot;
1659 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1660 output_semantic_index[slot] = attr;
1661 }
1662 }
1663
1664 st_translate_program(st->ctx,
1665 stage,
1666 ureg,
1667 stp->glsl_to_tgsi,
1668 prog,
1669 /* inputs */
1670 num_inputs,
1671 inputMapping,
1672 inputSlotToAttr,
1673 input_semantic_name,
1674 input_semantic_index,
1675 NULL,
1676 /* outputs */
1677 num_outputs,
1678 outputMapping,
1679 output_semantic_name,
1680 output_semantic_index);
1681
1682 stp->state.tokens = ureg_get_tokens(ureg, NULL);
1683
1684 ureg_destroy(ureg);
1685
1686 st_translate_stream_output_info(prog);
1687
1688 st_store_ir_in_disk_cache(st, prog, false);
1689
1690 if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1691 _mesa_print_program(prog);
1692
1693 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1694 stp->glsl_to_tgsi = NULL;
1695 return true;
1696 }
1697
1698
1699 /**
1700 * Get/create a basic program variant.
1701 */
1702 struct st_variant *
st_get_common_variant(struct st_context * st,struct st_program * prog,const struct st_common_variant_key * key)1703 st_get_common_variant(struct st_context *st,
1704 struct st_program *prog,
1705 const struct st_common_variant_key *key)
1706 {
1707 struct pipe_context *pipe = st->pipe;
1708 struct st_variant *v;
1709 struct pipe_shader_state state = {0};
1710 struct gl_program_parameter_list *params = prog->Base.Parameters;
1711
1712 /* Search for existing variant */
1713 for (v = prog->variants; v; v = v->next) {
1714 if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1715 break;
1716 }
1717
1718 if (!v) {
1719 /* create new */
1720 v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1721 if (v) {
1722 if (prog->state.type == PIPE_SHADER_IR_NIR) {
1723 bool finalize = false;
1724
1725 state.type = PIPE_SHADER_IR_NIR;
1726 state.ir.nir = get_nir_shader(st, prog);
1727
1728 if (key->clamp_color) {
1729 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1730 finalize = true;
1731 }
1732
1733 if (key->lower_ucp) {
1734 lower_ucp(st, state.ir.nir, key->lower_ucp, params);
1735 finalize = true;
1736 }
1737
1738 state.stream_output = prog->state.stream_output;
1739
1740 if (finalize || !st->allow_st_finalize_nir_twice) {
1741 st_finalize_nir(st, &prog->Base, prog->shader_program,
1742 state.ir.nir, true);
1743 }
1744
1745 if (ST_DEBUG & DEBUG_PRINT_IR)
1746 nir_print_shader(state.ir.nir, stderr);
1747 } else {
1748 if (key->lower_depth_clamp) {
1749 struct gl_program_parameter_list *params = prog->Base.Parameters;
1750
1751 unsigned depth_range_const =
1752 _mesa_add_state_reference(params, depth_range_state);
1753
1754 const struct tgsi_token *tokens;
1755 tokens =
1756 st_tgsi_lower_depth_clamp(prog->state.tokens,
1757 depth_range_const,
1758 key->clip_negative_one_to_one);
1759
1760 if (tokens != prog->state.tokens)
1761 tgsi_free_tokens(prog->state.tokens);
1762
1763 prog->state.tokens = tokens;
1764 }
1765 state = prog->state;
1766
1767 if (ST_DEBUG & DEBUG_PRINT_IR)
1768 tgsi_dump(state.tokens, 0);
1769 }
1770 /* fill in new variant */
1771 switch (prog->Base.info.stage) {
1772 case MESA_SHADER_TESS_CTRL:
1773 v->driver_shader = pipe->create_tcs_state(pipe, &state);
1774 break;
1775 case MESA_SHADER_TESS_EVAL:
1776 v->driver_shader = pipe->create_tes_state(pipe, &state);
1777 break;
1778 case MESA_SHADER_GEOMETRY:
1779 v->driver_shader = pipe->create_gs_state(pipe, &state);
1780 break;
1781 case MESA_SHADER_COMPUTE: {
1782 struct pipe_compute_state cs = {0};
1783 cs.ir_type = state.type;
1784 cs.req_local_mem = prog->Base.info.cs.shared_size;
1785
1786 if (state.type == PIPE_SHADER_IR_NIR)
1787 cs.prog = state.ir.nir;
1788 else
1789 cs.prog = state.tokens;
1790
1791 v->driver_shader = pipe->create_compute_state(pipe, &cs);
1792 break;
1793 }
1794 default:
1795 assert(!"unhandled shader type");
1796 free(v);
1797 return NULL;
1798 }
1799
1800 st_common_variant(v)->key = *key;
1801 v->st = key->st;
1802
1803 /* insert into list */
1804 v->next = prog->variants;
1805 prog->variants = v;
1806 }
1807 }
1808
1809 return v;
1810 }
1811
1812
1813 /**
1814 * Vert/Geom/Frag programs have per-context variants. Free all the
1815 * variants attached to the given program which match the given context.
1816 */
1817 static void
destroy_program_variants(struct st_context * st,struct gl_program * target)1818 destroy_program_variants(struct st_context *st, struct gl_program *target)
1819 {
1820 if (!target || target == &_mesa_DummyProgram)
1821 return;
1822
1823 struct st_program *p = st_program(target);
1824 struct st_variant *v, **prevPtr = &p->variants;
1825 bool unbound = false;
1826
1827 for (v = p->variants; v; ) {
1828 struct st_variant *next = v->next;
1829 if (v->st == st) {
1830 if (!unbound) {
1831 st_unbind_program(st, p);
1832 unbound = true;
1833 }
1834
1835 /* unlink from list */
1836 *prevPtr = next;
1837 /* destroy this variant */
1838 delete_variant(st, v, target->Target);
1839 }
1840 else {
1841 prevPtr = &v->next;
1842 }
1843 v = next;
1844 }
1845 }
1846
1847
1848 /**
1849 * Callback for _mesa_HashWalk. Free all the shader's program variants
1850 * which match the given context.
1851 */
1852 static void
destroy_shader_program_variants_cb(void * data,void * userData)1853 destroy_shader_program_variants_cb(void *data, void *userData)
1854 {
1855 struct st_context *st = (struct st_context *) userData;
1856 struct gl_shader *shader = (struct gl_shader *) data;
1857
1858 switch (shader->Type) {
1859 case GL_SHADER_PROGRAM_MESA:
1860 {
1861 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1862 GLuint i;
1863
1864 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1865 if (shProg->_LinkedShaders[i])
1866 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1867 }
1868 }
1869 break;
1870 case GL_VERTEX_SHADER:
1871 case GL_FRAGMENT_SHADER:
1872 case GL_GEOMETRY_SHADER:
1873 case GL_TESS_CONTROL_SHADER:
1874 case GL_TESS_EVALUATION_SHADER:
1875 case GL_COMPUTE_SHADER:
1876 break;
1877 default:
1878 assert(0);
1879 }
1880 }
1881
1882
1883 /**
1884 * Callback for _mesa_HashWalk. Free all the program variants which match
1885 * the given context.
1886 */
1887 static void
destroy_program_variants_cb(void * data,void * userData)1888 destroy_program_variants_cb(void *data, void *userData)
1889 {
1890 struct st_context *st = (struct st_context *) userData;
1891 struct gl_program *program = (struct gl_program *) data;
1892 destroy_program_variants(st, program);
1893 }
1894
1895
1896 /**
1897 * Walk over all shaders and programs to delete any variants which
1898 * belong to the given context.
1899 * This is called during context tear-down.
1900 */
1901 void
st_destroy_program_variants(struct st_context * st)1902 st_destroy_program_variants(struct st_context *st)
1903 {
1904 /* If shaders can be shared with other contexts, the last context will
1905 * call DeleteProgram on all shaders, releasing everything.
1906 */
1907 if (st->has_shareable_shaders)
1908 return;
1909
1910 /* ARB vert/frag program */
1911 _mesa_HashWalk(st->ctx->Shared->Programs,
1912 destroy_program_variants_cb, st);
1913
1914 /* GLSL vert/frag/geom shaders */
1915 _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1916 destroy_shader_program_variants_cb, st);
1917 }
1918
1919
1920 /**
1921 * Compile one shader variant.
1922 */
1923 static void
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog)1924 st_precompile_shader_variant(struct st_context *st,
1925 struct gl_program *prog)
1926 {
1927 switch (prog->Target) {
1928 case GL_VERTEX_PROGRAM_ARB: {
1929 struct st_program *p = (struct st_program *)prog;
1930 struct st_common_variant_key key;
1931
1932 memset(&key, 0, sizeof(key));
1933
1934 key.st = st->has_shareable_shaders ? NULL : st;
1935 st_get_vp_variant(st, p, &key);
1936 break;
1937 }
1938
1939 case GL_FRAGMENT_PROGRAM_ARB: {
1940 struct st_program *p = (struct st_program *)prog;
1941 struct st_fp_variant_key key;
1942
1943 memset(&key, 0, sizeof(key));
1944
1945 key.st = st->has_shareable_shaders ? NULL : st;
1946 key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1947 st_get_fp_variant(st, p, &key);
1948 break;
1949 }
1950
1951 case GL_TESS_CONTROL_PROGRAM_NV:
1952 case GL_TESS_EVALUATION_PROGRAM_NV:
1953 case GL_GEOMETRY_PROGRAM_NV:
1954 case GL_COMPUTE_PROGRAM_NV: {
1955 struct st_program *p = st_program(prog);
1956 struct st_common_variant_key key;
1957
1958 memset(&key, 0, sizeof(key));
1959
1960 key.st = st->has_shareable_shaders ? NULL : st;
1961 st_get_common_variant(st, p, &key);
1962 break;
1963 }
1964
1965 default:
1966 assert(0);
1967 }
1968 }
1969
1970 void
st_serialize_nir(struct st_program * stp)1971 st_serialize_nir(struct st_program *stp)
1972 {
1973 if (!stp->serialized_nir) {
1974 struct blob blob;
1975 size_t size;
1976
1977 blob_init(&blob);
1978 nir_serialize(&blob, stp->Base.nir, false);
1979 blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
1980 stp->serialized_nir_size = size;
1981 }
1982 }
1983
1984 void
st_finalize_program(struct st_context * st,struct gl_program * prog)1985 st_finalize_program(struct st_context *st, struct gl_program *prog)
1986 {
1987 if (st->current_program[prog->info.stage] == prog) {
1988 if (prog->info.stage == MESA_SHADER_VERTEX)
1989 st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1990 else
1991 st->dirty |= ((struct st_program *)prog)->affected_states;
1992 }
1993
1994 if (prog->nir) {
1995 nir_sweep(prog->nir);
1996
1997 /* This is only needed for ARB_vp/fp programs and when the disk cache
1998 * is disabled. If the disk cache is enabled, GLSL programs are
1999 * serialized in write_nir_to_cache.
2000 */
2001 st_serialize_nir(st_program(prog));
2002 }
2003
2004 /* Create Gallium shaders now instead of on demand. */
2005 if (ST_DEBUG & DEBUG_PRECOMPILE ||
2006 st->shader_has_one_variant[prog->info.stage])
2007 st_precompile_shader_variant(st, prog);
2008 }
2009