1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "main/errors.h"
35
36 #include "main/hash.h"
37 #include "main/mtypes.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_print.h"
40 #include "program/prog_to_nir.h"
41 #include "program/programopt.h"
42
43 #include "compiler/glsl/gl_nir.h"
44 #include "compiler/nir/nir.h"
45 #include "compiler/nir/nir_serialize.h"
46 #include "draw/draw_context.h"
47
48 #include "pipe/p_context.h"
49 #include "pipe/p_defines.h"
50 #include "pipe/p_shader_tokens.h"
51 #include "draw/draw_context.h"
52 #include "tgsi/tgsi_dump.h"
53 #include "tgsi/tgsi_emulate.h"
54 #include "tgsi/tgsi_parse.h"
55 #include "tgsi/tgsi_ureg.h"
56 #include "nir/nir_to_tgsi.h"
57
58 #include "util/u_memory.h"
59
60 #include "st_debug.h"
61 #include "st_cb_bitmap.h"
62 #include "st_cb_drawpixels.h"
63 #include "st_context.h"
64 #include "st_tgsi_lower_depth_clamp.h"
65 #include "st_tgsi_lower_yuv.h"
66 #include "st_program.h"
67 #include "st_atifs_to_nir.h"
68 #include "st_nir.h"
69 #include "st_shader_cache.h"
70 #include "st_util.h"
71 #include "cso_cache/cso_context.h"
72
73
74 static void
75 destroy_program_variants(struct st_context *st, struct gl_program *target);
76
77 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)78 set_affected_state_flags(uint64_t *states,
79 struct gl_program *prog,
80 uint64_t new_constants,
81 uint64_t new_sampler_views,
82 uint64_t new_samplers,
83 uint64_t new_images,
84 uint64_t new_ubos,
85 uint64_t new_ssbos,
86 uint64_t new_atomics)
87 {
88 if (prog->Parameters->NumParameters)
89 *states |= new_constants;
90
91 if (prog->info.num_textures)
92 *states |= new_sampler_views | new_samplers;
93
94 if (prog->info.num_images)
95 *states |= new_images;
96
97 if (prog->info.num_ubos)
98 *states |= new_ubos;
99
100 if (prog->info.num_ssbos)
101 *states |= new_ssbos;
102
103 if (prog->info.num_abos)
104 *states |= new_atomics;
105 }
106
107 /**
108 * This determines which states will be updated when the shader is bound.
109 */
110 void
st_set_prog_affected_state_flags(struct gl_program * prog)111 st_set_prog_affected_state_flags(struct gl_program *prog)
112 {
113 uint64_t *states;
114
115 switch (prog->info.stage) {
116 case MESA_SHADER_VERTEX:
117 states = &((struct st_program*)prog)->affected_states;
118
119 *states = ST_NEW_VS_STATE |
120 ST_NEW_RASTERIZER |
121 ST_NEW_VERTEX_ARRAYS;
122
123 set_affected_state_flags(states, prog,
124 ST_NEW_VS_CONSTANTS,
125 ST_NEW_VS_SAMPLER_VIEWS,
126 ST_NEW_VS_SAMPLERS,
127 ST_NEW_VS_IMAGES,
128 ST_NEW_VS_UBOS,
129 ST_NEW_VS_SSBOS,
130 ST_NEW_VS_ATOMICS);
131 break;
132
133 case MESA_SHADER_TESS_CTRL:
134 states = &(st_program(prog))->affected_states;
135
136 *states = ST_NEW_TCS_STATE;
137
138 set_affected_state_flags(states, prog,
139 ST_NEW_TCS_CONSTANTS,
140 ST_NEW_TCS_SAMPLER_VIEWS,
141 ST_NEW_TCS_SAMPLERS,
142 ST_NEW_TCS_IMAGES,
143 ST_NEW_TCS_UBOS,
144 ST_NEW_TCS_SSBOS,
145 ST_NEW_TCS_ATOMICS);
146 break;
147
148 case MESA_SHADER_TESS_EVAL:
149 states = &(st_program(prog))->affected_states;
150
151 *states = ST_NEW_TES_STATE |
152 ST_NEW_RASTERIZER;
153
154 set_affected_state_flags(states, prog,
155 ST_NEW_TES_CONSTANTS,
156 ST_NEW_TES_SAMPLER_VIEWS,
157 ST_NEW_TES_SAMPLERS,
158 ST_NEW_TES_IMAGES,
159 ST_NEW_TES_UBOS,
160 ST_NEW_TES_SSBOS,
161 ST_NEW_TES_ATOMICS);
162 break;
163
164 case MESA_SHADER_GEOMETRY:
165 states = &(st_program(prog))->affected_states;
166
167 *states = ST_NEW_GS_STATE |
168 ST_NEW_RASTERIZER;
169
170 set_affected_state_flags(states, prog,
171 ST_NEW_GS_CONSTANTS,
172 ST_NEW_GS_SAMPLER_VIEWS,
173 ST_NEW_GS_SAMPLERS,
174 ST_NEW_GS_IMAGES,
175 ST_NEW_GS_UBOS,
176 ST_NEW_GS_SSBOS,
177 ST_NEW_GS_ATOMICS);
178 break;
179
180 case MESA_SHADER_FRAGMENT:
181 states = &((struct st_program*)prog)->affected_states;
182
183 /* gl_FragCoord and glDrawPixels always use constants. */
184 *states = ST_NEW_FS_STATE |
185 ST_NEW_SAMPLE_SHADING |
186 ST_NEW_FS_CONSTANTS;
187
188 set_affected_state_flags(states, prog,
189 ST_NEW_FS_CONSTANTS,
190 ST_NEW_FS_SAMPLER_VIEWS,
191 ST_NEW_FS_SAMPLERS,
192 ST_NEW_FS_IMAGES,
193 ST_NEW_FS_UBOS,
194 ST_NEW_FS_SSBOS,
195 ST_NEW_FS_ATOMICS);
196 break;
197
198 case MESA_SHADER_COMPUTE:
199 states = &((struct st_program*)prog)->affected_states;
200
201 *states = ST_NEW_CS_STATE;
202
203 set_affected_state_flags(states, prog,
204 ST_NEW_CS_CONSTANTS,
205 ST_NEW_CS_SAMPLER_VIEWS,
206 ST_NEW_CS_SAMPLERS,
207 ST_NEW_CS_IMAGES,
208 ST_NEW_CS_UBOS,
209 ST_NEW_CS_SSBOS,
210 ST_NEW_CS_ATOMICS);
211 break;
212
213 default:
214 unreachable("unhandled shader stage");
215 }
216 }
217
218
219 /**
220 * Delete a shader variant. Note the caller must unlink the variant from
221 * the linked list.
222 */
223 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)224 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
225 {
226 if (v->driver_shader) {
227 if (target == GL_VERTEX_PROGRAM_ARB &&
228 ((struct st_common_variant*)v)->key.is_draw_shader) {
229 /* Draw shader. */
230 draw_delete_vertex_shader(st->draw, v->driver_shader);
231 } else if (st->has_shareable_shaders || v->st == st) {
232 /* The shader's context matches the calling context, or we
233 * don't care.
234 */
235 switch (target) {
236 case GL_VERTEX_PROGRAM_ARB:
237 st->pipe->delete_vs_state(st->pipe, v->driver_shader);
238 break;
239 case GL_TESS_CONTROL_PROGRAM_NV:
240 st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
241 break;
242 case GL_TESS_EVALUATION_PROGRAM_NV:
243 st->pipe->delete_tes_state(st->pipe, v->driver_shader);
244 break;
245 case GL_GEOMETRY_PROGRAM_NV:
246 st->pipe->delete_gs_state(st->pipe, v->driver_shader);
247 break;
248 case GL_FRAGMENT_PROGRAM_ARB:
249 st->pipe->delete_fs_state(st->pipe, v->driver_shader);
250 break;
251 case GL_COMPUTE_PROGRAM_NV:
252 st->pipe->delete_compute_state(st->pipe, v->driver_shader);
253 break;
254 default:
255 unreachable("bad shader type in delete_basic_variant");
256 }
257 } else {
258 /* We can't delete a shader with a context different from the one
259 * that created it. Add it to the creating context's zombie list.
260 */
261 enum pipe_shader_type type =
262 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
263
264 st_save_zombie_shader(v->st, type, v->driver_shader);
265 }
266 }
267
268 free(v);
269 }
270
271 static void
st_unbind_program(struct st_context * st,struct st_program * p)272 st_unbind_program(struct st_context *st, struct st_program *p)
273 {
274 /* Unbind the shader in cso_context and re-bind in st/mesa. */
275 switch (p->Base.info.stage) {
276 case MESA_SHADER_VERTEX:
277 cso_set_vertex_shader_handle(st->cso_context, NULL);
278 st->dirty |= ST_NEW_VS_STATE;
279 break;
280 case MESA_SHADER_TESS_CTRL:
281 cso_set_tessctrl_shader_handle(st->cso_context, NULL);
282 st->dirty |= ST_NEW_TCS_STATE;
283 break;
284 case MESA_SHADER_TESS_EVAL:
285 cso_set_tesseval_shader_handle(st->cso_context, NULL);
286 st->dirty |= ST_NEW_TES_STATE;
287 break;
288 case MESA_SHADER_GEOMETRY:
289 cso_set_geometry_shader_handle(st->cso_context, NULL);
290 st->dirty |= ST_NEW_GS_STATE;
291 break;
292 case MESA_SHADER_FRAGMENT:
293 cso_set_fragment_shader_handle(st->cso_context, NULL);
294 st->dirty |= ST_NEW_FS_STATE;
295 break;
296 case MESA_SHADER_COMPUTE:
297 cso_set_compute_shader_handle(st->cso_context, NULL);
298 st->dirty |= ST_NEW_CS_STATE;
299 break;
300 default:
301 unreachable("invalid shader type");
302 }
303 }
304
305 /**
306 * Free all basic program variants.
307 */
308 void
st_release_variants(struct st_context * st,struct st_program * p)309 st_release_variants(struct st_context *st, struct st_program *p)
310 {
311 struct st_variant *v;
312
313 /* If we are releasing shaders, re-bind them, because we don't
314 * know which shaders are bound in the driver.
315 */
316 if (p->variants)
317 st_unbind_program(st, p);
318
319 for (v = p->variants; v; ) {
320 struct st_variant *next = v->next;
321 delete_variant(st, v, p->Base.Target);
322 v = next;
323 }
324
325 p->variants = NULL;
326
327 if (p->state.tokens) {
328 ureg_free_tokens(p->state.tokens);
329 p->state.tokens = NULL;
330 }
331
332 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
333 * it has resulted in the driver taking ownership of the NIR. Those
334 * callers should be NULLing out the nir field in any pipe_shader_state
335 * that might have this called in order to indicate that.
336 *
337 * GLSL IR and ARB programs will have set gl_program->nir to the same
338 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
339 */
340 }
341
342 /**
343 * Free all basic program variants and unref program.
344 */
345 void
st_release_program(struct st_context * st,struct st_program ** p)346 st_release_program(struct st_context *st, struct st_program **p)
347 {
348 if (!*p)
349 return;
350
351 destroy_program_variants(st, &((*p)->Base));
352 st_reference_prog(st, p, NULL);
353 }
354
355 void
st_finalize_nir_before_variants(struct nir_shader * nir)356 st_finalize_nir_before_variants(struct nir_shader *nir)
357 {
358 NIR_PASS_V(nir, nir_split_var_copies);
359 NIR_PASS_V(nir, nir_lower_var_copies);
360 if (nir->options->lower_all_io_to_temps ||
361 nir->options->lower_all_io_to_elements ||
362 nir->info.stage == MESA_SHADER_VERTEX ||
363 nir->info.stage == MESA_SHADER_GEOMETRY) {
364 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
365 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
366 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
367 }
368
369 /* st_nir_assign_vs_in_locations requires correct shader info. */
370 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
371
372 st_nir_assign_vs_in_locations(nir);
373 }
374
375 static void
st_prog_to_nir_postprocess(struct st_context * st,nir_shader * nir,struct gl_program * prog)376 st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
377 struct gl_program *prog)
378 {
379 struct pipe_screen *screen = st->screen;
380
381 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
382 nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
383
384 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
385 NIR_PASS_V(nir, nir_lower_system_values);
386 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
387
388 /* Optimise NIR */
389 NIR_PASS_V(nir, nir_opt_constant_folding);
390 st_nir_opts(nir);
391 st_finalize_nir_before_variants(nir);
392
393 if (st->allow_st_finalize_nir_twice) {
394 char *msg = st_finalize_nir(st, prog, NULL, nir, true, true);
395 free(msg);
396 }
397
398 nir_validate_shader(nir, "after st/glsl finalize_nir");
399 }
400
401 /**
402 * Translate ARB (asm) program to NIR
403 */
404 static nir_shader *
st_translate_prog_to_nir(struct st_context * st,struct gl_program * prog,gl_shader_stage stage)405 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
406 gl_shader_stage stage)
407 {
408 const struct nir_shader_compiler_options *options =
409 st_get_nir_compiler_options(st, prog->info.stage);
410
411 /* Translate to NIR */
412 nir_shader *nir = prog_to_nir(prog, options);
413
414 st_prog_to_nir_postprocess(st, nir, prog);
415
416 return nir;
417 }
418
419 /**
420 * Prepare st_vertex_program info.
421 *
422 * attrib_to_index is an optional mapping from a vertex attrib to a shader
423 * input index.
424 */
425 void
st_prepare_vertex_program(struct st_program * stp,uint8_t * out_attrib_to_index)426 st_prepare_vertex_program(struct st_program *stp, uint8_t *out_attrib_to_index)
427 {
428 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
429 uint8_t attrib_to_index[VERT_ATTRIB_MAX] = {0};
430
431 stvp->num_inputs = 0;
432 stvp->vert_attrib_mask = 0;
433 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
434
435 /* Determine number of inputs, the mappings between VERT_ATTRIB_x
436 * and TGSI generic input indexes, plus input attrib semantic info.
437 */
438 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
439 if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
440 attrib_to_index[attr] = stvp->num_inputs;
441 stvp->vert_attrib_mask |= BITFIELD_BIT(attr);
442 stvp->num_inputs++;
443 }
444 }
445
446 /* pre-setup potentially unused edgeflag input */
447 attrib_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
448
449 /* Compute mapping of vertex program outputs to slots. */
450 unsigned num_outputs = 0;
451 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
452 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
453 stvp->result_to_output[attr] = num_outputs++;
454 }
455 /* pre-setup potentially unused edgeflag output */
456 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
457
458 if (out_attrib_to_index)
459 memcpy(out_attrib_to_index, attrib_to_index, sizeof(attrib_to_index));
460 }
461
462 void
st_translate_stream_output_info(struct gl_program * prog)463 st_translate_stream_output_info(struct gl_program *prog)
464 {
465 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
466 if (!info)
467 return;
468
469 /* Determine the (default) output register mapping for each output. */
470 unsigned num_outputs = 0;
471 ubyte output_mapping[VARYING_SLOT_TESS_MAX];
472 memset(output_mapping, 0, sizeof(output_mapping));
473
474 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
475 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
476 output_mapping[attr] = num_outputs++;
477 }
478
479 /* Translate stream output info. */
480 struct pipe_stream_output_info *so_info =
481 &((struct st_program*)prog)->state.stream_output;
482
483 for (unsigned i = 0; i < info->NumOutputs; i++) {
484 so_info->output[i].register_index =
485 output_mapping[info->Outputs[i].OutputRegister];
486 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
487 so_info->output[i].num_components = info->Outputs[i].NumComponents;
488 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
489 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
490 so_info->output[i].stream = info->Outputs[i].StreamId;
491 }
492
493 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
494 so_info->stride[i] = info->Buffers[i].Stride;
495 }
496 so_info->num_outputs = info->NumOutputs;
497 }
498
499 /**
500 * Creates a driver shader from a NIR shader. Takes ownership of the
501 * passed nir_shader.
502 */
503 struct pipe_shader_state *
st_create_nir_shader(struct st_context * st,struct pipe_shader_state * state)504 st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
505 {
506 struct pipe_context *pipe = st->pipe;
507 struct pipe_screen *screen = st->screen;
508
509 assert(state->type == PIPE_SHADER_IR_NIR);
510 nir_shader *nir = state->ir.nir;
511 gl_shader_stage stage = nir->info.stage;
512 enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage);
513
514 if (ST_DEBUG & DEBUG_PRINT_IR) {
515 fprintf(stderr, "NIR before handing off to driver:\n");
516 nir_print_shader(nir, stderr);
517 }
518
519 if (PIPE_SHADER_IR_NIR !=
520 screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_PREFERRED_IR)) {
521 /* u_screen.c defaults to images as deref enabled for some reason (which
522 * is what radeonsi wants), but nir-to-tgsi requires lowered images.
523 */
524 if (screen->get_param(screen, PIPE_CAP_NIR_IMAGES_AS_DEREF))
525 NIR_PASS_V(nir, gl_nir_lower_images, false);
526
527 state->type = PIPE_SHADER_IR_TGSI;
528 state->tokens = nir_to_tgsi(nir, screen);
529
530 if (ST_DEBUG & DEBUG_PRINT_IR) {
531 fprintf(stderr, "TGSI for driver after nir-to-tgsi:\n");
532 tgsi_dump(state->tokens, 0);
533 fprintf(stderr, "\n");
534 }
535 }
536
537 struct pipe_shader_state *shader;
538 switch (stage) {
539 case MESA_SHADER_VERTEX:
540 shader = pipe->create_vs_state(pipe, state);
541 break;
542 case MESA_SHADER_TESS_CTRL:
543 shader = pipe->create_tcs_state(pipe, state);
544 break;
545 case MESA_SHADER_TESS_EVAL:
546 shader = pipe->create_tes_state(pipe, state);
547 break;
548 case MESA_SHADER_GEOMETRY:
549 shader = pipe->create_gs_state(pipe, state);
550 break;
551 case MESA_SHADER_FRAGMENT:
552 shader = pipe->create_fs_state(pipe, state);
553 break;
554 case MESA_SHADER_COMPUTE: {
555 struct pipe_compute_state cs = {0};
556 cs.ir_type = state->type;
557 cs.req_local_mem = nir->info.shared_size;
558
559 if (state->type == PIPE_SHADER_IR_NIR)
560 cs.prog = state->ir.nir;
561 else
562 cs.prog = state->tokens;
563
564 shader = pipe->create_compute_state(pipe, &cs);
565 break;
566 }
567 default:
568 unreachable("unsupported shader stage");
569 return NULL;
570 }
571
572 if (state->type == PIPE_SHADER_IR_TGSI)
573 tgsi_free_tokens(state->tokens);
574
575 return shader;
576 }
577
578 /**
579 * Translate a vertex program.
580 */
581 bool
st_translate_vertex_program(struct st_context * st,struct st_program * stp)582 st_translate_vertex_program(struct st_context *st,
583 struct st_program *stp)
584 {
585 struct ureg_program *ureg;
586 enum pipe_error error;
587 unsigned num_outputs = 0;
588 unsigned attr;
589 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
590 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
591
592 if (stp->Base.arb.IsPositionInvariant)
593 _mesa_insert_mvp_code(st->ctx, &stp->Base);
594
595 /* ARB_vp: */
596 if (!stp->glsl_to_tgsi) {
597 _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
598
599 /* This determines which states will be updated when the assembly
600 * shader is bound.
601 */
602 stp->affected_states = ST_NEW_VS_STATE |
603 ST_NEW_RASTERIZER |
604 ST_NEW_VERTEX_ARRAYS;
605
606 if (stp->Base.Parameters->NumParameters)
607 stp->affected_states |= ST_NEW_VS_CONSTANTS;
608
609 if (stp->Base.nir)
610 ralloc_free(stp->Base.nir);
611
612 if (stp->serialized_nir) {
613 free(stp->serialized_nir);
614 stp->serialized_nir = NULL;
615 }
616
617 stp->state.type = PIPE_SHADER_IR_NIR;
618 stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
619 MESA_SHADER_VERTEX);
620 stp->Base.info = stp->Base.nir->info;
621
622 st_prepare_vertex_program(stp, NULL);
623 return true;
624 }
625
626 uint8_t input_to_index[VERT_ATTRIB_MAX];
627 st_prepare_vertex_program(stp, input_to_index);
628
629 /* Get semantic names and indices. */
630 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
631 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
632 unsigned slot = num_outputs++;
633 unsigned semantic_name, semantic_index;
634 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
635 &semantic_name, &semantic_index);
636 output_semantic_name[slot] = semantic_name;
637 output_semantic_index[slot] = semantic_index;
638 }
639 }
640 /* pre-setup potentially unused edgeflag output */
641 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
642 output_semantic_index[num_outputs] = 0;
643
644 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->screen);
645 if (ureg == NULL)
646 return false;
647
648 ureg_setup_shader_info(ureg, &stp->Base.info);
649
650 if (ST_DEBUG & DEBUG_MESA) {
651 _mesa_print_program(&stp->Base);
652 _mesa_print_program_parameters(st->ctx, &stp->Base);
653 debug_printf("\n");
654 }
655
656 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
657
658 error = st_translate_program(st->ctx,
659 PIPE_SHADER_VERTEX,
660 ureg,
661 stp->glsl_to_tgsi,
662 &stp->Base,
663 /* inputs */
664 stvp->num_inputs,
665 input_to_index,
666 NULL, /* inputSlotToAttr */
667 NULL, /* input semantic name */
668 NULL, /* input semantic index */
669 NULL, /* interp mode */
670 /* outputs */
671 num_outputs,
672 stvp->result_to_output,
673 output_semantic_name,
674 output_semantic_index);
675
676 st_translate_stream_output_info(&stp->Base);
677
678 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
679
680 if (error) {
681 debug_printf("%s: failed to translate GLSL IR program:\n", __func__);
682 _mesa_print_program(&stp->Base);
683 debug_assert(0);
684 return false;
685 }
686
687 stp->state.tokens = ureg_get_tokens(ureg, NULL);
688 ureg_destroy(ureg);
689
690 stp->glsl_to_tgsi = NULL;
691 st_store_ir_in_disk_cache(st, &stp->Base, false);
692
693 return stp->state.tokens != NULL;
694 }
695
696 static struct nir_shader *
get_nir_shader(struct st_context * st,struct st_program * stp)697 get_nir_shader(struct st_context *st, struct st_program *stp)
698 {
699 if (stp->Base.nir) {
700 nir_shader *nir = stp->Base.nir;
701
702 /* The first shader variant takes ownership of NIR, so that there is
703 * no cloning. Additional shader variants are always generated from
704 * serialized NIR to save memory.
705 */
706 stp->Base.nir = NULL;
707 assert(stp->serialized_nir && stp->serialized_nir_size);
708 return nir;
709 }
710
711 struct blob_reader blob_reader;
712 const struct nir_shader_compiler_options *options =
713 st_get_nir_compiler_options(st, stp->Base.info.stage);
714
715 blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
716 return nir_deserialize(NULL, options, &blob_reader);
717 }
718
719 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)720 lower_ucp(struct st_context *st,
721 struct nir_shader *nir,
722 unsigned ucp_enables,
723 struct gl_program_parameter_list *params)
724 {
725 if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
726 NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
727 else {
728 struct pipe_screen *screen = st->screen;
729 bool can_compact = screen->get_param(screen,
730 PIPE_CAP_NIR_COMPACT_ARRAYS);
731 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
732
733 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
734 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
735 if (use_eye) {
736 clipplane_state[i][0] = STATE_CLIPPLANE;
737 clipplane_state[i][1] = i;
738 } else {
739 clipplane_state[i][0] = STATE_CLIP_INTERNAL;
740 clipplane_state[i][1] = i;
741 }
742 _mesa_add_state_reference(params, clipplane_state[i]);
743 }
744
745 if (nir->info.stage == MESA_SHADER_VERTEX) {
746 NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
747 true, can_compact, clipplane_state);
748 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
749 NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
750 can_compact, clipplane_state);
751 }
752
753 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
754 nir_shader_get_entrypoint(nir), true, false);
755 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
756 }
757 }
758
759 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
760 { STATE_DEPTH_RANGE };
761
762 static struct st_common_variant *
st_create_common_variant(struct st_context * st,struct st_program * stp,const struct st_common_variant_key * key)763 st_create_common_variant(struct st_context *st,
764 struct st_program *stp,
765 const struct st_common_variant_key *key)
766 {
767 struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
768 struct pipe_context *pipe = st->pipe;
769 struct pipe_shader_state state = {0};
770
771 static const gl_state_index16 point_size_state[STATE_LENGTH] =
772 { STATE_POINT_SIZE_CLAMPED, 0 };
773 struct gl_program_parameter_list *params = stp->Base.Parameters;
774
775 v->key = *key;
776
777 state.stream_output = stp->state.stream_output;
778
779 if (stp->state.type == PIPE_SHADER_IR_NIR) {
780 bool finalize = false;
781
782 state.type = PIPE_SHADER_IR_NIR;
783 state.ir.nir = get_nir_shader(st, stp);
784 const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
785
786 if (key->clamp_color) {
787 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
788 finalize = true;
789 }
790 if (key->passthrough_edgeflags) {
791 NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
792 finalize = true;
793 }
794
795 if (key->lower_point_size) {
796 _mesa_add_state_reference(params, point_size_state);
797 NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
798 point_size_state);
799
800 switch (stp->Base.info.stage) {
801 case MESA_SHADER_VERTEX:
802 stp->affected_states |= ST_NEW_VS_CONSTANTS;
803 break;
804 case MESA_SHADER_TESS_EVAL:
805 stp->affected_states |= ST_NEW_TES_CONSTANTS;
806 break;
807 case MESA_SHADER_GEOMETRY:
808 stp->affected_states |= ST_NEW_GS_CONSTANTS;
809 break;
810 default:
811 unreachable("bad shader stage");
812 }
813
814 finalize = true;
815 }
816
817 if (key->lower_ucp) {
818 assert(!options->unify_interfaces);
819 lower_ucp(st, state.ir.nir, key->lower_ucp, params);
820 finalize = true;
821 }
822
823 if (st->emulate_gl_clamp &&
824 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
825 nir_lower_tex_options tex_opts = {0};
826 tex_opts.saturate_s = key->gl_clamp[0];
827 tex_opts.saturate_t = key->gl_clamp[1];
828 tex_opts.saturate_r = key->gl_clamp[2];
829 NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts);
830 }
831
832 if (finalize || !st->allow_st_finalize_nir_twice) {
833 char *msg = st_finalize_nir(st, &stp->Base, stp->shader_program, state.ir.nir,
834 true, false);
835 free(msg);
836
837 /* Clip lowering and edgeflags may have introduced new varyings, so
838 * update the inputs_read/outputs_written. However, with
839 * unify_interfaces set (aka iris) the non-SSO varyings layout is
840 * decided at link time with outputs_written updated so the two line
841 * up. A driver with this flag set may not use any of the lowering
842 * passes that would change the varyings, so skip to make sure we don't
843 * break its linkage.
844 */
845 if (!options->unify_interfaces) {
846 nir_shader_gather_info(state.ir.nir,
847 nir_shader_get_entrypoint(state.ir.nir));
848 }
849 }
850
851 if (key->is_draw_shader)
852 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
853 else
854 v->base.driver_shader = st_create_nir_shader(st, &state);
855
856 return v;
857 }
858
859 state.type = PIPE_SHADER_IR_TGSI;
860 state.tokens = tgsi_dup_tokens(stp->state.tokens);
861
862 /* Emulate features. */
863 if (key->clamp_color || key->passthrough_edgeflags) {
864 const struct tgsi_token *tokens;
865 unsigned flags =
866 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
867 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
868
869 tokens = tgsi_emulate(state.tokens, flags);
870
871 if (tokens) {
872 tgsi_free_tokens(state.tokens);
873 state.tokens = tokens;
874 } else {
875 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
876 }
877 }
878
879 if (key->lower_depth_clamp) {
880 unsigned depth_range_const =
881 _mesa_add_state_reference(params, depth_range_state);
882
883 const struct tgsi_token *tokens;
884 tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
885 key->clip_negative_one_to_one);
886 if (tokens != state.tokens)
887 tgsi_free_tokens(state.tokens);
888 state.tokens = tokens;
889 }
890
891 if (ST_DEBUG & DEBUG_PRINT_IR)
892 tgsi_dump(state.tokens, 0);
893
894 switch (stp->Base.info.stage) {
895 case MESA_SHADER_VERTEX:
896 if (key->is_draw_shader)
897 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
898 else
899 v->base.driver_shader = pipe->create_vs_state(pipe, &state);
900 break;
901 case MESA_SHADER_TESS_CTRL:
902 v->base.driver_shader = pipe->create_tcs_state(pipe, &state);
903 break;
904 case MESA_SHADER_TESS_EVAL:
905 v->base.driver_shader = pipe->create_tes_state(pipe, &state);
906 break;
907 case MESA_SHADER_GEOMETRY:
908 v->base.driver_shader = pipe->create_gs_state(pipe, &state);
909 break;
910 case MESA_SHADER_COMPUTE: {
911 struct pipe_compute_state cs = {0};
912 cs.ir_type = state.type;
913 cs.req_local_mem = stp->Base.info.shared_size;
914
915 if (state.type == PIPE_SHADER_IR_NIR)
916 cs.prog = state.ir.nir;
917 else
918 cs.prog = state.tokens;
919
920 v->base.driver_shader = pipe->create_compute_state(pipe, &cs);
921 break;
922 }
923 default:
924 assert(!"unhandled shader type");
925 free(v);
926 return NULL;
927 }
928
929 if (state.tokens) {
930 tgsi_free_tokens(state.tokens);
931 }
932
933 return v;
934 }
935
936 static void
st_add_variant(struct st_variant ** list,struct st_variant * v)937 st_add_variant(struct st_variant **list, struct st_variant *v)
938 {
939 struct st_variant *first = *list;
940
941 /* Make sure that the default variant stays the first in the list, and insert
942 * any later variants in as the second entry.
943 */
944 if (first) {
945 v->next = first->next;
946 first->next = v;
947 } else {
948 *list = v;
949 }
950 }
951
952 /**
953 * Find/create a vertex program variant.
954 */
955 struct st_common_variant *
st_get_common_variant(struct st_context * st,struct st_program * stp,const struct st_common_variant_key * key)956 st_get_common_variant(struct st_context *st,
957 struct st_program *stp,
958 const struct st_common_variant_key *key)
959 {
960 struct st_common_variant *v;
961
962 /* Search for existing variant */
963 for (v = st_common_variant(stp->variants); v;
964 v = st_common_variant(v->base.next)) {
965 if (memcmp(&v->key, key, sizeof(*key)) == 0) {
966 break;
967 }
968 }
969
970 if (!v) {
971 if (stp->variants != NULL) {
972 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
973 "Compiling %s shader variant (%s%s%s%s%s%s%s%s)",
974 _mesa_shader_stage_to_string(stp->Base.info.stage),
975 key->passthrough_edgeflags ? "edgeflags," : "",
976 key->clamp_color ? "clamp_color," : "",
977 key->lower_depth_clamp ? "depth_clamp," : "",
978 key->clip_negative_one_to_one ? "clip_negative_one," : "",
979 key->lower_point_size ? "point_size," : "",
980 key->lower_ucp ? "ucp," : "",
981 key->is_draw_shader ? "draw," : "",
982 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
983 }
984
985 /* create now */
986 v = st_create_common_variant(st, stp, key);
987 if (v) {
988 v->base.st = key->st;
989
990 if (stp->Base.info.stage == MESA_SHADER_VERTEX) {
991 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
992
993 v->vert_attrib_mask =
994 stvp->vert_attrib_mask |
995 (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
996 }
997
998 st_add_variant(&stp->variants, &v->base);
999 }
1000 }
1001
1002 return v;
1003 }
1004
1005
1006 /**
1007 * Translate a Mesa fragment shader into a TGSI shader.
1008 */
1009 bool
st_translate_fragment_program(struct st_context * st,struct st_program * stfp)1010 st_translate_fragment_program(struct st_context *st,
1011 struct st_program *stfp)
1012 {
1013 /* Non-GLSL programs: */
1014 if (!stfp->glsl_to_tgsi) {
1015 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
1016 if (st->ctx->Const.GLSLFragCoordIsSysVal)
1017 _mesa_program_fragment_position_to_sysval(&stfp->Base);
1018
1019 /* This determines which states will be updated when the assembly
1020 * shader is bound.
1021 *
1022 * fragment.position and glDrawPixels always use constants.
1023 */
1024 stfp->affected_states = ST_NEW_FS_STATE |
1025 ST_NEW_SAMPLE_SHADING |
1026 ST_NEW_FS_CONSTANTS;
1027
1028 if (stfp->ati_fs) {
1029 /* Just set them for ATI_fs unconditionally. */
1030 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1031 ST_NEW_FS_SAMPLERS;
1032 } else {
1033 /* ARB_fp */
1034 if (stfp->Base.SamplersUsed)
1035 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1036 ST_NEW_FS_SAMPLERS;
1037 }
1038
1039 /* Translate to NIR. ATI_fs translates at variant time. */
1040 if (!stfp->ati_fs) {
1041 nir_shader *nir =
1042 st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
1043
1044 if (stfp->Base.nir)
1045 ralloc_free(stfp->Base.nir);
1046 if (stfp->serialized_nir) {
1047 free(stfp->serialized_nir);
1048 stfp->serialized_nir = NULL;
1049 }
1050 stfp->state.type = PIPE_SHADER_IR_NIR;
1051 stfp->Base.nir = nir;
1052 }
1053
1054 return true;
1055 }
1056
1057 ubyte outputMapping[2 * FRAG_RESULT_MAX];
1058 ubyte inputMapping[VARYING_SLOT_MAX];
1059 ubyte inputSlotToAttr[VARYING_SLOT_MAX];
1060 ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */
1061 GLuint attr;
1062 GLbitfield64 inputsRead;
1063 struct ureg_program *ureg;
1064
1065 GLboolean write_all = GL_FALSE;
1066
1067 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1068 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1069 uint fs_num_inputs = 0;
1070
1071 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1072 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1073 uint fs_num_outputs = 0;
1074
1075 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
1076
1077 /*
1078 * Convert Mesa program inputs to TGSI input register semantics.
1079 */
1080 inputsRead = stfp->Base.info.inputs_read;
1081 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1082 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
1083 const GLuint slot = fs_num_inputs++;
1084
1085 inputMapping[attr] = slot;
1086 inputSlotToAttr[slot] = attr;
1087
1088 switch (attr) {
1089 case VARYING_SLOT_POS:
1090 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
1091 input_semantic_index[slot] = 0;
1092 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1093 break;
1094 case VARYING_SLOT_COL0:
1095 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1096 input_semantic_index[slot] = 0;
1097 interpMode[slot] = stfp->glsl_to_tgsi ?
1098 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
1099 break;
1100 case VARYING_SLOT_COL1:
1101 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1102 input_semantic_index[slot] = 1;
1103 interpMode[slot] = stfp->glsl_to_tgsi ?
1104 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
1105 break;
1106 case VARYING_SLOT_FOGC:
1107 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
1108 input_semantic_index[slot] = 0;
1109 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1110 break;
1111 case VARYING_SLOT_FACE:
1112 input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
1113 input_semantic_index[slot] = 0;
1114 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1115 break;
1116 case VARYING_SLOT_PRIMITIVE_ID:
1117 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
1118 input_semantic_index[slot] = 0;
1119 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1120 break;
1121 case VARYING_SLOT_LAYER:
1122 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
1123 input_semantic_index[slot] = 0;
1124 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1125 break;
1126 case VARYING_SLOT_VIEWPORT:
1127 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
1128 input_semantic_index[slot] = 0;
1129 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1130 break;
1131 case VARYING_SLOT_CLIP_DIST0:
1132 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1133 input_semantic_index[slot] = 0;
1134 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1135 break;
1136 case VARYING_SLOT_CLIP_DIST1:
1137 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1138 input_semantic_index[slot] = 1;
1139 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1140 break;
1141 case VARYING_SLOT_CULL_DIST0:
1142 case VARYING_SLOT_CULL_DIST1:
1143 /* these should have been lowered by GLSL */
1144 assert(0);
1145 break;
1146 /* In most cases, there is nothing special about these
1147 * inputs, so adopt a convention to use the generic
1148 * semantic name and the mesa VARYING_SLOT_ number as the
1149 * index.
1150 *
1151 * All that is required is that the vertex shader labels
1152 * its own outputs similarly, and that the vertex shader
1153 * generates at least every output required by the
1154 * fragment shader plus fixed-function hardware (such as
1155 * BFC).
1156 *
1157 * However, some drivers may need us to identify the PNTC and TEXi
1158 * varyings if, for example, their capability to replace them with
1159 * sprite coordinates is limited.
1160 */
1161 case VARYING_SLOT_PNTC:
1162 if (st->needs_texcoord_semantic) {
1163 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1164 input_semantic_index[slot] = 0;
1165 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1166 break;
1167 }
1168 FALLTHROUGH;
1169 case VARYING_SLOT_TEX0:
1170 case VARYING_SLOT_TEX1:
1171 case VARYING_SLOT_TEX2:
1172 case VARYING_SLOT_TEX3:
1173 case VARYING_SLOT_TEX4:
1174 case VARYING_SLOT_TEX5:
1175 case VARYING_SLOT_TEX6:
1176 case VARYING_SLOT_TEX7:
1177 if (st->needs_texcoord_semantic) {
1178 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1179 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1180 interpMode[slot] = stfp->glsl_to_tgsi ?
1181 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1182 break;
1183 }
1184 FALLTHROUGH;
1185 case VARYING_SLOT_VAR0:
1186 default:
1187 /* Semantic indices should be zero-based because drivers may choose
1188 * to assign a fixed slot determined by that index.
1189 * This is useful because ARB_separate_shader_objects uses location
1190 * qualifiers for linkage, and if the semantic index corresponds to
1191 * these locations, linkage passes in the driver become unecessary.
1192 *
1193 * If needs_texcoord_semantic is true, no semantic indices will be
1194 * consumed for the TEXi varyings, and we can base the locations of
1195 * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
1196 */
1197 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1198 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1199 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1200 input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1201 if (attr == VARYING_SLOT_PNTC)
1202 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1203 else {
1204 interpMode[slot] = stfp->glsl_to_tgsi ?
1205 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1206 }
1207 break;
1208 }
1209 }
1210 else {
1211 inputMapping[attr] = -1;
1212 }
1213 }
1214
1215 /*
1216 * Semantics and mapping for outputs
1217 */
1218 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1219
1220 /* if z is written, emit that first */
1221 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1222 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1223 fs_output_semantic_index[fs_num_outputs] = 0;
1224 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1225 fs_num_outputs++;
1226 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1227 }
1228
1229 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1230 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1231 fs_output_semantic_index[fs_num_outputs] = 0;
1232 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1233 fs_num_outputs++;
1234 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1235 }
1236
1237 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1238 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1239 fs_output_semantic_index[fs_num_outputs] = 0;
1240 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1241 fs_num_outputs++;
1242 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1243 }
1244
1245 /* handle remaining outputs (color) */
1246 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1247 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1248 stfp->Base.SecondaryOutputsWritten;
1249 const unsigned loc = attr % FRAG_RESULT_MAX;
1250
1251 if (written & BITFIELD64_BIT(loc)) {
1252 switch (loc) {
1253 case FRAG_RESULT_DEPTH:
1254 case FRAG_RESULT_STENCIL:
1255 case FRAG_RESULT_SAMPLE_MASK:
1256 /* handled above */
1257 assert(0);
1258 break;
1259 case FRAG_RESULT_COLOR:
1260 write_all = GL_TRUE;
1261 FALLTHROUGH;
1262 default: {
1263 int index;
1264 assert(loc == FRAG_RESULT_COLOR ||
1265 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1266
1267 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1268
1269 if (attr >= FRAG_RESULT_MAX) {
1270 /* Secondary color for dual source blending. */
1271 assert(index == 0);
1272 index++;
1273 }
1274
1275 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1276 fs_output_semantic_index[fs_num_outputs] = index;
1277 outputMapping[attr] = fs_num_outputs;
1278 break;
1279 }
1280 }
1281
1282 fs_num_outputs++;
1283 }
1284 }
1285
1286 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->screen);
1287 if (ureg == NULL)
1288 return false;
1289
1290 ureg_setup_shader_info(ureg, &stfp->Base.info);
1291
1292 if (ST_DEBUG & DEBUG_MESA) {
1293 _mesa_print_program(&stfp->Base);
1294 _mesa_print_program_parameters(st->ctx, &stfp->Base);
1295 debug_printf("\n");
1296 }
1297 if (write_all == GL_TRUE)
1298 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1299
1300 if (stfp->glsl_to_tgsi) {
1301 st_translate_program(st->ctx,
1302 PIPE_SHADER_FRAGMENT,
1303 ureg,
1304 stfp->glsl_to_tgsi,
1305 &stfp->Base,
1306 /* inputs */
1307 fs_num_inputs,
1308 inputMapping,
1309 inputSlotToAttr,
1310 input_semantic_name,
1311 input_semantic_index,
1312 interpMode,
1313 /* outputs */
1314 fs_num_outputs,
1315 outputMapping,
1316 fs_output_semantic_name,
1317 fs_output_semantic_index);
1318
1319 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1320 }
1321
1322 stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1323 ureg_destroy(ureg);
1324
1325 if (stfp->glsl_to_tgsi) {
1326 stfp->glsl_to_tgsi = NULL;
1327 st_store_ir_in_disk_cache(st, &stfp->Base, false);
1328 }
1329
1330 return stfp->state.tokens != NULL;
1331 }
1332
1333 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct st_program * stfp,const struct st_fp_variant_key * key)1334 st_create_fp_variant(struct st_context *st,
1335 struct st_program *stfp,
1336 const struct st_fp_variant_key *key)
1337 {
1338 struct pipe_context *pipe = st->pipe;
1339 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1340 struct pipe_shader_state state = {0};
1341 struct gl_program_parameter_list *params = stfp->Base.Parameters;
1342 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1343 { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1344 static const gl_state_index16 scale_state[STATE_LENGTH] =
1345 { STATE_PT_SCALE };
1346 static const gl_state_index16 bias_state[STATE_LENGTH] =
1347 { STATE_PT_BIAS };
1348 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1349 { STATE_ALPHA_REF };
1350
1351 if (!variant)
1352 return NULL;
1353
1354 /* Translate ATI_fs to NIR at variant time because that's when we have the
1355 * texture types.
1356 */
1357 if (stfp->ati_fs) {
1358 const struct nir_shader_compiler_options *options =
1359 st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1360
1361 nir_shader *s = st_translate_atifs_program(stfp->ati_fs, key, &stfp->Base, options);
1362
1363 st_prog_to_nir_postprocess(st, s, &stfp->Base);
1364
1365 state.type = PIPE_SHADER_IR_NIR;
1366 state.ir.nir = s;
1367 } else if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1368 state.type = PIPE_SHADER_IR_NIR;
1369 state.ir.nir = get_nir_shader(st, stfp);
1370 }
1371
1372 if (state.type == PIPE_SHADER_IR_NIR) {
1373 bool finalize = false;
1374
1375 if (key->clamp_color) {
1376 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1377 finalize = true;
1378 }
1379
1380 if (key->lower_flatshade) {
1381 NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1382 finalize = true;
1383 }
1384
1385 if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1386 _mesa_add_state_reference(params, alpha_ref_state);
1387 NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1388 false, alpha_ref_state);
1389 finalize = true;
1390 }
1391
1392 if (key->lower_two_sided_color) {
1393 bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1394 NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1395 finalize = true;
1396 }
1397
1398 if (key->persample_shading) {
1399 nir_shader *shader = state.ir.nir;
1400 nir_foreach_shader_in_variable(var, shader)
1401 var->data.sample = true;
1402 finalize = true;
1403 }
1404
1405 if (key->lower_texcoord_replace) {
1406 bool point_coord_is_sysval = st->ctx->Const.GLSLPointCoordIsSysVal;
1407 NIR_PASS_V(state.ir.nir, nir_lower_texcoord_replace,
1408 key->lower_texcoord_replace, point_coord_is_sysval, false);
1409 finalize = true;
1410 }
1411
1412 if (st->emulate_gl_clamp &&
1413 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1414 nir_lower_tex_options tex_opts = {0};
1415 tex_opts.saturate_s = key->gl_clamp[0];
1416 tex_opts.saturate_t = key->gl_clamp[1];
1417 tex_opts.saturate_r = key->gl_clamp[2];
1418 NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts);
1419 finalize = true;
1420 }
1421
1422 assert(!(key->bitmap && key->drawpixels));
1423
1424 /* glBitmap */
1425 if (key->bitmap) {
1426 nir_lower_bitmap_options options = {0};
1427
1428 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1429 options.sampler = variant->bitmap_sampler;
1430 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1431
1432 NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1433 finalize = true;
1434 }
1435
1436 /* glDrawPixels (color only) */
1437 if (key->drawpixels) {
1438 nir_lower_drawpixels_options options = {{0}};
1439 unsigned samplers_used = stfp->Base.SamplersUsed;
1440
1441 /* Find the first unused slot. */
1442 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1443 options.drawpix_sampler = variant->drawpix_sampler;
1444 samplers_used |= (1 << variant->drawpix_sampler);
1445
1446 options.pixel_maps = key->pixelMaps;
1447 if (key->pixelMaps) {
1448 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1449 options.pixelmap_sampler = variant->pixelmap_sampler;
1450 }
1451
1452 options.scale_and_bias = key->scaleAndBias;
1453 if (key->scaleAndBias) {
1454 _mesa_add_state_reference(params, scale_state);
1455 memcpy(options.scale_state_tokens, scale_state,
1456 sizeof(options.scale_state_tokens));
1457 _mesa_add_state_reference(params, bias_state);
1458 memcpy(options.bias_state_tokens, bias_state,
1459 sizeof(options.bias_state_tokens));
1460 }
1461
1462 _mesa_add_state_reference(params, texcoord_state);
1463 memcpy(options.texcoord_state_tokens, texcoord_state,
1464 sizeof(options.texcoord_state_tokens));
1465
1466 NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1467 finalize = true;
1468 }
1469
1470 bool need_lower_tex_src_plane = false;
1471
1472 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1473 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1474 key->external.lower_ayuv || key->external.lower_xyuv ||
1475 key->external.lower_yuv || key->external.lower_yu_yv ||
1476 key->external.lower_y41x)) {
1477
1478 st_nir_lower_samplers(st->screen, state.ir.nir,
1479 stfp->shader_program, &stfp->Base);
1480
1481 nir_lower_tex_options options = {0};
1482 options.lower_y_uv_external = key->external.lower_nv12;
1483 options.lower_y_u_v_external = key->external.lower_iyuv;
1484 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1485 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1486 options.lower_ayuv_external = key->external.lower_ayuv;
1487 options.lower_xyuv_external = key->external.lower_xyuv;
1488 options.lower_yuv_external = key->external.lower_yuv;
1489 options.lower_yu_yv_external = key->external.lower_yu_yv;
1490 options.lower_y41x_external = key->external.lower_y41x;
1491 NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1492 finalize = true;
1493 need_lower_tex_src_plane = true;
1494 }
1495
1496 if (finalize || !st->allow_st_finalize_nir_twice) {
1497 char *msg = st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1498 false, false);
1499 free(msg);
1500 }
1501
1502 /* This pass needs to happen *after* nir_lower_sampler */
1503 if (unlikely(need_lower_tex_src_plane)) {
1504 NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1505 ~stfp->Base.SamplersUsed,
1506 key->external.lower_nv12 | key->external.lower_xy_uxvx |
1507 key->external.lower_yx_xuxv,
1508 key->external.lower_iyuv);
1509 finalize = true;
1510 }
1511
1512 if (finalize || !st->allow_st_finalize_nir_twice) {
1513 /* Some of the lowering above may have introduced new varyings */
1514 nir_shader_gather_info(state.ir.nir,
1515 nir_shader_get_entrypoint(state.ir.nir));
1516
1517 struct pipe_screen *screen = st->screen;
1518 if (screen->finalize_nir) {
1519 char *msg = screen->finalize_nir(screen, state.ir.nir);
1520 free(msg);
1521 }
1522 }
1523
1524 variant->base.driver_shader = st_create_nir_shader(st, &state);
1525 variant->key = *key;
1526
1527 return variant;
1528 }
1529
1530 state.tokens = stfp->state.tokens;
1531
1532 assert(!(key->bitmap && key->drawpixels));
1533
1534 /* Emulate features. */
1535 if (key->clamp_color || key->persample_shading) {
1536 const struct tgsi_token *tokens;
1537 unsigned flags =
1538 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1539 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1540
1541 tokens = tgsi_emulate(state.tokens, flags);
1542
1543 if (tokens) {
1544 if (state.tokens != stfp->state.tokens)
1545 tgsi_free_tokens(state.tokens);
1546 state.tokens = tokens;
1547 } else
1548 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1549 }
1550
1551 /* glBitmap */
1552 if (key->bitmap) {
1553 const struct tgsi_token *tokens;
1554
1555 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1556
1557 tokens = st_get_bitmap_shader(state.tokens,
1558 st->internal_target,
1559 variant->bitmap_sampler,
1560 st->needs_texcoord_semantic,
1561 st->bitmap.tex_format ==
1562 PIPE_FORMAT_R8_UNORM);
1563
1564 if (tokens) {
1565 if (state.tokens != stfp->state.tokens)
1566 tgsi_free_tokens(state.tokens);
1567 state.tokens = tokens;
1568 } else
1569 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1570 }
1571
1572 /* glDrawPixels (color only) */
1573 if (key->drawpixels) {
1574 const struct tgsi_token *tokens;
1575 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1576
1577 /* Find the first unused slot. */
1578 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1579
1580 if (key->pixelMaps) {
1581 unsigned samplers_used = stfp->Base.SamplersUsed |
1582 (1 << variant->drawpix_sampler);
1583
1584 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1585 }
1586
1587 if (key->scaleAndBias) {
1588 scale_const = _mesa_add_state_reference(params, scale_state);
1589 bias_const = _mesa_add_state_reference(params, bias_state);
1590 }
1591
1592 texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1593
1594 tokens = st_get_drawpix_shader(state.tokens,
1595 st->needs_texcoord_semantic,
1596 key->scaleAndBias, scale_const,
1597 bias_const, key->pixelMaps,
1598 variant->drawpix_sampler,
1599 variant->pixelmap_sampler,
1600 texcoord_const, st->internal_target);
1601
1602 if (tokens) {
1603 if (state.tokens != stfp->state.tokens)
1604 tgsi_free_tokens(state.tokens);
1605 state.tokens = tokens;
1606 } else
1607 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1608 }
1609
1610 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1611 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1612 const struct tgsi_token *tokens;
1613
1614 /* samplers inserted would conflict, but this should be unpossible: */
1615 assert(!(key->bitmap || key->drawpixels));
1616
1617 tokens = st_tgsi_lower_yuv(state.tokens,
1618 ~stfp->Base.SamplersUsed,
1619 key->external.lower_nv12 ||
1620 key->external.lower_xy_uxvx ||
1621 key->external.lower_yx_xuxv,
1622 key->external.lower_iyuv);
1623 if (tokens) {
1624 if (state.tokens != stfp->state.tokens)
1625 tgsi_free_tokens(state.tokens);
1626 state.tokens = tokens;
1627 } else {
1628 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1629 }
1630 }
1631
1632 if (key->lower_depth_clamp) {
1633 unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1634
1635 const struct tgsi_token *tokens;
1636 tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1637 if (state.tokens != stfp->state.tokens)
1638 tgsi_free_tokens(state.tokens);
1639 state.tokens = tokens;
1640 }
1641
1642 if (ST_DEBUG & DEBUG_PRINT_IR)
1643 tgsi_dump(state.tokens, 0);
1644
1645 /* fill in variant */
1646 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1647 variant->key = *key;
1648
1649 if (state.tokens != stfp->state.tokens)
1650 tgsi_free_tokens(state.tokens);
1651 return variant;
1652 }
1653
1654 /**
1655 * Translate fragment program if needed.
1656 */
1657 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct st_program * stfp,const struct st_fp_variant_key * key)1658 st_get_fp_variant(struct st_context *st,
1659 struct st_program *stfp,
1660 const struct st_fp_variant_key *key)
1661 {
1662 struct st_fp_variant *fpv;
1663
1664 /* Search for existing variant */
1665 for (fpv = st_fp_variant(stfp->variants); fpv;
1666 fpv = st_fp_variant(fpv->base.next)) {
1667 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1668 break;
1669 }
1670 }
1671
1672 if (!fpv) {
1673 /* create new */
1674
1675 if (stfp->variants != NULL) {
1676 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1677 "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
1678 key->bitmap ? "bitmap," : "",
1679 key->drawpixels ? "drawpixels," : "",
1680 key->scaleAndBias ? "scale_bias," : "",
1681 key->pixelMaps ? "pixel_maps," : "",
1682 key->clamp_color ? "clamp_color," : "",
1683 key->persample_shading ? "persample_shading," : "",
1684 key->fog ? "fog," : "",
1685 key->lower_depth_clamp ? "depth_clamp," : "",
1686 key->lower_two_sided_color ? "twoside," : "",
1687 key->lower_flatshade ? "flatshade," : "",
1688 key->lower_texcoord_replace ? "texcoord_replace," : "",
1689 key->lower_alpha_func ? "alpha_compare," : "",
1690 /* skipped ATI_fs targets */
1691 stfp->Base.ExternalSamplersUsed ? "external?," : "",
1692 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
1693 }
1694
1695 fpv = st_create_fp_variant(st, stfp, key);
1696 if (fpv) {
1697 fpv->base.st = key->st;
1698
1699 st_add_variant(&stfp->variants, &fpv->base);
1700 }
1701 }
1702
1703 return fpv;
1704 }
1705
1706 /**
1707 * Translate a program. This is common code for geometry and tessellation
1708 * shaders.
1709 */
1710 bool
st_translate_common_program(struct st_context * st,struct st_program * stp)1711 st_translate_common_program(struct st_context *st,
1712 struct st_program *stp)
1713 {
1714 struct gl_program *prog = &stp->Base;
1715 enum pipe_shader_type stage =
1716 pipe_shader_type_from_mesa(stp->Base.info.stage);
1717 struct ureg_program *ureg = ureg_create_with_screen(stage, st->screen);
1718
1719 if (ureg == NULL)
1720 return false;
1721
1722 ureg_setup_shader_info(ureg, &stp->Base.info);
1723
1724 ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1725 ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1726 ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1727 GLuint attr;
1728
1729 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1730 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1731 uint num_inputs = 0;
1732
1733 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1734 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1735 uint num_outputs = 0;
1736
1737 GLint i;
1738
1739 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1740 memset(inputMapping, 0, sizeof(inputMapping));
1741 memset(outputMapping, 0, sizeof(outputMapping));
1742 memset(&stp->state, 0, sizeof(stp->state));
1743
1744 /*
1745 * Convert Mesa program inputs to TGSI input register semantics.
1746 */
1747 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1748 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1749 continue;
1750
1751 unsigned slot = num_inputs++;
1752
1753 inputMapping[attr] = slot;
1754 inputSlotToAttr[slot] = attr;
1755
1756 unsigned semantic_name, semantic_index;
1757 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1758 &semantic_name, &semantic_index);
1759 input_semantic_name[slot] = semantic_name;
1760 input_semantic_index[slot] = semantic_index;
1761 }
1762
1763 /* Also add patch inputs. */
1764 for (attr = 0; attr < 32; attr++) {
1765 if (prog->info.patch_inputs_read & (1u << attr)) {
1766 GLuint slot = num_inputs++;
1767 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1768
1769 inputMapping[patch_attr] = slot;
1770 inputSlotToAttr[slot] = patch_attr;
1771 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1772 input_semantic_index[slot] = attr;
1773 }
1774 }
1775
1776 /* initialize output semantics to defaults */
1777 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1778 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1779 output_semantic_index[i] = 0;
1780 }
1781
1782 /*
1783 * Determine number of outputs, the (default) output register
1784 * mapping and the semantic information for each output.
1785 */
1786 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1787 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1788 GLuint slot = num_outputs++;
1789
1790 outputMapping[attr] = slot;
1791
1792 unsigned semantic_name, semantic_index;
1793 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1794 &semantic_name, &semantic_index);
1795 output_semantic_name[slot] = semantic_name;
1796 output_semantic_index[slot] = semantic_index;
1797 }
1798 }
1799
1800 /* Also add patch outputs. */
1801 for (attr = 0; attr < 32; attr++) {
1802 if (prog->info.patch_outputs_written & (1u << attr)) {
1803 GLuint slot = num_outputs++;
1804 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1805
1806 outputMapping[patch_attr] = slot;
1807 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1808 output_semantic_index[slot] = attr;
1809 }
1810 }
1811
1812 st_translate_program(st->ctx,
1813 stage,
1814 ureg,
1815 stp->glsl_to_tgsi,
1816 prog,
1817 /* inputs */
1818 num_inputs,
1819 inputMapping,
1820 inputSlotToAttr,
1821 input_semantic_name,
1822 input_semantic_index,
1823 NULL,
1824 /* outputs */
1825 num_outputs,
1826 outputMapping,
1827 output_semantic_name,
1828 output_semantic_index);
1829
1830 stp->state.tokens = ureg_get_tokens(ureg, NULL);
1831
1832 ureg_destroy(ureg);
1833
1834 st_translate_stream_output_info(prog);
1835
1836 st_store_ir_in_disk_cache(st, prog, false);
1837
1838 if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1839 _mesa_print_program(prog);
1840
1841 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1842 stp->glsl_to_tgsi = NULL;
1843 return true;
1844 }
1845
1846
1847 /**
1848 * Vert/Geom/Frag programs have per-context variants. Free all the
1849 * variants attached to the given program which match the given context.
1850 */
1851 static void
destroy_program_variants(struct st_context * st,struct gl_program * target)1852 destroy_program_variants(struct st_context *st, struct gl_program *target)
1853 {
1854 if (!target || target == &_mesa_DummyProgram)
1855 return;
1856
1857 struct st_program *p = st_program(target);
1858 struct st_variant *v, **prevPtr = &p->variants;
1859 bool unbound = false;
1860
1861 for (v = p->variants; v; ) {
1862 struct st_variant *next = v->next;
1863 if (v->st == st) {
1864 if (!unbound) {
1865 st_unbind_program(st, p);
1866 unbound = true;
1867 }
1868
1869 /* unlink from list */
1870 *prevPtr = next;
1871 /* destroy this variant */
1872 delete_variant(st, v, target->Target);
1873 }
1874 else {
1875 prevPtr = &v->next;
1876 }
1877 v = next;
1878 }
1879 }
1880
1881
1882 /**
1883 * Callback for _mesa_HashWalk. Free all the shader's program variants
1884 * which match the given context.
1885 */
1886 static void
destroy_shader_program_variants_cb(void * data,void * userData)1887 destroy_shader_program_variants_cb(void *data, void *userData)
1888 {
1889 struct st_context *st = (struct st_context *) userData;
1890 struct gl_shader *shader = (struct gl_shader *) data;
1891
1892 switch (shader->Type) {
1893 case GL_SHADER_PROGRAM_MESA:
1894 {
1895 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1896 GLuint i;
1897
1898 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1899 if (shProg->_LinkedShaders[i])
1900 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1901 }
1902 }
1903 break;
1904 case GL_VERTEX_SHADER:
1905 case GL_FRAGMENT_SHADER:
1906 case GL_GEOMETRY_SHADER:
1907 case GL_TESS_CONTROL_SHADER:
1908 case GL_TESS_EVALUATION_SHADER:
1909 case GL_COMPUTE_SHADER:
1910 break;
1911 default:
1912 assert(0);
1913 }
1914 }
1915
1916
1917 /**
1918 * Callback for _mesa_HashWalk. Free all the program variants which match
1919 * the given context.
1920 */
1921 static void
destroy_program_variants_cb(void * data,void * userData)1922 destroy_program_variants_cb(void *data, void *userData)
1923 {
1924 struct st_context *st = (struct st_context *) userData;
1925 struct gl_program *program = (struct gl_program *) data;
1926 destroy_program_variants(st, program);
1927 }
1928
1929
1930 /**
1931 * Walk over all shaders and programs to delete any variants which
1932 * belong to the given context.
1933 * This is called during context tear-down.
1934 */
1935 void
st_destroy_program_variants(struct st_context * st)1936 st_destroy_program_variants(struct st_context *st)
1937 {
1938 /* If shaders can be shared with other contexts, the last context will
1939 * call DeleteProgram on all shaders, releasing everything.
1940 */
1941 if (st->has_shareable_shaders)
1942 return;
1943
1944 /* ARB vert/frag program */
1945 _mesa_HashWalk(st->ctx->Shared->Programs,
1946 destroy_program_variants_cb, st);
1947
1948 /* GLSL vert/frag/geom shaders */
1949 _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1950 destroy_shader_program_variants_cb, st);
1951 }
1952
1953
1954 /**
1955 * Compile one shader variant.
1956 */
1957 static void
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog)1958 st_precompile_shader_variant(struct st_context *st,
1959 struct gl_program *prog)
1960 {
1961 switch (prog->Target) {
1962 case GL_VERTEX_PROGRAM_ARB:
1963 case GL_TESS_CONTROL_PROGRAM_NV:
1964 case GL_TESS_EVALUATION_PROGRAM_NV:
1965 case GL_GEOMETRY_PROGRAM_NV:
1966 case GL_COMPUTE_PROGRAM_NV: {
1967 struct st_program *p = (struct st_program *)prog;
1968 struct st_common_variant_key key;
1969
1970 memset(&key, 0, sizeof(key));
1971
1972 if (st->ctx->API == API_OPENGL_COMPAT &&
1973 st->clamp_vert_color_in_shader &&
1974 (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1975 VARYING_SLOT_COL1 |
1976 VARYING_SLOT_BFC0 |
1977 VARYING_SLOT_BFC1))) {
1978 key.clamp_color = true;
1979 }
1980
1981 key.st = st->has_shareable_shaders ? NULL : st;
1982 st_get_common_variant(st, p, &key);
1983 break;
1984 }
1985
1986 case GL_FRAGMENT_PROGRAM_ARB: {
1987 struct st_program *p = (struct st_program *)prog;
1988 struct st_fp_variant_key key;
1989
1990 memset(&key, 0, sizeof(key));
1991
1992 key.st = st->has_shareable_shaders ? NULL : st;
1993 key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1994 if (p->ati_fs) {
1995 for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1996 key.texture_index[i] = TEXTURE_2D_INDEX;
1997 }
1998 st_get_fp_variant(st, p, &key);
1999 break;
2000 }
2001
2002 default:
2003 assert(0);
2004 }
2005 }
2006
2007 void
st_serialize_nir(struct st_program * stp)2008 st_serialize_nir(struct st_program *stp)
2009 {
2010 if (!stp->serialized_nir) {
2011 struct blob blob;
2012 size_t size;
2013
2014 blob_init(&blob);
2015 nir_serialize(&blob, stp->Base.nir, false);
2016 blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2017 stp->serialized_nir_size = size;
2018 }
2019 }
2020
2021 void
st_finalize_program(struct st_context * st,struct gl_program * prog)2022 st_finalize_program(struct st_context *st, struct gl_program *prog)
2023 {
2024 if (st->current_program[prog->info.stage] == prog) {
2025 if (prog->info.stage == MESA_SHADER_VERTEX)
2026 st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2027 else
2028 st->dirty |= ((struct st_program *)prog)->affected_states;
2029 }
2030
2031 if (prog->nir) {
2032 nir_sweep(prog->nir);
2033
2034 /* This is only needed for ARB_vp/fp programs and when the disk cache
2035 * is disabled. If the disk cache is enabled, GLSL programs are
2036 * serialized in write_nir_to_cache.
2037 */
2038 st_serialize_nir(st_program(prog));
2039 }
2040
2041 /* Always create the default variant of the program. */
2042 st_precompile_shader_variant(st, prog);
2043 }
2044