1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "nir_builder.h"
35 #include "main/errors.h"
36
37 #include "main/hash.h"
38 #include "main/mtypes.h"
39 #include "nir/nir_xfb_info.h"
40 #include "nir/pipe_nir.h"
41 #include "program/prog_parameter.h"
42 #include "program/prog_print.h"
43 #include "program/prog_to_nir.h"
44
45 #include "compiler/glsl/gl_nir.h"
46 #include "compiler/glsl/gl_nir_linker.h"
47 #include "compiler/nir/nir.h"
48 #include "compiler/nir/nir_serialize.h"
49 #include "draw/draw_context.h"
50
51 #include "pipe/p_context.h"
52 #include "pipe/p_defines.h"
53 #include "pipe/p_shader_tokens.h"
54 #include "draw/draw_context.h"
55
56 #include "util/u_dump.h"
57 #include "util/u_memory.h"
58
59 #include "st_debug.h"
60 #include "st_cb_bitmap.h"
61 #include "st_cb_drawpixels.h"
62 #include "st_context.h"
63 #include "st_program.h"
64 #include "st_atifs_to_nir.h"
65 #include "st_nir.h"
66 #include "st_shader_cache.h"
67 #include "st_util.h"
68 #include "cso_cache/cso_context.h"
69
70
71 static void
72 destroy_program_variants(struct st_context *st, struct gl_program *target);
73
74 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)75 set_affected_state_flags(uint64_t *states,
76 struct gl_program *prog,
77 uint64_t new_constants,
78 uint64_t new_sampler_views,
79 uint64_t new_samplers,
80 uint64_t new_images,
81 uint64_t new_ubos,
82 uint64_t new_ssbos,
83 uint64_t new_atomics)
84 {
85 if (prog->Parameters->NumParameters)
86 *states |= new_constants;
87
88 if (prog->info.num_textures)
89 *states |= new_sampler_views | new_samplers;
90
91 if (prog->info.num_images)
92 *states |= new_images;
93
94 if (prog->info.num_ubos)
95 *states |= new_ubos;
96
97 if (prog->info.num_ssbos)
98 *states |= new_ssbos;
99
100 if (prog->info.num_abos)
101 *states |= new_atomics;
102 }
103
104 /**
105 * This determines which states will be updated when the shader is bound.
106 */
107 void
st_set_prog_affected_state_flags(struct gl_program * prog)108 st_set_prog_affected_state_flags(struct gl_program *prog)
109 {
110 uint64_t *states;
111
112 switch (prog->info.stage) {
113 case MESA_SHADER_VERTEX:
114 states = &prog->affected_states;
115
116 *states = ST_NEW_VS_STATE |
117 ST_NEW_RASTERIZER |
118 ST_NEW_VERTEX_ARRAYS;
119
120 set_affected_state_flags(states, prog,
121 ST_NEW_VS_CONSTANTS,
122 ST_NEW_VS_SAMPLER_VIEWS,
123 ST_NEW_VS_SAMPLERS,
124 ST_NEW_VS_IMAGES,
125 ST_NEW_VS_UBOS,
126 ST_NEW_VS_SSBOS,
127 ST_NEW_VS_ATOMICS);
128 break;
129
130 case MESA_SHADER_TESS_CTRL:
131 states = &prog->affected_states;
132
133 *states = ST_NEW_TCS_STATE;
134
135 set_affected_state_flags(states, prog,
136 ST_NEW_TCS_CONSTANTS,
137 ST_NEW_TCS_SAMPLER_VIEWS,
138 ST_NEW_TCS_SAMPLERS,
139 ST_NEW_TCS_IMAGES,
140 ST_NEW_TCS_UBOS,
141 ST_NEW_TCS_SSBOS,
142 ST_NEW_TCS_ATOMICS);
143 break;
144
145 case MESA_SHADER_TESS_EVAL:
146 states = &prog->affected_states;
147
148 *states = ST_NEW_TES_STATE |
149 ST_NEW_RASTERIZER;
150
151 set_affected_state_flags(states, prog,
152 ST_NEW_TES_CONSTANTS,
153 ST_NEW_TES_SAMPLER_VIEWS,
154 ST_NEW_TES_SAMPLERS,
155 ST_NEW_TES_IMAGES,
156 ST_NEW_TES_UBOS,
157 ST_NEW_TES_SSBOS,
158 ST_NEW_TES_ATOMICS);
159 break;
160
161 case MESA_SHADER_GEOMETRY:
162 states = &prog->affected_states;
163
164 *states = ST_NEW_GS_STATE |
165 ST_NEW_RASTERIZER;
166
167 set_affected_state_flags(states, prog,
168 ST_NEW_GS_CONSTANTS,
169 ST_NEW_GS_SAMPLER_VIEWS,
170 ST_NEW_GS_SAMPLERS,
171 ST_NEW_GS_IMAGES,
172 ST_NEW_GS_UBOS,
173 ST_NEW_GS_SSBOS,
174 ST_NEW_GS_ATOMICS);
175 break;
176
177 case MESA_SHADER_FRAGMENT:
178 states = &prog->affected_states;
179
180 /* gl_FragCoord and glDrawPixels always use constants. */
181 *states = ST_NEW_FS_STATE |
182 ST_NEW_SAMPLE_SHADING |
183 ST_NEW_FS_CONSTANTS;
184
185 set_affected_state_flags(states, prog,
186 ST_NEW_FS_CONSTANTS,
187 ST_NEW_FS_SAMPLER_VIEWS,
188 ST_NEW_FS_SAMPLERS,
189 ST_NEW_FS_IMAGES,
190 ST_NEW_FS_UBOS,
191 ST_NEW_FS_SSBOS,
192 ST_NEW_FS_ATOMICS);
193 break;
194
195 case MESA_SHADER_COMPUTE:
196 states = &prog->affected_states;
197
198 *states = ST_NEW_CS_STATE;
199
200 set_affected_state_flags(states, prog,
201 ST_NEW_CS_CONSTANTS,
202 ST_NEW_CS_SAMPLER_VIEWS,
203 ST_NEW_CS_SAMPLERS,
204 ST_NEW_CS_IMAGES,
205 ST_NEW_CS_UBOS,
206 ST_NEW_CS_SSBOS,
207 ST_NEW_CS_ATOMICS);
208 break;
209
210 default:
211 unreachable("unhandled shader stage");
212 }
213 }
214
215
216 /**
217 * Delete a shader variant. Note the caller must unlink the variant from
218 * the linked list.
219 */
220 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)221 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
222 {
223 if (v->driver_shader) {
224 if (target == GL_VERTEX_PROGRAM_ARB &&
225 ((struct st_common_variant*)v)->key.is_draw_shader) {
226 /* Draw shader. */
227 draw_delete_vertex_shader(st->draw, v->driver_shader);
228 } else if (st->has_shareable_shaders || v->st == st) {
229 /* The shader's context matches the calling context, or we
230 * don't care.
231 */
232 switch (target) {
233 case GL_VERTEX_PROGRAM_ARB:
234 st->pipe->delete_vs_state(st->pipe, v->driver_shader);
235 break;
236 case GL_TESS_CONTROL_PROGRAM_NV:
237 st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
238 break;
239 case GL_TESS_EVALUATION_PROGRAM_NV:
240 st->pipe->delete_tes_state(st->pipe, v->driver_shader);
241 break;
242 case GL_GEOMETRY_PROGRAM_NV:
243 st->pipe->delete_gs_state(st->pipe, v->driver_shader);
244 break;
245 case GL_FRAGMENT_PROGRAM_ARB:
246 st->pipe->delete_fs_state(st->pipe, v->driver_shader);
247 break;
248 case GL_COMPUTE_PROGRAM_NV:
249 st->pipe->delete_compute_state(st->pipe, v->driver_shader);
250 break;
251 default:
252 unreachable("bad shader type in delete_basic_variant");
253 }
254 } else {
255 /* We can't delete a shader with a context different from the one
256 * that created it. Add it to the creating context's zombie list.
257 */
258 enum pipe_shader_type type =
259 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
260
261 st_save_zombie_shader(v->st, type, v->driver_shader);
262 }
263 }
264
265 FREE(v);
266 }
267
268 static void
st_unbind_program(struct st_context * st,struct gl_program * p)269 st_unbind_program(struct st_context *st, struct gl_program *p)
270 {
271 struct gl_context *ctx = st->ctx;
272
273 /* Unbind the shader in cso_context and re-bind in st/mesa. */
274 switch (p->info.stage) {
275 case MESA_SHADER_VERTEX:
276 cso_set_vertex_shader_handle(st->cso_context, NULL);
277 ctx->NewDriverState |= ST_NEW_VS_STATE;
278 break;
279 case MESA_SHADER_TESS_CTRL:
280 cso_set_tessctrl_shader_handle(st->cso_context, NULL);
281 ctx->NewDriverState |= ST_NEW_TCS_STATE;
282 break;
283 case MESA_SHADER_TESS_EVAL:
284 cso_set_tesseval_shader_handle(st->cso_context, NULL);
285 ctx->NewDriverState |= ST_NEW_TES_STATE;
286 break;
287 case MESA_SHADER_GEOMETRY:
288 cso_set_geometry_shader_handle(st->cso_context, NULL);
289 ctx->NewDriverState |= ST_NEW_GS_STATE;
290 break;
291 case MESA_SHADER_FRAGMENT:
292 cso_set_fragment_shader_handle(st->cso_context, NULL);
293 ctx->NewDriverState |= ST_NEW_FS_STATE;
294 break;
295 case MESA_SHADER_COMPUTE:
296 cso_set_compute_shader_handle(st->cso_context, NULL);
297 ctx->NewDriverState |= ST_NEW_CS_STATE;
298 break;
299 default:
300 unreachable("invalid shader type");
301 }
302 }
303
304 /**
305 * Free all basic program variants.
306 */
307 void
st_release_variants(struct st_context * st,struct gl_program * p)308 st_release_variants(struct st_context *st, struct gl_program *p)
309 {
310 struct st_variant *v;
311
312 /* If we are releasing shaders, re-bind them, because we don't
313 * know which shaders are bound in the driver.
314 */
315 if (p->variants)
316 st_unbind_program(st, p);
317
318 for (v = p->variants; v; ) {
319 struct st_variant *next = v->next;
320 delete_variant(st, v, p->Target);
321 v = next;
322 }
323
324 p->variants = NULL;
325
326 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
327 * it has resulted in the driver taking ownership of the NIR. Those
328 * callers should be NULLing out the nir field in any pipe_shader_state
329 * that might have this called in order to indicate that.
330 *
331 * GLSL IR and ARB programs will have set gl_program->nir to the same
332 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
333 */
334 }
335
336 /**
337 * Free all basic program variants and unref program.
338 */
339 void
st_release_program(struct st_context * st,struct gl_program ** p)340 st_release_program(struct st_context *st, struct gl_program **p)
341 {
342 if (!*p)
343 return;
344
345 destroy_program_variants(st, *p);
346 _mesa_reference_program(st->ctx, p, NULL);
347 }
348
349 static void
st_prog_to_nir_postprocess(struct st_context * st,nir_shader * nir,struct gl_program * prog)350 st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
351 struct gl_program *prog)
352 {
353 struct pipe_screen *screen = st->screen;
354
355 NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa);
356 nir_validate_shader(nir, "after st/ptn lower_reg_intrinsics_to_ssa");
357
358 NIR_PASS(_, nir, st_nir_lower_wpos_ytransform, prog, screen);
359 NIR_PASS(_, nir, nir_lower_system_values);
360
361 struct nir_lower_compute_system_values_options cs_options = {
362 .has_base_global_invocation_id = false,
363 .has_base_workgroup_id = false,
364 };
365 NIR_PASS(_, nir, nir_lower_compute_system_values, &cs_options);
366
367 /* Optimise NIR */
368 NIR_PASS(_, nir, nir_opt_constant_folding);
369 gl_nir_opts(nir);
370
371 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
372 /* This must be done after optimizations to assign IO bases. */
373 nir_recompute_io_bases(nir, nir_var_shader_in | nir_var_shader_out);
374
375 if (st->allow_st_finalize_nir_twice) {
376 st_serialize_base_nir(prog, nir);
377 st_finalize_nir(st, prog, NULL, nir, true, false);
378
379 if (screen->finalize_nir) {
380 char *msg = screen->finalize_nir(screen, nir);
381 free(msg);
382 }
383 }
384
385 nir_validate_shader(nir, "after st/glsl finalize_nir");
386 }
387
388 /**
389 * Prepare st_vertex_program info.
390 *
391 * attrib_to_index is an optional mapping from a vertex attrib to a shader
392 * input index.
393 */
394 void
st_prepare_vertex_program(struct gl_program * prog)395 st_prepare_vertex_program(struct gl_program *prog)
396 {
397 struct gl_vertex_program *stvp = (struct gl_vertex_program *)prog;
398
399 stvp->num_inputs = util_bitcount64(prog->info.inputs_read);
400 stvp->vert_attrib_mask = prog->info.inputs_read;
401
402 /* Compute mapping of vertex program outputs to slots. */
403 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
404 unsigned num_outputs = 0;
405 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
406 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
407 stvp->result_to_output[attr] = num_outputs++;
408 }
409 /* pre-setup potentially unused edgeflag output */
410 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
411 }
412
413 void
st_translate_stream_output_info(struct gl_program * prog)414 st_translate_stream_output_info(struct gl_program *prog)
415 {
416 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
417 if (!info)
418 return;
419
420 /* Determine the (default) output register mapping for each output. */
421 unsigned num_outputs = 0;
422 uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
423 memset(output_mapping, 0, sizeof(output_mapping));
424
425 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
426 /* this output was added by mesa/st and should not be tracked for xfb:
427 * drivers must check var->data.explicit_location to find the original output
428 * and only emit that one for xfb
429 */
430 if (prog->skip_pointsize_xfb && attr == VARYING_SLOT_PSIZ)
431 continue;
432 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
433 output_mapping[attr] = num_outputs++;
434 }
435
436 /* Translate stream output info. */
437 struct pipe_stream_output_info *so_info =
438 &prog->state.stream_output;
439
440 if (!num_outputs) {
441 so_info->num_outputs = 0;
442 return;
443 }
444
445 for (unsigned i = 0; i < info->NumOutputs; i++) {
446 so_info->output[i].register_index =
447 output_mapping[info->Outputs[i].OutputRegister];
448 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
449 so_info->output[i].num_components = info->Outputs[i].NumComponents;
450 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
451 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
452 so_info->output[i].stream = info->Outputs[i].StreamId;
453 }
454
455 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
456 so_info->stride[i] = info->Buffers[i].Stride;
457 }
458 so_info->num_outputs = info->NumOutputs;
459 }
460
461 /**
462 * Creates a driver shader from a NIR shader. Takes ownership of the
463 * passed nir_shader.
464 */
465 void *
st_create_nir_shader(struct st_context * st,struct pipe_shader_state * state)466 st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
467 {
468 struct pipe_context *pipe = st->pipe;
469
470 assert(state->type == PIPE_SHADER_IR_NIR);
471 nir_shader *nir = state->ir.nir;
472 gl_shader_stage stage = nir->info.stage;
473
474 /* Renumber SSA defs to make it easier to run diff on printed NIR. */
475 nir_foreach_function_impl(impl, nir) {
476 nir_index_ssa_defs(impl);
477 }
478
479 if (ST_DEBUG & DEBUG_PRINT_IR) {
480 fprintf(stderr, "NIR before handing off to driver:\n");
481 nir_print_shader(nir, stderr);
482 }
483
484 if (ST_DEBUG & DEBUG_PRINT_XFB) {
485 if (nir->info.io_lowered) {
486 if (nir->xfb_info && nir->xfb_info->output_count) {
487 fprintf(stderr, "XFB info before handing off to driver:\n");
488 fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
489 nir->info.xfb_stride[0], nir->info.xfb_stride[1],
490 nir->info.xfb_stride[2], nir->info.xfb_stride[3]);
491 nir_print_xfb_info(nir->xfb_info, stderr);
492 }
493 } else {
494 struct pipe_stream_output_info *so = &state->stream_output;
495
496 if (so->num_outputs) {
497 fprintf(stderr, "XFB info before handing off to driver:\n");
498 fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
499 so->stride[0], so->stride[1], so->stride[2],
500 so->stride[3]);
501
502 for (unsigned i = 0; i < so->num_outputs; i++) {
503 fprintf(stderr, "output%u: buffer=%u offset=%u, location=%u, "
504 "component_offset=%u, component_mask=0x%x, "
505 "stream=%u\n",
506 i, so->output[i].output_buffer,
507 so->output[i].dst_offset * 4,
508 so->output[i].register_index,
509 so->output[i].start_component,
510 BITFIELD_RANGE(so->output[i].start_component,
511 so->output[i].num_components),
512 so->output[i].stream);
513 }
514 }
515 }
516 }
517
518 void *shader;
519 switch (stage) {
520 case MESA_SHADER_VERTEX:
521 shader = pipe->create_vs_state(pipe, state);
522 break;
523 case MESA_SHADER_TESS_CTRL:
524 shader = pipe->create_tcs_state(pipe, state);
525 break;
526 case MESA_SHADER_TESS_EVAL:
527 shader = pipe->create_tes_state(pipe, state);
528 break;
529 case MESA_SHADER_GEOMETRY:
530 shader = pipe->create_gs_state(pipe, state);
531 break;
532 case MESA_SHADER_FRAGMENT:
533 shader = pipe->create_fs_state(pipe, state);
534 break;
535 case MESA_SHADER_COMPUTE: {
536 /* We'd like to use this for all stages but we need to rework streamout in
537 * gallium first.
538 */
539 shader = pipe_shader_from_nir(pipe, nir);
540 break;
541 }
542 default:
543 unreachable("unsupported shader stage");
544 return NULL;
545 }
546
547 return shader;
548 }
549
550 /**
551 * Translate a vertex program.
552 */
553 static bool
st_translate_vertex_program(struct st_context * st,struct gl_program * prog)554 st_translate_vertex_program(struct st_context *st,
555 struct gl_program *prog)
556 {
557 /* This determines which states will be updated when the assembly
558 * shader is bound.
559 */
560 prog->affected_states = ST_NEW_VS_STATE |
561 ST_NEW_RASTERIZER |
562 ST_NEW_VERTEX_ARRAYS;
563
564 if (prog->Parameters->NumParameters)
565 prog->affected_states |= ST_NEW_VS_CONSTANTS;
566
567 if (prog->arb.Instructions && prog->nir)
568 ralloc_free(prog->nir);
569
570 if (prog->serialized_nir) {
571 free(prog->serialized_nir);
572 prog->serialized_nir = NULL;
573 }
574 free(prog->base_serialized_nir);
575
576 prog->state.type = PIPE_SHADER_IR_NIR;
577 if (prog->arb.Instructions)
578 prog->nir = prog_to_nir(st->ctx, prog);
579 st_prog_to_nir_postprocess(st, prog->nir, prog);
580 prog->info = prog->nir->info;
581
582 st_prepare_vertex_program(prog);
583 return true;
584 }
585
586 static const struct nir_shader_compiler_options draw_nir_options = {
587 .lower_scmp = true,
588 .lower_flrp32 = true,
589 .lower_flrp64 = true,
590 .lower_fsat = true,
591 .lower_bitfield_insert = true,
592 .lower_bitfield_extract = true,
593 .lower_fdph = true,
594 .lower_ffma16 = true,
595 .lower_ffma32 = true,
596 .lower_ffma64 = true,
597 .lower_flrp16 = true,
598 .lower_fmod = true,
599 .lower_hadd = true,
600 .lower_uadd_sat = true,
601 .lower_usub_sat = true,
602 .lower_iadd_sat = true,
603 .lower_ldexp = true,
604 .lower_pack_snorm_2x16 = true,
605 .lower_pack_snorm_4x8 = true,
606 .lower_pack_unorm_2x16 = true,
607 .lower_pack_unorm_4x8 = true,
608 .lower_pack_half_2x16 = true,
609 .lower_pack_split = true,
610 .lower_unpack_snorm_2x16 = true,
611 .lower_unpack_snorm_4x8 = true,
612 .lower_unpack_unorm_2x16 = true,
613 .lower_unpack_unorm_4x8 = true,
614 .lower_unpack_half_2x16 = true,
615 .lower_extract_byte = true,
616 .lower_extract_word = true,
617 .lower_insert_byte = true,
618 .lower_insert_word = true,
619 .lower_uadd_carry = true,
620 .lower_usub_borrow = true,
621 .lower_mul_2x32_64 = true,
622 .lower_ifind_msb = true,
623 .lower_int64_options = nir_lower_imul_2x32_64,
624 .lower_doubles_options = nir_lower_dround_even,
625 .max_unroll_iterations = 32,
626 .lower_to_scalar = true,
627 .lower_uniforms_to_ubo = true,
628 .lower_vector_cmp = true,
629 .lower_device_index_to_zero = true,
630 .support_16bit_alu = true,
631 .lower_fisnormal = true,
632 .lower_fquantize2f16 = true,
633 .driver_functions = true,
634 };
635
636 static struct nir_shader *
get_nir_shader(struct st_context * st,struct gl_program * prog,bool is_draw)637 get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
638 {
639 if ((!is_draw || !st->ctx->Const.PackedDriverUniformStorage) && prog->nir) {
640 nir_shader *nir = prog->nir;
641
642 /* The first shader variant takes ownership of NIR, so that there is
643 * no cloning. Additional shader variants are always generated from
644 * serialized NIR to save memory.
645 */
646 prog->nir = NULL;
647 assert(prog->serialized_nir && prog->serialized_nir_size);
648 return nir;
649 }
650
651 struct blob_reader blob_reader;
652 const struct nir_shader_compiler_options *options =
653 is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
654
655 if (is_draw && st->ctx->Const.PackedDriverUniformStorage &&
656 (!prog->shader_program || prog->shader_program->data->LinkStatus != LINKING_SKIPPED)) {
657 assert(prog->base_serialized_nir);
658 blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
659 } else {
660 assert(prog->serialized_nir);
661 blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
662 }
663 return nir_deserialize(NULL, options, &blob_reader);
664 }
665
666 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)667 lower_ucp(struct st_context *st,
668 struct nir_shader *nir,
669 unsigned ucp_enables,
670 struct gl_program_parameter_list *params)
671 {
672 if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
673 NIR_PASS(_, nir, nir_lower_clip_disable, ucp_enables);
674 else {
675 bool can_compact = nir->options->compact_arrays;
676 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
677
678 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
679 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
680 if (use_eye) {
681 clipplane_state[i][0] = STATE_CLIPPLANE;
682 clipplane_state[i][1] = i;
683 } else {
684 clipplane_state[i][0] = STATE_CLIP_INTERNAL;
685 clipplane_state[i][1] = i;
686 }
687 _mesa_add_state_reference(params, clipplane_state[i]);
688 }
689
690 if (nir->info.stage == MESA_SHADER_VERTEX ||
691 nir->info.stage == MESA_SHADER_TESS_EVAL) {
692 NIR_PASS(_, nir, nir_lower_clip_vs, ucp_enables,
693 true, can_compact, clipplane_state);
694 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
695 NIR_PASS(_, nir, nir_lower_clip_gs, ucp_enables,
696 can_compact, clipplane_state);
697 }
698 }
699 }
700
701 static bool
force_persample_shading(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)702 force_persample_shading(struct nir_builder *b, nir_intrinsic_instr *intr,
703 void *data)
704 {
705 if (intr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
706 intr->intrinsic == nir_intrinsic_load_barycentric_centroid) {
707 intr->intrinsic = nir_intrinsic_load_barycentric_sample;
708 return true;
709 }
710
711 return false;
712 }
713
714 static int
xfb_compare_dst_offset(const void * a,const void * b)715 xfb_compare_dst_offset(const void *a, const void *b)
716 {
717 const struct pipe_stream_output *var0 = (const struct pipe_stream_output*)a;
718 const struct pipe_stream_output *var1 = (const struct pipe_stream_output*)b;
719
720 if (var0->output_buffer != var1->output_buffer)
721 return var0->output_buffer > var1->output_buffer ? 1 : -1;
722
723 return var0->dst_offset - var1->dst_offset;
724 }
725
726 static void
get_stream_output_info_from_nir(nir_shader * nir,struct pipe_stream_output_info * info)727 get_stream_output_info_from_nir(nir_shader *nir,
728 struct pipe_stream_output_info *info)
729 {
730 /* Get pipe_stream_output_info from NIR. Only used by IO variables. */
731 nir_xfb_info *xfb = nir->xfb_info;
732 memset(info, 0, sizeof(*info));
733
734 if (!xfb)
735 return;
736
737 info->num_outputs = xfb->output_count;
738
739 for (unsigned i = 0; i < 4; i++)
740 info->stride[i] = nir->info.xfb_stride[i];
741
742 for (unsigned i = 0; i < xfb->output_count; i++) {
743 struct pipe_stream_output *out = &info->output[i];
744
745 assert(!xfb->outputs[i].high_16bits);
746
747 out->register_index =
748 util_bitcount64(nir->info.outputs_written &
749 BITFIELD64_MASK(xfb->outputs[i].location));
750 out->start_component = xfb->outputs[i].component_offset;
751 out->num_components = util_bitcount(xfb->outputs[i].component_mask);
752 out->output_buffer = xfb->outputs[i].buffer;
753 out->dst_offset = xfb->outputs[i].offset / 4;
754 out->stream = xfb->buffer_to_stream[out->output_buffer];
755 }
756
757 /* Intel requires that xfb outputs are sorted by dst_offset. */
758 qsort(info->output, info->num_outputs, sizeof(info->output[0]),
759 xfb_compare_dst_offset);
760 }
761
762 static struct st_common_variant *
st_create_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key,bool report_compile_error,char ** error)763 st_create_common_variant(struct st_context *st,
764 struct gl_program *prog,
765 const struct st_common_variant_key *key,
766 bool report_compile_error, char **error)
767 {
768 MESA_TRACE_FUNC();
769
770 struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
771 struct pipe_shader_state state = {0};
772
773 static const gl_state_index16 point_size_state[STATE_LENGTH] =
774 { STATE_POINT_SIZE_CLAMPED, 0 };
775 struct gl_program_parameter_list *params = prog->Parameters;
776
777 v->key = *key;
778
779 state.stream_output = prog->state.stream_output;
780
781 bool finalize = false;
782
783 state.type = PIPE_SHADER_IR_NIR;
784 state.ir.nir = get_nir_shader(st, prog, key->is_draw_shader);
785 const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
786
787 if (key->clamp_color) {
788 NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
789 finalize = true;
790 }
791 if (key->passthrough_edgeflags) {
792 NIR_PASS(_, state.ir.nir, nir_lower_passthrough_edgeflags);
793 finalize = true;
794 }
795
796 if (key->export_point_size) {
797 /* if flag is set, shader must export psiz */
798 _mesa_add_state_reference(params, point_size_state);
799 NIR_PASS(_, state.ir.nir, nir_lower_point_size_mov,
800 point_size_state);
801
802 finalize = true;
803 }
804
805 if (key->lower_ucp) {
806 assert(!options->unify_interfaces);
807 lower_ucp(st, state.ir.nir, key->lower_ucp, params);
808 finalize = true;
809 }
810
811 if (st->emulate_gl_clamp &&
812 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
813 nir_lower_tex_options tex_opts = {0};
814 tex_opts.saturate_s = key->gl_clamp[0];
815 tex_opts.saturate_t = key->gl_clamp[1];
816 tex_opts.saturate_r = key->gl_clamp[2];
817 NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
818 }
819
820 if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
821 st_finalize_nir(st, prog, prog->shader_program, state.ir.nir, false,
822 key->is_draw_shader);
823 }
824
825 assert(state.ir.nir->info.stage == MESA_SHADER_COMPUTE ||
826 state.ir.nir->info.io_lowered);
827
828 /* This should be after all passes that touch IO. */
829 if (state.ir.nir->info.io_lowered &&
830 (!(state.ir.nir->options->io_options & nir_io_has_intrinsics) ||
831 key->is_draw_shader)) {
832 assert(!state.stream_output.num_outputs || state.ir.nir->xfb_info);
833 get_stream_output_info_from_nir(state.ir.nir, &state.stream_output);
834 /* Some lowering passes can leave dead code behind, but dead IO intrinsics
835 * are still counted as enabled IO, which breaks things.
836 */
837 NIR_PASS(_, state.ir.nir, nir_opt_dce);
838
839 /* vc4, vc5 require this. */
840 if (state.ir.nir->info.stage == MESA_SHADER_VERTEX ||
841 state.ir.nir->info.stage == MESA_SHADER_TESS_EVAL)
842 NIR_PASS(_, state.ir.nir, nir_move_output_stores_to_end);
843
844 NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
845
846 if (state.ir.nir->info.stage == MESA_SHADER_TESS_CTRL &&
847 state.ir.nir->options->compact_arrays &&
848 state.ir.nir->options->vectorize_tess_levels)
849 NIR_PASS(_, state.ir.nir, nir_vectorize_tess_levels);
850
851 gl_nir_opts(state.ir.nir);
852 finalize = true;
853 }
854
855 if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
856 struct pipe_screen *screen = st->screen;
857 if (!key->is_draw_shader && screen->finalize_nir) {
858 char *msg = screen->finalize_nir(screen, state.ir.nir);
859 free(msg);
860 }
861
862 /* Clip lowering and edgeflags may have introduced new varyings, so
863 * update the inputs_read/outputs_written. However, with
864 * unify_interfaces set (aka iris) the non-SSO varyings layout is
865 * decided at link time with outputs_written updated so the two line
866 * up. A driver with this flag set may not use any of the lowering
867 * passes that would change the varyings, so skip to make sure we don't
868 * break its linkage.
869 */
870 if (!options->unify_interfaces) {
871 nir_shader_gather_info(state.ir.nir,
872 nir_shader_get_entrypoint(state.ir.nir));
873 }
874 }
875
876 if (key->is_draw_shader) {
877 NIR_PASS(_, state.ir.nir, gl_nir_lower_images, false);
878 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
879 }
880 else
881 v->base.driver_shader = st_create_nir_shader(st, &state);
882
883 if (report_compile_error && state.error_message) {
884 *error = state.error_message;
885 return NULL;
886 }
887
888 if (error)
889 *error = NULL;
890 return v;
891 }
892
893 static void
st_add_variant(struct st_variant ** list,struct st_variant * v)894 st_add_variant(struct st_variant **list, struct st_variant *v)
895 {
896 struct st_variant *first = *list;
897
898 /* Make sure that the default variant stays the first in the list, and insert
899 * any later variants in as the second entry.
900 */
901 if (first) {
902 v->next = first->next;
903 first->next = v;
904 } else {
905 *list = v;
906 }
907 }
908
909 /**
910 * Find/create a vertex program variant.
911 */
912 struct st_common_variant *
st_get_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key,bool report_compile_error,char ** error)913 st_get_common_variant(struct st_context *st,
914 struct gl_program *prog,
915 const struct st_common_variant_key *key,
916 bool report_compile_error, char **error)
917 {
918 struct st_common_variant *v;
919
920 /* Search for existing variant */
921 for (v = st_common_variant(prog->variants); v;
922 v = st_common_variant(v->base.next)) {
923 if (memcmp(&v->key, key, sizeof(*key)) == 0) {
924 break;
925 }
926 }
927
928 if (!v) {
929 if (prog->variants != NULL) {
930 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
931 "Compiling %s shader variant (%s%s%s%s%s%s)",
932 _mesa_shader_stage_to_string(prog->info.stage),
933 key->passthrough_edgeflags ? "edgeflags," : "",
934 key->clamp_color ? "clamp_color," : "",
935 key->export_point_size ? "point_size," : "",
936 key->lower_ucp ? "ucp," : "",
937 key->is_draw_shader ? "draw," : "",
938 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
939 }
940
941 /* create now */
942 v = st_create_common_variant(st, prog, key, report_compile_error, error);
943 if (v) {
944 v->base.st = key->st;
945
946 if (prog->info.stage == MESA_SHADER_VERTEX) {
947 struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
948
949 v->vert_attrib_mask =
950 vp->vert_attrib_mask |
951 (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
952 }
953
954 st_add_variant(&prog->variants, &v->base);
955 }
956 }
957
958 return v;
959 }
960
961
962 /**
963 * Translate a non-GLSL Mesa fragment shader into a NIR shader.
964 */
965 static bool
st_translate_fragment_program(struct st_context * st,struct gl_program * prog)966 st_translate_fragment_program(struct st_context *st,
967 struct gl_program *prog)
968 {
969 /* This determines which states will be updated when the assembly
970 * shader is bound.
971 *
972 * fragment.position and glDrawPixels always use constants.
973 */
974 prog->affected_states = ST_NEW_FS_STATE |
975 ST_NEW_SAMPLE_SHADING |
976 ST_NEW_FS_CONSTANTS;
977
978 if (prog->ati_fs) {
979 /* Just set them for ATI_fs unconditionally. */
980 prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
981 ST_NEW_FS_SAMPLERS;
982 } else {
983 /* ARB_fp */
984 if (prog->SamplersUsed)
985 prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
986 ST_NEW_FS_SAMPLERS;
987 }
988
989 /* Translate to NIR. */
990 if (prog->nir && prog->arb.Instructions)
991 ralloc_free(prog->nir);
992
993 if (prog->serialized_nir) {
994 free(prog->serialized_nir);
995 prog->serialized_nir = NULL;
996 }
997
998 prog->state.type = PIPE_SHADER_IR_NIR;
999 if (prog->arb.Instructions) {
1000 prog->nir = prog_to_nir(st->ctx, prog);
1001 } else if (prog->ati_fs) {
1002 const struct nir_shader_compiler_options *options =
1003 st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1004
1005 assert(!prog->nir);
1006 prog->nir = st_translate_atifs_program(prog->ati_fs, prog, options);
1007 }
1008 st_prog_to_nir_postprocess(st, prog->nir, prog);
1009
1010 prog->info = prog->nir->info;
1011 if (prog->ati_fs) {
1012 /* ATI_fs will lower fixed function fog at variant time, after the FF vertex
1013 * prog has been generated. So we have to always declare a read of FOGC so
1014 * that FF vp feeds it to us just in case.
1015 */
1016 prog->info.inputs_read |= VARYING_BIT_FOGC;
1017 }
1018
1019 return true;
1020 }
1021
1022 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key,bool report_compile_error,char ** error)1023 st_create_fp_variant(struct st_context *st,
1024 struct gl_program *fp,
1025 const struct st_fp_variant_key *key,
1026 bool report_compile_error, char **error)
1027 {
1028 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1029 struct pipe_shader_state state = {0};
1030 struct gl_program_parameter_list *params = fp->Parameters;
1031 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1032 { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1033 static const gl_state_index16 scale_state[STATE_LENGTH] =
1034 { STATE_PT_SCALE };
1035 static const gl_state_index16 bias_state[STATE_LENGTH] =
1036 { STATE_PT_BIAS };
1037 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1038 { STATE_ALPHA_REF };
1039
1040 if (!variant)
1041 return NULL;
1042
1043 MESA_TRACE_FUNC();
1044
1045 /* Translate ATI_fs to NIR at variant time because that's when we have the
1046 * texture types.
1047 */
1048 state.ir.nir = get_nir_shader(st, fp, false);
1049 state.type = PIPE_SHADER_IR_NIR;
1050 state.report_compile_error = report_compile_error;
1051
1052 bool finalize = false;
1053
1054 if (fp->ati_fs) {
1055 if (key->fog) {
1056 NIR_PASS(_, state.ir.nir, st_nir_lower_fog, key->fog, fp->Parameters);
1057 }
1058
1059 NIR_PASS(_, state.ir.nir, st_nir_lower_atifs_samplers, key->texture_index);
1060
1061 finalize = true;
1062 }
1063
1064 if (key->clamp_color) {
1065 NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
1066 finalize = true;
1067 }
1068
1069 if (key->lower_flatshade) {
1070 NIR_PASS(_, state.ir.nir, nir_lower_flatshade);
1071 finalize = true;
1072 }
1073
1074 if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1075 _mesa_add_state_reference(params, alpha_ref_state);
1076 NIR_PASS(_, state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1077 false, alpha_ref_state);
1078 finalize = true;
1079 }
1080
1081 if (key->lower_two_sided_color) {
1082 bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1083 NIR_PASS(_, state.ir.nir, nir_lower_two_sided_color, face_sysval);
1084 finalize = true;
1085 }
1086
1087 if (key->persample_shading) {
1088 nir_shader *shader = state.ir.nir;
1089 nir_shader_intrinsics_pass(shader, force_persample_shading,
1090 nir_metadata_all, NULL);
1091
1092 /* In addition to requiring per-sample interpolation, sample shading
1093 * changes the behaviour of gl_SampleMaskIn, so we need per-sample shading
1094 * even if there are no shader-in variables at all. In that case,
1095 * uses_sample_shading won't be set by glsl_to_nir. We need to do so here.
1096 */
1097 shader->info.fs.uses_sample_shading = true;
1098
1099 finalize = true;
1100 }
1101
1102 if (st->emulate_gl_clamp &&
1103 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1104 nir_lower_tex_options tex_opts = {0};
1105 tex_opts.saturate_s = key->gl_clamp[0];
1106 tex_opts.saturate_t = key->gl_clamp[1];
1107 tex_opts.saturate_r = key->gl_clamp[2];
1108 NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
1109 finalize = true;
1110 }
1111
1112 assert(!(key->bitmap && key->drawpixels));
1113
1114 /* glBitmap */
1115 if (key->bitmap) {
1116 nir_lower_bitmap_options options = {0};
1117
1118 variant->bitmap_sampler = ffs(~fp->SamplersUsed) - 1;
1119 options.sampler = variant->bitmap_sampler;
1120 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1121
1122 NIR_PASS(_, state.ir.nir, nir_lower_bitmap, &options);
1123 finalize = true;
1124 }
1125
1126 /* glDrawPixels (color only) */
1127 if (key->drawpixels) {
1128 nir_lower_drawpixels_options options = {{0}};
1129 unsigned samplers_used = fp->SamplersUsed;
1130
1131 /* Find the first unused slot. */
1132 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1133 options.drawpix_sampler = variant->drawpix_sampler;
1134 samplers_used |= (1 << variant->drawpix_sampler);
1135
1136 options.pixel_maps = key->pixelMaps;
1137 if (key->pixelMaps) {
1138 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1139 options.pixelmap_sampler = variant->pixelmap_sampler;
1140 }
1141
1142 options.scale_and_bias = key->scaleAndBias;
1143 if (key->scaleAndBias) {
1144 _mesa_add_state_reference(params, scale_state);
1145 memcpy(options.scale_state_tokens, scale_state,
1146 sizeof(options.scale_state_tokens));
1147 _mesa_add_state_reference(params, bias_state);
1148 memcpy(options.bias_state_tokens, bias_state,
1149 sizeof(options.bias_state_tokens));
1150 }
1151
1152 _mesa_add_state_reference(params, texcoord_state);
1153 memcpy(options.texcoord_state_tokens, texcoord_state,
1154 sizeof(options.texcoord_state_tokens));
1155
1156 NIR_PASS(_, state.ir.nir, nir_lower_drawpixels, &options);
1157 finalize = true;
1158 }
1159
1160 bool need_lower_tex_src_plane = false;
1161
1162 if (unlikely(key->external.lower_nv12 || key->external.lower_nv21 ||
1163 key->external.lower_iyuv ||
1164 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1165 key->external.lower_yx_xvxu || key->external.lower_xy_vxux ||
1166 key->external.lower_ayuv || key->external.lower_xyuv ||
1167 key->external.lower_yuv || key->external.lower_yu_yv ||
1168 key->external.lower_yv_yu || key->external.lower_y41x)) {
1169
1170 st_nir_lower_samplers(st->screen, state.ir.nir,
1171 fp->shader_program, fp);
1172
1173 nir_lower_tex_options options = {0};
1174 options.lower_y_uv_external = key->external.lower_nv12;
1175 options.lower_y_vu_external = key->external.lower_nv21;
1176 options.lower_y_u_v_external = key->external.lower_iyuv;
1177 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1178 options.lower_xy_vxux_external = key->external.lower_xy_vxux;
1179 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1180 options.lower_yx_xvxu_external = key->external.lower_yx_xvxu;
1181 options.lower_ayuv_external = key->external.lower_ayuv;
1182 options.lower_xyuv_external = key->external.lower_xyuv;
1183 options.lower_yuv_external = key->external.lower_yuv;
1184 options.lower_yu_yv_external = key->external.lower_yu_yv;
1185 options.lower_yv_yu_external = key->external.lower_yv_yu;
1186 options.lower_y41x_external = key->external.lower_y41x;
1187 options.bt709_external = key->external.bt709;
1188 options.bt2020_external = key->external.bt2020;
1189 options.yuv_full_range_external = key->external.yuv_full_range;
1190 NIR_PASS(_, state.ir.nir, nir_lower_tex, &options);
1191 finalize = true;
1192 need_lower_tex_src_plane = true;
1193 }
1194
1195 if (finalize || !st->allow_st_finalize_nir_twice)
1196 st_finalize_nir(st, fp, fp->shader_program, state.ir.nir, false, false);
1197
1198 /* This pass needs to happen *after* nir_lower_sampler */
1199 if (unlikely(need_lower_tex_src_plane)) {
1200 NIR_PASS(_, state.ir.nir, st_nir_lower_tex_src_plane,
1201 ~fp->SamplersUsed,
1202 key->external.lower_nv12 | key->external.lower_nv21 |
1203 key->external.lower_xy_uxvx | key->external.lower_xy_vxux |
1204 key->external.lower_yx_xuxv | key->external.lower_yx_xvxu,
1205 key->external.lower_iyuv);
1206 finalize = true;
1207 }
1208
1209 /* It is undefined behavior when an ARB assembly uses SHADOW2D target
1210 * with a texture in not depth format. In this case NVIDIA automatically
1211 * replaces SHADOW sampler with a normal sampler and some games like
1212 * Penumbra Overture which abuses this UB (issues/8425) works fine but
1213 * breaks with mesa. Replace the shadow sampler with a normal one here
1214 */
1215 if (!fp->shader_program && ~key->depth_textures & fp->ShadowSamplers) {
1216 NIR_PASS(_, state.ir.nir, nir_remove_tex_shadow,
1217 ~key->depth_textures & fp->ShadowSamplers);
1218 finalize = true;
1219 }
1220
1221 assert(state.ir.nir->info.io_lowered);
1222
1223 /* This should be after all passes that touch IO. */
1224 if (!(state.ir.nir->options->io_options & nir_io_has_intrinsics)) {
1225 /* Some lowering passes can leave dead code behind, but dead IO intrinsics
1226 * are still counted as enabled IO, which breaks things.
1227 */
1228 NIR_PASS(_, state.ir.nir, nir_opt_dce);
1229 NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
1230 gl_nir_opts(state.ir.nir);
1231 finalize = true;
1232 }
1233
1234 if (finalize || !st->allow_st_finalize_nir_twice) {
1235 /* Some of the lowering above may have introduced new varyings */
1236 nir_shader_gather_info(state.ir.nir,
1237 nir_shader_get_entrypoint(state.ir.nir));
1238
1239 struct pipe_screen *screen = st->screen;
1240 if (screen->finalize_nir) {
1241 char *msg = screen->finalize_nir(screen, state.ir.nir);
1242 free(msg);
1243 }
1244 }
1245
1246 variant->base.driver_shader = st_create_nir_shader(st, &state);
1247 if (report_compile_error && state.error_message) {
1248 *error = state.error_message;
1249 return NULL;
1250 }
1251
1252 variant->key = *key;
1253 if (error)
1254 *error = NULL;
1255 return variant;
1256 }
1257
1258 /**
1259 * Translate fragment program if needed.
1260 */
1261 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key,bool report_compile_error,char ** error)1262 st_get_fp_variant(struct st_context *st,
1263 struct gl_program *fp,
1264 const struct st_fp_variant_key *key,
1265 bool report_compile_error, char **error)
1266 {
1267 struct st_fp_variant *fpv;
1268
1269 /* Search for existing variant */
1270 for (fpv = st_fp_variant(fp->variants); fpv;
1271 fpv = st_fp_variant(fpv->base.next)) {
1272 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1273 break;
1274 }
1275 }
1276
1277 if (!fpv) {
1278 /* create new */
1279
1280 if (fp->variants != NULL) {
1281 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1282 "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%d)",
1283 key->bitmap ? "bitmap," : "",
1284 key->drawpixels ? "drawpixels," : "",
1285 key->scaleAndBias ? "scale_bias," : "",
1286 key->pixelMaps ? "pixel_maps," : "",
1287 key->clamp_color ? "clamp_color," : "",
1288 key->persample_shading ? "persample_shading," : "",
1289 key->fog ? "fog," : "",
1290 key->lower_two_sided_color ? "twoside," : "",
1291 key->lower_flatshade ? "flatshade," : "",
1292 key->lower_alpha_func != COMPARE_FUNC_ALWAYS ? "alpha_compare," : "",
1293 /* skipped ATI_fs targets */
1294 fp->ExternalSamplersUsed ? "external?," : "",
1295 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "",
1296 "depth_textures=", key->depth_textures);
1297 }
1298
1299 fpv = st_create_fp_variant(st, fp, key, report_compile_error, error);
1300 if (fpv) {
1301 fpv->base.st = key->st;
1302
1303 st_add_variant(&fp->variants, &fpv->base);
1304 }
1305 }
1306
1307 return fpv;
1308 }
1309
1310 /**
1311 * Vert/Geom/Frag programs have per-context variants. Free all the
1312 * variants attached to the given program which match the given context.
1313 */
1314 static void
destroy_program_variants(struct st_context * st,struct gl_program * p)1315 destroy_program_variants(struct st_context *st, struct gl_program *p)
1316 {
1317 if (!p || p == &_mesa_DummyProgram)
1318 return;
1319
1320 struct st_variant *v, **prevPtr = &p->variants;
1321 bool unbound = false;
1322
1323 for (v = p->variants; v; ) {
1324 struct st_variant *next = v->next;
1325 if (v->st == st) {
1326 if (!unbound) {
1327 st_unbind_program(st, p);
1328 unbound = true;
1329 }
1330
1331 /* unlink from list */
1332 *prevPtr = next;
1333 /* destroy this variant */
1334 delete_variant(st, v, p->Target);
1335 }
1336 else {
1337 prevPtr = &v->next;
1338 }
1339 v = next;
1340 }
1341 }
1342
1343
1344 /**
1345 * Callback for _mesa_HashWalk. Free all the shader's program variants
1346 * which match the given context.
1347 */
1348 static void
destroy_shader_program_variants_cb(void * data,void * userData)1349 destroy_shader_program_variants_cb(void *data, void *userData)
1350 {
1351 struct st_context *st = (struct st_context *) userData;
1352 struct gl_shader *shader = (struct gl_shader *) data;
1353
1354 switch (shader->Type) {
1355 case GL_SHADER_PROGRAM_MESA:
1356 {
1357 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1358 GLuint i;
1359
1360 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1361 if (shProg->_LinkedShaders[i])
1362 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1363 }
1364 }
1365 break;
1366 case GL_VERTEX_SHADER:
1367 case GL_FRAGMENT_SHADER:
1368 case GL_GEOMETRY_SHADER:
1369 case GL_TESS_CONTROL_SHADER:
1370 case GL_TESS_EVALUATION_SHADER:
1371 case GL_COMPUTE_SHADER:
1372 break;
1373 default:
1374 assert(0);
1375 }
1376 }
1377
1378
1379 /**
1380 * Callback for _mesa_HashWalk. Free all the program variants which match
1381 * the given context.
1382 */
1383 static void
destroy_program_variants_cb(void * data,void * userData)1384 destroy_program_variants_cb(void *data, void *userData)
1385 {
1386 struct st_context *st = (struct st_context *) userData;
1387 struct gl_program *program = (struct gl_program *) data;
1388 destroy_program_variants(st, program);
1389 }
1390
1391
1392 /**
1393 * Walk over all shaders and programs to delete any variants which
1394 * belong to the given context.
1395 * This is called during context tear-down.
1396 */
1397 void
st_destroy_program_variants(struct st_context * st)1398 st_destroy_program_variants(struct st_context *st)
1399 {
1400 /* If shaders can be shared with other contexts, the last context will
1401 * call DeleteProgram on all shaders, releasing everything.
1402 */
1403 if (st->has_shareable_shaders)
1404 return;
1405
1406 /* ARB vert/frag program */
1407 _mesa_HashWalk(&st->ctx->Shared->Programs,
1408 destroy_program_variants_cb, st);
1409
1410 /* GLSL vert/frag/geom shaders */
1411 _mesa_HashWalk(&st->ctx->Shared->ShaderObjects,
1412 destroy_shader_program_variants_cb, st);
1413 }
1414
1415 /**
1416 * Compile one shader variant.
1417 */
1418 static char *
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog,bool report_compile_error)1419 st_precompile_shader_variant(struct st_context *st,
1420 struct gl_program *prog,
1421 bool report_compile_error)
1422 {
1423 char *error = NULL;
1424
1425 switch (prog->Target) {
1426 case GL_VERTEX_PROGRAM_ARB:
1427 case GL_TESS_CONTROL_PROGRAM_NV:
1428 case GL_TESS_EVALUATION_PROGRAM_NV:
1429 case GL_GEOMETRY_PROGRAM_NV:
1430 case GL_COMPUTE_PROGRAM_NV: {
1431 struct st_common_variant_key key;
1432
1433 memset(&key, 0, sizeof(key));
1434
1435 if (_mesa_is_desktop_gl_compat(st->ctx) &&
1436 st->clamp_vert_color_in_shader &&
1437 (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1438 VARYING_SLOT_COL1 |
1439 VARYING_SLOT_BFC0 |
1440 VARYING_SLOT_BFC1))) {
1441 key.clamp_color = true;
1442 }
1443
1444 key.st = st->has_shareable_shaders ? NULL : st;
1445 st_get_common_variant(st, prog, &key, report_compile_error, &error);
1446 return error;
1447 }
1448
1449 case GL_FRAGMENT_PROGRAM_ARB: {
1450 struct st_fp_variant_key key;
1451
1452 memset(&key, 0, sizeof(key));
1453
1454 key.st = st->has_shareable_shaders ? NULL : st;
1455 key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1456 if (prog->ati_fs) {
1457 for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1458 key.texture_index[i] = TEXTURE_2D_INDEX;
1459 }
1460
1461 /* Shadow samplers require texture in depth format, which we lower to
1462 * non-shadow if necessary for ARB programs
1463 */
1464 if (!prog->shader_program)
1465 key.depth_textures = prog->ShadowSamplers;
1466
1467 st_get_fp_variant(st, prog, &key, report_compile_error, &error);
1468 return error;
1469 }
1470
1471 default:
1472 unreachable("invalid shader stage");
1473 }
1474 }
1475
1476 void
st_serialize_nir(struct gl_program * prog)1477 st_serialize_nir(struct gl_program *prog)
1478 {
1479 if (!prog->serialized_nir) {
1480 struct blob blob;
1481 size_t size;
1482
1483 blob_init(&blob);
1484 nir_serialize(&blob, prog->nir, false);
1485 blob_finish_get_buffer(&blob, &prog->serialized_nir, &size);
1486 prog->serialized_nir_size = size;
1487 }
1488 }
1489
1490 void
st_serialize_base_nir(struct gl_program * prog,nir_shader * nir)1491 st_serialize_base_nir(struct gl_program *prog, nir_shader *nir)
1492 {
1493 if (!prog->base_serialized_nir && nir->info.stage == MESA_SHADER_VERTEX) {
1494 struct blob blob;
1495 size_t size;
1496
1497 blob_init(&blob);
1498 nir_serialize(&blob, nir, false);
1499 blob_finish_get_buffer(&blob, &prog->base_serialized_nir, &size);
1500 prog->base_serialized_nir_size = size;
1501 }
1502 }
1503
1504 char *
st_finalize_program(struct st_context * st,struct gl_program * prog,bool report_compile_error)1505 st_finalize_program(struct st_context *st, struct gl_program *prog,
1506 bool report_compile_error)
1507 {
1508 struct gl_context *ctx = st->ctx;
1509 bool is_bound = false;
1510
1511 MESA_TRACE_FUNC();
1512
1513 if (prog->info.stage == MESA_SHADER_VERTEX)
1514 is_bound = prog == ctx->VertexProgram._Current;
1515 else if (prog->info.stage == MESA_SHADER_TESS_CTRL)
1516 is_bound = prog == ctx->TessCtrlProgram._Current;
1517 else if (prog->info.stage == MESA_SHADER_TESS_EVAL)
1518 is_bound = prog == ctx->TessEvalProgram._Current;
1519 else if (prog->info.stage == MESA_SHADER_GEOMETRY)
1520 is_bound = prog == ctx->GeometryProgram._Current;
1521 else if (prog->info.stage == MESA_SHADER_FRAGMENT)
1522 is_bound = prog == ctx->FragmentProgram._Current;
1523 else if (prog->info.stage == MESA_SHADER_COMPUTE)
1524 is_bound = prog == ctx->ComputeProgram._Current;
1525
1526 if (is_bound) {
1527 if (prog->info.stage == MESA_SHADER_VERTEX) {
1528 ctx->Array.NewVertexElements = true;
1529 ctx->NewDriverState |= ST_NEW_VERTEX_PROGRAM(ctx, prog);
1530 } else {
1531 ctx->NewDriverState |= prog->affected_states;
1532 }
1533 }
1534
1535 if (prog->nir) {
1536 nir_sweep(prog->nir);
1537
1538 /* This is only needed for ARB_vp/fp programs and when the disk cache
1539 * is disabled. If the disk cache is enabled, GLSL programs are
1540 * serialized in write_nir_to_cache.
1541 */
1542 st_serialize_base_nir(prog, prog->nir);
1543 st_serialize_nir(prog);
1544 }
1545
1546 /* Always create the default variant of the program. */
1547 return st_precompile_shader_variant(st, prog, report_compile_error);
1548 }
1549
1550 /**
1551 * Called when the program's text/code is changed. We have to free
1552 * all shader variants and corresponding gallium shaders when this happens.
1553 */
1554 GLboolean
st_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1555 st_program_string_notify( struct gl_context *ctx,
1556 GLenum target,
1557 struct gl_program *prog )
1558 {
1559 struct st_context *st = st_context(ctx);
1560
1561 /* GLSL-to-NIR should not end up here. */
1562 assert(!prog->shader_program);
1563
1564 st_release_variants(st, prog);
1565
1566 if (target == GL_FRAGMENT_PROGRAM_ARB ||
1567 target == GL_FRAGMENT_SHADER_ATI) {
1568 if (!st_translate_fragment_program(st, prog))
1569 return false;
1570 } else if (target == GL_VERTEX_PROGRAM_ARB) {
1571 if (!st_translate_vertex_program(st, prog))
1572 return false;
1573 if (st->add_point_size &&
1574 gl_nir_can_add_pointsize_to_program(&st->ctx->Const, prog)) {
1575 prog->skip_pointsize_xfb = true;
1576 NIR_PASS(_, prog->nir, gl_nir_add_point_size);
1577 }
1578 }
1579
1580 st_finalize_program(st, prog, false);
1581 return GL_TRUE;
1582 }
1583