1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "nir_builder.h"
35 #include "main/errors.h"
36
37 #include "main/hash.h"
38 #include "main/mtypes.h"
39 #include "nir/nir_xfb_info.h"
40 #include "nir/pipe_nir.h"
41 #include "program/prog_parameter.h"
42 #include "program/prog_print.h"
43 #include "program/prog_to_nir.h"
44
45 #include "compiler/glsl/gl_nir.h"
46 #include "compiler/glsl/gl_nir_linker.h"
47 #include "compiler/nir/nir.h"
48 #include "compiler/nir/nir_serialize.h"
49 #include "draw/draw_context.h"
50
51 #include "pipe/p_context.h"
52 #include "pipe/p_defines.h"
53 #include "pipe/p_shader_tokens.h"
54 #include "draw/draw_context.h"
55
56 #include "util/u_dump.h"
57 #include "util/u_memory.h"
58
59 #include "st_debug.h"
60 #include "st_cb_bitmap.h"
61 #include "st_cb_drawpixels.h"
62 #include "st_context.h"
63 #include "st_program.h"
64 #include "st_atifs_to_nir.h"
65 #include "st_nir.h"
66 #include "st_shader_cache.h"
67 #include "st_util.h"
68 #include "cso_cache/cso_context.h"
69
70
71 static void
72 destroy_program_variants(struct st_context *st, struct gl_program *target);
73
74 static void
set_affected_state_flags(uint64_t * states,struct gl_program * prog,uint64_t new_constants,uint64_t new_sampler_views,uint64_t new_samplers,uint64_t new_images,uint64_t new_ubos,uint64_t new_ssbos,uint64_t new_atomics)75 set_affected_state_flags(uint64_t *states,
76 struct gl_program *prog,
77 uint64_t new_constants,
78 uint64_t new_sampler_views,
79 uint64_t new_samplers,
80 uint64_t new_images,
81 uint64_t new_ubos,
82 uint64_t new_ssbos,
83 uint64_t new_atomics)
84 {
85 if (prog->Parameters->NumParameters)
86 *states |= new_constants;
87
88 if (prog->info.num_textures)
89 *states |= new_sampler_views | new_samplers;
90
91 if (prog->info.num_images)
92 *states |= new_images;
93
94 if (prog->info.num_ubos)
95 *states |= new_ubos;
96
97 if (prog->info.num_ssbos)
98 *states |= new_ssbos;
99
100 if (prog->info.num_abos)
101 *states |= new_atomics;
102 }
103
104 /**
105 * This determines which states will be updated when the shader is bound.
106 */
107 void
st_set_prog_affected_state_flags(struct gl_program * prog)108 st_set_prog_affected_state_flags(struct gl_program *prog)
109 {
110 uint64_t *states;
111
112 switch (prog->info.stage) {
113 case MESA_SHADER_VERTEX:
114 states = &prog->affected_states;
115
116 *states = ST_NEW_VS_STATE |
117 ST_NEW_RASTERIZER |
118 ST_NEW_VERTEX_ARRAYS;
119
120 set_affected_state_flags(states, prog,
121 ST_NEW_VS_CONSTANTS,
122 ST_NEW_VS_SAMPLER_VIEWS,
123 ST_NEW_VS_SAMPLERS,
124 ST_NEW_VS_IMAGES,
125 ST_NEW_VS_UBOS,
126 ST_NEW_VS_SSBOS,
127 ST_NEW_VS_ATOMICS);
128 break;
129
130 case MESA_SHADER_TESS_CTRL:
131 states = &prog->affected_states;
132
133 *states = ST_NEW_TCS_STATE;
134
135 set_affected_state_flags(states, prog,
136 ST_NEW_TCS_CONSTANTS,
137 ST_NEW_TCS_SAMPLER_VIEWS,
138 ST_NEW_TCS_SAMPLERS,
139 ST_NEW_TCS_IMAGES,
140 ST_NEW_TCS_UBOS,
141 ST_NEW_TCS_SSBOS,
142 ST_NEW_TCS_ATOMICS);
143 break;
144
145 case MESA_SHADER_TESS_EVAL:
146 states = &prog->affected_states;
147
148 *states = ST_NEW_TES_STATE |
149 ST_NEW_RASTERIZER;
150
151 set_affected_state_flags(states, prog,
152 ST_NEW_TES_CONSTANTS,
153 ST_NEW_TES_SAMPLER_VIEWS,
154 ST_NEW_TES_SAMPLERS,
155 ST_NEW_TES_IMAGES,
156 ST_NEW_TES_UBOS,
157 ST_NEW_TES_SSBOS,
158 ST_NEW_TES_ATOMICS);
159 break;
160
161 case MESA_SHADER_GEOMETRY:
162 states = &prog->affected_states;
163
164 *states = ST_NEW_GS_STATE |
165 ST_NEW_RASTERIZER;
166
167 set_affected_state_flags(states, prog,
168 ST_NEW_GS_CONSTANTS,
169 ST_NEW_GS_SAMPLER_VIEWS,
170 ST_NEW_GS_SAMPLERS,
171 ST_NEW_GS_IMAGES,
172 ST_NEW_GS_UBOS,
173 ST_NEW_GS_SSBOS,
174 ST_NEW_GS_ATOMICS);
175 break;
176
177 case MESA_SHADER_FRAGMENT:
178 states = &prog->affected_states;
179
180 /* gl_FragCoord and glDrawPixels always use constants. */
181 *states = ST_NEW_FS_STATE |
182 ST_NEW_SAMPLE_SHADING |
183 ST_NEW_FS_CONSTANTS;
184
185 set_affected_state_flags(states, prog,
186 ST_NEW_FS_CONSTANTS,
187 ST_NEW_FS_SAMPLER_VIEWS,
188 ST_NEW_FS_SAMPLERS,
189 ST_NEW_FS_IMAGES,
190 ST_NEW_FS_UBOS,
191 ST_NEW_FS_SSBOS,
192 ST_NEW_FS_ATOMICS);
193 break;
194
195 case MESA_SHADER_COMPUTE:
196 states = &prog->affected_states;
197
198 *states = ST_NEW_CS_STATE;
199
200 set_affected_state_flags(states, prog,
201 ST_NEW_CS_CONSTANTS,
202 ST_NEW_CS_SAMPLER_VIEWS,
203 ST_NEW_CS_SAMPLERS,
204 ST_NEW_CS_IMAGES,
205 ST_NEW_CS_UBOS,
206 ST_NEW_CS_SSBOS,
207 ST_NEW_CS_ATOMICS);
208 break;
209
210 default:
211 unreachable("unhandled shader stage");
212 }
213 }
214
215
216 /**
217 * Delete a shader variant. Note the caller must unlink the variant from
218 * the linked list.
219 */
220 static void
delete_variant(struct st_context * st,struct st_variant * v,GLenum target)221 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
222 {
223 if (v->driver_shader) {
224 if (target == GL_VERTEX_PROGRAM_ARB &&
225 ((struct st_common_variant*)v)->key.is_draw_shader) {
226 /* Draw shader. */
227 draw_delete_vertex_shader(st->draw, v->driver_shader);
228 } else if (st->has_shareable_shaders || v->st == st) {
229 /* The shader's context matches the calling context, or we
230 * don't care.
231 */
232 switch (target) {
233 case GL_VERTEX_PROGRAM_ARB:
234 st->pipe->delete_vs_state(st->pipe, v->driver_shader);
235 break;
236 case GL_TESS_CONTROL_PROGRAM_NV:
237 st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
238 break;
239 case GL_TESS_EVALUATION_PROGRAM_NV:
240 st->pipe->delete_tes_state(st->pipe, v->driver_shader);
241 break;
242 case GL_GEOMETRY_PROGRAM_NV:
243 st->pipe->delete_gs_state(st->pipe, v->driver_shader);
244 break;
245 case GL_FRAGMENT_PROGRAM_ARB:
246 st->pipe->delete_fs_state(st->pipe, v->driver_shader);
247 break;
248 case GL_COMPUTE_PROGRAM_NV:
249 st->pipe->delete_compute_state(st->pipe, v->driver_shader);
250 break;
251 default:
252 unreachable("bad shader type in delete_basic_variant");
253 }
254 } else {
255 /* We can't delete a shader with a context different from the one
256 * that created it. Add it to the creating context's zombie list.
257 */
258 enum pipe_shader_type type =
259 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
260
261 st_save_zombie_shader(v->st, type, v->driver_shader);
262 }
263 }
264
265 FREE(v);
266 }
267
268 static void
st_unbind_program(struct st_context * st,struct gl_program * p)269 st_unbind_program(struct st_context *st, struct gl_program *p)
270 {
271 struct gl_context *ctx = st->ctx;
272
273 /* Unbind the shader in cso_context and re-bind in st/mesa. */
274 switch (p->info.stage) {
275 case MESA_SHADER_VERTEX:
276 cso_set_vertex_shader_handle(st->cso_context, NULL);
277 ctx->NewDriverState |= ST_NEW_VS_STATE;
278 break;
279 case MESA_SHADER_TESS_CTRL:
280 cso_set_tessctrl_shader_handle(st->cso_context, NULL);
281 ctx->NewDriverState |= ST_NEW_TCS_STATE;
282 break;
283 case MESA_SHADER_TESS_EVAL:
284 cso_set_tesseval_shader_handle(st->cso_context, NULL);
285 ctx->NewDriverState |= ST_NEW_TES_STATE;
286 break;
287 case MESA_SHADER_GEOMETRY:
288 cso_set_geometry_shader_handle(st->cso_context, NULL);
289 ctx->NewDriverState |= ST_NEW_GS_STATE;
290 break;
291 case MESA_SHADER_FRAGMENT:
292 cso_set_fragment_shader_handle(st->cso_context, NULL);
293 ctx->NewDriverState |= ST_NEW_FS_STATE;
294 break;
295 case MESA_SHADER_COMPUTE:
296 cso_set_compute_shader_handle(st->cso_context, NULL);
297 ctx->NewDriverState |= ST_NEW_CS_STATE;
298 break;
299 default:
300 unreachable("invalid shader type");
301 }
302 }
303
304 /**
305 * Free all basic program variants.
306 */
307 void
st_release_variants(struct st_context * st,struct gl_program * p)308 st_release_variants(struct st_context *st, struct gl_program *p)
309 {
310 struct st_variant *v;
311
312 /* If we are releasing shaders, re-bind them, because we don't
313 * know which shaders are bound in the driver.
314 */
315 if (p->variants)
316 st_unbind_program(st, p);
317
318 for (v = p->variants; v; ) {
319 struct st_variant *next = v->next;
320 delete_variant(st, v, p->Target);
321 v = next;
322 }
323
324 p->variants = NULL;
325
326 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
327 * it has resulted in the driver taking ownership of the NIR. Those
328 * callers should be NULLing out the nir field in any pipe_shader_state
329 * that might have this called in order to indicate that.
330 *
331 * GLSL IR and ARB programs will have set gl_program->nir to the same
332 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
333 */
334 }
335
336 /**
337 * Free all basic program variants and unref program.
338 */
339 void
st_release_program(struct st_context * st,struct gl_program ** p)340 st_release_program(struct st_context *st, struct gl_program **p)
341 {
342 if (!*p)
343 return;
344
345 destroy_program_variants(st, *p);
346 _mesa_reference_program(st->ctx, p, NULL);
347 }
348
349 void
st_finalize_nir_before_variants(struct nir_shader * nir)350 st_finalize_nir_before_variants(struct nir_shader *nir)
351 {
352 NIR_PASS(_, nir, nir_split_var_copies);
353 NIR_PASS(_, nir, nir_lower_var_copies);
354 if (nir->options->lower_all_io_to_temps ||
355 nir->options->lower_all_io_to_elements ||
356 nir->info.stage == MESA_SHADER_VERTEX ||
357 nir->info.stage == MESA_SHADER_GEOMETRY) {
358 NIR_PASS(_, nir, nir_lower_io_arrays_to_elements_no_indirects, false);
359 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
360 NIR_PASS(_, nir, nir_lower_io_arrays_to_elements_no_indirects, true);
361 }
362
363 /* st_nir_assign_vs_in_locations requires correct shader info. */
364 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
365
366 st_nir_assign_vs_in_locations(nir);
367 }
368
369 static void
st_prog_to_nir_postprocess(struct st_context * st,nir_shader * nir,struct gl_program * prog)370 st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
371 struct gl_program *prog)
372 {
373 struct pipe_screen *screen = st->screen;
374
375 NIR_PASS(_, nir, nir_lower_reg_intrinsics_to_ssa);
376 nir_validate_shader(nir, "after st/ptn lower_reg_intrinsics_to_ssa");
377
378 /* Lower outputs to temporaries to avoid reading from output variables (which
379 * is permitted by the language but generally not implemented in HW).
380 */
381 NIR_PASS(_, nir, nir_lower_io_to_temporaries,
382 nir_shader_get_entrypoint(nir),
383 true, false);
384 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
385
386 NIR_PASS(_, nir, st_nir_lower_wpos_ytransform, prog, screen);
387 NIR_PASS(_, nir, nir_lower_system_values);
388
389 struct nir_lower_compute_system_values_options cs_options = {
390 .has_base_global_invocation_id = false,
391 .has_base_workgroup_id = false,
392 };
393 NIR_PASS(_, nir, nir_lower_compute_system_values, &cs_options);
394
395 /* Optimise NIR */
396 NIR_PASS(_, nir, nir_opt_constant_folding);
397 gl_nir_opts(nir);
398
399 /* This must be done after optimizations to assign IO bases. */
400 nir_recompute_io_bases(nir, nir_var_shader_in | nir_var_shader_out);
401 st_finalize_nir_before_variants(nir);
402
403 if (st->allow_st_finalize_nir_twice) {
404 st_serialize_base_nir(prog, nir);
405 st_finalize_nir(st, prog, NULL, nir, true, false);
406
407 if (screen->finalize_nir) {
408 char *msg = screen->finalize_nir(screen, nir);
409 free(msg);
410 }
411 }
412
413 nir_validate_shader(nir, "after st/glsl finalize_nir");
414 }
415
416 /**
417 * Prepare st_vertex_program info.
418 *
419 * attrib_to_index is an optional mapping from a vertex attrib to a shader
420 * input index.
421 */
422 void
st_prepare_vertex_program(struct gl_program * prog)423 st_prepare_vertex_program(struct gl_program *prog)
424 {
425 struct gl_vertex_program *stvp = (struct gl_vertex_program *)prog;
426
427 stvp->num_inputs = util_bitcount64(prog->info.inputs_read);
428 stvp->vert_attrib_mask = prog->info.inputs_read;
429
430 /* Compute mapping of vertex program outputs to slots. */
431 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
432 unsigned num_outputs = 0;
433 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
434 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
435 stvp->result_to_output[attr] = num_outputs++;
436 }
437 /* pre-setup potentially unused edgeflag output */
438 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
439 }
440
441 void
st_translate_stream_output_info(struct gl_program * prog)442 st_translate_stream_output_info(struct gl_program *prog)
443 {
444 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
445 if (!info)
446 return;
447
448 /* Determine the (default) output register mapping for each output. */
449 unsigned num_outputs = 0;
450 uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
451 memset(output_mapping, 0, sizeof(output_mapping));
452
453 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
454 /* this output was added by mesa/st and should not be tracked for xfb:
455 * drivers must check var->data.explicit_location to find the original output
456 * and only emit that one for xfb
457 */
458 if (prog->skip_pointsize_xfb && attr == VARYING_SLOT_PSIZ)
459 continue;
460 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
461 output_mapping[attr] = num_outputs++;
462 }
463
464 /* Translate stream output info. */
465 struct pipe_stream_output_info *so_info =
466 &prog->state.stream_output;
467
468 if (!num_outputs) {
469 so_info->num_outputs = 0;
470 return;
471 }
472
473 for (unsigned i = 0; i < info->NumOutputs; i++) {
474 so_info->output[i].register_index =
475 output_mapping[info->Outputs[i].OutputRegister];
476 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
477 so_info->output[i].num_components = info->Outputs[i].NumComponents;
478 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
479 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
480 so_info->output[i].stream = info->Outputs[i].StreamId;
481 }
482
483 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
484 so_info->stride[i] = info->Buffers[i].Stride;
485 }
486 so_info->num_outputs = info->NumOutputs;
487 }
488
489 /**
490 * Creates a driver shader from a NIR shader. Takes ownership of the
491 * passed nir_shader.
492 */
493 void *
st_create_nir_shader(struct st_context * st,struct pipe_shader_state * state)494 st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state)
495 {
496 struct pipe_context *pipe = st->pipe;
497
498 assert(state->type == PIPE_SHADER_IR_NIR);
499 nir_shader *nir = state->ir.nir;
500 gl_shader_stage stage = nir->info.stage;
501
502 /* Renumber SSA defs to make it easier to run diff on printed NIR. */
503 nir_foreach_function_impl(impl, nir) {
504 nir_index_ssa_defs(impl);
505 }
506
507 if (ST_DEBUG & DEBUG_PRINT_IR) {
508 fprintf(stderr, "NIR before handing off to driver:\n");
509 nir_print_shader(nir, stderr);
510 }
511
512 if (ST_DEBUG & DEBUG_PRINT_XFB) {
513 if (nir->info.io_lowered) {
514 if (nir->xfb_info && nir->xfb_info->output_count) {
515 fprintf(stderr, "XFB info before handing off to driver:\n");
516 fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
517 nir->info.xfb_stride[0], nir->info.xfb_stride[1],
518 nir->info.xfb_stride[2], nir->info.xfb_stride[3]);
519 nir_print_xfb_info(nir->xfb_info, stderr);
520 }
521 } else {
522 struct pipe_stream_output_info *so = &state->stream_output;
523
524 if (so->num_outputs) {
525 fprintf(stderr, "XFB info before handing off to driver:\n");
526 fprintf(stderr, "stride = {%u, %u, %u, %u}\n",
527 so->stride[0], so->stride[1], so->stride[2],
528 so->stride[3]);
529
530 for (unsigned i = 0; i < so->num_outputs; i++) {
531 fprintf(stderr, "output%u: buffer=%u offset=%u, location=%u, "
532 "component_offset=%u, component_mask=0x%x, "
533 "stream=%u\n",
534 i, so->output[i].output_buffer,
535 so->output[i].dst_offset * 4,
536 so->output[i].register_index,
537 so->output[i].start_component,
538 BITFIELD_RANGE(so->output[i].start_component,
539 so->output[i].num_components),
540 so->output[i].stream);
541 }
542 }
543 }
544 }
545
546 void *shader;
547 switch (stage) {
548 case MESA_SHADER_VERTEX:
549 shader = pipe->create_vs_state(pipe, state);
550 break;
551 case MESA_SHADER_TESS_CTRL:
552 shader = pipe->create_tcs_state(pipe, state);
553 break;
554 case MESA_SHADER_TESS_EVAL:
555 shader = pipe->create_tes_state(pipe, state);
556 break;
557 case MESA_SHADER_GEOMETRY:
558 shader = pipe->create_gs_state(pipe, state);
559 break;
560 case MESA_SHADER_FRAGMENT:
561 shader = pipe->create_fs_state(pipe, state);
562 break;
563 case MESA_SHADER_COMPUTE: {
564 /* We'd like to use this for all stages but we need to rework streamout in
565 * gallium first.
566 */
567 shader = pipe_shader_from_nir(pipe, nir);
568 break;
569 }
570 default:
571 unreachable("unsupported shader stage");
572 return NULL;
573 }
574
575 return shader;
576 }
577
578 /**
579 * Translate a vertex program.
580 */
581 static bool
st_translate_vertex_program(struct st_context * st,struct gl_program * prog)582 st_translate_vertex_program(struct st_context *st,
583 struct gl_program *prog)
584 {
585 /* This determines which states will be updated when the assembly
586 * shader is bound.
587 */
588 prog->affected_states = ST_NEW_VS_STATE |
589 ST_NEW_RASTERIZER |
590 ST_NEW_VERTEX_ARRAYS;
591
592 if (prog->Parameters->NumParameters)
593 prog->affected_states |= ST_NEW_VS_CONSTANTS;
594
595 if (prog->arb.Instructions && prog->nir)
596 ralloc_free(prog->nir);
597
598 if (prog->serialized_nir) {
599 free(prog->serialized_nir);
600 prog->serialized_nir = NULL;
601 }
602 free(prog->base_serialized_nir);
603
604 prog->state.type = PIPE_SHADER_IR_NIR;
605 if (prog->arb.Instructions)
606 prog->nir = prog_to_nir(st->ctx, prog);
607 st_prog_to_nir_postprocess(st, prog->nir, prog);
608 prog->info = prog->nir->info;
609
610 st_prepare_vertex_program(prog);
611 return true;
612 }
613
614 static const struct nir_shader_compiler_options draw_nir_options = {
615 .lower_scmp = true,
616 .lower_flrp32 = true,
617 .lower_flrp64 = true,
618 .lower_fsat = true,
619 .lower_bitfield_insert = true,
620 .lower_bitfield_extract = true,
621 .lower_fdph = true,
622 .lower_ffma16 = true,
623 .lower_ffma32 = true,
624 .lower_ffma64 = true,
625 .lower_flrp16 = true,
626 .lower_fmod = true,
627 .lower_hadd = true,
628 .lower_uadd_sat = true,
629 .lower_usub_sat = true,
630 .lower_iadd_sat = true,
631 .lower_ldexp = true,
632 .lower_pack_snorm_2x16 = true,
633 .lower_pack_snorm_4x8 = true,
634 .lower_pack_unorm_2x16 = true,
635 .lower_pack_unorm_4x8 = true,
636 .lower_pack_half_2x16 = true,
637 .lower_pack_split = true,
638 .lower_unpack_snorm_2x16 = true,
639 .lower_unpack_snorm_4x8 = true,
640 .lower_unpack_unorm_2x16 = true,
641 .lower_unpack_unorm_4x8 = true,
642 .lower_unpack_half_2x16 = true,
643 .lower_extract_byte = true,
644 .lower_extract_word = true,
645 .lower_insert_byte = true,
646 .lower_insert_word = true,
647 .lower_uadd_carry = true,
648 .lower_usub_borrow = true,
649 .lower_mul_2x32_64 = true,
650 .lower_ifind_msb = true,
651 .lower_int64_options = nir_lower_imul_2x32_64,
652 .lower_doubles_options = nir_lower_dround_even,
653 .max_unroll_iterations = 32,
654 .lower_to_scalar = true,
655 .lower_uniforms_to_ubo = true,
656 .lower_vector_cmp = true,
657 .lower_device_index_to_zero = true,
658 .support_16bit_alu = true,
659 .lower_fisnormal = true,
660 .lower_fquantize2f16 = true,
661 .driver_functions = true,
662 };
663
664 static struct nir_shader *
get_nir_shader(struct st_context * st,struct gl_program * prog,bool is_draw)665 get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
666 {
667 if ((!is_draw || !st->ctx->Const.PackedDriverUniformStorage) && prog->nir) {
668 nir_shader *nir = prog->nir;
669
670 /* The first shader variant takes ownership of NIR, so that there is
671 * no cloning. Additional shader variants are always generated from
672 * serialized NIR to save memory.
673 */
674 prog->nir = NULL;
675 assert(prog->serialized_nir && prog->serialized_nir_size);
676 return nir;
677 }
678
679 struct blob_reader blob_reader;
680 const struct nir_shader_compiler_options *options =
681 is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
682
683 if (is_draw && st->ctx->Const.PackedDriverUniformStorage &&
684 (!prog->shader_program || prog->shader_program->data->LinkStatus != LINKING_SKIPPED)) {
685 assert(prog->base_serialized_nir);
686 blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
687 } else {
688 assert(prog->serialized_nir);
689 blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
690 }
691 return nir_deserialize(NULL, options, &blob_reader);
692 }
693
694 static void
lower_ucp(struct st_context * st,struct nir_shader * nir,unsigned ucp_enables,struct gl_program_parameter_list * params)695 lower_ucp(struct st_context *st,
696 struct nir_shader *nir,
697 unsigned ucp_enables,
698 struct gl_program_parameter_list *params)
699 {
700 if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
701 NIR_PASS(_, nir, nir_lower_clip_disable, ucp_enables);
702 else {
703 bool can_compact = nir->options->compact_arrays;
704 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
705
706 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
707 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
708 if (use_eye) {
709 clipplane_state[i][0] = STATE_CLIPPLANE;
710 clipplane_state[i][1] = i;
711 } else {
712 clipplane_state[i][0] = STATE_CLIP_INTERNAL;
713 clipplane_state[i][1] = i;
714 }
715 _mesa_add_state_reference(params, clipplane_state[i]);
716 }
717
718 if (nir->info.stage == MESA_SHADER_VERTEX ||
719 nir->info.stage == MESA_SHADER_TESS_EVAL) {
720 NIR_PASS(_, nir, nir_lower_clip_vs, ucp_enables,
721 true, can_compact, clipplane_state);
722 } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
723 NIR_PASS(_, nir, nir_lower_clip_gs, ucp_enables,
724 can_compact, clipplane_state);
725 }
726
727 NIR_PASS(_, nir, nir_lower_io_to_temporaries,
728 nir_shader_get_entrypoint(nir), true, false);
729 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
730 }
731 }
732
733 static bool
force_persample_shading(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)734 force_persample_shading(struct nir_builder *b, nir_intrinsic_instr *intr,
735 void *data)
736 {
737 if (intr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
738 intr->intrinsic == nir_intrinsic_load_barycentric_centroid) {
739 intr->intrinsic = nir_intrinsic_load_barycentric_sample;
740 return true;
741 }
742
743 return false;
744 }
745
746 static int
xfb_compare_dst_offset(const void * a,const void * b)747 xfb_compare_dst_offset(const void *a, const void *b)
748 {
749 const struct pipe_stream_output *var0 = (const struct pipe_stream_output*)a;
750 const struct pipe_stream_output *var1 = (const struct pipe_stream_output*)b;
751
752 if (var0->output_buffer != var1->output_buffer)
753 return var0->output_buffer > var1->output_buffer ? 1 : -1;
754
755 return var0->dst_offset - var1->dst_offset;
756 }
757
758 static void
get_stream_output_info_from_nir(nir_shader * nir,struct pipe_stream_output_info * info)759 get_stream_output_info_from_nir(nir_shader *nir,
760 struct pipe_stream_output_info *info)
761 {
762 /* Get pipe_stream_output_info from NIR. Only used by IO variables. */
763 nir_xfb_info *xfb = nir->xfb_info;
764 memset(info, 0, sizeof(*info));
765
766 if (!xfb)
767 return;
768
769 info->num_outputs = xfb->output_count;
770
771 for (unsigned i = 0; i < 4; i++)
772 info->stride[i] = nir->info.xfb_stride[i];
773
774 for (unsigned i = 0; i < xfb->output_count; i++) {
775 struct pipe_stream_output *out = &info->output[i];
776
777 assert(!xfb->outputs[i].high_16bits);
778
779 out->register_index =
780 util_bitcount64(nir->info.outputs_written &
781 BITFIELD64_MASK(xfb->outputs[i].location));
782 out->start_component = xfb->outputs[i].component_offset;
783 out->num_components = util_bitcount(xfb->outputs[i].component_mask);
784 out->output_buffer = xfb->outputs[i].buffer;
785 out->dst_offset = xfb->outputs[i].offset / 4;
786 out->stream = xfb->buffer_to_stream[out->output_buffer];
787 }
788
789 /* Intel requires that xfb outputs are sorted by dst_offset. */
790 qsort(info->output, info->num_outputs, sizeof(info->output[0]),
791 xfb_compare_dst_offset);
792 }
793
794 static struct st_common_variant *
st_create_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key)795 st_create_common_variant(struct st_context *st,
796 struct gl_program *prog,
797 const struct st_common_variant_key *key)
798 {
799 MESA_TRACE_FUNC();
800
801 struct st_common_variant *v = CALLOC_STRUCT(st_common_variant);
802 struct pipe_shader_state state = {0};
803
804 static const gl_state_index16 point_size_state[STATE_LENGTH] =
805 { STATE_POINT_SIZE_CLAMPED, 0 };
806 struct gl_program_parameter_list *params = prog->Parameters;
807
808 v->key = *key;
809
810 state.stream_output = prog->state.stream_output;
811
812 bool finalize = false;
813
814 state.type = PIPE_SHADER_IR_NIR;
815 state.ir.nir = get_nir_shader(st, prog, key->is_draw_shader);
816 const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options;
817
818 if (key->clamp_color) {
819 NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
820 finalize = true;
821 }
822 if (key->passthrough_edgeflags) {
823 NIR_PASS(_, state.ir.nir, nir_lower_passthrough_edgeflags);
824 finalize = true;
825 }
826
827 if (key->export_point_size) {
828 /* if flag is set, shader must export psiz */
829 _mesa_add_state_reference(params, point_size_state);
830 NIR_PASS(_, state.ir.nir, nir_lower_point_size_mov,
831 point_size_state);
832
833 finalize = true;
834 }
835
836 if (key->lower_ucp) {
837 assert(!options->unify_interfaces);
838 lower_ucp(st, state.ir.nir, key->lower_ucp, params);
839 finalize = true;
840 }
841
842 if (st->emulate_gl_clamp &&
843 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
844 nir_lower_tex_options tex_opts = {0};
845 tex_opts.saturate_s = key->gl_clamp[0];
846 tex_opts.saturate_t = key->gl_clamp[1];
847 tex_opts.saturate_r = key->gl_clamp[2];
848 NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
849 }
850
851 if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
852 st_finalize_nir(st, prog, prog->shader_program, state.ir.nir, false,
853 key->is_draw_shader);
854 }
855
856 /* This should be after all passes that touch IO. */
857 if (state.ir.nir->info.io_lowered &&
858 (!(state.ir.nir->options->io_options & nir_io_has_intrinsics) ||
859 key->is_draw_shader)) {
860 assert(!state.stream_output.num_outputs || state.ir.nir->xfb_info);
861 get_stream_output_info_from_nir(state.ir.nir, &state.stream_output);
862 /* Some lowering passes can leave dead code behind, but dead IO intrinsics
863 * are still counted as enabled IO, which breaks things.
864 */
865 NIR_PASS(_, state.ir.nir, nir_opt_dce);
866 NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
867
868 if (state.ir.nir->info.stage == MESA_SHADER_TESS_CTRL &&
869 state.ir.nir->options->compact_arrays &&
870 state.ir.nir->options->vectorize_tess_levels)
871 NIR_PASS(_, state.ir.nir, nir_vectorize_tess_levels);
872
873 gl_nir_opts(state.ir.nir);
874 finalize = true;
875 }
876
877 if (finalize || !st->allow_st_finalize_nir_twice || key->is_draw_shader) {
878 struct pipe_screen *screen = st->screen;
879 if (!key->is_draw_shader && screen->finalize_nir) {
880 char *msg = screen->finalize_nir(screen, state.ir.nir);
881 free(msg);
882 }
883
884 /* Clip lowering and edgeflags may have introduced new varyings, so
885 * update the inputs_read/outputs_written. However, with
886 * unify_interfaces set (aka iris) the non-SSO varyings layout is
887 * decided at link time with outputs_written updated so the two line
888 * up. A driver with this flag set may not use any of the lowering
889 * passes that would change the varyings, so skip to make sure we don't
890 * break its linkage.
891 */
892 if (!options->unify_interfaces) {
893 nir_shader_gather_info(state.ir.nir,
894 nir_shader_get_entrypoint(state.ir.nir));
895 }
896 }
897
898 if (key->is_draw_shader) {
899 NIR_PASS(_, state.ir.nir, gl_nir_lower_images, false);
900 v->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
901 }
902 else
903 v->base.driver_shader = st_create_nir_shader(st, &state);
904
905 return v;
906 }
907
908 static void
st_add_variant(struct st_variant ** list,struct st_variant * v)909 st_add_variant(struct st_variant **list, struct st_variant *v)
910 {
911 struct st_variant *first = *list;
912
913 /* Make sure that the default variant stays the first in the list, and insert
914 * any later variants in as the second entry.
915 */
916 if (first) {
917 v->next = first->next;
918 first->next = v;
919 } else {
920 *list = v;
921 }
922 }
923
924 /**
925 * Find/create a vertex program variant.
926 */
927 struct st_common_variant *
st_get_common_variant(struct st_context * st,struct gl_program * prog,const struct st_common_variant_key * key)928 st_get_common_variant(struct st_context *st,
929 struct gl_program *prog,
930 const struct st_common_variant_key *key)
931 {
932 struct st_common_variant *v;
933
934 /* Search for existing variant */
935 for (v = st_common_variant(prog->variants); v;
936 v = st_common_variant(v->base.next)) {
937 if (memcmp(&v->key, key, sizeof(*key)) == 0) {
938 break;
939 }
940 }
941
942 if (!v) {
943 if (prog->variants != NULL) {
944 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
945 "Compiling %s shader variant (%s%s%s%s%s%s)",
946 _mesa_shader_stage_to_string(prog->info.stage),
947 key->passthrough_edgeflags ? "edgeflags," : "",
948 key->clamp_color ? "clamp_color," : "",
949 key->export_point_size ? "point_size," : "",
950 key->lower_ucp ? "ucp," : "",
951 key->is_draw_shader ? "draw," : "",
952 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "");
953 }
954
955 /* create now */
956 v = st_create_common_variant(st, prog, key);
957 if (v) {
958 v->base.st = key->st;
959
960 if (prog->info.stage == MESA_SHADER_VERTEX) {
961 struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
962
963 v->vert_attrib_mask =
964 vp->vert_attrib_mask |
965 (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0);
966 }
967
968 st_add_variant(&prog->variants, &v->base);
969 }
970 }
971
972 return v;
973 }
974
975
976 /**
977 * Translate a non-GLSL Mesa fragment shader into a NIR shader.
978 */
979 static bool
st_translate_fragment_program(struct st_context * st,struct gl_program * prog)980 st_translate_fragment_program(struct st_context *st,
981 struct gl_program *prog)
982 {
983 /* This determines which states will be updated when the assembly
984 * shader is bound.
985 *
986 * fragment.position and glDrawPixels always use constants.
987 */
988 prog->affected_states = ST_NEW_FS_STATE |
989 ST_NEW_SAMPLE_SHADING |
990 ST_NEW_FS_CONSTANTS;
991
992 if (prog->ati_fs) {
993 /* Just set them for ATI_fs unconditionally. */
994 prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
995 ST_NEW_FS_SAMPLERS;
996 } else {
997 /* ARB_fp */
998 if (prog->SamplersUsed)
999 prog->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
1000 ST_NEW_FS_SAMPLERS;
1001 }
1002
1003 /* Translate to NIR. */
1004 if (prog->nir && prog->arb.Instructions)
1005 ralloc_free(prog->nir);
1006
1007 if (prog->serialized_nir) {
1008 free(prog->serialized_nir);
1009 prog->serialized_nir = NULL;
1010 }
1011
1012 prog->state.type = PIPE_SHADER_IR_NIR;
1013 if (prog->arb.Instructions) {
1014 prog->nir = prog_to_nir(st->ctx, prog);
1015 } else if (prog->ati_fs) {
1016 const struct nir_shader_compiler_options *options =
1017 st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
1018
1019 assert(!prog->nir);
1020 prog->nir = st_translate_atifs_program(prog->ati_fs, prog, options);
1021 }
1022 st_prog_to_nir_postprocess(st, prog->nir, prog);
1023
1024 prog->info = prog->nir->info;
1025 if (prog->ati_fs) {
1026 /* ATI_fs will lower fixed function fog at variant time, after the FF vertex
1027 * prog has been generated. So we have to always declare a read of FOGC so
1028 * that FF vp feeds it to us just in case.
1029 */
1030 prog->info.inputs_read |= VARYING_BIT_FOGC;
1031 }
1032
1033 return true;
1034 }
1035
1036 static struct st_fp_variant *
st_create_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key)1037 st_create_fp_variant(struct st_context *st,
1038 struct gl_program *fp,
1039 const struct st_fp_variant_key *key)
1040 {
1041 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1042 struct pipe_shader_state state = {0};
1043 struct gl_program_parameter_list *params = fp->Parameters;
1044 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1045 { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1046 static const gl_state_index16 scale_state[STATE_LENGTH] =
1047 { STATE_PT_SCALE };
1048 static const gl_state_index16 bias_state[STATE_LENGTH] =
1049 { STATE_PT_BIAS };
1050 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1051 { STATE_ALPHA_REF };
1052
1053 if (!variant)
1054 return NULL;
1055
1056 MESA_TRACE_FUNC();
1057
1058 /* Translate ATI_fs to NIR at variant time because that's when we have the
1059 * texture types.
1060 */
1061 state.ir.nir = get_nir_shader(st, fp, false);
1062 state.type = PIPE_SHADER_IR_NIR;
1063
1064 bool finalize = false;
1065
1066 if (fp->ati_fs) {
1067 if (key->fog) {
1068 NIR_PASS(_, state.ir.nir, st_nir_lower_fog, key->fog, fp->Parameters);
1069 NIR_PASS(_, state.ir.nir, nir_lower_io_to_temporaries,
1070 nir_shader_get_entrypoint(state.ir.nir),
1071 true, false);
1072 nir_lower_global_vars_to_local(state.ir.nir);
1073 }
1074
1075 NIR_PASS(_, state.ir.nir, st_nir_lower_atifs_samplers, key->texture_index);
1076
1077 finalize = true;
1078 }
1079
1080 if (key->clamp_color) {
1081 NIR_PASS(_, state.ir.nir, nir_lower_clamp_color_outputs);
1082 finalize = true;
1083 }
1084
1085 if (key->lower_flatshade) {
1086 NIR_PASS(_, state.ir.nir, nir_lower_flatshade);
1087 finalize = true;
1088 }
1089
1090 if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) {
1091 _mesa_add_state_reference(params, alpha_ref_state);
1092 NIR_PASS(_, state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1093 false, alpha_ref_state);
1094 finalize = true;
1095 }
1096
1097 if (key->lower_two_sided_color) {
1098 bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1099 NIR_PASS(_, state.ir.nir, nir_lower_two_sided_color, face_sysval);
1100 finalize = true;
1101 }
1102
1103 if (key->persample_shading) {
1104 nir_shader *shader = state.ir.nir;
1105 if (shader->info.io_lowered) {
1106 nir_shader_intrinsics_pass(shader, force_persample_shading,
1107 nir_metadata_all, NULL);
1108 } else {
1109 nir_foreach_shader_in_variable(var, shader)
1110 var->data.sample = true;
1111 }
1112
1113 /* In addition to requiring per-sample interpolation, sample shading
1114 * changes the behaviour of gl_SampleMaskIn, so we need per-sample shading
1115 * even if there are no shader-in variables at all. In that case,
1116 * uses_sample_shading won't be set by glsl_to_nir. We need to do so here.
1117 */
1118 shader->info.fs.uses_sample_shading = true;
1119
1120 finalize = true;
1121 }
1122
1123 if (st->emulate_gl_clamp &&
1124 (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) {
1125 nir_lower_tex_options tex_opts = {0};
1126 tex_opts.saturate_s = key->gl_clamp[0];
1127 tex_opts.saturate_t = key->gl_clamp[1];
1128 tex_opts.saturate_r = key->gl_clamp[2];
1129 NIR_PASS(_, state.ir.nir, nir_lower_tex, &tex_opts);
1130 finalize = true;
1131 }
1132
1133 assert(!(key->bitmap && key->drawpixels));
1134
1135 /* glBitmap */
1136 if (key->bitmap) {
1137 nir_lower_bitmap_options options = {0};
1138
1139 variant->bitmap_sampler = ffs(~fp->SamplersUsed) - 1;
1140 options.sampler = variant->bitmap_sampler;
1141 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1142
1143 NIR_PASS(_, state.ir.nir, nir_lower_bitmap, &options);
1144 finalize = true;
1145 }
1146
1147 /* glDrawPixels (color only) */
1148 if (key->drawpixels) {
1149 nir_lower_drawpixels_options options = {{0}};
1150 unsigned samplers_used = fp->SamplersUsed;
1151
1152 /* Find the first unused slot. */
1153 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1154 options.drawpix_sampler = variant->drawpix_sampler;
1155 samplers_used |= (1 << variant->drawpix_sampler);
1156
1157 options.pixel_maps = key->pixelMaps;
1158 if (key->pixelMaps) {
1159 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1160 options.pixelmap_sampler = variant->pixelmap_sampler;
1161 }
1162
1163 options.scale_and_bias = key->scaleAndBias;
1164 if (key->scaleAndBias) {
1165 _mesa_add_state_reference(params, scale_state);
1166 memcpy(options.scale_state_tokens, scale_state,
1167 sizeof(options.scale_state_tokens));
1168 _mesa_add_state_reference(params, bias_state);
1169 memcpy(options.bias_state_tokens, bias_state,
1170 sizeof(options.bias_state_tokens));
1171 }
1172
1173 _mesa_add_state_reference(params, texcoord_state);
1174 memcpy(options.texcoord_state_tokens, texcoord_state,
1175 sizeof(options.texcoord_state_tokens));
1176
1177 NIR_PASS(_, state.ir.nir, nir_lower_drawpixels, &options);
1178 finalize = true;
1179 }
1180
1181 bool need_lower_tex_src_plane = false;
1182
1183 if (unlikely(key->external.lower_nv12 || key->external.lower_nv21 ||
1184 key->external.lower_iyuv ||
1185 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1186 key->external.lower_yx_xvxu || key->external.lower_xy_vxux ||
1187 key->external.lower_ayuv || key->external.lower_xyuv ||
1188 key->external.lower_yuv || key->external.lower_yu_yv ||
1189 key->external.lower_yv_yu || key->external.lower_y41x)) {
1190
1191 st_nir_lower_samplers(st->screen, state.ir.nir,
1192 fp->shader_program, fp);
1193
1194 nir_lower_tex_options options = {0};
1195 options.lower_y_uv_external = key->external.lower_nv12;
1196 options.lower_y_vu_external = key->external.lower_nv21;
1197 options.lower_y_u_v_external = key->external.lower_iyuv;
1198 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1199 options.lower_xy_vxux_external = key->external.lower_xy_vxux;
1200 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1201 options.lower_yx_xvxu_external = key->external.lower_yx_xvxu;
1202 options.lower_ayuv_external = key->external.lower_ayuv;
1203 options.lower_xyuv_external = key->external.lower_xyuv;
1204 options.lower_yuv_external = key->external.lower_yuv;
1205 options.lower_yu_yv_external = key->external.lower_yu_yv;
1206 options.lower_yv_yu_external = key->external.lower_yv_yu;
1207 options.lower_y41x_external = key->external.lower_y41x;
1208 options.bt709_external = key->external.bt709;
1209 options.bt2020_external = key->external.bt2020;
1210 options.yuv_full_range_external = key->external.yuv_full_range;
1211 NIR_PASS(_, state.ir.nir, nir_lower_tex, &options);
1212 finalize = true;
1213 need_lower_tex_src_plane = true;
1214 }
1215
1216 if (finalize || !st->allow_st_finalize_nir_twice)
1217 st_finalize_nir(st, fp, fp->shader_program, state.ir.nir, false, false);
1218
1219 /* This pass needs to happen *after* nir_lower_sampler */
1220 if (unlikely(need_lower_tex_src_plane)) {
1221 NIR_PASS(_, state.ir.nir, st_nir_lower_tex_src_plane,
1222 ~fp->SamplersUsed,
1223 key->external.lower_nv12 | key->external.lower_nv21 |
1224 key->external.lower_xy_uxvx | key->external.lower_xy_vxux |
1225 key->external.lower_yx_xuxv | key->external.lower_yx_xvxu,
1226 key->external.lower_iyuv);
1227 finalize = true;
1228 }
1229
1230 /* It is undefined behavior when an ARB assembly uses SHADOW2D target
1231 * with a texture in not depth format. In this case NVIDIA automatically
1232 * replaces SHADOW sampler with a normal sampler and some games like
1233 * Penumbra Overture which abuses this UB (issues/8425) works fine but
1234 * breaks with mesa. Replace the shadow sampler with a normal one here
1235 */
1236 if (!fp->shader_program && ~key->depth_textures & fp->ShadowSamplers) {
1237 NIR_PASS(_, state.ir.nir, nir_remove_tex_shadow,
1238 ~key->depth_textures & fp->ShadowSamplers);
1239 finalize = true;
1240 }
1241
1242 /* This should be after all passes that touch IO. */
1243 if (state.ir.nir->info.io_lowered &&
1244 !(state.ir.nir->options->io_options & nir_io_has_intrinsics)) {
1245 /* Some lowering passes can leave dead code behind, but dead IO intrinsics
1246 * are still counted as enabled IO, which breaks things.
1247 */
1248 NIR_PASS(_, state.ir.nir, nir_opt_dce);
1249 NIR_PASS(_, state.ir.nir, st_nir_unlower_io_to_vars);
1250 gl_nir_opts(state.ir.nir);
1251 finalize = true;
1252 }
1253
1254 if (finalize || !st->allow_st_finalize_nir_twice) {
1255 /* Some of the lowering above may have introduced new varyings */
1256 nir_shader_gather_info(state.ir.nir,
1257 nir_shader_get_entrypoint(state.ir.nir));
1258
1259 struct pipe_screen *screen = st->screen;
1260 if (screen->finalize_nir) {
1261 char *msg = screen->finalize_nir(screen, state.ir.nir);
1262 free(msg);
1263 }
1264 }
1265
1266 variant->base.driver_shader = st_create_nir_shader(st, &state);
1267 variant->key = *key;
1268
1269 return variant;
1270 }
1271
1272 /**
1273 * Translate fragment program if needed.
1274 */
1275 struct st_fp_variant *
st_get_fp_variant(struct st_context * st,struct gl_program * fp,const struct st_fp_variant_key * key)1276 st_get_fp_variant(struct st_context *st,
1277 struct gl_program *fp,
1278 const struct st_fp_variant_key *key)
1279 {
1280 struct st_fp_variant *fpv;
1281
1282 /* Search for existing variant */
1283 for (fpv = st_fp_variant(fp->variants); fpv;
1284 fpv = st_fp_variant(fpv->base.next)) {
1285 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1286 break;
1287 }
1288 }
1289
1290 if (!fpv) {
1291 /* create new */
1292
1293 if (fp->variants != NULL) {
1294 _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM,
1295 "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s%d)",
1296 key->bitmap ? "bitmap," : "",
1297 key->drawpixels ? "drawpixels," : "",
1298 key->scaleAndBias ? "scale_bias," : "",
1299 key->pixelMaps ? "pixel_maps," : "",
1300 key->clamp_color ? "clamp_color," : "",
1301 key->persample_shading ? "persample_shading," : "",
1302 key->fog ? "fog," : "",
1303 key->lower_two_sided_color ? "twoside," : "",
1304 key->lower_flatshade ? "flatshade," : "",
1305 key->lower_alpha_func != COMPARE_FUNC_ALWAYS ? "alpha_compare," : "",
1306 /* skipped ATI_fs targets */
1307 fp->ExternalSamplersUsed ? "external?," : "",
1308 key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : "",
1309 "depth_textures=", key->depth_textures);
1310 }
1311
1312 fpv = st_create_fp_variant(st, fp, key);
1313 if (fpv) {
1314 fpv->base.st = key->st;
1315
1316 st_add_variant(&fp->variants, &fpv->base);
1317 }
1318 }
1319
1320 return fpv;
1321 }
1322
1323 /**
1324 * Vert/Geom/Frag programs have per-context variants. Free all the
1325 * variants attached to the given program which match the given context.
1326 */
1327 static void
destroy_program_variants(struct st_context * st,struct gl_program * p)1328 destroy_program_variants(struct st_context *st, struct gl_program *p)
1329 {
1330 if (!p || p == &_mesa_DummyProgram)
1331 return;
1332
1333 struct st_variant *v, **prevPtr = &p->variants;
1334 bool unbound = false;
1335
1336 for (v = p->variants; v; ) {
1337 struct st_variant *next = v->next;
1338 if (v->st == st) {
1339 if (!unbound) {
1340 st_unbind_program(st, p);
1341 unbound = true;
1342 }
1343
1344 /* unlink from list */
1345 *prevPtr = next;
1346 /* destroy this variant */
1347 delete_variant(st, v, p->Target);
1348 }
1349 else {
1350 prevPtr = &v->next;
1351 }
1352 v = next;
1353 }
1354 }
1355
1356
1357 /**
1358 * Callback for _mesa_HashWalk. Free all the shader's program variants
1359 * which match the given context.
1360 */
1361 static void
destroy_shader_program_variants_cb(void * data,void * userData)1362 destroy_shader_program_variants_cb(void *data, void *userData)
1363 {
1364 struct st_context *st = (struct st_context *) userData;
1365 struct gl_shader *shader = (struct gl_shader *) data;
1366
1367 switch (shader->Type) {
1368 case GL_SHADER_PROGRAM_MESA:
1369 {
1370 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1371 GLuint i;
1372
1373 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1374 if (shProg->_LinkedShaders[i])
1375 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1376 }
1377 }
1378 break;
1379 case GL_VERTEX_SHADER:
1380 case GL_FRAGMENT_SHADER:
1381 case GL_GEOMETRY_SHADER:
1382 case GL_TESS_CONTROL_SHADER:
1383 case GL_TESS_EVALUATION_SHADER:
1384 case GL_COMPUTE_SHADER:
1385 break;
1386 default:
1387 assert(0);
1388 }
1389 }
1390
1391
1392 /**
1393 * Callback for _mesa_HashWalk. Free all the program variants which match
1394 * the given context.
1395 */
1396 static void
destroy_program_variants_cb(void * data,void * userData)1397 destroy_program_variants_cb(void *data, void *userData)
1398 {
1399 struct st_context *st = (struct st_context *) userData;
1400 struct gl_program *program = (struct gl_program *) data;
1401 destroy_program_variants(st, program);
1402 }
1403
1404
1405 /**
1406 * Walk over all shaders and programs to delete any variants which
1407 * belong to the given context.
1408 * This is called during context tear-down.
1409 */
1410 void
st_destroy_program_variants(struct st_context * st)1411 st_destroy_program_variants(struct st_context *st)
1412 {
1413 /* If shaders can be shared with other contexts, the last context will
1414 * call DeleteProgram on all shaders, releasing everything.
1415 */
1416 if (st->has_shareable_shaders)
1417 return;
1418
1419 /* ARB vert/frag program */
1420 _mesa_HashWalk(&st->ctx->Shared->Programs,
1421 destroy_program_variants_cb, st);
1422
1423 /* GLSL vert/frag/geom shaders */
1424 _mesa_HashWalk(&st->ctx->Shared->ShaderObjects,
1425 destroy_shader_program_variants_cb, st);
1426 }
1427
1428 /**
1429 * Compile one shader variant.
1430 */
1431 static void
st_precompile_shader_variant(struct st_context * st,struct gl_program * prog)1432 st_precompile_shader_variant(struct st_context *st,
1433 struct gl_program *prog)
1434 {
1435 switch (prog->Target) {
1436 case GL_VERTEX_PROGRAM_ARB:
1437 case GL_TESS_CONTROL_PROGRAM_NV:
1438 case GL_TESS_EVALUATION_PROGRAM_NV:
1439 case GL_GEOMETRY_PROGRAM_NV:
1440 case GL_COMPUTE_PROGRAM_NV: {
1441 struct st_common_variant_key key;
1442
1443 memset(&key, 0, sizeof(key));
1444
1445 if (_mesa_is_desktop_gl_compat(st->ctx) &&
1446 st->clamp_vert_color_in_shader &&
1447 (prog->info.outputs_written & (VARYING_SLOT_COL0 |
1448 VARYING_SLOT_COL1 |
1449 VARYING_SLOT_BFC0 |
1450 VARYING_SLOT_BFC1))) {
1451 key.clamp_color = true;
1452 }
1453
1454 key.st = st->has_shareable_shaders ? NULL : st;
1455 st_get_common_variant(st, prog, &key);
1456 break;
1457 }
1458
1459 case GL_FRAGMENT_PROGRAM_ARB: {
1460 struct st_fp_variant_key key;
1461
1462 memset(&key, 0, sizeof(key));
1463
1464 key.st = st->has_shareable_shaders ? NULL : st;
1465 key.lower_alpha_func = COMPARE_FUNC_ALWAYS;
1466 if (prog->ati_fs) {
1467 for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++)
1468 key.texture_index[i] = TEXTURE_2D_INDEX;
1469 }
1470
1471 /* Shadow samplers require texture in depth format, which we lower to
1472 * non-shadow if necessary for ARB programs
1473 */
1474 if (!prog->shader_program)
1475 key.depth_textures = prog->ShadowSamplers;
1476
1477 st_get_fp_variant(st, prog, &key);
1478 break;
1479 }
1480
1481 default:
1482 assert(0);
1483 }
1484 }
1485
1486 void
st_serialize_nir(struct gl_program * prog)1487 st_serialize_nir(struct gl_program *prog)
1488 {
1489 if (!prog->serialized_nir) {
1490 struct blob blob;
1491 size_t size;
1492
1493 blob_init(&blob);
1494 nir_serialize(&blob, prog->nir, false);
1495 blob_finish_get_buffer(&blob, &prog->serialized_nir, &size);
1496 prog->serialized_nir_size = size;
1497 }
1498 }
1499
1500 void
st_serialize_base_nir(struct gl_program * prog,nir_shader * nir)1501 st_serialize_base_nir(struct gl_program *prog, nir_shader *nir)
1502 {
1503 if (!prog->base_serialized_nir && nir->info.stage == MESA_SHADER_VERTEX) {
1504 struct blob blob;
1505 size_t size;
1506
1507 blob_init(&blob);
1508 nir_serialize(&blob, nir, false);
1509 blob_finish_get_buffer(&blob, &prog->base_serialized_nir, &size);
1510 prog->base_serialized_nir_size = size;
1511 }
1512 }
1513
1514 void
st_finalize_program(struct st_context * st,struct gl_program * prog)1515 st_finalize_program(struct st_context *st, struct gl_program *prog)
1516 {
1517 struct gl_context *ctx = st->ctx;
1518 bool is_bound = false;
1519
1520 MESA_TRACE_FUNC();
1521
1522 if (prog->info.stage == MESA_SHADER_VERTEX)
1523 is_bound = prog == ctx->VertexProgram._Current;
1524 else if (prog->info.stage == MESA_SHADER_TESS_CTRL)
1525 is_bound = prog == ctx->TessCtrlProgram._Current;
1526 else if (prog->info.stage == MESA_SHADER_TESS_EVAL)
1527 is_bound = prog == ctx->TessEvalProgram._Current;
1528 else if (prog->info.stage == MESA_SHADER_GEOMETRY)
1529 is_bound = prog == ctx->GeometryProgram._Current;
1530 else if (prog->info.stage == MESA_SHADER_FRAGMENT)
1531 is_bound = prog == ctx->FragmentProgram._Current;
1532 else if (prog->info.stage == MESA_SHADER_COMPUTE)
1533 is_bound = prog == ctx->ComputeProgram._Current;
1534
1535 if (is_bound) {
1536 if (prog->info.stage == MESA_SHADER_VERTEX) {
1537 ctx->Array.NewVertexElements = true;
1538 ctx->NewDriverState |= ST_NEW_VERTEX_PROGRAM(ctx, prog);
1539 } else {
1540 ctx->NewDriverState |= prog->affected_states;
1541 }
1542 }
1543
1544 if (prog->nir) {
1545 nir_sweep(prog->nir);
1546
1547 /* This is only needed for ARB_vp/fp programs and when the disk cache
1548 * is disabled. If the disk cache is enabled, GLSL programs are
1549 * serialized in write_nir_to_cache.
1550 */
1551 st_serialize_base_nir(prog, prog->nir);
1552 st_serialize_nir(prog);
1553 }
1554
1555 /* Always create the default variant of the program. */
1556 st_precompile_shader_variant(st, prog);
1557 }
1558
1559 /**
1560 * Called when the program's text/code is changed. We have to free
1561 * all shader variants and corresponding gallium shaders when this happens.
1562 */
1563 GLboolean
st_program_string_notify(struct gl_context * ctx,GLenum target,struct gl_program * prog)1564 st_program_string_notify( struct gl_context *ctx,
1565 GLenum target,
1566 struct gl_program *prog )
1567 {
1568 struct st_context *st = st_context(ctx);
1569
1570 /* GLSL-to-NIR should not end up here. */
1571 assert(!prog->shader_program);
1572
1573 st_release_variants(st, prog);
1574
1575 if (target == GL_FRAGMENT_PROGRAM_ARB ||
1576 target == GL_FRAGMENT_SHADER_ATI) {
1577 if (!st_translate_fragment_program(st, prog))
1578 return false;
1579 } else if (target == GL_VERTEX_PROGRAM_ARB) {
1580 if (!st_translate_vertex_program(st, prog))
1581 return false;
1582 if (st->add_point_size &&
1583 gl_nir_can_add_pointsize_to_program(&st->ctx->Const, prog)) {
1584 prog->skip_pointsize_xfb = true;
1585 NIR_PASS(_, prog->nir, gl_nir_add_point_size);
1586 }
1587 }
1588
1589 st_finalize_program(st, prog);
1590 return GL_TRUE;
1591 }
1592