/************************************************************************** * * Copyright 2007 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /* * Authors: * Keith Whitwell * Brian Paul */ #include "main/errors.h" #include "main/hash.h" #include "main/mtypes.h" #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/prog_to_nir.h" #include "program/programopt.h" #include "compiler/glsl/gl_nir.h" #include "compiler/glsl/gl_nir_linker.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_serialize.h" #include "draw/draw_context.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" #include "nir/nir_to_tgsi.h" #include "util/u_memory.h" #include "st_debug.h" #include "st_cb_bitmap.h" #include "st_cb_drawpixels.h" #include "st_context.h" #include "st_program.h" #include "st_atifs_to_nir.h" #include "st_nir.h" #include "st_shader_cache.h" #include "st_util.h" #include "cso_cache/cso_context.h" static void destroy_program_variants(struct st_context *st, struct gl_program *target); static void set_affected_state_flags(uint64_t *states, struct gl_program *prog, uint64_t new_constants, uint64_t new_sampler_views, uint64_t new_samplers, uint64_t new_images, uint64_t new_ubos, uint64_t new_ssbos, uint64_t new_atomics) { if (prog->Parameters->NumParameters) *states |= new_constants; if (prog->info.num_textures) *states |= new_sampler_views | new_samplers; if (prog->info.num_images) *states |= new_images; if (prog->info.num_ubos) *states |= new_ubos; if (prog->info.num_ssbos) *states |= new_ssbos; if (prog->info.num_abos) *states |= new_atomics; } /** * This determines which states will be updated when the shader is bound. */ void st_set_prog_affected_state_flags(struct gl_program *prog) { uint64_t *states; switch (prog->info.stage) { case MESA_SHADER_VERTEX: states = &prog->affected_states; *states = ST_NEW_VS_STATE | ST_NEW_RASTERIZER | ST_NEW_VERTEX_ARRAYS; set_affected_state_flags(states, prog, ST_NEW_VS_CONSTANTS, ST_NEW_VS_SAMPLER_VIEWS, ST_NEW_VS_SAMPLERS, ST_NEW_VS_IMAGES, ST_NEW_VS_UBOS, ST_NEW_VS_SSBOS, ST_NEW_VS_ATOMICS); break; case MESA_SHADER_TESS_CTRL: states = &prog->affected_states; *states = ST_NEW_TCS_STATE; set_affected_state_flags(states, prog, ST_NEW_TCS_CONSTANTS, ST_NEW_TCS_SAMPLER_VIEWS, ST_NEW_TCS_SAMPLERS, ST_NEW_TCS_IMAGES, ST_NEW_TCS_UBOS, ST_NEW_TCS_SSBOS, ST_NEW_TCS_ATOMICS); break; case MESA_SHADER_TESS_EVAL: states = &prog->affected_states; *states = ST_NEW_TES_STATE | ST_NEW_RASTERIZER; set_affected_state_flags(states, prog, ST_NEW_TES_CONSTANTS, ST_NEW_TES_SAMPLER_VIEWS, ST_NEW_TES_SAMPLERS, ST_NEW_TES_IMAGES, ST_NEW_TES_UBOS, ST_NEW_TES_SSBOS, ST_NEW_TES_ATOMICS); break; case MESA_SHADER_GEOMETRY: states = &prog->affected_states; *states = ST_NEW_GS_STATE | ST_NEW_RASTERIZER; set_affected_state_flags(states, prog, ST_NEW_GS_CONSTANTS, ST_NEW_GS_SAMPLER_VIEWS, ST_NEW_GS_SAMPLERS, ST_NEW_GS_IMAGES, ST_NEW_GS_UBOS, ST_NEW_GS_SSBOS, ST_NEW_GS_ATOMICS); break; case MESA_SHADER_FRAGMENT: states = &prog->affected_states; /* gl_FragCoord and glDrawPixels always use constants. */ *states = ST_NEW_FS_STATE | ST_NEW_SAMPLE_SHADING | ST_NEW_FS_CONSTANTS; set_affected_state_flags(states, prog, ST_NEW_FS_CONSTANTS, ST_NEW_FS_SAMPLER_VIEWS, ST_NEW_FS_SAMPLERS, ST_NEW_FS_IMAGES, ST_NEW_FS_UBOS, ST_NEW_FS_SSBOS, ST_NEW_FS_ATOMICS); break; case MESA_SHADER_COMPUTE: states = &prog->affected_states; *states = ST_NEW_CS_STATE; set_affected_state_flags(states, prog, ST_NEW_CS_CONSTANTS, ST_NEW_CS_SAMPLER_VIEWS, ST_NEW_CS_SAMPLERS, ST_NEW_CS_IMAGES, ST_NEW_CS_UBOS, ST_NEW_CS_SSBOS, ST_NEW_CS_ATOMICS); break; default: unreachable("unhandled shader stage"); } } /** * Delete a shader variant. Note the caller must unlink the variant from * the linked list. */ static void delete_variant(struct st_context *st, struct st_variant *v, GLenum target) { if (v->driver_shader) { if (target == GL_VERTEX_PROGRAM_ARB && ((struct st_common_variant*)v)->key.is_draw_shader) { /* Draw shader. */ draw_delete_vertex_shader(st->draw, v->driver_shader); } else if (st->has_shareable_shaders || v->st == st) { /* The shader's context matches the calling context, or we * don't care. */ switch (target) { case GL_VERTEX_PROGRAM_ARB: st->pipe->delete_vs_state(st->pipe, v->driver_shader); break; case GL_TESS_CONTROL_PROGRAM_NV: st->pipe->delete_tcs_state(st->pipe, v->driver_shader); break; case GL_TESS_EVALUATION_PROGRAM_NV: st->pipe->delete_tes_state(st->pipe, v->driver_shader); break; case GL_GEOMETRY_PROGRAM_NV: st->pipe->delete_gs_state(st->pipe, v->driver_shader); break; case GL_FRAGMENT_PROGRAM_ARB: st->pipe->delete_fs_state(st->pipe, v->driver_shader); break; case GL_COMPUTE_PROGRAM_NV: st->pipe->delete_compute_state(st->pipe, v->driver_shader); break; default: unreachable("bad shader type in delete_basic_variant"); } } else { /* We can't delete a shader with a context different from the one * that created it. Add it to the creating context's zombie list. */ enum pipe_shader_type type = pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target)); st_save_zombie_shader(v->st, type, v->driver_shader); } } FREE(v); } static void st_unbind_program(struct st_context *st, struct gl_program *p) { /* Unbind the shader in cso_context and re-bind in st/mesa. */ switch (p->info.stage) { case MESA_SHADER_VERTEX: cso_set_vertex_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_VS_STATE; break; case MESA_SHADER_TESS_CTRL: cso_set_tessctrl_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_TCS_STATE; break; case MESA_SHADER_TESS_EVAL: cso_set_tesseval_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_TES_STATE; break; case MESA_SHADER_GEOMETRY: cso_set_geometry_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_GS_STATE; break; case MESA_SHADER_FRAGMENT: cso_set_fragment_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_FS_STATE; break; case MESA_SHADER_COMPUTE: cso_set_compute_shader_handle(st->cso_context, NULL); st->dirty |= ST_NEW_CS_STATE; break; default: unreachable("invalid shader type"); } } /** * Free all basic program variants. */ void st_release_variants(struct st_context *st, struct gl_program *p) { struct st_variant *v; /* If we are releasing shaders, re-bind them, because we don't * know which shaders are bound in the driver. */ if (p->variants) st_unbind_program(st, p); for (v = p->variants; v; ) { struct st_variant *next = v->next; delete_variant(st, v, p->Target); v = next; } p->variants = NULL; if (p->state.tokens) { ureg_free_tokens(p->state.tokens); p->state.tokens = NULL; } /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on * it has resulted in the driver taking ownership of the NIR. Those * callers should be NULLing out the nir field in any pipe_shader_state * that might have this called in order to indicate that. * * GLSL IR and ARB programs will have set gl_program->nir to the same * shader as ir->ir.nir, so it will be freed by _mesa_delete_program(). */ } /** * Free all basic program variants and unref program. */ void st_release_program(struct st_context *st, struct gl_program **p) { if (!*p) return; destroy_program_variants(st, *p); _mesa_reference_program(st->ctx, p, NULL); } void st_finalize_nir_before_variants(struct nir_shader *nir) { NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_var_copies); if (nir->options->lower_all_io_to_temps || nir->options->lower_all_io_to_elements || nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY) { NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); } /* st_nir_assign_vs_in_locations requires correct shader info. */ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); st_nir_assign_vs_in_locations(nir); } static void st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir, struct gl_program *prog) { struct pipe_screen *screen = st->screen; NIR_PASS_V(nir, nir_lower_regs_to_ssa); nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa"); /* Lower outputs to temporaries to avoid reading from output variables (which * is permitted by the language but generally not implemented in HW). */ NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false); NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen); NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); /* Optimise NIR */ NIR_PASS_V(nir, nir_opt_constant_folding); gl_nir_opts(nir); st_finalize_nir_before_variants(nir); if (st->allow_st_finalize_nir_twice) { char *msg = st_finalize_nir(st, prog, NULL, nir, true, true); free(msg); } nir_validate_shader(nir, "after st/glsl finalize_nir"); } /** * Translate ARB (asm) program to NIR */ static nir_shader * st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog, gl_shader_stage stage) { const struct nir_shader_compiler_options *options = st_get_nir_compiler_options(st, prog->info.stage); /* Translate to NIR */ nir_shader *nir = prog_to_nir(st->ctx, prog, options); st_prog_to_nir_postprocess(st, nir, prog); return nir; } /** * Prepare st_vertex_program info. * * attrib_to_index is an optional mapping from a vertex attrib to a shader * input index. */ void st_prepare_vertex_program(struct gl_program *prog) { struct gl_vertex_program *stvp = (struct gl_vertex_program *)prog; stvp->num_inputs = util_bitcount64(prog->info.inputs_read); stvp->vert_attrib_mask = prog->info.inputs_read; /* Compute mapping of vertex program outputs to slots. */ memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output)); unsigned num_outputs = 0; for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { if (prog->info.outputs_written & BITFIELD64_BIT(attr)) stvp->result_to_output[attr] = num_outputs++; } /* pre-setup potentially unused edgeflag output */ stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs; } void st_translate_stream_output_info(struct gl_program *prog) { struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback; if (!info) return; /* Determine the (default) output register mapping for each output. */ unsigned num_outputs = 0; ubyte output_mapping[VARYING_SLOT_TESS_MAX]; memset(output_mapping, 0, sizeof(output_mapping)); for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { /* this output was added by mesa/st and should not be tracked for xfb: * drivers must check var->data.explicit_location to find the original output * and only emit that one for xfb */ if (prog->skip_pointsize_xfb && attr == VARYING_SLOT_PSIZ) continue; if (prog->info.outputs_written & BITFIELD64_BIT(attr)) output_mapping[attr] = num_outputs++; } /* Translate stream output info. */ struct pipe_stream_output_info *so_info = &prog->state.stream_output; for (unsigned i = 0; i < info->NumOutputs; i++) { so_info->output[i].register_index = output_mapping[info->Outputs[i].OutputRegister]; so_info->output[i].start_component = info->Outputs[i].ComponentOffset; so_info->output[i].num_components = info->Outputs[i].NumComponents; so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer; so_info->output[i].dst_offset = info->Outputs[i].DstOffset; so_info->output[i].stream = info->Outputs[i].StreamId; } for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { so_info->stride[i] = info->Buffers[i].Stride; } so_info->num_outputs = info->NumOutputs; } /** * Creates a driver shader from a NIR shader. Takes ownership of the * passed nir_shader. */ struct pipe_shader_state * st_create_nir_shader(struct st_context *st, struct pipe_shader_state *state) { struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = st->screen; assert(state->type == PIPE_SHADER_IR_NIR); nir_shader *nir = state->ir.nir; struct shader_info info = nir->info; gl_shader_stage stage = nir->info.stage; enum pipe_shader_type sh = pipe_shader_type_from_mesa(stage); if (ST_DEBUG & DEBUG_PRINT_IR) { fprintf(stderr, "NIR before handing off to driver:\n"); nir_print_shader(nir, stderr); } if (PIPE_SHADER_IR_NIR != screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_PREFERRED_IR)) { /* u_screen.c defaults to images as deref enabled for some reason (which * is what radeonsi wants), but nir-to-tgsi requires lowered images. */ if (screen->get_param(screen, PIPE_CAP_NIR_IMAGES_AS_DEREF)) NIR_PASS_V(nir, gl_nir_lower_images, false); state->type = PIPE_SHADER_IR_TGSI; state->tokens = nir_to_tgsi(nir, screen); if (ST_DEBUG & DEBUG_PRINT_IR) { fprintf(stderr, "TGSI for driver after nir-to-tgsi:\n"); tgsi_dump(state->tokens, 0); fprintf(stderr, "\n"); } } struct pipe_shader_state *shader; switch (stage) { case MESA_SHADER_VERTEX: shader = pipe->create_vs_state(pipe, state); break; case MESA_SHADER_TESS_CTRL: shader = pipe->create_tcs_state(pipe, state); break; case MESA_SHADER_TESS_EVAL: shader = pipe->create_tes_state(pipe, state); break; case MESA_SHADER_GEOMETRY: shader = pipe->create_gs_state(pipe, state); break; case MESA_SHADER_FRAGMENT: shader = pipe->create_fs_state(pipe, state); break; case MESA_SHADER_COMPUTE: { struct pipe_compute_state cs = {0}; cs.ir_type = state->type; cs.req_local_mem = info.shared_size; if (state->type == PIPE_SHADER_IR_NIR) cs.prog = state->ir.nir; else cs.prog = state->tokens; shader = pipe->create_compute_state(pipe, &cs); break; } default: unreachable("unsupported shader stage"); return NULL; } if (state->type == PIPE_SHADER_IR_TGSI) tgsi_free_tokens(state->tokens); return shader; } /** * Translate a vertex program. */ static bool st_translate_vertex_program(struct st_context *st, struct gl_program *prog) { /* ARB_vp: */ if (prog->arb.IsPositionInvariant) _mesa_insert_mvp_code(st->ctx, prog); /* This determines which states will be updated when the assembly * shader is bound. */ prog->affected_states = ST_NEW_VS_STATE | ST_NEW_RASTERIZER | ST_NEW_VERTEX_ARRAYS; if (prog->Parameters->NumParameters) prog->affected_states |= ST_NEW_VS_CONSTANTS; if (prog->nir) ralloc_free(prog->nir); if (prog->serialized_nir) { free(prog->serialized_nir); prog->serialized_nir = NULL; } prog->state.type = PIPE_SHADER_IR_NIR; prog->nir = st_translate_prog_to_nir(st, prog, MESA_SHADER_VERTEX); prog->info = prog->nir->info; st_prepare_vertex_program(prog); return true; } static struct nir_shader * get_nir_shader(struct st_context *st, struct gl_program *prog) { if (prog->nir) { nir_shader *nir = prog->nir; /* The first shader variant takes ownership of NIR, so that there is * no cloning. Additional shader variants are always generated from * serialized NIR to save memory. */ prog->nir = NULL; assert(prog->serialized_nir && prog->serialized_nir_size); return nir; } struct blob_reader blob_reader; const struct nir_shader_compiler_options *options = st_get_nir_compiler_options(st, prog->info.stage); blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size); return nir_deserialize(NULL, options, &blob_reader); } static void lower_ucp(struct st_context *st, struct nir_shader *nir, unsigned ucp_enables, struct gl_program_parameter_list *params) { if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables); else { struct pipe_screen *screen = st->screen; bool can_compact = screen->get_param(screen, PIPE_CAP_NIR_COMPACT_ARRAYS); bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL; gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}}; for (int i = 0; i < MAX_CLIP_PLANES; ++i) { if (use_eye) { clipplane_state[i][0] = STATE_CLIPPLANE; clipplane_state[i][1] = i; } else { clipplane_state[i][0] = STATE_CLIP_INTERNAL; clipplane_state[i][1] = i; } _mesa_add_state_reference(params, clipplane_state[i]); } if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables, true, can_compact, clipplane_state); } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables, can_compact, clipplane_state); } NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false); NIR_PASS_V(nir, nir_lower_global_vars_to_local); } } static struct st_common_variant * st_create_common_variant(struct st_context *st, struct gl_program *prog, const struct st_common_variant_key *key) { struct st_common_variant *v = CALLOC_STRUCT(st_common_variant); struct pipe_shader_state state = {0}; static const gl_state_index16 point_size_state[STATE_LENGTH] = { STATE_POINT_SIZE_CLAMPED, 0 }; struct gl_program_parameter_list *params = prog->Parameters; v->key = *key; state.stream_output = prog->state.stream_output; bool finalize = false; state.type = PIPE_SHADER_IR_NIR; state.ir.nir = get_nir_shader(st, prog); const nir_shader_compiler_options *options = ((nir_shader *)state.ir.nir)->options; if (key->clamp_color) { NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs); finalize = true; } if (key->passthrough_edgeflags) { NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags); finalize = true; } if (key->export_point_size) { /* if flag is set, shader must export psiz */ _mesa_add_state_reference(params, point_size_state); NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov, point_size_state); finalize = true; } if (key->lower_ucp) { assert(!options->unify_interfaces); lower_ucp(st, state.ir.nir, key->lower_ucp, params); finalize = true; } if (st->emulate_gl_clamp && (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) { nir_lower_tex_options tex_opts = {0}; tex_opts.saturate_s = key->gl_clamp[0]; tex_opts.saturate_t = key->gl_clamp[1]; tex_opts.saturate_r = key->gl_clamp[2]; NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts); } if (finalize || !st->allow_st_finalize_nir_twice) { char *msg = st_finalize_nir(st, prog, prog->shader_program, state.ir.nir, true, false); free(msg); /* Clip lowering and edgeflags may have introduced new varyings, so * update the inputs_read/outputs_written. However, with * unify_interfaces set (aka iris) the non-SSO varyings layout is * decided at link time with outputs_written updated so the two line * up. A driver with this flag set may not use any of the lowering * passes that would change the varyings, so skip to make sure we don't * break its linkage. */ if (!options->unify_interfaces) { nir_shader_gather_info(state.ir.nir, nir_shader_get_entrypoint(state.ir.nir)); } } if (key->is_draw_shader) v->base.driver_shader = draw_create_vertex_shader(st->draw, &state); else v->base.driver_shader = st_create_nir_shader(st, &state); return v; } static void st_add_variant(struct st_variant **list, struct st_variant *v) { struct st_variant *first = *list; /* Make sure that the default variant stays the first in the list, and insert * any later variants in as the second entry. */ if (first) { v->next = first->next; first->next = v; } else { *list = v; } } /** * Find/create a vertex program variant. */ struct st_common_variant * st_get_common_variant(struct st_context *st, struct gl_program *prog, const struct st_common_variant_key *key) { struct st_common_variant *v; /* Search for existing variant */ for (v = st_common_variant(prog->variants); v; v = st_common_variant(v->base.next)) { if (memcmp(&v->key, key, sizeof(*key)) == 0) { break; } } if (!v) { if (prog->variants != NULL) { _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM, "Compiling %s shader variant (%s%s%s%s%s%s)", _mesa_shader_stage_to_string(prog->info.stage), key->passthrough_edgeflags ? "edgeflags," : "", key->clamp_color ? "clamp_color," : "", key->export_point_size ? "point_size," : "", key->lower_ucp ? "ucp," : "", key->is_draw_shader ? "draw," : "", key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : ""); } /* create now */ v = st_create_common_variant(st, prog, key); if (v) { v->base.st = key->st; if (prog->info.stage == MESA_SHADER_VERTEX) { struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; v->vert_attrib_mask = vp->vert_attrib_mask | (key->passthrough_edgeflags ? VERT_BIT_EDGEFLAG : 0); } st_add_variant(&prog->variants, &v->base); } } return v; } /** * Translate a non-GLSL Mesa fragment shader into a NIR shader. */ static bool st_translate_fragment_program(struct st_context *st, struct gl_program *fp) { /* This determines which states will be updated when the assembly * shader is bound. * * fragment.position and glDrawPixels always use constants. */ fp->affected_states = ST_NEW_FS_STATE | ST_NEW_SAMPLE_SHADING | ST_NEW_FS_CONSTANTS; if (fp->ati_fs) { /* Just set them for ATI_fs unconditionally. */ fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | ST_NEW_FS_SAMPLERS; } else { /* ARB_fp */ if (fp->SamplersUsed) fp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS | ST_NEW_FS_SAMPLERS; } /* Translate to NIR. ATI_fs translates at variant time. */ if (!fp->ati_fs) { nir_shader *nir = st_translate_prog_to_nir(st, fp, MESA_SHADER_FRAGMENT); if (fp->nir) ralloc_free(fp->nir); if (fp->serialized_nir) { free(fp->serialized_nir); fp->serialized_nir = NULL; } fp->state.type = PIPE_SHADER_IR_NIR; fp->nir = nir; } return true; } static struct st_fp_variant * st_create_fp_variant(struct st_context *st, struct gl_program *fp, const struct st_fp_variant_key *key) { struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant); struct pipe_shader_state state = {0}; struct gl_program_parameter_list *params = fp->Parameters; static const gl_state_index16 texcoord_state[STATE_LENGTH] = { STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 }; static const gl_state_index16 scale_state[STATE_LENGTH] = { STATE_PT_SCALE }; static const gl_state_index16 bias_state[STATE_LENGTH] = { STATE_PT_BIAS }; static const gl_state_index16 alpha_ref_state[STATE_LENGTH] = { STATE_ALPHA_REF }; if (!variant) return NULL; /* Translate ATI_fs to NIR at variant time because that's when we have the * texture types. */ if (fp->ati_fs) { const struct nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT); nir_shader *s = st_translate_atifs_program(fp->ati_fs, key, fp, options); st_prog_to_nir_postprocess(st, s, fp); state.ir.nir = s; } else { state.ir.nir = get_nir_shader(st, fp); } state.type = PIPE_SHADER_IR_NIR; bool finalize = false; if (key->clamp_color) { NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs); finalize = true; } if (key->lower_flatshade) { NIR_PASS_V(state.ir.nir, nir_lower_flatshade); finalize = true; } if (key->lower_alpha_func != COMPARE_FUNC_ALWAYS) { _mesa_add_state_reference(params, alpha_ref_state); NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func, false, alpha_ref_state); finalize = true; } if (key->lower_two_sided_color) { bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal; NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval); finalize = true; } if (key->persample_shading) { nir_shader *shader = state.ir.nir; nir_foreach_shader_in_variable(var, shader) var->data.sample = true; finalize = true; } if (key->lower_texcoord_replace) { bool point_coord_is_sysval = st->ctx->Const.GLSLPointCoordIsSysVal; NIR_PASS_V(state.ir.nir, nir_lower_texcoord_replace, key->lower_texcoord_replace, point_coord_is_sysval, false); finalize = true; } if (st->emulate_gl_clamp && (key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2])) { nir_lower_tex_options tex_opts = {0}; tex_opts.saturate_s = key->gl_clamp[0]; tex_opts.saturate_t = key->gl_clamp[1]; tex_opts.saturate_r = key->gl_clamp[2]; NIR_PASS_V(state.ir.nir, nir_lower_tex, &tex_opts); finalize = true; } assert(!(key->bitmap && key->drawpixels)); /* glBitmap */ if (key->bitmap) { nir_lower_bitmap_options options = {0}; variant->bitmap_sampler = ffs(~fp->SamplersUsed) - 1; options.sampler = variant->bitmap_sampler; options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM; NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options); finalize = true; } /* glDrawPixels (color only) */ if (key->drawpixels) { nir_lower_drawpixels_options options = {{0}}; unsigned samplers_used = fp->SamplersUsed; /* Find the first unused slot. */ variant->drawpix_sampler = ffs(~samplers_used) - 1; options.drawpix_sampler = variant->drawpix_sampler; samplers_used |= (1 << variant->drawpix_sampler); options.pixel_maps = key->pixelMaps; if (key->pixelMaps) { variant->pixelmap_sampler = ffs(~samplers_used) - 1; options.pixelmap_sampler = variant->pixelmap_sampler; } options.scale_and_bias = key->scaleAndBias; if (key->scaleAndBias) { _mesa_add_state_reference(params, scale_state); memcpy(options.scale_state_tokens, scale_state, sizeof(options.scale_state_tokens)); _mesa_add_state_reference(params, bias_state); memcpy(options.bias_state_tokens, bias_state, sizeof(options.bias_state_tokens)); } _mesa_add_state_reference(params, texcoord_state); memcpy(options.texcoord_state_tokens, texcoord_state, sizeof(options.texcoord_state_tokens)); NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options); finalize = true; } bool need_lower_tex_src_plane = false; if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv || key->external.lower_xy_uxvx || key->external.lower_yx_xuxv || key->external.lower_ayuv || key->external.lower_xyuv || key->external.lower_yuv || key->external.lower_yu_yv || key->external.lower_y41x)) { st_nir_lower_samplers(st->screen, state.ir.nir, fp->shader_program, fp); nir_lower_tex_options options = {0}; options.lower_y_uv_external = key->external.lower_nv12; options.lower_y_u_v_external = key->external.lower_iyuv; options.lower_xy_uxvx_external = key->external.lower_xy_uxvx; options.lower_yx_xuxv_external = key->external.lower_yx_xuxv; options.lower_ayuv_external = key->external.lower_ayuv; options.lower_xyuv_external = key->external.lower_xyuv; options.lower_yuv_external = key->external.lower_yuv; options.lower_yu_yv_external = key->external.lower_yu_yv; options.lower_y41x_external = key->external.lower_y41x; options.bt709_external = key->external.bt709; options.bt2020_external = key->external.bt2020; options.yuv_full_range_external = key->external.yuv_full_range; NIR_PASS_V(state.ir.nir, nir_lower_tex, &options); finalize = true; need_lower_tex_src_plane = true; } if (finalize || !st->allow_st_finalize_nir_twice) { char *msg = st_finalize_nir(st, fp, fp->shader_program, state.ir.nir, false, false); free(msg); } /* This pass needs to happen *after* nir_lower_sampler */ if (unlikely(need_lower_tex_src_plane)) { NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane, ~fp->SamplersUsed, key->external.lower_nv12 | key->external.lower_xy_uxvx | key->external.lower_yx_xuxv, key->external.lower_iyuv); finalize = true; } if (finalize || !st->allow_st_finalize_nir_twice) { /* Some of the lowering above may have introduced new varyings */ nir_shader_gather_info(state.ir.nir, nir_shader_get_entrypoint(state.ir.nir)); struct pipe_screen *screen = st->screen; if (screen->finalize_nir) { char *msg = screen->finalize_nir(screen, state.ir.nir); free(msg); } } variant->base.driver_shader = st_create_nir_shader(st, &state); variant->key = *key; return variant; } /** * Translate fragment program if needed. */ struct st_fp_variant * st_get_fp_variant(struct st_context *st, struct gl_program *fp, const struct st_fp_variant_key *key) { struct st_fp_variant *fpv; /* Search for existing variant */ for (fpv = st_fp_variant(fp->variants); fpv; fpv = st_fp_variant(fpv->base.next)) { if (memcmp(&fpv->key, key, sizeof(*key)) == 0) { break; } } if (!fpv) { /* create new */ if (fp->variants != NULL) { _mesa_perf_debug(st->ctx, MESA_DEBUG_SEVERITY_MEDIUM, "Compiling fragment shader variant (%s%s%s%s%s%s%s%s%s%s%s%s%s)", key->bitmap ? "bitmap," : "", key->drawpixels ? "drawpixels," : "", key->scaleAndBias ? "scale_bias," : "", key->pixelMaps ? "pixel_maps," : "", key->clamp_color ? "clamp_color," : "", key->persample_shading ? "persample_shading," : "", key->fog ? "fog," : "", key->lower_two_sided_color ? "twoside," : "", key->lower_flatshade ? "flatshade," : "", key->lower_texcoord_replace ? "texcoord_replace," : "", key->lower_alpha_func ? "alpha_compare," : "", /* skipped ATI_fs targets */ fp->ExternalSamplersUsed ? "external?," : "", key->gl_clamp[0] || key->gl_clamp[1] || key->gl_clamp[2] ? "GL_CLAMP," : ""); } fpv = st_create_fp_variant(st, fp, key); if (fpv) { fpv->base.st = key->st; st_add_variant(&fp->variants, &fpv->base); } } return fpv; } /** * Vert/Geom/Frag programs have per-context variants. Free all the * variants attached to the given program which match the given context. */ static void destroy_program_variants(struct st_context *st, struct gl_program *p) { if (!p || p == &_mesa_DummyProgram) return; struct st_variant *v, **prevPtr = &p->variants; bool unbound = false; for (v = p->variants; v; ) { struct st_variant *next = v->next; if (v->st == st) { if (!unbound) { st_unbind_program(st, p); unbound = true; } /* unlink from list */ *prevPtr = next; /* destroy this variant */ delete_variant(st, v, p->Target); } else { prevPtr = &v->next; } v = next; } } /** * Callback for _mesa_HashWalk. Free all the shader's program variants * which match the given context. */ static void destroy_shader_program_variants_cb(void *data, void *userData) { struct st_context *st = (struct st_context *) userData; struct gl_shader *shader = (struct gl_shader *) data; switch (shader->Type) { case GL_SHADER_PROGRAM_MESA: { struct gl_shader_program *shProg = (struct gl_shader_program *) data; GLuint i; for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) { if (shProg->_LinkedShaders[i]) destroy_program_variants(st, shProg->_LinkedShaders[i]->Program); } } break; case GL_VERTEX_SHADER: case GL_FRAGMENT_SHADER: case GL_GEOMETRY_SHADER: case GL_TESS_CONTROL_SHADER: case GL_TESS_EVALUATION_SHADER: case GL_COMPUTE_SHADER: break; default: assert(0); } } /** * Callback for _mesa_HashWalk. Free all the program variants which match * the given context. */ static void destroy_program_variants_cb(void *data, void *userData) { struct st_context *st = (struct st_context *) userData; struct gl_program *program = (struct gl_program *) data; destroy_program_variants(st, program); } /** * Walk over all shaders and programs to delete any variants which * belong to the given context. * This is called during context tear-down. */ void st_destroy_program_variants(struct st_context *st) { /* If shaders can be shared with other contexts, the last context will * call DeleteProgram on all shaders, releasing everything. */ if (st->has_shareable_shaders) return; /* ARB vert/frag program */ _mesa_HashWalk(st->ctx->Shared->Programs, destroy_program_variants_cb, st); /* GLSL vert/frag/geom shaders */ _mesa_HashWalk(st->ctx->Shared->ShaderObjects, destroy_shader_program_variants_cb, st); } bool st_can_add_pointsize_to_program(struct st_context *st, struct gl_program *prog) { nir_shader *nir = prog->nir; if (!nir) return true; //fixedfunction assert(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_GEOMETRY); if (nir->info.outputs_written & VARYING_BIT_PSIZ) return false; unsigned max_components = nir->info.stage == MESA_SHADER_GEOMETRY ? st->ctx->Const.MaxGeometryTotalOutputComponents : st->ctx->Const.Program[nir->info.stage].MaxOutputComponents; unsigned num_components = 0; unsigned needed_components = nir->info.stage == MESA_SHADER_GEOMETRY ? nir->info.gs.vertices_out : 1; nir_foreach_shader_out_variable(var, nir) { num_components += glsl_count_dword_slots(var->type, false); } /* Ensure that there is enough attribute space to emit at least one primitive */ if (nir->info.stage == MESA_SHADER_GEOMETRY) { if (num_components + needed_components > st->ctx->Const.Program[nir->info.stage].MaxOutputComponents) return false; num_components *= nir->info.gs.vertices_out; } return num_components + needed_components <= max_components; } /** * Compile one shader variant. */ static void st_precompile_shader_variant(struct st_context *st, struct gl_program *prog) { switch (prog->Target) { case GL_VERTEX_PROGRAM_ARB: case GL_TESS_CONTROL_PROGRAM_NV: case GL_TESS_EVALUATION_PROGRAM_NV: case GL_GEOMETRY_PROGRAM_NV: case GL_COMPUTE_PROGRAM_NV: { struct st_common_variant_key key; memset(&key, 0, sizeof(key)); if (st->ctx->API == API_OPENGL_COMPAT && st->clamp_vert_color_in_shader && (prog->info.outputs_written & (VARYING_SLOT_COL0 | VARYING_SLOT_COL1 | VARYING_SLOT_BFC0 | VARYING_SLOT_BFC1))) { key.clamp_color = true; } key.st = st->has_shareable_shaders ? NULL : st; st_get_common_variant(st, prog, &key); break; } case GL_FRAGMENT_PROGRAM_ARB: { struct st_fp_variant_key key; memset(&key, 0, sizeof(key)); key.st = st->has_shareable_shaders ? NULL : st; key.lower_alpha_func = COMPARE_FUNC_ALWAYS; if (prog->ati_fs) { for (int i = 0; i < ARRAY_SIZE(key.texture_index); i++) key.texture_index[i] = TEXTURE_2D_INDEX; } st_get_fp_variant(st, prog, &key); break; } default: assert(0); } } void st_serialize_nir(struct gl_program *prog) { if (!prog->serialized_nir) { struct blob blob; size_t size; blob_init(&blob); nir_serialize(&blob, prog->nir, false); blob_finish_get_buffer(&blob, &prog->serialized_nir, &size); prog->serialized_nir_size = size; } } void st_finalize_program(struct st_context *st, struct gl_program *prog) { if (st->current_program[prog->info.stage] == prog) { if (prog->info.stage == MESA_SHADER_VERTEX) { st->ctx->Array.NewVertexElements = true; st->dirty |= ST_NEW_VERTEX_PROGRAM(st, prog); } else { st->dirty |= prog->affected_states; } } if (prog->nir) { nir_sweep(prog->nir); /* This is only needed for ARB_vp/fp programs and when the disk cache * is disabled. If the disk cache is enabled, GLSL programs are * serialized in write_nir_to_cache. */ st_serialize_nir(prog); } /* Always create the default variant of the program. */ st_precompile_shader_variant(st, prog); } /** * Called when the program's text/code is changed. We have to free * all shader variants and corresponding gallium shaders when this happens. */ GLboolean st_program_string_notify( struct gl_context *ctx, GLenum target, struct gl_program *prog ) { struct st_context *st = st_context(ctx); /* GLSL-to-NIR should not end up here. */ assert(!prog->shader_program); st_release_variants(st, prog); if (target == GL_FRAGMENT_PROGRAM_ARB || target == GL_FRAGMENT_SHADER_ATI) { if (target == GL_FRAGMENT_SHADER_ATI) { assert(prog->ati_fs); assert(prog->ati_fs->Program == prog); st_init_atifs_prog(ctx, prog); } if (!st_translate_fragment_program(st, prog)) return false; } else if (target == GL_VERTEX_PROGRAM_ARB) { if (!st_translate_vertex_program(st, prog)) return false; if (st->lower_point_size && st_can_add_pointsize_to_program(st, prog)) { prog->skip_pointsize_xfb = true; NIR_PASS_V(prog->nir, st_nir_add_point_size); } } st_finalize_program(st, prog); return GL_TRUE; }