/* * Copyright © 2011 Intel Corporation * Copyright © 2022 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /** * This lowering pass generates GLSL code that manually packs varyings into * vec4 slots, for the benefit of back-ends that don't support packed varyings * natively. * * For example, the following shader: * * out mat3x2 foo; // location=4, location_frac=0 * out vec3 bar[2]; // location=5, location_frac=2 * * main() * { * ... * } * * Is rewritten to: * * mat3x2 foo; * vec3 bar[2]; * out vec4 packed4; // location=4, location_frac=0 * out vec4 packed5; // location=5, location_frac=0 * out vec4 packed6; // location=6, location_frac=0 * * main() * { * ... * packed4.xy = foo[0]; * packed4.zw = foo[1]; * packed5.xy = foo[2]; * packed5.zw = bar[0].xy; * packed6.x = bar[0].z; * packed6.yzw = bar[1]; * } * * This lowering pass properly handles "double parking" of a varying vector * across two varying slots. For example, in the code above, two of the * components of bar[0] are stored in packed5, and the remaining component is * stored in packed6. * * Note that in theory, the extra instructions may cause some loss of * performance. However, hopefully in most cases the performance loss will * either be absorbed by a later optimization pass, or it will be offset by * memory bandwidth savings (because fewer varyings are used). * * This lowering pass also packs flat floats, ints, and uints together, by * using ivec4 as the base type of flat "varyings", and using appropriate * casts to convert floats and uints into ints. * * This lowering pass also handles varyings whose type is a struct or an array * of struct. Structs are packed in order and with no gaps, so there may be a * performance penalty due to structure elements being double-parked. * * Lowering of geometry shader inputs is slightly more complex, since geometry * inputs are always arrays, so we need to lower arrays to arrays. For * example, the following input: * * in struct Foo { * float f; * vec3 v; * vec2 a[2]; * } arr[3]; // location=4, location_frac=0 * * Would get lowered like this if it occurred in a fragment shader: * * struct Foo { * float f; * vec3 v; * vec2 a[2]; * } arr[3]; * in vec4 packed4; // location=4, location_frac=0 * in vec4 packed5; // location=5, location_frac=0 * in vec4 packed6; // location=6, location_frac=0 * in vec4 packed7; // location=7, location_frac=0 * in vec4 packed8; // location=8, location_frac=0 * in vec4 packed9; // location=9, location_frac=0 * * main() * { * arr[0].f = packed4.x; * arr[0].v = packed4.yzw; * arr[0].a[0] = packed5.xy; * arr[0].a[1] = packed5.zw; * arr[1].f = packed6.x; * arr[1].v = packed6.yzw; * arr[1].a[0] = packed7.xy; * arr[1].a[1] = packed7.zw; * arr[2].f = packed8.x; * arr[2].v = packed8.yzw; * arr[2].a[0] = packed9.xy; * arr[2].a[1] = packed9.zw; * ... * } * * But it would get lowered like this if it occurred in a geometry shader: * * struct Foo { * float f; * vec3 v; * vec2 a[2]; * } arr[3]; * in vec4 packed4[3]; // location=4, location_frac=0 * in vec4 packed5[3]; // location=5, location_frac=0 * * main() * { * arr[0].f = packed4[0].x; * arr[0].v = packed4[0].yzw; * arr[0].a[0] = packed5[0].xy; * arr[0].a[1] = packed5[0].zw; * arr[1].f = packed4[1].x; * arr[1].v = packed4[1].yzw; * arr[1].a[0] = packed5[1].xy; * arr[1].a[1] = packed5[1].zw; * arr[2].f = packed4[2].x; * arr[2].v = packed4[2].yzw; * arr[2].a[0] = packed5[2].xy; * arr[2].a[1] = packed5[2].zw; * ... * } */ #include "nir.h" #include "nir_builder.h" #include "gl_nir.h" #include "gl_nir_linker.h" #include "program/prog_instruction.h" #include "main/mtypes.h" /** * Visitor that performs varying packing. For each varying declared in the * shader, this visitor determines whether it needs to be packed. If so, it * demotes it to an ordinary global, creates new packed varyings, and * generates assignments to convert between the original varying and the * packed varying. */ struct lower_packed_varyings_state { const struct gl_constants *consts; struct gl_shader_program *prog; /** * Memory context used to allocate new instructions for the shader. */ void *mem_ctx; /** * Number of generic varying slots which are used by this shader. This is * used to allocate temporary intermediate data structures. If any varying * used by this shader has a location greater than or equal to * VARYING_SLOT_VAR0 + locations_used, an assertion will fire. */ unsigned locations_used; const uint8_t* components; /** * Array of pointers to the packed varyings that have been created for each * generic varying slot. NULL entries in this array indicate varying slots * for which a packed varying has not been created yet. */ nir_variable **packed_varyings; nir_shader *shader; nir_function_impl *impl; nir_builder b; /** * Type of varying which is being lowered in this pass (either * nir_var_shader_in or ir_var_shader_out). */ nir_variable_mode mode; /** * If we are currently lowering geometry shader inputs, the number of input * vertices the geometry shader accepts. Otherwise zero. */ unsigned gs_input_vertices; bool disable_varying_packing; bool disable_xfb_packing; bool xfb_enabled; bool ifc_exposed_to_query_api; }; static bool needs_lowering(struct lower_packed_varyings_state *state, nir_variable *var) { /* Things composed of vec4's, varyings with explicitly assigned * locations or varyings marked as must_be_shader_input (which might be used * by interpolateAt* functions) shouldn't be lowered. Everything else can be. */ if (var->data.explicit_location || var->data.must_be_shader_input) return false; const struct glsl_type *type = var->type; if (nir_is_arrayed_io(var, state->shader->info.stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } /* Some drivers (e.g. panfrost) don't support packing of transform * feedback varyings. */ if (state->disable_xfb_packing && var->data.is_xfb && !(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) && state->xfb_enabled) return false; /* Override disable_varying_packing if the var is only used by transform * feedback. Also override it if transform feedback is enabled and the * variable is an array, struct or matrix as the elements of these types * will always have the same interpolation and therefore are safe to pack. */ if (state->disable_varying_packing && !var->data.is_xfb_only && !((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) && state->xfb_enabled)) return false; type = glsl_without_array(type); if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type)) return false; return true; } /** * If no packed varying has been created for the given varying location yet, * create it and add it to the shader. * * The newly created varying inherits its interpolation parameters from \c * unpacked_var. Its base type is ivec4 if we are lowering a flat varying, * vec4 otherwise. */ static void create_or_update_packed_varying(struct lower_packed_varyings_state *state, nir_variable *unpacked_var, const char *name, unsigned location, unsigned slot, unsigned vertex_index) { assert(slot < state->locations_used); if (state->packed_varyings[slot] == NULL) { assert(state->components[slot] != 0); assert(name); nir_variable *packed_var = rzalloc(state->shader, nir_variable); packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name); packed_var->data.mode = state->mode; bool is_interpolation_flat = unpacked_var->data.interpolation == INTERP_MODE_FLAT || glsl_contains_integer(unpacked_var->type) || glsl_contains_double(unpacked_var->type); const struct glsl_type *packed_type; if (is_interpolation_flat) packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]); else packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]); if (state->gs_input_vertices != 0) { packed_type = glsl_array_type(packed_type, state->gs_input_vertices, 0); } packed_var->type = packed_type; packed_var->data.centroid = unpacked_var->data.centroid; packed_var->data.sample = unpacked_var->data.sample; packed_var->data.patch = unpacked_var->data.patch; packed_var->data.interpolation = is_interpolation_flat ? (unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation; packed_var->data.location = location; packed_var->data.precision = unpacked_var->data.precision; packed_var->data.always_active_io = unpacked_var->data.always_active_io; packed_var->data.stream = NIR_STREAM_PACKED; nir_shader_add_variable(state->shader, packed_var); state->packed_varyings[slot] = packed_var; } else { nir_variable *var = state->packed_varyings[slot]; /* The slot needs to be marked as always active if any variable that got * packed there was. */ var->data.always_active_io |= unpacked_var->data.always_active_io; /* For geometry shader inputs, only update the packed variable name the * first time we visit each component. */ if (state->gs_input_vertices == 0 || vertex_index == 0) { assert(name); ralloc_asprintf_append((char **) &var->name, ",%s", name); } } } /** * Retrieve the packed varying corresponding to the given varying location. * * \param vertex_index: if we are lowering geometry shader inputs, then this * indicates which vertex we are currently lowering. Otherwise it is ignored. */ static nir_deref_instr * get_packed_varying_deref(struct lower_packed_varyings_state *state, unsigned location, nir_variable *unpacked_var, const char *name, unsigned vertex_index) { unsigned slot = location - VARYING_SLOT_VAR0; assert(slot < state->locations_used); create_or_update_packed_varying(state, unpacked_var, name, location, slot, vertex_index); nir_deref_instr *deref = nir_build_deref_var(&state->b, state->packed_varyings[slot]); if (state->gs_input_vertices != 0) { /* When lowering GS inputs, the packed variable is an array, so we need * to dereference it using vertex_index. */ nir_load_const_instr *c_idx = nir_load_const_instr_create(state->b.shader, 1, 32); c_idx->value[0].u32 = vertex_index; nir_builder_instr_insert(&state->b, &c_idx->instr); deref = nir_build_deref_array(&state->b, deref, &c_idx->def); } return deref; } static nir_ssa_def * i2u(struct lower_packed_varyings_state *state, nir_ssa_def *value) { value = nir_build_alu(&state->b, nir_type_conversion_op(nir_type_int, nir_type_uint, nir_rounding_mode_undef), value, NULL, NULL, NULL); return value; } static nir_ssa_def * u2i(struct lower_packed_varyings_state *state, nir_ssa_def *value) { value = nir_build_alu(&state->b, nir_type_conversion_op(nir_type_uint, nir_type_int, nir_rounding_mode_undef), value, NULL, NULL, NULL); return value; } struct packing_store_values { bool is_64bit; unsigned writemasks[2]; nir_ssa_def *values[2]; nir_deref_instr *deref; }; /** * Make an ir_assignment from \c rhs to \c lhs, performing appropriate * bitcasts if necessary to match up types. * * This function is called when packing varyings. */ static struct packing_store_values * bitwise_assign_pack(struct lower_packed_varyings_state *state, nir_deref_instr *packed_deref, nir_deref_instr *unpacked_deref, const struct glsl_type *unpacked_type, nir_ssa_def *value, unsigned writemask) { nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref); enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type); enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type); struct packing_store_values *store_state = calloc(1, sizeof(struct packing_store_values)); if (unpacked_base_type != packed_base_type) { /* Since we only mix types in flat varyings, and we always store flat * varyings as type ivec4, we need only produce conversions from (uint * or float) to int. */ assert(packed_base_type == GLSL_TYPE_INT); switch (unpacked_base_type) { case GLSL_TYPE_UINT: value = u2i(state, value); break; case GLSL_TYPE_FLOAT: value = nir_mov(&state->b, value); break; case GLSL_TYPE_DOUBLE: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: assert(glsl_get_vector_elements(unpacked_type) <= 2); if (glsl_get_vector_elements(unpacked_type) == 2) { assert(glsl_get_vector_elements(packed_var->type) == 4); unsigned swiz_x = 0; unsigned writemask = 0x3; nir_ssa_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1); nir_ssa_def *x_value = nir_unpack_64_2x32(&state->b, swizzle); if (unpacked_base_type != GLSL_TYPE_INT64) x_value = u2i(state, x_value); store_state->is_64bit = true; store_state->deref = packed_deref; store_state->values[0] = x_value; store_state->writemasks[0] = writemask; unsigned swiz_y = 1; writemask = 0xc; swizzle = nir_swizzle(&state->b, value, &swiz_y, 1); nir_ssa_def *y_value = nir_unpack_64_2x32(&state->b, swizzle); if (unpacked_base_type != GLSL_TYPE_INT64) y_value = u2i(state, y_value); store_state->deref = packed_deref; store_state->values[1] = y_value; store_state->writemasks[1] = writemask; return store_state; } else { value = nir_unpack_64_2x32(&state->b, value); if (unpacked_base_type != GLSL_TYPE_INT64) value = u2i(state, value); } break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: value = u2i(state, nir_unpack_64_2x32(&state->b, value)); break; default: assert(!"Unexpected type conversion while lowering varyings"); break; } } store_state->deref = packed_deref; store_state->values[0] = value; store_state->writemasks[0] = writemask; return store_state; } /** * This function is called when unpacking varyings. */ static struct packing_store_values * bitwise_assign_unpack(struct lower_packed_varyings_state *state, nir_deref_instr *unpacked_deref, nir_deref_instr *packed_deref, const struct glsl_type *unpacked_type, nir_ssa_def *value, unsigned writemask) { nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref); const struct glsl_type *packed_type = glsl_without_array(packed_var->type); enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type); enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type); struct packing_store_values *store_state = calloc(1, sizeof(struct packing_store_values)); if (unpacked_base_type != packed_base_type) { /* Since we only mix types in flat varyings, and we always store flat * varyings as type ivec4, we need only produce conversions from int to * (uint or float). */ assert(packed_base_type == GLSL_TYPE_INT); switch (unpacked_base_type) { case GLSL_TYPE_UINT: value = i2u(state, value); break; case GLSL_TYPE_FLOAT: value = nir_mov(&state->b, value); break; case GLSL_TYPE_DOUBLE: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: assert(glsl_get_vector_elements(unpacked_type) <= 2); if (glsl_get_vector_elements(unpacked_type) == 2) { assert(glsl_get_vector_elements(packed_type) == 4); unsigned swiz_xy[2] = {0, 1}; writemask = 1 << (ffs(writemask) - 1); nir_ssa_def *xy_value = nir_swizzle(&state->b, value, swiz_xy, 2); if (unpacked_base_type != GLSL_TYPE_INT64) xy_value = i2u(state, xy_value); xy_value = nir_pack_64_2x32(&state->b, xy_value); store_state->is_64bit = true; store_state->deref = unpacked_deref; store_state->values[0] = xy_value; store_state->writemasks[0] = writemask; unsigned swiz_zw[2] = {2, 3}; writemask = writemask << 1; nir_ssa_def *zw_value = nir_swizzle(&state->b, value, swiz_zw, 2); if (unpacked_base_type != GLSL_TYPE_INT64) zw_value = i2u(state, zw_value); zw_value = nir_pack_64_2x32(&state->b, zw_value); store_state->deref = unpacked_deref; store_state->values[1] = zw_value; store_state->writemasks[1] = writemask; return store_state; } else { if (unpacked_base_type != GLSL_TYPE_INT64) value = i2u(state, value); value = nir_pack_64_2x32(&state->b, value); } break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: value = nir_pack_64_2x32(&state->b, i2u(state, value)); break; default: assert(!"Unexpected type conversion while lowering varyings"); break; } } store_state->deref = unpacked_deref; store_state->values[0] = value; store_state->writemasks[0] = writemask; return store_state; } static void create_store_deref(struct lower_packed_varyings_state *state, nir_deref_instr *deref, nir_ssa_def *value, unsigned writemask, bool is_64bit) { /* If dest and value have different number of components pack the srcs * into a vector. */ const struct glsl_type *type = glsl_without_array(deref->type); unsigned comps = glsl_get_vector_elements(type); if (value->num_components != comps) { nir_ssa_def *srcs[4]; unsigned comp = 0; for (unsigned i = 0; i < comps; i++) { if (writemask & (1 << i)) { if (is_64bit && state->mode == nir_var_shader_in) srcs[i] = value; else srcs[i] = nir_swizzle(&state->b, value, &comp, 1); comp++; } else { srcs[i] = nir_ssa_undef(&state->b, 1, glsl_type_is_64bit(type) ? 64 : 32); } } value = nir_vec(&state->b, srcs, comps); } nir_store_deref(&state->b, deref, value, writemask); } static unsigned lower_varying(struct lower_packed_varyings_state *state, nir_ssa_def *rhs_swizzle, unsigned writemask, const struct glsl_type *type, unsigned fine_location, nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref, const char *name, bool gs_input_toplevel, unsigned vertex_index); /** * Recursively pack or unpack a varying for which we need to iterate over its * constituent elements. * This takes care of both arrays and matrices. * * \param gs_input_toplevel should be set to true if we are lowering geometry * shader inputs, and we are currently lowering the whole input variable * (i.e. we are lowering the array whose index selects the vertex). * * \param vertex_index: if we are lowering geometry shader inputs, and the * level of the array that we are currently lowering is *not* the top level, * then this indicates which vertex we are currently lowering. Otherwise it * is ignored. */ static unsigned lower_arraylike(struct lower_packed_varyings_state *state, nir_ssa_def *rhs_swizzle, unsigned writemask, const struct glsl_type *type, unsigned fine_location, nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref, const char *name, bool gs_input_toplevel, unsigned vertex_index) { unsigned array_size = glsl_get_length(type); unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; if (array_size * dmul + fine_location % 4 > 4) { fine_location = ALIGN_POT(fine_location, dmul); } type = glsl_get_array_element(type); for (unsigned i = 0; i < array_size; i++) { nir_load_const_instr *c_idx = nir_load_const_instr_create(state->b.shader, 1, 32); c_idx->value[0].u32 = i; nir_builder_instr_insert(&state->b, &c_idx->instr); nir_deref_instr *unpacked_array_deref = nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def); if (gs_input_toplevel) { /* Geometry shader inputs are a special case. Instead of storing * each element of the array at a different location, all elements * are at the same location, but with a different vertex index. */ (void) lower_varying(state, rhs_swizzle, writemask, type, fine_location, unpacked_var, unpacked_array_deref, name, false, i); } else { char *subscripted_name = name ? ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL; fine_location = lower_varying(state, rhs_swizzle, writemask, type, fine_location, unpacked_var, unpacked_array_deref, subscripted_name, false, vertex_index); } } return fine_location; } /** * Recursively pack or unpack the given varying (or portion of a varying) by * traversing all of its constituent vectors. * * \param fine_location is the location where the first constituent vector * should be packed--the word "fine" indicates that this location is expressed * in multiples of a float, rather than multiples of a vec4 as is used * elsewhere in Mesa. * * \param gs_input_toplevel should be set to true if we are lowering geometry * shader inputs, and we are currently lowering the whole input variable * (i.e. we are lowering the array whose index selects the vertex). * * \param vertex_index: if we are lowering geometry shader inputs, and the * level of the array that we are currently lowering is *not* the top level, * then this indicates which vertex we are currently lowering. Otherwise it * is ignored. * * \return the location where the next constituent vector (after this one) * should be packed. */ static unsigned lower_varying(struct lower_packed_varyings_state *state, nir_ssa_def *rhs_swizzle, unsigned writemask, const struct glsl_type *type, unsigned fine_location, nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref, const char *name, bool gs_input_toplevel, unsigned vertex_index) { unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1; /* When gs_input_toplevel is set, we should be looking at a geometry shader * input array. */ assert(!gs_input_toplevel || glsl_type_is_array(type)); if (glsl_type_is_struct(type)) { unsigned struct_len = glsl_get_length(type); for (unsigned i = 0; i < struct_len; i++) { const char *field_name = glsl_get_struct_elem_name(type, i); char *deref_name = name ? ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) : NULL; const struct glsl_type *field_type = glsl_get_struct_field(type, i); nir_deref_instr *unpacked_struct_deref = nir_build_deref_struct(&state->b, unpacked_var_deref, i); fine_location = lower_varying(state, rhs_swizzle, writemask, field_type, fine_location, unpacked_var, unpacked_struct_deref, deref_name, false, vertex_index); } return fine_location; } else if (glsl_type_is_array(type)) { /* Arrays are packed/unpacked by considering each array element in * sequence. */ return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location, unpacked_var, unpacked_var_deref, name, gs_input_toplevel, vertex_index); } else if (glsl_type_is_matrix(type)) { /* Matrices are packed/unpacked by considering each column vector in * sequence. */ return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location, unpacked_var, unpacked_var_deref, name, false, vertex_index); } else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) { /* We don't have code to split up 64bit variable between two * varying slots, instead we add padding if necessary. */ unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul); if (aligned_fine_location != fine_location) { return lower_varying(state, rhs_swizzle, writemask, type, aligned_fine_location, unpacked_var, unpacked_var_deref, name, false, vertex_index); } /* This vector is going to be "double parked" across two varying slots, * so handle it as two separate assignments. For doubles, a dvec3/dvec4 * can end up being spread over 3 slots. However the second splitting * will happen later, here we just always want to split into 2. */ unsigned left_components, right_components; unsigned left_swizzle_values[4] = { 0, 0, 0, 0 }; unsigned right_swizzle_values[4] = { 0, 0, 0, 0 }; char left_swizzle_name[4] = { 0, 0, 0, 0 }; char right_swizzle_name[4] = { 0, 0, 0, 0 }; left_components = 4 - fine_location % 4; if (glsl_type_is_64bit(type)) { left_components /= 2; assert(left_components > 0); } right_components = glsl_get_vector_elements(type) - left_components; /* If set use previously set writemask to offset the following * swizzle/writemasks. This can happen when spliting a dvec, etc across * slots. */ unsigned offset = 0; if (writemask) { for (unsigned i = 0; i < left_components; i++) { /* Keep going until we find the first component of the write */ if (!(writemask & (1 << i))) { offset++; } else break; } } for (unsigned i = 0; i < left_components; i++) { left_swizzle_values[i] = i + offset; left_swizzle_name[i] = "xyzw"[i + offset]; } for (unsigned i = 0; i < right_components; i++) { right_swizzle_values[i] = i + left_components + offset; right_swizzle_name[i] = "xyzw"[i + left_components + offset]; } if (left_components) { char *left_name = name ? ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) : NULL; nir_ssa_def *left_swizzle = NULL; unsigned left_writemask = ~0u; if (state->mode == nir_var_shader_out) { nir_ssa_def *ssa_def = rhs_swizzle ? rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref); left_swizzle = nir_swizzle(&state->b, ssa_def, left_swizzle_values, left_components); } else { left_writemask = ((1 << left_components) - 1) << offset; } const struct glsl_type *swiz_type = glsl_vector_type(glsl_get_base_type(type), left_components); fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type, fine_location, unpacked_var, unpacked_var_deref, left_name, false, vertex_index); } else { /* Top up the fine location to the next slot */ fine_location++; } char *right_name = name ? ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) : NULL; nir_ssa_def *right_swizzle = NULL; unsigned right_writemask = ~0u; if (state->mode == nir_var_shader_out) { nir_ssa_def *ssa_def = rhs_swizzle ? rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref); right_swizzle = nir_swizzle(&state->b, ssa_def, right_swizzle_values, right_components); } else { right_writemask = ((1 << right_components) - 1) << (left_components + offset); } const struct glsl_type *swiz_type = glsl_vector_type(glsl_get_base_type(type), right_components); return lower_varying(state, right_swizzle, right_writemask, swiz_type, fine_location, unpacked_var, unpacked_var_deref, right_name, false, vertex_index); } else { /* No special handling is necessary; (un)pack the old varying (now temp) * from/into the new packed varying. */ unsigned components = glsl_get_vector_elements(type) * dmul; unsigned location = fine_location / 4; unsigned location_frac = fine_location % 4; assert(state->components[location - VARYING_SLOT_VAR0] >= components); nir_deref_instr *packed_deref = get_packed_varying_deref(state, location, unpacked_var, name, vertex_index); nir_variable *packed_var = state->packed_varyings[location - VARYING_SLOT_VAR0]; if (unpacked_var->data.stream != 0) { assert(unpacked_var->data.stream < 4); for (unsigned i = 0; i < components; ++i) { packed_var->data.stream |= unpacked_var->data.stream << (2 * (location_frac + i)); } } struct packing_store_values *store_value; if (state->mode == nir_var_shader_out) { unsigned writemask = ((1 << components) - 1) << location_frac; nir_ssa_def *value = rhs_swizzle ? rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref); store_value = bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type, value, writemask); } else { unsigned swizzle_values[4] = { 0, 0, 0, 0 }; for (unsigned i = 0; i < components; ++i) { swizzle_values[i] = i + location_frac; } nir_ssa_def *ssa_def = &packed_deref->dest.ssa; ssa_def = nir_load_deref(&state->b, packed_deref); nir_ssa_def *swizzle = nir_swizzle(&state->b, ssa_def, swizzle_values, components); store_value = bitwise_assign_unpack(state, unpacked_var_deref, packed_deref, type, swizzle, writemask); } create_store_deref(state, store_value->deref, store_value->values[0], store_value->writemasks[0], store_value->is_64bit); if (store_value->is_64bit) { create_store_deref(state, store_value->deref, store_value->values[1], store_value->writemasks[1], store_value->is_64bit); } free(store_value); return fine_location + components; } } /* Recursively pack varying. */ static void pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var) { nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var); lower_varying(state, NULL, ~0u, var->type, var->data.location * 4 + var->data.location_frac, var, unpacked_var_deref, var->name, state->gs_input_vertices != 0, 0); } static void lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var) { if (var->data.mode != state->mode || var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var)) return; /* Skip any new packed varyings we just added */ if (strncmp("packed:", var->name, 7) == 0) return; /* This lowering pass is only capable of packing floats and ints * together when their interpolation mode is "flat". Treat integers as * being flat when the interpolation mode is none. */ assert(var->data.interpolation == INTERP_MODE_FLAT || var->data.interpolation == INTERP_MODE_NONE || !glsl_contains_integer(var->type)); if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) { struct set *resource_set = _mesa_pointer_set_create(NULL); nir_add_packed_var_to_resource_list(state->consts, state->prog, resource_set, var, state->shader->info.stage, GL_PROGRAM_OUTPUT); _mesa_set_destroy(resource_set, NULL); } /* Change the old varying into an ordinary global. */ var->data.mode = nir_var_shader_temp; nir_foreach_block(block, state->impl) { if (state->shader->info.stage != MESA_SHADER_GEOMETRY) { /* For shaders other than geometry, outputs need to be lowered before * each return statement and at the end of main() */ if (nir_block_ends_in_return_or_halt(block)) { state->b.cursor = nir_before_instr(nir_block_last_instr(block)); pack_output_var(state, var); } else if (block == nir_impl_last_block(state->impl)) { state->b.cursor = nir_after_block(block); pack_output_var(state, var); } } else { /* For geometry shaders, outputs need to be lowered before each call * to EmitVertex() */ nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_emit_vertex) continue; state->b.cursor = nir_before_instr(instr); pack_output_var(state, var); } } } } static void lower_packed_outputs(struct lower_packed_varyings_state *state) { nir_foreach_shader_out_variable_safe(var, state->shader) { lower_output_var(state, var); } } static void lower_packed_inputs(struct lower_packed_varyings_state *state) { /* Shader inputs need to be lowered at the beginning of main() so set bulder * cursor to insert packing code at the start of the main function. */ state->b.cursor = nir_before_block(nir_start_block(state->impl)); /* insert new varyings, lower old ones to locals and add unpacking code a * the start of the shader. */ nir_foreach_shader_in_variable_safe(var, state->shader) { if (var->data.mode != state->mode || var->data.location < VARYING_SLOT_VAR0 || !needs_lowering(state, var)) continue; /* Skip any new packed varyings we just added */ if (strncmp("packed:", var->name, 7) == 0) continue; /* This lowering pass is only capable of packing floats and ints * together when their interpolation mode is "flat". Treat integers as * being flat when the interpolation mode is none. */ assert(var->data.interpolation == INTERP_MODE_FLAT || var->data.interpolation == INTERP_MODE_NONE || !glsl_contains_integer(var->type)); /* Program interface needs to expose varyings in case of SSO. Add the * variable for program resource list before it gets modified and lost. */ if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) { struct set *resource_set = _mesa_pointer_set_create(NULL); nir_add_packed_var_to_resource_list(state->consts, state->prog, resource_set, var, state->shader->info.stage, GL_PROGRAM_INPUT); _mesa_set_destroy(resource_set, NULL); } /* Change the old varying into an ordinary global. */ var->data.mode = nir_var_shader_temp; /* Recursively unpack varying. */ nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var); lower_varying(state, NULL, ~0u, var->type, var->data.location * 4 + var->data.location_frac, var, unpacked_var_deref, var->name, state->gs_input_vertices != 0, 0); } } void gl_nir_lower_packed_varyings(const struct gl_constants *consts, struct gl_shader_program *prog, void *mem_ctx, unsigned locations_used, const uint8_t *components, nir_variable_mode mode, unsigned gs_input_vertices, struct gl_linked_shader *linked_shader, bool disable_varying_packing, bool disable_xfb_packing, bool xfb_enabled) { struct lower_packed_varyings_state state; nir_shader *shader = linked_shader->Program->nir; nir_function_impl *impl = nir_shader_get_entrypoint(shader); assert(shader->info.stage != MESA_SHADER_COMPUTE); /* assert that functions have been inlined before packing is called */ nir_foreach_function(f, shader) { assert(f->impl == impl); } nir_builder_init(&state.b, impl); state.consts = consts; state.prog = prog; state.mem_ctx = mem_ctx; state.shader = shader; state.impl = impl; state.locations_used = locations_used; state.components = components; state.mode = mode; state.gs_input_vertices = gs_input_vertices; state.disable_varying_packing = disable_varying_packing; state.disable_xfb_packing = disable_xfb_packing; state.xfb_enabled = xfb_enabled; state.packed_varyings = (nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *), locations_used); /* Determine if the shader interface is exposed to api query */ struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES]; unsigned num_shaders = 0; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i]) linked_shaders[num_shaders++] = prog->_LinkedShaders[i]; } if (mode == nir_var_shader_in) { state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader; lower_packed_inputs(&state); } else { state.ifc_exposed_to_query_api = linked_shaders[num_shaders - 1] == linked_shader; lower_packed_outputs(&state); } nir_lower_global_vars_to_local(shader); nir_fixup_deref_modes(shader); }