/* * Copyright 2024 Valve Corporation * SPDX-License-Identifier: MIT */ #include "nir.h" #include "nir_builder.h" /* * Lower calls to functions prefixed "nir_*" to the NIR ALU instruction or * intrinsic represented. This matches functions of the form: * * nir_[op name](__optional mangling suffix) * * These functions return a value if the instruction has a destination. They * take all instruction sources as parameters, followed by parameters for each * ordered intrinsic index if any. * * Mangling allows for multiple definitions of the same instruction with * different vector lengths and bit sizes. This could be combined with * __attribute_((overloadable)) for seamless overloads. * * In effect, this pass re-implements nir_builder dynamically. This exposes * low-level hardware intrinsics to internal driver programs. It is intended for * use with internal OpenCL but should theoretically work for GLSL too. */ static void lower_builtin_alu(nir_builder *b, nir_call_instr *call, nir_op op) { const nir_op_info info = nir_op_infos[op]; nir_def *srcs[NIR_ALU_MAX_INPUTS]; for (unsigned s = 0; s < info.num_inputs; ++s) { srcs[s] = call->params[1 + s].ssa; } nir_def *res = nir_build_alu_src_arr(b, op, srcs); nir_store_deref(b, nir_src_as_deref(call->params[0]), res, nir_component_mask(res->num_components)); } static void lower_builtin_intr(nir_builder *b, nir_call_instr *call, nir_intrinsic_op op) { nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, op); const nir_intrinsic_info info = nir_intrinsic_infos[op]; /* If there is a destination, the first parameter is the return deref */ unsigned src = info.has_dest ? 1 : 0; assert(call->num_params == (src + info.num_srcs + info.num_indices)); /* The next parameters are the intrinsic sources */ for (unsigned s = 0; s < info.num_srcs; ++s) { intr->src[s] = nir_src_for_ssa(call->params[src++].ssa); } /* The remaining parameters are the intrinsic indices */ for (unsigned s = 0; s < info.num_indices; ++s) { uint64_t val = nir_src_as_uint(call->params[src++]); intr->const_index[info.index_map[info.indices[s]] - 1] = val; } /* Some intrinsics must infer num_components from a particular source. */ for (unsigned s = 0; s < info.num_srcs; ++s) { if (info.src_components[s] == 0) { intr->num_components = intr->src[s].ssa->num_components; break; } } /* Insert the instruction before any store_deref */ nir_builder_instr_insert(b, &intr->instr); /* If there is a destination, plumb it through the return deref */ if (info.has_dest) { nir_deref_instr *deref = nir_src_as_deref(call->params[0]); unsigned bit_size = glsl_get_bit_size(deref->type); unsigned num_components = MAX2(glsl_get_length(deref->type), 1); nir_def_init(&intr->instr, &intr->def, num_components, bit_size); nir_store_deref(b, deref, &intr->def, nir_component_mask(num_components)); if (info.dest_components == 0 && intr->num_components == 0) { intr->num_components = num_components; } } } static bool lower(nir_builder *b, nir_instr *instr, void *data) { /* All builtins are exposed as function calls */ if (instr->type != nir_instr_type_call) return false; nir_call_instr *call = nir_instr_as_call(instr); nir_function *func = call->callee; /* We reserve all functions prefixed nir_* as builtins needing lowering. */ if (strncmp("nir_", func->name, strlen("nir_")) != 0) return false; /* Strip the nir_ prefix to give the name of an ALU opcode or intrinsic. Also * strip the __* suffix if present: we don't need mangling information, we * can recover vector lengths / bit sizes from the NIR. This implements a * crude form of function overloading. */ const char *intr_name = func->name + strlen("nir_"); const char *suffix = strstr(intr_name, "__"); unsigned len = (suffix != NULL) ? (suffix - intr_name) : strlen(intr_name); /* From this point on, we must not fail. Remove the call. */ b->cursor = nir_instr_remove(&call->instr); /* Look for an ALU opcode */ for (unsigned i = 0; i < ARRAY_SIZE(nir_op_infos); ++i) { if (strncmp(intr_name, nir_op_infos[i].name, len) == 0 && strlen(nir_op_infos[i].name) == len) { lower_builtin_alu(b, call, i); return true; } } /* Look for an intrinsic */ for (unsigned i = 0; i < ARRAY_SIZE(nir_intrinsic_infos); ++i) { if (strncmp(intr_name, nir_intrinsic_infos[i].name, len) == 0 && strlen(nir_intrinsic_infos[i].name) == len) { lower_builtin_intr(b, call, i); return true; } } /* We must have matched something! */ fprintf(stderr, "unknown opcode %s\n", func->name); unreachable("invalid nir opcode/intrinsic"); } bool nir_lower_calls_to_builtins(nir_shader *s) { return nir_shader_instructions_pass(s, lower, nir_metadata_none, NULL); }