/* * Copyright (C) 2014 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" #include "util/u_debug.h" #include "util/u_math.h" #include "tgsi_lowering.h" struct tgsi_lowering_context { struct tgsi_transform_context base; const struct tgsi_lowering_config *config; struct tgsi_shader_info *info; unsigned two_side_colors; unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ int face_idx; unsigned numtmp; struct { struct tgsi_full_src_register src; struct tgsi_full_dst_register dst; } tmp[2]; #define A 0 #define B 1 struct tgsi_full_src_register imm; int emitted_decls; unsigned saturate; }; static inline struct tgsi_lowering_context * tgsi_lowering_context(struct tgsi_transform_context *tctx) { return (struct tgsi_lowering_context *)tctx; } /* * Utility helpers: */ static void reg_dst(struct tgsi_full_dst_register *dst, const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) { *dst = *orig_dst; dst->Register.WriteMask &= wrmask; assert(dst->Register.WriteMask); } static inline void get_swiz(unsigned *swiz, const struct tgsi_src_register *src) { swiz[0] = src->SwizzleX; swiz[1] = src->SwizzleY; swiz[2] = src->SwizzleZ; swiz[3] = src->SwizzleW; } static void reg_src(struct tgsi_full_src_register *src, const struct tgsi_full_src_register *orig_src, unsigned sx, unsigned sy, unsigned sz, unsigned sw) { unsigned swiz[4]; get_swiz(swiz, &orig_src->Register); *src = *orig_src; src->Register.SwizzleX = swiz[sx]; src->Register.SwizzleY = swiz[sy]; src->Register.SwizzleZ = swiz[sz]; src->Register.SwizzleW = swiz[sw]; } #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w /* * if (dst.x aliases src.x) { * MOV tmpA.x, src.x * src = tmpA * } * COS dst.x, src.x * SIN dst.y, src.x * MOV dst.zw, imm{0.0, 1.0} */ static bool aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, const struct tgsi_full_src_register *src, unsigned src_mask) { if ((dst->Register.File == src->Register.File) && (dst->Register.Index == src->Register.Index)) { unsigned i, actual_mask = 0; unsigned swiz[4]; get_swiz(swiz, &src->Register); for (i = 0; i < 4; i++) if (src_mask & (1 << i)) actual_mask |= (1 << swiz[i]); if (actual_mask & dst_mask) return true; } return false; } static void create_mov(struct tgsi_transform_context *tctx, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src, unsigned mask, unsigned saturate) { struct tgsi_full_instruction new_inst; new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.Saturate = saturate; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, mask); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &new_inst); } /* to help calculate # of tgsi tokens for a lowering.. we assume * the worst case, ie. removed instructions don't have ADDR[] or * anything which increases the # of tokens per src/dst and the * inserted instructions do. * * OINST() - old instruction * 1 : instruction itself * 1 : dst * 1 * nargs : srcN * * NINST() - new instruction * 1 : instruction itself * 2 : dst * 2 * nargs : srcN */ #define OINST(nargs) (1 + 1 + 1 * (nargs)) #define NINST(nargs) (1 + 2 + 2 * (nargs)) /* * Lowering Translators: */ /* DST - Distance Vector * dst.x = 1.0 * dst.y = src0.y \times src1.y * dst.z = src0.z * dst.w = src1.w * * ; note: could be more clever and use just a single temp * ; if I was clever enough to re-write the swizzles. * ; needs: 2 tmp, imm{1.0} * if (dst.y aliases src0.z) { * MOV tmpA.yz, src0.yz * src0 = tmpA * } * if (dst.yz aliases src1.w) { * MOV tmpB.yw, src1.yw * src1 = tmpB * } * MUL dst.y, src0.y, src1.y * MOV dst.z, src0.z * MOV dst.w, src1.w * MOV dst.x, imm{1.0} */ #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1) + NINST(1) - OINST(2)) #define DST_TMP 2 static void transform_dst(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; struct tgsi_full_instruction new_inst; if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); src0 = &ctx->tmp[A].src; } if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); src1 = &ctx->tmp[B].src; } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* MUL dst.y, src0.y, src1.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { /* MOV dst.z, src0.z */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { /* MOV dst.w, src1.w */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { /* MOV dst.x, imm{1.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } } /* LRP - Linear Interpolate * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w * * This becomes: src0 \times src1 + src2 - src0 \times src2, which * can then become: src0 \times src1 - (src0 \times src2 - src2) * * ; needs: 1 tmp * MAD tmpA, src0, src2, -src2 * MAD dst, src0, src1, -tmpA */ #define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) #define LRP_TMP 1 static void transform_lrp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; struct tgsi_full_src_register *src2 = &inst->Src[2]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* MAD tmpA, src0, src2, -src2 */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; tctx->emit_instruction(tctx, &new_inst); /* MAD dst, src0, src1, -tmpA */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); new_inst.Src[2].Register.Negate = true; tctx->emit_instruction(tctx, &new_inst); } } /* FRC - Fraction * dst.x = src.x - \lfloor src.x\rfloor * dst.y = src.y - \lfloor src.y\rfloor * dst.z = src.z - \lfloor src.z\rfloor * dst.w = src.w - \lfloor src.w\rfloor * * ; needs: 1 tmp * FLR tmpA, src * SUB dst, src, tmpA */ #define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) #define FRC_TMP 1 static void transform_frc(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src = &inst->Src[0]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* FLR tmpA, src */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &new_inst); /* SUB dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } /* POW - Power * dst.x = src0.x^{src1.x} * dst.y = src0.x^{src1.x} * dst.z = src0.x^{src1.x} * dst.w = src0.x^{src1.x} * * ; needs: 1 tmp * LG2 tmpA.x, src0.x * MUL tmpA.x, src1.x, tmpA.x * EX2 dst, tmpA.x */ #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) #define POW_TMP 1 static void transform_pow(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* LG2 tmpA.x, src0.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MUL tmpA.x, src1.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* EX2 dst, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } } /* LIT - Light Coefficients * dst.x = 1.0 * dst.y = max(src.x, 0.0) * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 * dst.w = 1.0 * * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} * MAX tmpA.xy, src.xy, imm{0.0} * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} * LG2 tmpA.y, tmpA.y * MUL tmpA.y, tmpA.z, tmpA.y * EX2 tmpA.y, tmpA.y * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} * MOV dst.yz, tmpA.xy * MOV dst.xw, imm{1.0} */ #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) #define LIT_TMP 1 static void transform_lit(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src = &inst->Src[0]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { /* MAX tmpA.xy, src.xy, imm{0.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MIN tmpA.z, src.w, imm{128.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); tctx->emit_instruction(tctx, &new_inst); /* MAX tmpA.z, tmpA.z, -imm{128.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); new_inst.Src[1].Register.Negate = true; tctx->emit_instruction(tctx, &new_inst); /* LG2 tmpA.y, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MUL tmpA.y, tmpA.z, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); tctx->emit_instruction(tctx, &new_inst); /* EX2 tmpA.y, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); new_inst.Src[0].Register.Negate = true; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MOV dst.yz, tmpA.xy */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { /* MOV dst.xw, imm{1.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); tctx->emit_instruction(tctx, &new_inst); } } /* EXP - Approximate Exponential Base 2 * dst.x = 2^{\lfloor src.x\rfloor} * dst.y = src.x - \lfloor src.x\rfloor * dst.z = 2^{src.x} * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} * if (lowering FLR) { * FRC tmpA.x, src.x * SUB tmpA.x, src.x, tmpA.x * } else { * FLR tmpA.x, src.x * } * EX2 tmpA.y, src.x * SUB dst.y, src.x, tmpA.x * EX2 dst.x, tmpA.x * MOV dst.z, tmpA.y * MOV dst.w, imm{1.0} */ #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1)+ NINST(1) - OINST(1)) #define EXP_TMP 1 static void transform_exp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src = &inst->Src[0]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { if (ctx->config->lower_FLR) { /* FRC tmpA.x, src.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* SUB tmpA.x, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.x, src.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { /* EX2 tmpA.y, src.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* SUB dst.y, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { /* EX2 dst.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { /* MOV dst.z, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { /* MOV dst.w, imm{1.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); tctx->emit_instruction(tctx, &new_inst); } } /* LOG - Approximate Logarithm Base 2 * dst.x = \lfloor\log_2{|src.x|}\rfloor * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} * dst.z = \log_2{|src.x|} * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} * LG2 tmpA.x, |src.x| * if (lowering FLR) { * FRC tmpA.y, tmpA.x * SUB tmpA.y, tmpA.x, tmpA.y * } else { * FLR tmpA.y, tmpA.x * } * EX2 tmpA.z, tmpA.y * RCP tmpA.z, tmpA.z * MUL dst.y, |src.x|, tmpA.z * MOV dst.xz, tmpA.yx * MOV dst.w, imm{1.0} */ #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ NINST(2) + NINST(1) + NINST(1) - OINST(1)) #define LOG_TMP 1 static void transform_log(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src = &inst->Src[0]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { /* LG2 tmpA.x, |src.x| */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); new_inst.Src[0].Register.Absolute = true; tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { if (ctx->config->lower_FLR) { /* FRC tmpA.y, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); tctx->emit_instruction(tctx, &new_inst); /* SUB tmpA.y, tmpA.x, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.y, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); tctx->emit_instruction(tctx, &new_inst); } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* EX2 tmpA.z, tmpA.y */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* RCP tmpA.z, tmpA.z */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MUL dst.y, |src.x|, tmpA.z */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); new_inst.Src[0].Register.Absolute = true; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { /* MOV dst.xz, tmpA.yx */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); tctx->emit_instruction(tctx, &new_inst); } if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { /* MOV dst.w, imm{1.0} */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); tctx->emit_instruction(tctx, &new_inst); } } /* DP4 - 4-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w * * DP3 - 3-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z * * DP2 - 2-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y * * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar * operations, which is what you'd prefer for a ISA that is natively * scalar. Probably a native vector ISA would at least already have * DP4/DP3 instructions, but perhaps there is room for an alternative * translation for DP2 using vector instructions. * * ; needs: 1 tmp * MUL tmpA.x, src0.x, src1.x * MAD tmpA.x, src0.y, src1.y, tmpA.x * if (DP3 || DP4) { * MAD tmpA.x, src0.z, src1.z, tmpA.x * if (DP4) { * MAD tmpA.x, src0.w, src1.w, tmpA.x * } * } * ; fixup last instruction to replicate into dst */ #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) #define DOTP_TMP 1 static void transform_dotp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; struct tgsi_full_instruction new_inst; unsigned opcode = inst->Instruction.Opcode; /* NOTE: any potential last instruction must replicate src on all * components (since it could be re-written to write to final dst) */ if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* MUL tmpA.x, src0.x, src1.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); if ((opcode == TGSI_OPCODE_DP3) || (opcode == TGSI_OPCODE_DP4)) { tctx->emit_instruction(tctx, &new_inst); /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); if (opcode == TGSI_OPCODE_DP4) { tctx->emit_instruction(tctx, &new_inst); /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } } /* fixup last instruction to write to dst: */ reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); tctx->emit_instruction(tctx, &new_inst); } } /* FLR - floor, CEIL - ceil * ; needs: 1 tmp * if (CEIL) { * FRC tmpA, -src * ADD dst, src, tmpA * } else { * FRC tmpA, src * SUB dst, src, tmpA * } */ #define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) #define FLR_TMP 1 #define CEIL_TMP 1 static void transform_flr_ceil(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_instruction new_inst; unsigned opcode = inst->Instruction.Opcode; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); if (opcode == TGSI_OPCODE_CEIL) new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; tctx->emit_instruction(tctx, &new_inst); /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); if (opcode == TGSI_OPCODE_FLR) new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } /* TRUNC - truncate off fractional part * dst.x = trunc(src.x) * dst.y = trunc(src.y) * dst.z = trunc(src.z) * dst.w = trunc(src.w) * * ; needs: 1 tmp * if (lower FLR) { * FRC tmpA, |src| * SUB tmpA, |src|, tmpA * } else { * FLR tmpA, |src| * } * CMP dst, src, -tmpA, tmpA */ #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) #define TRUNC_TMP 1 static void transform_trunc(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { if (ctx->config->lower_FLR) { new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; tctx->emit_instruction(tctx, &new_inst); new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; tctx->emit_instruction(tctx, &new_inst); } new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); new_inst.Src[1].Register.Negate = true; reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); tctx->emit_instruction(tctx, &new_inst); } } /* Inserts a MOV_SAT for the needed components of tex coord. Note that * in the case of TXP, the clamping must happen *after* projection, so * we need to lower TXP to TEX. * * MOV tmpA, src0 * if (opc == TXP) { * ; do perspective division manually before clamping: * RCP tmpB, tmpA.w * MUL tmpB., tmpA, tmpB.xxxx * opc = TEX; * } * MOV_SAT tmpA., tmpA ; is the clamped s/t/r coords * dst, tmpA, ... */ #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) #define SAMP_TMP 2 static int transform_samp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_src_register *coord = &inst->Src[0]; struct tgsi_full_src_register *samp; struct tgsi_full_instruction new_inst; /* mask is clamped coords, pmask is all coords (for projection): */ unsigned mask = 0, pmask = 0, smask; unsigned tex = inst->Texture.Texture; unsigned opcode = inst->Instruction.Opcode; bool lower_txp = (opcode == TGSI_OPCODE_TXP) && (ctx->config->lower_TXP & (1 << tex)); if (opcode == TGSI_OPCODE_TXB2) { samp = &inst->Src[2]; } else { samp = &inst->Src[1]; } /* convert sampler # to bitmask to test: */ smask = 1 << samp->Register.Index; /* check if we actually need to lower this one: */ if (!(ctx->saturate & smask) && !lower_txp) return -1; /* figure out which coordinates need saturating: * - RECT textures should not get saturated * - array index coords should not get saturated */ switch (tex) { case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: if (ctx->config->saturate_r & smask) mask |= TGSI_WRITEMASK_Z; pmask |= TGSI_WRITEMASK_Z; /* fallthrough */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOW2D_ARRAY: case TGSI_TEXTURE_2D_MSAA: case TGSI_TEXTURE_2D_ARRAY_MSAA: if (ctx->config->saturate_t & smask) mask |= TGSI_WRITEMASK_Y; pmask |= TGSI_WRITEMASK_Y; /* fallthrough */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_SHADOW1D_ARRAY: if (ctx->config->saturate_s & smask) mask |= TGSI_WRITEMASK_X; pmask |= TGSI_WRITEMASK_X; break; case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOWRECT: /* we don't saturate, but in case of lower_txp we * still need to do the perspective divide: */ pmask = TGSI_WRITEMASK_XY; break; } /* sanity check.. driver could be asking to saturate a non- * existent coordinate component: */ if (!mask && !lower_txp) return -1; /* MOV tmpA, src0 */ create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); /* This is a bit sad.. we need to clamp *after* the coords * are projected, which means lowering TXP to TEX and doing * the projection ourself. But since I haven't figured out * how to make the lowering code deliver an electric shock * to anyone using GL_CLAMP, we must do this instead: */ if (opcode == TGSI_OPCODE_TXP) { /* RCP tmpB.x tmpA.w */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 1; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); tctx->emit_instruction(tctx, &new_inst); /* MUL tmpA.mask, tmpA, tmpB.xxxx */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); tctx->emit_instruction(tctx, &new_inst); opcode = TGSI_OPCODE_TEX; } /* MOV_SAT tmpA., tmpA */ if (mask) { create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); } /* modify the texture samp instruction to take fixed up coord: */ new_inst = *inst; new_inst.Instruction.Opcode = opcode; new_inst.Src[0] = ctx->tmp[A].src; tctx->emit_instruction(tctx, &new_inst); return 0; } /* Two-sided color emulation: * For each COLOR input, create a corresponding BCOLOR input, plus * CMP instruction to select front or back color based on FACE */ #define TWOSIDE_GROW(n) ( \ 2 + /* FACE */ \ ((n) * 3) + /* IN[], BCOLOR[n], */\ ((n) * 1) + /* TEMP[] */ \ ((n) * NINST(3)) /* CMP instr */ \ ) static void emit_twoside(struct tgsi_transform_context *tctx) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_shader_info *info = ctx->info; struct tgsi_full_declaration decl; struct tgsi_full_instruction new_inst; unsigned inbase, tmpbase; int i; inbase = info->file_max[TGSI_FILE_INPUT] + 1; tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; /* additional inputs for BCOLOR's */ for (i = 0; i < ctx->two_side_colors; i++) { unsigned in_idx = ctx->two_side_idx[i]; decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = true; decl.Range.First = decl.Range.Last = inbase + i; decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; decl.Semantic.Index = info->input_semantic_index[in_idx]; decl.Declaration.Interpolate = true; decl.Interp.Interpolate = info->input_interpolate[in_idx]; decl.Interp.Location = info->input_interpolate_loc[in_idx]; decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx]; tctx->emit_declaration(tctx, &decl); } /* additional input for FACE */ if (ctx->two_side_colors && (ctx->face_idx == -1)) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = true; decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; decl.Semantic.Name = TGSI_SEMANTIC_FACE; decl.Semantic.Index = 0; tctx->emit_declaration(tctx, &decl); ctx->face_idx = decl.Range.First; } /* additional temps for COLOR/BCOLOR selection: */ for (i = 0; i < ctx->two_side_colors; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; tctx->emit_declaration(tctx, &decl); } /* and finally additional instructions to select COLOR/BCOLOR: */ for (i = 0; i < ctx->two_side_colors; i++) { new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; new_inst.Instruction.NumDstRegs = 1; new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; new_inst.Instruction.NumSrcRegs = 3; new_inst.Src[0].Register.File = TGSI_FILE_INPUT; new_inst.Src[0].Register.Index = ctx->face_idx; new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; new_inst.Src[1].Register.File = TGSI_FILE_INPUT; new_inst.Src[1].Register.Index = inbase + i; new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; new_inst.Src[2].Register.File = TGSI_FILE_INPUT; new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; tctx->emit_instruction(tctx, &new_inst); } } static void emit_decls(struct tgsi_transform_context *tctx) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_shader_info *info = ctx->info; struct tgsi_full_declaration decl; struct tgsi_full_immediate immed; unsigned tmpbase; int i; tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; ctx->color_base = tmpbase + ctx->numtmp; /* declare immediate: */ immed = tgsi_default_full_immediate(); immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ immed.u[0].Float = 0.0; immed.u[1].Float = 1.0; immed.u[2].Float = 128.0; immed.u[3].Float = 0.0; tctx->emit_immediate(tctx, &immed); ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; ctx->imm.Register.Index = info->immediate_count; ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; /* declare temp regs: */ for (i = 0; i < ctx->numtmp; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; decl.Range.First = decl.Range.Last = tmpbase + i; tctx->emit_declaration(tctx, &decl); ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; ctx->tmp[i].src.Register.Index = tmpbase + i; ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; ctx->tmp[i].dst.Register.Index = tmpbase + i; ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; } if (ctx->two_side_colors) emit_twoside(tctx); } static void rename_color_inputs(struct tgsi_lowering_context *ctx, struct tgsi_full_instruction *inst) { unsigned i, j; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_src_register *src = &inst->Src[i].Register; if (src->File == TGSI_FILE_INPUT) { for (j = 0; j < ctx->two_side_colors; j++) { if (src->Index == ctx->two_side_idx[j]) { src->File = TGSI_FILE_TEMPORARY; src->Index = ctx->color_base + j; break; } } } } } static void transform_instr(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); if (!ctx->emitted_decls) { emit_decls(tctx); ctx->emitted_decls = 1; } /* if emulating two-sided-color, we need to re-write some * src registers: */ if (ctx->two_side_colors) rename_color_inputs(ctx, inst); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_DST: if (!ctx->config->lower_DST) goto skip; transform_dst(tctx, inst); break; case TGSI_OPCODE_LRP: if (!ctx->config->lower_LRP) goto skip; transform_lrp(tctx, inst); break; case TGSI_OPCODE_FRC: if (!ctx->config->lower_FRC) goto skip; transform_frc(tctx, inst); break; case TGSI_OPCODE_POW: if (!ctx->config->lower_POW) goto skip; transform_pow(tctx, inst); break; case TGSI_OPCODE_LIT: if (!ctx->config->lower_LIT) goto skip; transform_lit(tctx, inst); break; case TGSI_OPCODE_EXP: if (!ctx->config->lower_EXP) goto skip; transform_exp(tctx, inst); break; case TGSI_OPCODE_LOG: if (!ctx->config->lower_LOG) goto skip; transform_log(tctx, inst); break; case TGSI_OPCODE_DP4: if (!ctx->config->lower_DP4) goto skip; transform_dotp(tctx, inst); break; case TGSI_OPCODE_DP3: if (!ctx->config->lower_DP3) goto skip; transform_dotp(tctx, inst); break; case TGSI_OPCODE_DP2: if (!ctx->config->lower_DP2) goto skip; transform_dotp(tctx, inst); break; case TGSI_OPCODE_FLR: if (!ctx->config->lower_FLR) goto skip; transform_flr_ceil(tctx, inst); break; case TGSI_OPCODE_CEIL: if (!ctx->config->lower_CEIL) goto skip; transform_flr_ceil(tctx, inst); break; case TGSI_OPCODE_TRUNC: if (!ctx->config->lower_TRUNC) goto skip; transform_trunc(tctx, inst); break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXL: if (transform_samp(tctx, inst)) goto skip; break; default: skip: tctx->emit_instruction(tctx, inst); break; } } /* returns NULL if no lowering required, else returns the new * tokens (which caller is required to free()). In either case * returns the current info. */ const struct tgsi_token * tgsi_transform_lowering(const struct tgsi_lowering_config *config, const struct tgsi_token *tokens, struct tgsi_shader_info *info) { struct tgsi_lowering_context ctx; struct tgsi_token *newtoks; int newlen, numtmp; /* sanity check in case limit is ever increased: */ STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); /* sanity check the lowering */ assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); assert(!(config->lower_FRC && config->lower_TRUNC)); memset(&ctx, 0, sizeof(ctx)); ctx.base.transform_instruction = transform_instr; ctx.info = info; ctx.config = config; tgsi_scan_shader(tokens, info); /* if we are adding fragment shader support to emulate two-sided * color, then figure out the number of additional inputs we need * to create for BCOLOR's.. */ if ((info->processor == PIPE_SHADER_FRAGMENT) && config->color_two_side) { int i; ctx.face_idx = -1; for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) ctx.two_side_idx[ctx.two_side_colors++] = i; if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) ctx.face_idx = i; } } ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) /* if there are no instructions to lower, then we are done: */ if (!(OPCS(DST) || OPCS(LRP) || OPCS(FRC) || OPCS(POW) || OPCS(LIT) || OPCS(EXP) || OPCS(LOG) || OPCS(DP4) || OPCS(DP3) || OPCS(DP2) || OPCS(FLR) || OPCS(CEIL) || OPCS(TRUNC) || OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) return NULL; #if 0 /* debug */ _debug_printf("BEFORE:"); tgsi_dump(tokens, 0); #endif numtmp = 0; newlen = tgsi_num_tokens(tokens); if (OPCS(DST)) { newlen += DST_GROW * OPCS(DST); numtmp = MAX2(numtmp, DST_TMP); } if (OPCS(LRP)) { newlen += LRP_GROW * OPCS(LRP); numtmp = MAX2(numtmp, LRP_TMP); } if (OPCS(FRC)) { newlen += FRC_GROW * OPCS(FRC); numtmp = MAX2(numtmp, FRC_TMP); } if (OPCS(POW)) { newlen += POW_GROW * OPCS(POW); numtmp = MAX2(numtmp, POW_TMP); } if (OPCS(LIT)) { newlen += LIT_GROW * OPCS(LIT); numtmp = MAX2(numtmp, LIT_TMP); } if (OPCS(EXP)) { newlen += EXP_GROW * OPCS(EXP); numtmp = MAX2(numtmp, EXP_TMP); } if (OPCS(LOG)) { newlen += LOG_GROW * OPCS(LOG); numtmp = MAX2(numtmp, LOG_TMP); } if (OPCS(DP4)) { newlen += DP4_GROW * OPCS(DP4); numtmp = MAX2(numtmp, DOTP_TMP); } if (OPCS(DP3)) { newlen += DP3_GROW * OPCS(DP3); numtmp = MAX2(numtmp, DOTP_TMP); } if (OPCS(DP2)) { newlen += DP2_GROW * OPCS(DP2); numtmp = MAX2(numtmp, DOTP_TMP); } if (OPCS(FLR)) { newlen += FLR_GROW * OPCS(FLR); numtmp = MAX2(numtmp, FLR_TMP); } if (OPCS(CEIL)) { newlen += CEIL_GROW * OPCS(CEIL); numtmp = MAX2(numtmp, CEIL_TMP); } if (OPCS(TRUNC)) { newlen += TRUNC_GROW * OPCS(TRUNC); numtmp = MAX2(numtmp, TRUNC_TMP); } if (ctx.saturate || config->lower_TXP) { int n = 0; if (ctx.saturate) { n = info->opcode_count[TGSI_OPCODE_TEX] + info->opcode_count[TGSI_OPCODE_TXP] + info->opcode_count[TGSI_OPCODE_TXB] + info->opcode_count[TGSI_OPCODE_TXB2] + info->opcode_count[TGSI_OPCODE_TXL]; } else if (config->lower_TXP) { n = info->opcode_count[TGSI_OPCODE_TXP]; } newlen += SAMP_GROW * n; numtmp = MAX2(numtmp, SAMP_TMP); } /* specifically don't include two_side_colors temps in the count: */ ctx.numtmp = numtmp; if (ctx.two_side_colors) { newlen += TWOSIDE_GROW(ctx.two_side_colors); /* note: we permanently consume temp regs, re-writing references * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP * instruction that selects which varying to use): */ numtmp += ctx.two_side_colors; } newlen += 2 * numtmp; newlen += 5; /* immediate */ newtoks = tgsi_alloc_tokens(newlen); if (!newtoks) return NULL; tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); tgsi_scan_shader(newtoks, info); #if 0 /* debug */ _debug_printf("AFTER:"); tgsi_dump(newtoks, 0); #endif return newtoks; }