• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30 
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 
34 #include "tgsi_lowering.h"
35 
36 struct tgsi_lowering_context {
37    struct tgsi_transform_context base;
38    const struct tgsi_lowering_config *config;
39    struct tgsi_shader_info *info;
40    unsigned two_side_colors;
41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
43    int face_idx;
44    unsigned numtmp;
45    struct {
46       struct tgsi_full_src_register src;
47       struct tgsi_full_dst_register dst;
48    } tmp[2];
49 #define A 0
50 #define B 1
51    struct tgsi_full_src_register imm;
52    int emitted_decls;
53    unsigned saturate;
54 };
55 
56 static inline struct tgsi_lowering_context *
tgsi_lowering_context(struct tgsi_transform_context * tctx)57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59    return (struct tgsi_lowering_context *)tctx;
60 }
61 
62 /*
63  * Utility helpers:
64  */
65 
66 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)67 reg_dst(struct tgsi_full_dst_register *dst,
68 	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70    *dst = *orig_dst;
71    dst->Register.WriteMask &= wrmask;
72    assert(dst->Register.WriteMask);
73 }
74 
75 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78    swiz[0] = src->SwizzleX;
79    swiz[1] = src->SwizzleY;
80    swiz[2] = src->SwizzleZ;
81    swiz[3] = src->SwizzleW;
82 }
83 
84 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)85 reg_src(struct tgsi_full_src_register *src,
86 	const struct tgsi_full_src_register *orig_src,
87 	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89    unsigned swiz[4];
90    get_swiz(swiz, &orig_src->Register);
91    *src = *orig_src;
92    src->Register.SwizzleX = swiz[sx];
93    src->Register.SwizzleY = swiz[sy];
94    src->Register.SwizzleZ = swiz[sz];
95    src->Register.SwizzleW = swiz[sw];
96 }
97 
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101 
102 /*
103  * if (dst.x aliases src.x) {
104  *   MOV tmpA.x, src.x
105  *   src = tmpA
106  * }
107  * COS dst.x, src.x
108  * SIN dst.y, src.x
109  * MOV dst.zw, imm{0.0, 1.0}
110  */
111 static bool
aliases(const struct tgsi_full_dst_register * dst,unsigned dst_mask,const struct tgsi_full_src_register * src,unsigned src_mask)112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 	const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115    if ((dst->Register.File == src->Register.File) &&
116        (dst->Register.Index == src->Register.Index)) {
117       unsigned i, actual_mask = 0;
118       unsigned swiz[4];
119       get_swiz(swiz, &src->Register);
120       for (i = 0; i < 4; i++)
121          if (src_mask & (1 << i))
122             actual_mask |= (1 << swiz[i]);
123       if (actual_mask & dst_mask)
124          return true;
125    }
126    return false;
127 }
128 
129 static void
create_mov(struct tgsi_transform_context * tctx,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src,unsigned mask,unsigned saturate)130 create_mov(struct tgsi_transform_context *tctx,
131            const struct tgsi_full_dst_register *dst,
132            const struct tgsi_full_src_register *src,
133            unsigned mask, unsigned saturate)
134 {
135    struct tgsi_full_instruction new_inst;
136 
137    new_inst = tgsi_default_full_instruction();
138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139    new_inst.Instruction.Saturate = saturate;
140    new_inst.Instruction.NumDstRegs = 1;
141    reg_dst(&new_inst.Dst[0], dst, mask);
142    new_inst.Instruction.NumSrcRegs = 1;
143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144    tctx->emit_instruction(tctx, &new_inst);
145 }
146 
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148  * the worst case, ie. removed instructions don't have ADDR[] or
149  * anything which increases the # of tokens per src/dst and the
150  * inserted instructions do.
151  *
152  * OINST() - old instruction
153  *    1         : instruction itself
154  *    1         : dst
155  *    1 * nargs : srcN
156  *
157  * NINST() - new instruction
158  *    1         : instruction itself
159  *    2         : dst
160  *    2 * nargs : srcN
161  */
162 
163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
165 
166 /*
167  * Lowering Translators:
168  */
169 
170 /* DST - Distance Vector
171  *   dst.x = 1.0
172  *   dst.y = src0.y \times src1.y
173  *   dst.z = src0.z
174  *   dst.w = src1.w
175  *
176  * ; note: could be more clever and use just a single temp
177  * ;       if I was clever enough to re-write the swizzles.
178  * ; needs: 2 tmp, imm{1.0}
179  * if (dst.y aliases src0.z) {
180  *   MOV tmpA.yz, src0.yz
181  *   src0 = tmpA
182  * }
183  * if (dst.yz aliases src1.w) {
184  *   MOV tmpB.yw, src1.yw
185  *   src1 = tmpB
186  * }
187  * MUL dst.y, src0.y, src1.y
188  * MOV dst.z, src0.z
189  * MOV dst.w, src1.w
190  * MOV dst.x, imm{1.0}
191  */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 		NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP  2
195 static void
transform_dst(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)196 transform_dst(struct tgsi_transform_context *tctx,
197               struct tgsi_full_instruction *inst)
198 {
199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
201    struct tgsi_full_src_register *src0 = &inst->Src[0];
202    struct tgsi_full_src_register *src1 = &inst->Src[1];
203    struct tgsi_full_instruction new_inst;
204 
205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207       src0 = &ctx->tmp[A].src;
208    }
209 
210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212       src1 = &ctx->tmp[B].src;
213    }
214 
215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216       /* MUL dst.y, src0.y, src1.y */
217       new_inst = tgsi_default_full_instruction();
218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219       new_inst.Instruction.NumDstRegs = 1;
220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221       new_inst.Instruction.NumSrcRegs = 2;
222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224       tctx->emit_instruction(tctx, &new_inst);
225    }
226 
227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228       /* MOV dst.z, src0.z */
229       new_inst = tgsi_default_full_instruction();
230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231       new_inst.Instruction.NumDstRegs = 1;
232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233       new_inst.Instruction.NumSrcRegs = 1;
234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235       tctx->emit_instruction(tctx, &new_inst);
236    }
237 
238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239       /* MOV dst.w, src1.w */
240       new_inst = tgsi_default_full_instruction();
241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242       new_inst.Instruction.NumDstRegs = 1;
243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244       new_inst.Instruction.NumSrcRegs = 1;
245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246       tctx->emit_instruction(tctx, &new_inst);
247    }
248 
249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250       /* MOV dst.x, imm{1.0} */
251       new_inst = tgsi_default_full_instruction();
252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253       new_inst.Instruction.NumDstRegs = 1;
254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255       new_inst.Instruction.NumSrcRegs = 1;
256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257       tctx->emit_instruction(tctx, &new_inst);
258    }
259 }
260 
261 /* XPD - Cross Product
262  *   dst.x = src0.y \times src1.z - src1.y \times src0.z
263  *   dst.y = src0.z \times src1.x - src1.z \times src0.x
264  *   dst.z = src0.x \times src1.y - src1.x \times src0.y
265  *   dst.w = 1.0
266  *
267  * ; needs: 1 tmp, imm{1.0}
268  * MUL tmpA.xyz, src1.yzx, src0.zxy
269  * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
270  * MOV dst.w, imm{1.0}
271  */
272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
273 #define XPD_TMP  1
274 static void
transform_xpd(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)275 transform_xpd(struct tgsi_transform_context *tctx,
276               struct tgsi_full_instruction *inst)
277 {
278    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
279    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
280    struct tgsi_full_src_register *src0 = &inst->Src[0];
281    struct tgsi_full_src_register *src1 = &inst->Src[1];
282    struct tgsi_full_instruction new_inst;
283 
284    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
285       /* MUL tmpA.xyz, src1.yzx, src0.zxy */
286       new_inst = tgsi_default_full_instruction();
287       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
288       new_inst.Instruction.NumDstRegs = 1;
289       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
290       new_inst.Instruction.NumSrcRegs = 2;
291       reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
292       reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
293       tctx->emit_instruction(tctx, &new_inst);
294 
295       /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
296       new_inst = tgsi_default_full_instruction();
297       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
298       new_inst.Instruction.NumDstRegs = 1;
299       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
300       new_inst.Instruction.NumSrcRegs = 3;
301       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
302       reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
303       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
304       new_inst.Src[2].Register.Negate = true;
305       tctx->emit_instruction(tctx, &new_inst);
306    }
307 
308    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
309       /* MOV dst.w, imm{1.0} */
310       new_inst = tgsi_default_full_instruction();
311       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
312       new_inst.Instruction.NumDstRegs = 1;
313       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
314       new_inst.Instruction.NumSrcRegs = 1;
315       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
316       tctx->emit_instruction(tctx, &new_inst);
317    }
318 }
319 
320 /* SCS - Sine Cosine
321  *   dst.x = \cos{src.x}
322  *   dst.y = \sin{src.x}
323  *   dst.z = 0.0
324  *   dst.w = 1.0
325  *
326  * ; needs: 1 tmp, imm{0.0, 1.0}
327  * if (dst.x aliases src.x) {
328  *   MOV tmpA.x, src.x
329  *   src = tmpA
330  * }
331  * COS dst.x, src.x
332  * SIN dst.y, src.x
333  * MOV dst.zw, imm{0.0, 1.0}
334  */
335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
336 #define SCS_TMP  1
337 static void
transform_scs(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)338 transform_scs(struct tgsi_transform_context *tctx,
339               struct tgsi_full_instruction *inst)
340 {
341    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
342    struct tgsi_full_dst_register *dst = &inst->Dst[0];
343    struct tgsi_full_src_register *src = &inst->Src[0];
344    struct tgsi_full_instruction new_inst;
345 
346    if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
347       create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
348       src = &ctx->tmp[A].src;
349    }
350 
351    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
352       /* COS dst.x, src.x */
353       new_inst = tgsi_default_full_instruction();
354       new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
355       new_inst.Instruction.NumDstRegs = 1;
356       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
357       new_inst.Instruction.NumSrcRegs = 1;
358       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
359       tctx->emit_instruction(tctx, &new_inst);
360    }
361 
362    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
363       /* SIN dst.y, src.x */
364       new_inst = tgsi_default_full_instruction();
365       new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
366       new_inst.Instruction.NumDstRegs = 1;
367       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
368       new_inst.Instruction.NumSrcRegs = 1;
369       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
370       tctx->emit_instruction(tctx, &new_inst);
371    }
372 
373    if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
374       /* MOV dst.zw, imm{0.0, 1.0} */
375       new_inst = tgsi_default_full_instruction();
376       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
377       new_inst.Instruction.NumDstRegs = 1;
378       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
379       new_inst.Instruction.NumSrcRegs = 1;
380       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
381       tctx->emit_instruction(tctx, &new_inst);
382    }
383 }
384 
385 /* LRP - Linear Interpolate
386  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
387  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
388  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
389  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
390  *
391  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
392  * can then become: src0 \times src1 - (src0 \times src2 - src2)
393  *
394  * ; needs: 1 tmp
395  * MAD tmpA, src0, src2, -src2
396  * MAD dst, src0, src1, -tmpA
397  */
398 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
399 #define LRP_TMP  1
400 static void
transform_lrp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)401 transform_lrp(struct tgsi_transform_context *tctx,
402               struct tgsi_full_instruction *inst)
403 {
404    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
405    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
406    struct tgsi_full_src_register *src0 = &inst->Src[0];
407    struct tgsi_full_src_register *src1 = &inst->Src[1];
408    struct tgsi_full_src_register *src2 = &inst->Src[2];
409    struct tgsi_full_instruction new_inst;
410 
411    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
412       /* MAD tmpA, src0, src2, -src2 */
413       new_inst = tgsi_default_full_instruction();
414       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
415       new_inst.Instruction.NumDstRegs = 1;
416       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
417       new_inst.Instruction.NumSrcRegs = 3;
418       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
419       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
420       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
421       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
422       tctx->emit_instruction(tctx, &new_inst);
423 
424       /* MAD dst, src0, src1, -tmpA */
425       new_inst = tgsi_default_full_instruction();
426       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
427       new_inst.Instruction.NumDstRegs = 1;
428       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
429       new_inst.Instruction.NumSrcRegs = 3;
430       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
431       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
432       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
433       new_inst.Src[2].Register.Negate = true;
434       tctx->emit_instruction(tctx, &new_inst);
435    }
436 }
437 
438 /* FRC - Fraction
439  *  dst.x = src.x - \lfloor src.x\rfloor
440  *  dst.y = src.y - \lfloor src.y\rfloor
441  *  dst.z = src.z - \lfloor src.z\rfloor
442  *  dst.w = src.w - \lfloor src.w\rfloor
443  *
444  * ; needs: 1 tmp
445  * FLR tmpA, src
446  * SUB dst, src, tmpA
447  */
448 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
449 #define FRC_TMP  1
450 static void
transform_frc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)451 transform_frc(struct tgsi_transform_context *tctx,
452               struct tgsi_full_instruction *inst)
453 {
454    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
455    struct tgsi_full_dst_register *dst = &inst->Dst[0];
456    struct tgsi_full_src_register *src = &inst->Src[0];
457    struct tgsi_full_instruction new_inst;
458 
459    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
460       /* FLR tmpA, src */
461       new_inst = tgsi_default_full_instruction();
462       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
463       new_inst.Instruction.NumDstRegs = 1;
464       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
465       new_inst.Instruction.NumSrcRegs = 1;
466       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
467       tctx->emit_instruction(tctx, &new_inst);
468 
469       /* SUB dst, src, tmpA */
470       new_inst = tgsi_default_full_instruction();
471       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
472       new_inst.Instruction.NumDstRegs = 1;
473       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
474       new_inst.Instruction.NumSrcRegs = 2;
475       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
476       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
477       new_inst.Src[1].Register.Negate = 1;
478       tctx->emit_instruction(tctx, &new_inst);
479    }
480 }
481 
482 /* POW - Power
483  *  dst.x = src0.x^{src1.x}
484  *  dst.y = src0.x^{src1.x}
485  *  dst.z = src0.x^{src1.x}
486  *  dst.w = src0.x^{src1.x}
487  *
488  * ; needs: 1 tmp
489  * LG2 tmpA.x, src0.x
490  * MUL tmpA.x, src1.x, tmpA.x
491  * EX2 dst, tmpA.x
492  */
493 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
494 #define POW_TMP  1
495 static void
transform_pow(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)496 transform_pow(struct tgsi_transform_context *tctx,
497               struct tgsi_full_instruction *inst)
498 {
499    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
500    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
501    struct tgsi_full_src_register *src0 = &inst->Src[0];
502    struct tgsi_full_src_register *src1 = &inst->Src[1];
503    struct tgsi_full_instruction new_inst;
504 
505    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
506       /* LG2 tmpA.x, src0.x */
507       new_inst = tgsi_default_full_instruction();
508       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
509       new_inst.Instruction.NumDstRegs = 1;
510       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
511       new_inst.Instruction.NumSrcRegs = 1;
512       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
513       tctx->emit_instruction(tctx, &new_inst);
514 
515       /* MUL tmpA.x, src1.x, tmpA.x */
516       new_inst = tgsi_default_full_instruction();
517       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
518       new_inst.Instruction.NumDstRegs = 1;
519       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
520       new_inst.Instruction.NumSrcRegs = 2;
521       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
522       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
523       tctx->emit_instruction(tctx, &new_inst);
524 
525       /* EX2 dst, tmpA.x */
526       new_inst = tgsi_default_full_instruction();
527       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
528       new_inst.Instruction.NumDstRegs = 1;
529       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
530       new_inst.Instruction.NumSrcRegs = 1;
531       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
532       tctx->emit_instruction(tctx, &new_inst);
533    }
534 }
535 
536 /* LIT - Light Coefficients
537  *  dst.x = 1.0
538  *  dst.y = max(src.x, 0.0)
539  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
540  *  dst.w = 1.0
541  *
542  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
543  * MAX tmpA.xy, src.xy, imm{0.0}
544  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
545  * LG2 tmpA.y, tmpA.y
546  * MUL tmpA.y, tmpA.z, tmpA.y
547  * EX2 tmpA.y, tmpA.y
548  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
549  * MOV dst.yz, tmpA.xy
550  * MOV dst.xw, imm{1.0}
551  */
552 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
553 		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
554 #define LIT_TMP  1
555 static void
transform_lit(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)556 transform_lit(struct tgsi_transform_context *tctx,
557               struct tgsi_full_instruction *inst)
558 {
559    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
560    struct tgsi_full_dst_register *dst = &inst->Dst[0];
561    struct tgsi_full_src_register *src = &inst->Src[0];
562    struct tgsi_full_instruction new_inst;
563 
564    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
565       /* MAX tmpA.xy, src.xy, imm{0.0} */
566       new_inst = tgsi_default_full_instruction();
567       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
568       new_inst.Instruction.NumDstRegs = 1;
569       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
570       new_inst.Instruction.NumSrcRegs = 2;
571       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
572       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
573       tctx->emit_instruction(tctx, &new_inst);
574 
575       /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
576       new_inst = tgsi_default_full_instruction();
577       new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
578       new_inst.Instruction.NumDstRegs = 1;
579       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
580       new_inst.Instruction.NumSrcRegs = 3;
581       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
582       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
583       new_inst.Src[1].Register.Negate = true;
584       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
585       tctx->emit_instruction(tctx, &new_inst);
586 
587       /* LG2 tmpA.y, tmpA.y */
588       new_inst = tgsi_default_full_instruction();
589       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
590       new_inst.Instruction.NumDstRegs = 1;
591       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
592       new_inst.Instruction.NumSrcRegs = 1;
593       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
594       tctx->emit_instruction(tctx, &new_inst);
595 
596       /* MUL tmpA.y, tmpA.z, tmpA.y */
597       new_inst = tgsi_default_full_instruction();
598       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
599       new_inst.Instruction.NumDstRegs = 1;
600       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
601       new_inst.Instruction.NumSrcRegs = 2;
602       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
603       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
604       tctx->emit_instruction(tctx, &new_inst);
605 
606       /* EX2 tmpA.y, tmpA.y */
607       new_inst = tgsi_default_full_instruction();
608       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
609       new_inst.Instruction.NumDstRegs = 1;
610       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
611       new_inst.Instruction.NumSrcRegs = 1;
612       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
613       tctx->emit_instruction(tctx, &new_inst);
614 
615       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
616       new_inst = tgsi_default_full_instruction();
617       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
618       new_inst.Instruction.NumDstRegs = 1;
619       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
620       new_inst.Instruction.NumSrcRegs = 3;
621       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
622       new_inst.Src[0].Register.Negate = true;
623       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
624       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
625       tctx->emit_instruction(tctx, &new_inst);
626 
627       /* MOV dst.yz, tmpA.xy */
628       new_inst = tgsi_default_full_instruction();
629       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
630       new_inst.Instruction.NumDstRegs = 1;
631       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
632       new_inst.Instruction.NumSrcRegs = 1;
633       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
634       tctx->emit_instruction(tctx, &new_inst);
635    }
636 
637    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
638       /* MOV dst.xw, imm{1.0} */
639       new_inst = tgsi_default_full_instruction();
640       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
641       new_inst.Instruction.NumDstRegs = 1;
642       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
643       new_inst.Instruction.NumSrcRegs = 1;
644       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
645       tctx->emit_instruction(tctx, &new_inst);
646    }
647 }
648 
649 /* EXP - Approximate Exponential Base 2
650  *  dst.x = 2^{\lfloor src.x\rfloor}
651  *  dst.y = src.x - \lfloor src.x\rfloor
652  *  dst.z = 2^{src.x}
653  *  dst.w = 1.0
654  *
655  * ; needs: 1 tmp, imm{1.0}
656  * if (lowering FLR) {
657  *   FRC tmpA.x, src.x
658  *   SUB tmpA.x, src.x, tmpA.x
659  * } else {
660  *   FLR tmpA.x, src.x
661  * }
662  * EX2 tmpA.y, src.x
663  * SUB dst.y, src.x, tmpA.x
664  * EX2 dst.x, tmpA.x
665  * MOV dst.z, tmpA.y
666  * MOV dst.w, imm{1.0}
667  */
668 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
669 		NINST(1)+ NINST(1) - OINST(1))
670 #define EXP_TMP  1
671 static void
transform_exp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)672 transform_exp(struct tgsi_transform_context *tctx,
673               struct tgsi_full_instruction *inst)
674 {
675    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
676    struct tgsi_full_dst_register *dst = &inst->Dst[0];
677    struct tgsi_full_src_register *src = &inst->Src[0];
678    struct tgsi_full_instruction new_inst;
679 
680    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
681       if (ctx->config->lower_FLR) {
682          /* FRC tmpA.x, src.x */
683          new_inst = tgsi_default_full_instruction();
684          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
685          new_inst.Instruction.NumDstRegs = 1;
686          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
687          new_inst.Instruction.NumSrcRegs = 1;
688          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
689          tctx->emit_instruction(tctx, &new_inst);
690 
691          /* SUB tmpA.x, src.x, tmpA.x */
692          new_inst = tgsi_default_full_instruction();
693          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
694          new_inst.Instruction.NumDstRegs = 1;
695          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
696          new_inst.Instruction.NumSrcRegs = 2;
697          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
698          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
699          new_inst.Src[1].Register.Negate = 1;
700          tctx->emit_instruction(tctx, &new_inst);
701      } else {
702          /* FLR tmpA.x, src.x */
703          new_inst = tgsi_default_full_instruction();
704          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
705          new_inst.Instruction.NumDstRegs = 1;
706          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
707          new_inst.Instruction.NumSrcRegs = 1;
708          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
709          tctx->emit_instruction(tctx, &new_inst);
710       }
711    }
712 
713    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
714       /* EX2 tmpA.y, src.x */
715       new_inst = tgsi_default_full_instruction();
716       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
717       new_inst.Instruction.NumDstRegs = 1;
718       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
719       new_inst.Instruction.NumSrcRegs = 1;
720       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
721       tctx->emit_instruction(tctx, &new_inst);
722    }
723 
724    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
725       /* SUB dst.y, src.x, tmpA.x */
726       new_inst = tgsi_default_full_instruction();
727       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
728       new_inst.Instruction.NumDstRegs = 1;
729       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
730       new_inst.Instruction.NumSrcRegs = 2;
731       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
732       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
733       new_inst.Src[1].Register.Negate = 1;
734       tctx->emit_instruction(tctx, &new_inst);
735    }
736 
737    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
738       /* EX2 dst.x, tmpA.x */
739       new_inst = tgsi_default_full_instruction();
740       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
741       new_inst.Instruction.NumDstRegs = 1;
742       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
743       new_inst.Instruction.NumSrcRegs = 1;
744       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
745       tctx->emit_instruction(tctx, &new_inst);
746    }
747 
748    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
749       /* MOV dst.z, tmpA.y */
750       new_inst = tgsi_default_full_instruction();
751       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
752       new_inst.Instruction.NumDstRegs = 1;
753       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
754       new_inst.Instruction.NumSrcRegs = 1;
755       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
756       tctx->emit_instruction(tctx, &new_inst);
757    }
758 
759    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
760       /* MOV dst.w, imm{1.0} */
761       new_inst = tgsi_default_full_instruction();
762       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
763       new_inst.Instruction.NumDstRegs = 1;
764       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
765       new_inst.Instruction.NumSrcRegs = 1;
766       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
767       tctx->emit_instruction(tctx, &new_inst);
768    }
769 }
770 
771 /* LOG - Approximate Logarithm Base 2
772  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
773  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
774  *  dst.z = \log_2{|src.x|}
775  *  dst.w = 1.0
776  *
777  * ; needs: 1 tmp, imm{1.0}
778  * LG2 tmpA.x, |src.x|
779  * if (lowering FLR) {
780  *   FRC tmpA.y, tmpA.x
781  *   SUB tmpA.y, tmpA.x, tmpA.y
782  * } else {
783  *   FLR tmpA.y, tmpA.x
784  * }
785  * EX2 tmpA.z, tmpA.y
786  * RCP tmpA.z, tmpA.z
787  * MUL dst.y, |src.x|, tmpA.z
788  * MOV dst.xz, tmpA.yx
789  * MOV dst.w, imm{1.0}
790  */
791 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
792 		NINST(2) + NINST(1) + NINST(1) - OINST(1))
793 #define LOG_TMP  1
794 static void
transform_log(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)795 transform_log(struct tgsi_transform_context *tctx,
796               struct tgsi_full_instruction *inst)
797 {
798    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
799    struct tgsi_full_dst_register *dst = &inst->Dst[0];
800    struct tgsi_full_src_register *src = &inst->Src[0];
801    struct tgsi_full_instruction new_inst;
802 
803    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
804       /* LG2 tmpA.x, |src.x| */
805       new_inst = tgsi_default_full_instruction();
806       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
807       new_inst.Instruction.NumDstRegs = 1;
808       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
809       new_inst.Instruction.NumSrcRegs = 1;
810       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
811       new_inst.Src[0].Register.Absolute = true;
812       tctx->emit_instruction(tctx, &new_inst);
813    }
814 
815    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
816       if (ctx->config->lower_FLR) {
817          /* FRC tmpA.y, tmpA.x */
818          new_inst = tgsi_default_full_instruction();
819          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
820          new_inst.Instruction.NumDstRegs = 1;
821          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
822          new_inst.Instruction.NumSrcRegs = 1;
823          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
824          tctx->emit_instruction(tctx, &new_inst);
825 
826          /* SUB tmpA.y, tmpA.x, tmpA.y */
827          new_inst = tgsi_default_full_instruction();
828          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
829          new_inst.Instruction.NumDstRegs = 1;
830          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
831          new_inst.Instruction.NumSrcRegs = 2;
832          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
833          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
834          new_inst.Src[1].Register.Negate = 1;
835          tctx->emit_instruction(tctx, &new_inst);
836       } else {
837          /* FLR tmpA.y, tmpA.x */
838          new_inst = tgsi_default_full_instruction();
839          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
840          new_inst.Instruction.NumDstRegs = 1;
841          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
842          new_inst.Instruction.NumSrcRegs = 1;
843          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
844          tctx->emit_instruction(tctx, &new_inst);
845       }
846    }
847 
848    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
849       /* EX2 tmpA.z, tmpA.y */
850       new_inst = tgsi_default_full_instruction();
851       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
852       new_inst.Instruction.NumDstRegs = 1;
853       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
854       new_inst.Instruction.NumSrcRegs = 1;
855       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
856       tctx->emit_instruction(tctx, &new_inst);
857 
858       /* RCP tmpA.z, tmpA.z */
859       new_inst = tgsi_default_full_instruction();
860       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
861       new_inst.Instruction.NumDstRegs = 1;
862       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
863       new_inst.Instruction.NumSrcRegs = 1;
864       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
865       tctx->emit_instruction(tctx, &new_inst);
866 
867       /* MUL dst.y, |src.x|, tmpA.z */
868       new_inst = tgsi_default_full_instruction();
869       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
870       new_inst.Instruction.NumDstRegs = 1;
871       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
872       new_inst.Instruction.NumSrcRegs = 2;
873       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
874       new_inst.Src[0].Register.Absolute = true;
875       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
876       tctx->emit_instruction(tctx, &new_inst);
877    }
878 
879    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
880       /* MOV dst.xz, tmpA.yx */
881       new_inst = tgsi_default_full_instruction();
882       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
883       new_inst.Instruction.NumDstRegs = 1;
884       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
885       new_inst.Instruction.NumSrcRegs = 1;
886       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
887       tctx->emit_instruction(tctx, &new_inst);
888    }
889 
890    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
891       /* MOV dst.w, imm{1.0} */
892       new_inst = tgsi_default_full_instruction();
893       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
894       new_inst.Instruction.NumDstRegs = 1;
895       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
896       new_inst.Instruction.NumSrcRegs = 1;
897       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
898       tctx->emit_instruction(tctx, &new_inst);
899    }
900 }
901 
902 /* DP4 - 4-component Dot Product
903  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
904  *
905  * DP3 - 3-component Dot Product
906  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
907  *
908  * DPH - Homogeneous Dot Product
909  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
910  *
911  * DP2 - 2-component Dot Product
912  *   dst = src0.x \times src1.x + src0.y \times src1.y
913  *
914  * DP2A - 2-component Dot Product And Add
915  *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
916  *
917  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
918  * operations, which is what you'd prefer for a ISA that is natively
919  * scalar.  Probably a native vector ISA would at least already have
920  * DP4/DP3 instructions, but perhaps there is room for an alternative
921  * translation for DPH/DP2/DP2A using vector instructions.
922  *
923  * ; needs: 1 tmp
924  * MUL tmpA.x, src0.x, src1.x
925  * MAD tmpA.x, src0.y, src1.y, tmpA.x
926  * if (DPH || DP3 || DP4) {
927  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
928  *   if (DPH) {
929  *     ADD tmpA.x, src1.w, tmpA.x
930  *   } else if (DP4) {
931  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
932  *   }
933  * } else if (DP2A) {
934  *   ADD tmpA.x, src2.x, tmpA.x
935  * }
936  * ; fixup last instruction to replicate into dst
937  */
938 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
939 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
940 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
941 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
942 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
943 #define DOTP_TMP  1
944 static void
transform_dotp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)945 transform_dotp(struct tgsi_transform_context *tctx,
946                struct tgsi_full_instruction *inst)
947 {
948    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
949    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
950    struct tgsi_full_src_register *src0 = &inst->Src[0];
951    struct tgsi_full_src_register *src1 = &inst->Src[1];
952    struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
953    struct tgsi_full_instruction new_inst;
954    unsigned opcode = inst->Instruction.Opcode;
955 
956    /* NOTE: any potential last instruction must replicate src on all
957     * components (since it could be re-written to write to final dst)
958     */
959 
960    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
961       /* MUL tmpA.x, src0.x, src1.x */
962       new_inst = tgsi_default_full_instruction();
963       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
964       new_inst.Instruction.NumDstRegs = 1;
965       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
966       new_inst.Instruction.NumSrcRegs = 2;
967       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
968       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
969       tctx->emit_instruction(tctx, &new_inst);
970 
971       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
972       new_inst = tgsi_default_full_instruction();
973       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
974       new_inst.Instruction.NumDstRegs = 1;
975       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
976       new_inst.Instruction.NumSrcRegs = 3;
977       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
978       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
979       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
980 
981       if ((opcode == TGSI_OPCODE_DPH) ||
982           (opcode == TGSI_OPCODE_DP3) ||
983           (opcode == TGSI_OPCODE_DP4)) {
984          tctx->emit_instruction(tctx, &new_inst);
985 
986          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
987          new_inst = tgsi_default_full_instruction();
988          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
989          new_inst.Instruction.NumDstRegs = 1;
990          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
991          new_inst.Instruction.NumSrcRegs = 3;
992          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
993          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
994          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
995 
996          if (opcode == TGSI_OPCODE_DPH) {
997             tctx->emit_instruction(tctx, &new_inst);
998 
999             /* ADD tmpA.x, src1.w, tmpA.x */
1000             new_inst = tgsi_default_full_instruction();
1001             new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1002             new_inst.Instruction.NumDstRegs = 1;
1003             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1004             new_inst.Instruction.NumSrcRegs = 2;
1005             reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
1006             reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1007          } else if (opcode == TGSI_OPCODE_DP4) {
1008             tctx->emit_instruction(tctx, &new_inst);
1009 
1010             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
1011             new_inst = tgsi_default_full_instruction();
1012             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1013             new_inst.Instruction.NumDstRegs = 1;
1014             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1015             new_inst.Instruction.NumSrcRegs = 3;
1016             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
1017             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
1018             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1019          }
1020       } else if (opcode == TGSI_OPCODE_DP2A) {
1021          tctx->emit_instruction(tctx, &new_inst);
1022 
1023          /* ADD tmpA.x, src2.x, tmpA.x */
1024          new_inst = tgsi_default_full_instruction();
1025          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1026          new_inst.Instruction.NumDstRegs = 1;
1027          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1028          new_inst.Instruction.NumSrcRegs = 2;
1029          reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
1030          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1031       }
1032 
1033       /* fixup last instruction to write to dst: */
1034       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1035 
1036       tctx->emit_instruction(tctx, &new_inst);
1037    }
1038 }
1039 
1040 /* FLR - floor, CEIL - ceil
1041  * ; needs: 1 tmp
1042  * if (CEIL) {
1043  *   FRC tmpA, -src
1044  *   ADD dst, src, tmpA
1045  * } else {
1046  *   FRC tmpA, src
1047  *   SUB dst, src, tmpA
1048  * }
1049  */
1050 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
1051 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
1052 #define FLR_TMP 1
1053 #define CEIL_TMP 1
1054 static void
transform_flr_ceil(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1055 transform_flr_ceil(struct tgsi_transform_context *tctx,
1056                    struct tgsi_full_instruction *inst)
1057 {
1058    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1059    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
1060    struct tgsi_full_src_register *src0 = &inst->Src[0];
1061    struct tgsi_full_instruction new_inst;
1062    unsigned opcode = inst->Instruction.Opcode;
1063 
1064    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1065       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
1066       new_inst = tgsi_default_full_instruction();
1067       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1068       new_inst.Instruction.NumDstRegs = 1;
1069       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1070       new_inst.Instruction.NumSrcRegs = 1;
1071       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1072 
1073       if (opcode == TGSI_OPCODE_CEIL)
1074          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
1075       tctx->emit_instruction(tctx, &new_inst);
1076 
1077       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
1078       new_inst = tgsi_default_full_instruction();
1079       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1080       new_inst.Instruction.NumDstRegs = 1;
1081       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1082       new_inst.Instruction.NumSrcRegs = 2;
1083       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1084       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1085       if (opcode == TGSI_OPCODE_FLR)
1086          new_inst.Src[1].Register.Negate = 1;
1087       tctx->emit_instruction(tctx, &new_inst);
1088    }
1089 }
1090 
1091 /* TRUNC - truncate off fractional part
1092  *  dst.x = trunc(src.x)
1093  *  dst.y = trunc(src.y)
1094  *  dst.z = trunc(src.z)
1095  *  dst.w = trunc(src.w)
1096  *
1097  * ; needs: 1 tmp
1098  * if (lower FLR) {
1099  *   FRC tmpA, |src|
1100  *   SUB tmpA, |src|, tmpA
1101  * } else {
1102  *   FLR tmpA, |src|
1103  * }
1104  * CMP dst, src, -tmpA, tmpA
1105  */
1106 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
1107 #define TRUNC_TMP 1
1108 static void
transform_trunc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1109 transform_trunc(struct tgsi_transform_context *tctx,
1110                 struct tgsi_full_instruction *inst)
1111 {
1112    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1113    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
1114    struct tgsi_full_src_register *src0 = &inst->Src[0];
1115    struct tgsi_full_instruction new_inst;
1116 
1117    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1118       if (ctx->config->lower_FLR) {
1119          new_inst = tgsi_default_full_instruction();
1120          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1121          new_inst.Instruction.NumDstRegs = 1;
1122          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1123          new_inst.Instruction.NumSrcRegs = 1;
1124          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1125          new_inst.Src[0].Register.Absolute = true;
1126          new_inst.Src[0].Register.Negate = false;
1127          tctx->emit_instruction(tctx, &new_inst);
1128 
1129          new_inst = tgsi_default_full_instruction();
1130          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1131          new_inst.Instruction.NumDstRegs = 1;
1132          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1133          new_inst.Instruction.NumSrcRegs = 2;
1134          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1135          new_inst.Src[0].Register.Absolute = true;
1136          new_inst.Src[0].Register.Negate = false;
1137          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1138          new_inst.Src[1].Register.Negate = 1;
1139          tctx->emit_instruction(tctx, &new_inst);
1140       } else {
1141          new_inst = tgsi_default_full_instruction();
1142          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
1143          new_inst.Instruction.NumDstRegs = 1;
1144          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1145          new_inst.Instruction.NumSrcRegs = 1;
1146          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1147          new_inst.Src[0].Register.Absolute = true;
1148          new_inst.Src[0].Register.Negate = false;
1149          tctx->emit_instruction(tctx, &new_inst);
1150       }
1151 
1152       new_inst = tgsi_default_full_instruction();
1153       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1154       new_inst.Instruction.NumDstRegs = 1;
1155       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1156       new_inst.Instruction.NumSrcRegs = 3;
1157       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1158       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1159       new_inst.Src[1].Register.Negate = true;
1160       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1161       tctx->emit_instruction(tctx, &new_inst);
1162    }
1163 }
1164 
1165 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1166  * in the case of TXP, the clamping must happen *after* projection, so
1167  * we need to lower TXP to TEX.
1168  *
1169  *   MOV tmpA, src0
1170  *   if (opc == TXP) {
1171  *     ; do perspective division manually before clamping:
1172  *     RCP tmpB, tmpA.w
1173  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1174  *     opc = TEX;
1175  *   }
1176  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1177  *   <opc> dst, tmpA, ...
1178  */
1179 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1180 #define SAMP_TMP  2
1181 static int
transform_samp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1182 transform_samp(struct tgsi_transform_context *tctx,
1183                struct tgsi_full_instruction *inst)
1184 {
1185    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1186    struct tgsi_full_src_register *coord = &inst->Src[0];
1187    struct tgsi_full_src_register *samp;
1188    struct tgsi_full_instruction new_inst;
1189    /* mask is clamped coords, pmask is all coords (for projection): */
1190    unsigned mask = 0, pmask = 0, smask;
1191    unsigned tex = inst->Texture.Texture;
1192    unsigned opcode = inst->Instruction.Opcode;
1193    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1194 		   (ctx->config->lower_TXP & (1 << tex));
1195 
1196    if (opcode == TGSI_OPCODE_TXB2) {
1197       samp = &inst->Src[2];
1198    } else {
1199       samp = &inst->Src[1];
1200    }
1201 
1202    /* convert sampler # to bitmask to test: */
1203    smask = 1 << samp->Register.Index;
1204 
1205    /* check if we actually need to lower this one: */
1206    if (!(ctx->saturate & smask) && !lower_txp)
1207       return -1;
1208 
1209    /* figure out which coordinates need saturating:
1210     *   - RECT textures should not get saturated
1211     *   - array index coords should not get saturated
1212     */
1213    switch (tex) {
1214    case TGSI_TEXTURE_3D:
1215    case TGSI_TEXTURE_CUBE:
1216    case TGSI_TEXTURE_CUBE_ARRAY:
1217    case TGSI_TEXTURE_SHADOWCUBE:
1218    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1219       if (ctx->config->saturate_r & smask)
1220          mask |= TGSI_WRITEMASK_Z;
1221       pmask |= TGSI_WRITEMASK_Z;
1222       /* fallthrough */
1223 
1224    case TGSI_TEXTURE_2D:
1225    case TGSI_TEXTURE_2D_ARRAY:
1226    case TGSI_TEXTURE_SHADOW2D:
1227    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1228    case TGSI_TEXTURE_2D_MSAA:
1229    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1230       if (ctx->config->saturate_t & smask)
1231          mask |= TGSI_WRITEMASK_Y;
1232       pmask |= TGSI_WRITEMASK_Y;
1233       /* fallthrough */
1234 
1235    case TGSI_TEXTURE_1D:
1236    case TGSI_TEXTURE_1D_ARRAY:
1237    case TGSI_TEXTURE_SHADOW1D:
1238    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1239       if (ctx->config->saturate_s & smask)
1240          mask |= TGSI_WRITEMASK_X;
1241       pmask |= TGSI_WRITEMASK_X;
1242       break;
1243 
1244    case TGSI_TEXTURE_RECT:
1245    case TGSI_TEXTURE_SHADOWRECT:
1246       /* we don't saturate, but in case of lower_txp we
1247        * still need to do the perspective divide:
1248        */
1249        pmask = TGSI_WRITEMASK_XY;
1250        break;
1251    }
1252 
1253    /* sanity check.. driver could be asking to saturate a non-
1254     * existent coordinate component:
1255     */
1256    if (!mask && !lower_txp)
1257       return -1;
1258 
1259    /* MOV tmpA, src0 */
1260    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1261 
1262    /* This is a bit sad.. we need to clamp *after* the coords
1263     * are projected, which means lowering TXP to TEX and doing
1264     * the projection ourself.  But since I haven't figured out
1265     * how to make the lowering code deliver an electric shock
1266     * to anyone using GL_CLAMP, we must do this instead:
1267     */
1268    if (opcode == TGSI_OPCODE_TXP) {
1269       /* RCP tmpB.x tmpA.w */
1270       new_inst = tgsi_default_full_instruction();
1271       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1272       new_inst.Instruction.NumDstRegs = 1;
1273       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1274       new_inst.Instruction.NumSrcRegs = 1;
1275       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1276       tctx->emit_instruction(tctx, &new_inst);
1277 
1278       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1279       new_inst = tgsi_default_full_instruction();
1280       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1281       new_inst.Instruction.NumDstRegs = 1;
1282       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1283       new_inst.Instruction.NumSrcRegs = 2;
1284       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1285       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1286       tctx->emit_instruction(tctx, &new_inst);
1287 
1288       opcode = TGSI_OPCODE_TEX;
1289    }
1290 
1291    /* MOV_SAT tmpA.<mask>, tmpA */
1292    if (mask) {
1293       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1294    }
1295 
1296    /* modify the texture samp instruction to take fixed up coord: */
1297    new_inst = *inst;
1298    new_inst.Instruction.Opcode = opcode;
1299    new_inst.Src[0] = ctx->tmp[A].src;
1300    tctx->emit_instruction(tctx, &new_inst);
1301 
1302    return 0;
1303 }
1304 
1305 /* Two-sided color emulation:
1306  * For each COLOR input, create a corresponding BCOLOR input, plus
1307  * CMP instruction to select front or back color based on FACE
1308  */
1309 #define TWOSIDE_GROW(n)  (                      \
1310       2 +         /* FACE */                    \
1311       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1312       ((n) * 1) + /* TEMP[] */                  \
1313       ((n) * NINST(3))   /* CMP instr */        \
1314       )
1315 
1316 static void
emit_twoside(struct tgsi_transform_context * tctx)1317 emit_twoside(struct tgsi_transform_context *tctx)
1318 {
1319    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1320    struct tgsi_shader_info *info = ctx->info;
1321    struct tgsi_full_declaration decl;
1322    struct tgsi_full_instruction new_inst;
1323    unsigned inbase, tmpbase;
1324    int i;
1325 
1326    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1327    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1328 
1329    /* additional inputs for BCOLOR's */
1330    for (i = 0; i < ctx->two_side_colors; i++) {
1331       unsigned in_idx = ctx->two_side_idx[i];
1332       decl = tgsi_default_full_declaration();
1333       decl.Declaration.File = TGSI_FILE_INPUT;
1334       decl.Declaration.Semantic = true;
1335       decl.Range.First = decl.Range.Last = inbase + i;
1336       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1337       decl.Semantic.Index = info->input_semantic_index[in_idx];
1338       decl.Declaration.Interpolate = true;
1339       decl.Interp.Interpolate = info->input_interpolate[in_idx];
1340       decl.Interp.Location = info->input_interpolate_loc[in_idx];
1341       decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1342       tctx->emit_declaration(tctx, &decl);
1343    }
1344 
1345    /* additional input for FACE */
1346    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1347       decl = tgsi_default_full_declaration();
1348       decl.Declaration.File = TGSI_FILE_INPUT;
1349       decl.Declaration.Semantic = true;
1350       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1351       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1352       decl.Semantic.Index = 0;
1353       tctx->emit_declaration(tctx, &decl);
1354 
1355       ctx->face_idx = decl.Range.First;
1356    }
1357 
1358    /* additional temps for COLOR/BCOLOR selection: */
1359    for (i = 0; i < ctx->two_side_colors; i++) {
1360       decl = tgsi_default_full_declaration();
1361       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1362       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1363       tctx->emit_declaration(tctx, &decl);
1364    }
1365 
1366    /* and finally additional instructions to select COLOR/BCOLOR: */
1367    for (i = 0; i < ctx->two_side_colors; i++) {
1368       new_inst = tgsi_default_full_instruction();
1369       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1370 
1371       new_inst.Instruction.NumDstRegs = 1;
1372       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1373       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1374       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1375 
1376       new_inst.Instruction.NumSrcRegs = 3;
1377       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1378       new_inst.Src[0].Register.Index = ctx->face_idx;
1379       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1380       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1381       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1382       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1383       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1384       new_inst.Src[1].Register.Index = inbase + i;
1385       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1386       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1387       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1388       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1389       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1390       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1391       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1392       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1393       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1394       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1395 
1396       tctx->emit_instruction(tctx, &new_inst);
1397    }
1398 }
1399 
1400 static void
emit_decls(struct tgsi_transform_context * tctx)1401 emit_decls(struct tgsi_transform_context *tctx)
1402 {
1403    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1404    struct tgsi_shader_info *info = ctx->info;
1405    struct tgsi_full_declaration decl;
1406    struct tgsi_full_immediate immed;
1407    unsigned tmpbase;
1408    int i;
1409 
1410    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1411 
1412    ctx->color_base = tmpbase + ctx->numtmp;
1413 
1414    /* declare immediate: */
1415    immed = tgsi_default_full_immediate();
1416    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1417    immed.u[0].Float = 0.0;
1418    immed.u[1].Float = 1.0;
1419    immed.u[2].Float = 128.0;
1420    immed.u[3].Float = 0.0;
1421    tctx->emit_immediate(tctx, &immed);
1422 
1423    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1424    ctx->imm.Register.Index = info->immediate_count;
1425    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1426    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1427    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1428    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1429 
1430    /* declare temp regs: */
1431    for (i = 0; i < ctx->numtmp; i++) {
1432       decl = tgsi_default_full_declaration();
1433       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1434       decl.Range.First = decl.Range.Last = tmpbase + i;
1435       tctx->emit_declaration(tctx, &decl);
1436 
1437       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1438       ctx->tmp[i].src.Register.Index = tmpbase + i;
1439       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1440       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1441       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1442       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1443 
1444       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1445       ctx->tmp[i].dst.Register.Index = tmpbase + i;
1446       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1447    }
1448 
1449    if (ctx->two_side_colors)
1450       emit_twoside(tctx);
1451 }
1452 
1453 static void
rename_color_inputs(struct tgsi_lowering_context * ctx,struct tgsi_full_instruction * inst)1454 rename_color_inputs(struct tgsi_lowering_context *ctx,
1455                     struct tgsi_full_instruction *inst)
1456 {
1457    unsigned i, j;
1458    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1459       struct tgsi_src_register *src = &inst->Src[i].Register;
1460       if (src->File == TGSI_FILE_INPUT) {
1461          for (j = 0; j < ctx->two_side_colors; j++) {
1462             if (src->Index == ctx->two_side_idx[j]) {
1463                src->File = TGSI_FILE_TEMPORARY;
1464                src->Index = ctx->color_base + j;
1465                break;
1466             }
1467          }
1468       }
1469    }
1470 
1471 }
1472 
1473 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1474 transform_instr(struct tgsi_transform_context *tctx,
1475 		struct tgsi_full_instruction *inst)
1476 {
1477    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1478 
1479    if (!ctx->emitted_decls) {
1480       emit_decls(tctx);
1481       ctx->emitted_decls = 1;
1482    }
1483 
1484    /* if emulating two-sided-color, we need to re-write some
1485     * src registers:
1486     */
1487    if (ctx->two_side_colors)
1488       rename_color_inputs(ctx, inst);
1489 
1490    switch (inst->Instruction.Opcode) {
1491    case TGSI_OPCODE_DST:
1492       if (!ctx->config->lower_DST)
1493          goto skip;
1494       transform_dst(tctx, inst);
1495       break;
1496    case TGSI_OPCODE_XPD:
1497       if (!ctx->config->lower_XPD)
1498          goto skip;
1499       transform_xpd(tctx, inst);
1500       break;
1501    case TGSI_OPCODE_SCS:
1502       if (!ctx->config->lower_SCS)
1503          goto skip;
1504       transform_scs(tctx, inst);
1505       break;
1506    case TGSI_OPCODE_LRP:
1507       if (!ctx->config->lower_LRP)
1508          goto skip;
1509       transform_lrp(tctx, inst);
1510       break;
1511    case TGSI_OPCODE_FRC:
1512       if (!ctx->config->lower_FRC)
1513          goto skip;
1514       transform_frc(tctx, inst);
1515       break;
1516    case TGSI_OPCODE_POW:
1517       if (!ctx->config->lower_POW)
1518          goto skip;
1519       transform_pow(tctx, inst);
1520       break;
1521    case TGSI_OPCODE_LIT:
1522       if (!ctx->config->lower_LIT)
1523          goto skip;
1524       transform_lit(tctx, inst);
1525       break;
1526    case TGSI_OPCODE_EXP:
1527       if (!ctx->config->lower_EXP)
1528          goto skip;
1529       transform_exp(tctx, inst);
1530       break;
1531    case TGSI_OPCODE_LOG:
1532       if (!ctx->config->lower_LOG)
1533          goto skip;
1534       transform_log(tctx, inst);
1535       break;
1536    case TGSI_OPCODE_DP4:
1537       if (!ctx->config->lower_DP4)
1538          goto skip;
1539       transform_dotp(tctx, inst);
1540       break;
1541    case TGSI_OPCODE_DP3:
1542       if (!ctx->config->lower_DP3)
1543          goto skip;
1544       transform_dotp(tctx, inst);
1545       break;
1546    case TGSI_OPCODE_DPH:
1547       if (!ctx->config->lower_DPH)
1548          goto skip;
1549       transform_dotp(tctx, inst);
1550       break;
1551    case TGSI_OPCODE_DP2:
1552       if (!ctx->config->lower_DP2)
1553          goto skip;
1554       transform_dotp(tctx, inst);
1555       break;
1556    case TGSI_OPCODE_DP2A:
1557       if (!ctx->config->lower_DP2A)
1558          goto skip;
1559       transform_dotp(tctx, inst);
1560       break;
1561    case TGSI_OPCODE_FLR:
1562       if (!ctx->config->lower_FLR)
1563          goto skip;
1564       transform_flr_ceil(tctx, inst);
1565       break;
1566    case TGSI_OPCODE_CEIL:
1567       if (!ctx->config->lower_CEIL)
1568          goto skip;
1569       transform_flr_ceil(tctx, inst);
1570       break;
1571    case TGSI_OPCODE_TRUNC:
1572       if (!ctx->config->lower_TRUNC)
1573          goto skip;
1574       transform_trunc(tctx, inst);
1575       break;
1576    case TGSI_OPCODE_TEX:
1577    case TGSI_OPCODE_TXP:
1578    case TGSI_OPCODE_TXB:
1579    case TGSI_OPCODE_TXB2:
1580    case TGSI_OPCODE_TXL:
1581       if (transform_samp(tctx, inst))
1582          goto skip;
1583       break;
1584    default:
1585    skip:
1586       tctx->emit_instruction(tctx, inst);
1587       break;
1588    }
1589 }
1590 
1591 /* returns NULL if no lowering required, else returns the new
1592  * tokens (which caller is required to free()).  In either case
1593  * returns the current info.
1594  */
1595 const struct tgsi_token *
tgsi_transform_lowering(const struct tgsi_lowering_config * config,const struct tgsi_token * tokens,struct tgsi_shader_info * info)1596 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1597                         const struct tgsi_token *tokens,
1598                         struct tgsi_shader_info *info)
1599 {
1600    struct tgsi_lowering_context ctx;
1601    struct tgsi_token *newtoks;
1602    int newlen, numtmp;
1603 
1604    /* sanity check in case limit is ever increased: */
1605    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1606 
1607    /* sanity check the lowering */
1608    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1609    assert(!(config->lower_FRC && config->lower_TRUNC));
1610 
1611    memset(&ctx, 0, sizeof(ctx));
1612    ctx.base.transform_instruction = transform_instr;
1613    ctx.info = info;
1614    ctx.config = config;
1615 
1616    tgsi_scan_shader(tokens, info);
1617 
1618    /* if we are adding fragment shader support to emulate two-sided
1619     * color, then figure out the number of additional inputs we need
1620     * to create for BCOLOR's..
1621     */
1622    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1623        config->color_two_side) {
1624       int i;
1625       ctx.face_idx = -1;
1626       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1627          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1628             ctx.two_side_idx[ctx.two_side_colors++] = i;
1629          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1630             ctx.face_idx = i;
1631       }
1632    }
1633 
1634    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1635 
1636 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1637    /* if there are no instructions to lower, then we are done: */
1638    if (!(OPCS(DST) ||
1639          OPCS(XPD) ||
1640          OPCS(SCS) ||
1641          OPCS(LRP) ||
1642          OPCS(FRC) ||
1643          OPCS(POW) ||
1644          OPCS(LIT) ||
1645          OPCS(EXP) ||
1646          OPCS(LOG) ||
1647          OPCS(DP4) ||
1648          OPCS(DP3) ||
1649          OPCS(DPH) ||
1650          OPCS(DP2) ||
1651          OPCS(DP2A) ||
1652          OPCS(FLR) ||
1653          OPCS(CEIL) ||
1654          OPCS(TRUNC) ||
1655          OPCS(TXP) ||
1656          ctx.two_side_colors ||
1657          ctx.saturate))
1658       return NULL;
1659 
1660 #if 0  /* debug */
1661    _debug_printf("BEFORE:");
1662    tgsi_dump(tokens, 0);
1663 #endif
1664 
1665    numtmp = 0;
1666    newlen = tgsi_num_tokens(tokens);
1667    if (OPCS(DST)) {
1668       newlen += DST_GROW * OPCS(DST);
1669       numtmp = MAX2(numtmp, DST_TMP);
1670    }
1671    if (OPCS(XPD)) {
1672       newlen += XPD_GROW * OPCS(XPD);
1673       numtmp = MAX2(numtmp, XPD_TMP);
1674    }
1675    if (OPCS(SCS)) {
1676       newlen += SCS_GROW * OPCS(SCS);
1677       numtmp = MAX2(numtmp, SCS_TMP);
1678    }
1679    if (OPCS(LRP)) {
1680       newlen += LRP_GROW * OPCS(LRP);
1681       numtmp = MAX2(numtmp, LRP_TMP);
1682    }
1683    if (OPCS(FRC)) {
1684       newlen += FRC_GROW * OPCS(FRC);
1685       numtmp = MAX2(numtmp, FRC_TMP);
1686    }
1687    if (OPCS(POW)) {
1688       newlen += POW_GROW * OPCS(POW);
1689       numtmp = MAX2(numtmp, POW_TMP);
1690    }
1691    if (OPCS(LIT)) {
1692       newlen += LIT_GROW * OPCS(LIT);
1693       numtmp = MAX2(numtmp, LIT_TMP);
1694    }
1695    if (OPCS(EXP)) {
1696       newlen += EXP_GROW * OPCS(EXP);
1697       numtmp = MAX2(numtmp, EXP_TMP);
1698    }
1699    if (OPCS(LOG)) {
1700       newlen += LOG_GROW * OPCS(LOG);
1701       numtmp = MAX2(numtmp, LOG_TMP);
1702    }
1703    if (OPCS(DP4)) {
1704       newlen += DP4_GROW * OPCS(DP4);
1705       numtmp = MAX2(numtmp, DOTP_TMP);
1706    }
1707    if (OPCS(DP3)) {
1708       newlen += DP3_GROW * OPCS(DP3);
1709       numtmp = MAX2(numtmp, DOTP_TMP);
1710    }
1711    if (OPCS(DPH)) {
1712       newlen += DPH_GROW * OPCS(DPH);
1713       numtmp = MAX2(numtmp, DOTP_TMP);
1714    }
1715    if (OPCS(DP2)) {
1716       newlen += DP2_GROW * OPCS(DP2);
1717       numtmp = MAX2(numtmp, DOTP_TMP);
1718    }
1719    if (OPCS(DP2A)) {
1720       newlen += DP2A_GROW * OPCS(DP2A);
1721       numtmp = MAX2(numtmp, DOTP_TMP);
1722    }
1723    if (OPCS(FLR)) {
1724       newlen += FLR_GROW * OPCS(FLR);
1725       numtmp = MAX2(numtmp, FLR_TMP);
1726    }
1727    if (OPCS(CEIL)) {
1728       newlen += CEIL_GROW * OPCS(CEIL);
1729       numtmp = MAX2(numtmp, CEIL_TMP);
1730    }
1731    if (OPCS(TRUNC)) {
1732       newlen += TRUNC_GROW * OPCS(TRUNC);
1733       numtmp = MAX2(numtmp, TRUNC_TMP);
1734    }
1735    if (ctx.saturate || config->lower_TXP) {
1736       int n = 0;
1737 
1738       if (ctx.saturate) {
1739          n = info->opcode_count[TGSI_OPCODE_TEX] +
1740             info->opcode_count[TGSI_OPCODE_TXP] +
1741             info->opcode_count[TGSI_OPCODE_TXB] +
1742             info->opcode_count[TGSI_OPCODE_TXB2] +
1743             info->opcode_count[TGSI_OPCODE_TXL];
1744       } else if (config->lower_TXP) {
1745           n = info->opcode_count[TGSI_OPCODE_TXP];
1746       }
1747 
1748       newlen += SAMP_GROW * n;
1749       numtmp = MAX2(numtmp, SAMP_TMP);
1750    }
1751 
1752    /* specifically don't include two_side_colors temps in the count: */
1753    ctx.numtmp = numtmp;
1754 
1755    if (ctx.two_side_colors) {
1756       newlen += TWOSIDE_GROW(ctx.two_side_colors);
1757       /* note: we permanently consume temp regs, re-writing references
1758        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1759        * instruction that selects which varying to use):
1760        */
1761       numtmp += ctx.two_side_colors;
1762    }
1763 
1764    newlen += 2 * numtmp;
1765    newlen += 5;        /* immediate */
1766 
1767    newtoks = tgsi_alloc_tokens(newlen);
1768    if (!newtoks)
1769       return NULL;
1770 
1771    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1772 
1773    tgsi_scan_shader(newtoks, info);
1774 
1775 #if 0  /* debug */
1776    _debug_printf("AFTER:");
1777    tgsi_dump(newtoks, 0);
1778 #endif
1779 
1780    return newtoks;
1781 }
1782