• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2008 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /**
7  * @file
8  *
9  * Shareable transformations that transform "special" ALU instructions
10  * into ALU instructions that are supported by hardware.
11  *
12  */
13 
14 #include "radeon_program_alu.h"
15 
16 #include "radeon_compiler.h"
17 #include "radeon_compiler_util.h"
18 #include "radeon_dataflow.h"
19 
20 #include "util/log.h"
21 
22 static struct rc_instruction *
emit1(struct radeon_compiler * c,struct rc_instruction * after,rc_opcode Opcode,struct rc_sub_instruction * base,struct rc_dst_register DstReg,struct rc_src_register SrcReg)23 emit1(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
24       struct rc_sub_instruction *base, struct rc_dst_register DstReg, struct rc_src_register SrcReg)
25 {
26    struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
27 
28    if (base) {
29       memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
30    }
31 
32    fpi->U.I.Opcode = Opcode;
33    fpi->U.I.DstReg = DstReg;
34    fpi->U.I.SrcReg[0] = SrcReg;
35    return fpi;
36 }
37 
38 static struct rc_instruction *
emit2(struct radeon_compiler * c,struct rc_instruction * after,rc_opcode Opcode,struct rc_sub_instruction * base,struct rc_dst_register DstReg,struct rc_src_register SrcReg0,struct rc_src_register SrcReg1)39 emit2(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
40       struct rc_sub_instruction *base, struct rc_dst_register DstReg,
41       struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
42 {
43    struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
44 
45    if (base) {
46       memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
47    }
48 
49    fpi->U.I.Opcode = Opcode;
50    fpi->U.I.DstReg = DstReg;
51    fpi->U.I.SrcReg[0] = SrcReg0;
52    fpi->U.I.SrcReg[1] = SrcReg1;
53    return fpi;
54 }
55 
56 static struct rc_instruction *
emit3(struct radeon_compiler * c,struct rc_instruction * after,rc_opcode Opcode,struct rc_sub_instruction * base,struct rc_dst_register DstReg,struct rc_src_register SrcReg0,struct rc_src_register SrcReg1,struct rc_src_register SrcReg2)57 emit3(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
58       struct rc_sub_instruction *base, struct rc_dst_register DstReg,
59       struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
60       struct rc_src_register SrcReg2)
61 {
62    struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
63 
64    if (base) {
65       memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
66    }
67 
68    fpi->U.I.Opcode = Opcode;
69    fpi->U.I.DstReg = DstReg;
70    fpi->U.I.SrcReg[0] = SrcReg0;
71    fpi->U.I.SrcReg[1] = SrcReg1;
72    fpi->U.I.SrcReg[2] = SrcReg2;
73    return fpi;
74 }
75 
76 static struct rc_dst_register
dstregtmpmask(int index,int mask)77 dstregtmpmask(int index, int mask)
78 {
79    struct rc_dst_register dst = {0, 0, 0};
80    dst.File = RC_FILE_TEMPORARY;
81    dst.Index = index;
82    dst.WriteMask = mask;
83    return dst;
84 }
85 
86 static const struct rc_src_register builtin_one = {
87    .File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_1111};
88 
89 static const struct rc_src_register srcreg_undefined = {
90    .File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_XYZW};
91 
92 static struct rc_src_register
srcreg(int file,int index)93 srcreg(int file, int index)
94 {
95    struct rc_src_register src = srcreg_undefined;
96    src.File = file;
97    src.Index = index;
98    return src;
99 }
100 
101 static struct rc_src_register
srcregswz(int file,int index,int swz)102 srcregswz(int file, int index, int swz)
103 {
104    struct rc_src_register src = srcreg_undefined;
105    src.File = file;
106    src.Index = index;
107    src.Swizzle = swz;
108    return src;
109 }
110 
111 static struct rc_src_register
absolute(struct rc_src_register reg)112 absolute(struct rc_src_register reg)
113 {
114    struct rc_src_register newreg = reg;
115    newreg.Abs = 1;
116    newreg.Negate = RC_MASK_NONE;
117    return newreg;
118 }
119 
120 static struct rc_src_register
negate(struct rc_src_register reg)121 negate(struct rc_src_register reg)
122 {
123    struct rc_src_register newreg = reg;
124    newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
125    return newreg;
126 }
127 
128 static struct rc_src_register
swizzle(struct rc_src_register reg,rc_swizzle x,rc_swizzle y,rc_swizzle z,rc_swizzle w)129 swizzle(struct rc_src_register reg, rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
130 {
131    struct rc_src_register swizzled = reg;
132    swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
133    return swizzled;
134 }
135 
136 static struct rc_src_register
swizzle_smear(struct rc_src_register reg,rc_swizzle x)137 swizzle_smear(struct rc_src_register reg, rc_swizzle x)
138 {
139    return swizzle(reg, x, x, x, x);
140 }
141 
142 static struct rc_src_register
swizzle_xxxx(struct rc_src_register reg)143 swizzle_xxxx(struct rc_src_register reg)
144 {
145    return swizzle_smear(reg, RC_SWIZZLE_X);
146 }
147 
148 static struct rc_src_register
swizzle_yyyy(struct rc_src_register reg)149 swizzle_yyyy(struct rc_src_register reg)
150 {
151    return swizzle_smear(reg, RC_SWIZZLE_Y);
152 }
153 
154 static struct rc_src_register
swizzle_zzzz(struct rc_src_register reg)155 swizzle_zzzz(struct rc_src_register reg)
156 {
157    return swizzle_smear(reg, RC_SWIZZLE_Z);
158 }
159 
160 static struct rc_src_register
swizzle_wwww(struct rc_src_register reg)161 swizzle_wwww(struct rc_src_register reg)
162 {
163    return swizzle_smear(reg, RC_SWIZZLE_W);
164 }
165 
166 static struct rc_dst_register
new_dst_reg(struct radeon_compiler * c,struct rc_instruction * inst)167 new_dst_reg(struct radeon_compiler *c, struct rc_instruction *inst)
168 {
169    unsigned tmp = rc_find_free_temporary(c);
170    return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
171 }
172 
173 static void
transform_DP2(struct radeon_compiler * c,struct rc_instruction * inst)174 transform_DP2(struct radeon_compiler *c, struct rc_instruction *inst)
175 {
176    struct rc_src_register src0 = inst->U.I.SrcReg[0];
177    struct rc_src_register src1 = inst->U.I.SrcReg[1];
178    src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
179    src0.Swizzle &= ~(63 << (3 * 2));
180    src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
181    src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
182    src1.Swizzle &= ~(63 << (3 * 2));
183    src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
184    emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
185    rc_remove_instruction(inst);
186 }
187 
188 static void
transform_RSQ(struct radeon_compiler * c,struct rc_instruction * inst)189 transform_RSQ(struct radeon_compiler *c, struct rc_instruction *inst)
190 {
191    inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
192 }
193 
194 static void
transform_KILP(struct radeon_compiler * c,struct rc_instruction * inst)195 transform_KILP(struct radeon_compiler *c, struct rc_instruction *inst)
196 {
197    inst->U.I.SrcReg[0] = negate(builtin_one);
198    inst->U.I.Opcode = RC_OPCODE_KIL;
199 }
200 
201 /**
202  * Can be used as a transformation for @ref radeonClauseLocalTransform,
203  * no userData necessary.
204  *
205  * Transforms RSQ to Radeon's native RSQ by explicitly setting
206  * absolute value.
207  *
208  * @note should be applicable to R300 and R500 fragment programs.
209  */
210 int
radeonTransformALU(struct radeon_compiler * c,struct rc_instruction * inst,void * unused)211 radeonTransformALU(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
212 {
213    switch (inst->U.I.Opcode) {
214    case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
215    case RC_OPCODE_KILP: transform_KILP(c, inst); return 1;
216    case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
217    case RC_OPCODE_SEQ: unreachable();
218    case RC_OPCODE_SGE: unreachable();
219    case RC_OPCODE_SLT: unreachable();
220    case RC_OPCODE_SNE: unreachable();
221    default: return 0;
222    }
223 }
224 
225 static void
transform_r300_vertex_CMP(struct radeon_compiler * c,struct rc_instruction * inst)226 transform_r300_vertex_CMP(struct radeon_compiler *c, struct rc_instruction *inst)
227 {
228    /* R5xx has a CMP, but we can use it only if it reads from less than
229     * three different temps. */
230    if (c->is_r500 && !rc_inst_has_three_diff_temp_srcs(inst))
231       return;
232 
233    unreachable();
234 }
235 
236 static void
transform_r300_vertex_DP2(struct radeon_compiler * c,struct rc_instruction * inst)237 transform_r300_vertex_DP2(struct radeon_compiler *c, struct rc_instruction *inst)
238 {
239    struct rc_instruction *next_inst = inst->Next;
240    transform_DP2(c, inst);
241    next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
242 }
243 
244 static void
transform_r300_vertex_DP3(struct radeon_compiler * c,struct rc_instruction * inst)245 transform_r300_vertex_DP3(struct radeon_compiler *c, struct rc_instruction *inst)
246 {
247    struct rc_src_register src0 = inst->U.I.SrcReg[0];
248    struct rc_src_register src1 = inst->U.I.SrcReg[1];
249    src0.Negate &= ~RC_MASK_W;
250    src0.Swizzle &= ~(7 << (3 * 3));
251    src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
252    src1.Negate &= ~RC_MASK_W;
253    src1.Swizzle &= ~(7 << (3 * 3));
254    src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
255    emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
256    rc_remove_instruction(inst);
257 }
258 
259 static void
transform_r300_vertex_fix_LIT(struct radeon_compiler * c,struct rc_instruction * inst)260 transform_r300_vertex_fix_LIT(struct radeon_compiler *c, struct rc_instruction *inst)
261 {
262    struct rc_dst_register dst = new_dst_reg(c, inst);
263    unsigned constant_swizzle;
264    int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001,
265                                                     &constant_swizzle);
266 
267    /* MOV dst, src */
268    dst.WriteMask = RC_MASK_XYZW;
269    emit1(c, inst->Prev, RC_OPCODE_MOV, NULL, dst, inst->U.I.SrcReg[0]);
270 
271    /* MAX dst.y, src, 0.00...001 */
272    emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(dst.Index, RC_MASK_Y),
273          srcreg(RC_FILE_TEMPORARY, dst.Index),
274          srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
275 
276    inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
277 }
278 
279 static void
transform_r300_vertex_SEQ(struct radeon_compiler * c,struct rc_instruction * inst)280 transform_r300_vertex_SEQ(struct radeon_compiler *c, struct rc_instruction *inst)
281 {
282    /* x = y  <==>  x >= y && y >= x */
283    /* x <= y */
284    struct rc_dst_register dst0 = new_dst_reg(c, inst);
285    emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]);
286 
287    /* y <= x */
288    int tmp = rc_find_free_temporary(c);
289    emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
290          inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]);
291 
292    /* x && y  =  x * y */
293    emit2(c, inst->Prev, RC_OPCODE_MUL, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index),
294          srcreg(RC_FILE_TEMPORARY, tmp));
295 
296    rc_remove_instruction(inst);
297 }
298 
299 static void
transform_r300_vertex_SNE(struct radeon_compiler * c,struct rc_instruction * inst)300 transform_r300_vertex_SNE(struct radeon_compiler *c, struct rc_instruction *inst)
301 {
302    /* x != y  <==>  x < y || y < x */
303    /* x < y */
304    struct rc_dst_register dst0 = new_dst_reg(c, inst);
305    emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]);
306 
307    /* y < x */
308    int tmp = rc_find_free_temporary(c);
309    emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
310          inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]);
311 
312    /* x || y  =  max(x, y) */
313    emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index),
314          srcreg(RC_FILE_TEMPORARY, tmp));
315 
316    rc_remove_instruction(inst);
317 }
318 
319 /**
320  * For use with rc_local_transform, this transforms non-native ALU
321  * instructions of the r300 up to r500 vertex engine.
322  */
323 int
r300_transform_vertex_alu(struct radeon_compiler * c,struct rc_instruction * inst,void * unused)324 r300_transform_vertex_alu(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
325 {
326    switch (inst->U.I.Opcode) {
327    case RC_OPCODE_CMP:
328       transform_r300_vertex_CMP(c, inst);
329       return 1;
330    case RC_OPCODE_DP2:
331       transform_r300_vertex_DP2(c, inst);
332       return 1;
333    case RC_OPCODE_DP3:
334       transform_r300_vertex_DP3(c, inst);
335       return 1;
336    case RC_OPCODE_LIT:
337       transform_r300_vertex_fix_LIT(c, inst);
338       return 1;
339    case RC_OPCODE_SEQ:
340       if (!c->is_r500) {
341          transform_r300_vertex_SEQ(c, inst);
342          return 1;
343       }
344       return 0;
345    case RC_OPCODE_SNE:
346       if (!c->is_r500) {
347          transform_r300_vertex_SNE(c, inst);
348          return 1;
349       }
350       return 0;
351    default:
352       return 0;
353    }
354 }
355 
356 /**
357  * Replaces DDX/DDY instructions with MOV 0 to avoid using dummy shaders on r300/r400.
358  *
359  * @warning This explicitly changes the form of DDX and DDY!
360  */
361 
362 int
radeonStubDeriv(struct radeon_compiler * c,struct rc_instruction * inst,void * unused)363 radeonStubDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
364 {
365    if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
366       return 0;
367 
368    inst->U.I.Opcode = RC_OPCODE_MOV;
369    inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
370 
371    mesa_logw_once("r300: WARNING: Shader is trying to use derivatives, "
372                   "but the hardware doesn't support it. "
373                   "Expect possible misrendering (it's not a bug, do not report it).");
374 
375    return 1;
376 }
377 
378 /**
379  * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
380  * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
381  * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
382  *
383  * @warning This explicitly changes the form of DDX and DDY!
384  */
385 
386 int
radeonTransformDeriv(struct radeon_compiler * c,struct rc_instruction * inst,void * unused)387 radeonTransformDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
388 {
389    if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
390       return 0;
391 
392    inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
393    inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
394 
395    return 1;
396 }
397 
398 int
rc_force_output_alpha_to_one(struct radeon_compiler * c,struct rc_instruction * inst,void * data)399 rc_force_output_alpha_to_one(struct radeon_compiler *c, struct rc_instruction *inst, void *data)
400 {
401    struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler *)c;
402    const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
403    unsigned tmp;
404 
405    if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT ||
406        inst->U.I.DstReg.Index == fragc->OutputDepth)
407       return 1;
408 
409    tmp = rc_find_free_temporary(c);
410 
411    /* Insert MOV after inst, set alpha to 1. */
412    emit1(c, inst, RC_OPCODE_MOV, NULL, inst->U.I.DstReg,
413          srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1));
414 
415    /* Re-route the destination of inst to the source of mov. */
416    inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
417    inst->U.I.DstReg.Index = tmp;
418 
419    /* Move the saturate output modifier to the MOV instruction
420     * (for better copy propagation). */
421    inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode;
422    inst->U.I.SaturateMode = RC_SATURATE_NONE;
423    return 1;
424 }
425