• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_program_pair.h"
7 
8 #include "radeon_compiler.h"
9 #include "radeon_compiler_util.h"
10 
11 #include "util/compiler.h"
12 
13 /**
14  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
15  * and reverse the order of arguments for CMP.
16  */
17 static void
final_rewrite(struct rc_sub_instruction * inst)18 final_rewrite(struct rc_sub_instruction *inst)
19 {
20    struct rc_src_register tmp;
21 
22    switch (inst->Opcode) {
23    case RC_OPCODE_ADD:
24       inst->SrcReg[2] = inst->SrcReg[1];
25       inst->SrcReg[1].File = RC_FILE_NONE;
26       inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
27       inst->SrcReg[1].Negate = RC_MASK_NONE;
28       inst->Opcode = RC_OPCODE_MAD;
29       break;
30    case RC_OPCODE_CMP:
31       tmp = inst->SrcReg[2];
32       inst->SrcReg[2] = inst->SrcReg[0];
33       inst->SrcReg[0] = tmp;
34       break;
35    case RC_OPCODE_MOV:
36       inst->SrcReg[1] = inst->SrcReg[0];
37       inst->Opcode = RC_OPCODE_MAX;
38       break;
39    case RC_OPCODE_MUL:
40       inst->SrcReg[2].File = RC_FILE_NONE;
41       inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
42       inst->Opcode = RC_OPCODE_MAD;
43       break;
44    default:
45       /* nothing to do */
46       break;
47    }
48 }
49 
50 /**
51  * ALU operations usually enable the output modifier, which in turn standardizes
52  * NaN values and flushes denormal results to zero. A MOV instruction which
53  * preserves the source bits is implemented by setting US_OMOD_DISABLED
54  * for the instruction and using the MAX(src, src) instruction.
55  * The output modifier cannot be disabled for a saturated MOV (MOV with clamping enabled).
56  * RC_OMOD_DISABLE is only available on R5xx and is only valid with MIN/MAX/CMP/CND.
57  */
58 static unsigned
translate_omod(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)59 translate_omod(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
60 {
61    if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode &&
62        (inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN ||
63         inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND))
64       return RC_OMOD_DISABLE;
65    return inst->Omod;
66 }
67 
68 /**
69  * Classify an instruction according to which ALUs etc. it needs
70  */
71 static void
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)72 classify_instruction(struct rc_sub_instruction *inst, int *needrgb, int *needalpha,
73                      int *istranscendent)
74 {
75    *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
76    *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
77    *istranscendent = 0;
78 
79    if (inst->WriteALUResult == RC_ALURESULT_X)
80       *needrgb = 1;
81    else if (inst->WriteALUResult == RC_ALURESULT_W)
82       *needalpha = 1;
83 
84    switch (inst->Opcode) {
85    case RC_OPCODE_ADD:
86    case RC_OPCODE_CMP:
87    case RC_OPCODE_CND:
88    case RC_OPCODE_DDX:
89    case RC_OPCODE_DDY:
90    case RC_OPCODE_FRC:
91    case RC_OPCODE_MAD:
92    case RC_OPCODE_MAX:
93    case RC_OPCODE_MIN:
94    case RC_OPCODE_MOV:
95    case RC_OPCODE_MUL: break;
96    case RC_OPCODE_COS:
97    case RC_OPCODE_EX2:
98    case RC_OPCODE_LG2:
99    case RC_OPCODE_RCP:
100    case RC_OPCODE_RSQ:
101    case RC_OPCODE_SIN:
102       *istranscendent = 1;
103       *needalpha = 1;
104       break;
105    case RC_OPCODE_DP4: *needalpha = 1; FALLTHROUGH;
106    case RC_OPCODE_DP3: *needrgb = 1; break;
107    default: break;
108    }
109 }
110 
111 static void
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)112 src_uses(struct rc_src_register src, unsigned int *rgb, unsigned int *alpha)
113 {
114    int j;
115    for (j = 0; j < 4; ++j) {
116       unsigned int swz = GET_SWZ(src.Swizzle, j);
117       if (swz < 3)
118          *rgb = 1;
119       else if (swz < 4)
120          *alpha = 1;
121    }
122 }
123 
124 /**
125  * Fill the given ALU instruction's opcodes and source operands into the given pair,
126  * if possible.
127  */
128 static void
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)129 set_pair_instruction(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *pair,
130                      struct rc_sub_instruction *inst)
131 {
132    int needrgb, needalpha, istranscendent;
133    const struct rc_opcode_info *opcode;
134    int i;
135 
136    memset(pair, 0, sizeof(struct rc_pair_instruction));
137 
138    classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
139 
140    if (needrgb) {
141       if (istranscendent)
142          pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
143       else
144          pair->RGB.Opcode = inst->Opcode;
145       if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
146          pair->RGB.Saturate = 1;
147    }
148    if (needalpha) {
149       pair->Alpha.Opcode = inst->Opcode;
150       if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
151          pair->Alpha.Saturate = 1;
152    }
153 
154    opcode = rc_get_opcode_info(inst->Opcode);
155 
156    /* Presubtract handling:
157     * We need to make sure that the values used by the presubtract
158     * operation end up in src0 or src1. */
159    if (inst->PreSub.Opcode != RC_PRESUB_NONE) {
160       /* rc_pair_alloc_source() will fill in data for
161        * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
162       int j;
163       for (j = 0; j < 3; j++) {
164          int src_regs;
165          if (inst->SrcReg[j].File != RC_FILE_PRESUB)
166             continue;
167 
168          src_regs = rc_presubtract_src_reg_count(inst->PreSub.Opcode);
169          for (i = 0; i < src_regs; i++) {
170             unsigned int rgb = 0;
171             unsigned int alpha = 0;
172             src_uses(inst->SrcReg[j], &rgb, &alpha);
173             if (rgb) {
174                pair->RGB.Src[i].File = inst->PreSub.SrcReg[i].File;
175                pair->RGB.Src[i].Index = inst->PreSub.SrcReg[i].Index;
176                pair->RGB.Src[i].Used = 1;
177             }
178             if (alpha) {
179                pair->Alpha.Src[i].File = inst->PreSub.SrcReg[i].File;
180                pair->Alpha.Src[i].Index = inst->PreSub.SrcReg[i].Index;
181                pair->Alpha.Src[i].Used = 1;
182             }
183          }
184       }
185    }
186 
187    for (i = 0; i < opcode->NumSrcRegs; ++i) {
188       int source;
189       if (needrgb && !istranscendent) {
190          unsigned int srcrgb = 0;
191          unsigned int srcalpha = 0;
192          unsigned int srcmask = 0;
193          int j;
194          /* We don't care about the alpha channel here.  We only
195           * want the part of the swizzle that writes to rgb,
196           * since we are creating an rgb instruction. */
197          for (j = 0; j < 3; ++j) {
198             unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
199 
200             if (swz < RC_SWIZZLE_W)
201                srcrgb = 1;
202             else if (swz == RC_SWIZZLE_W)
203                srcalpha = 1;
204 
205             /* We check for ZERO here as well because otherwise the zero
206              * sign (which doesn't matter and we already ignore it previously
207              * when checking for valid swizzle) could mess up the final negate sign.
208              * Example problematic pattern where this would be produced is:
209              *   CONST[1] FLT32 {   0.0000,     0.0000,    -4.0000,     0.0000}
210              *   ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
211              *
212              * after inline literals would become:
213              *   ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
214              *
215              * and after pair translate:
216              *   src0.xyz = const[0], src0.w = 4.000000 (0x48)
217              *   MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
218              *
219              * Without the zero check there would be -src0.w00.
220              */
221             if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
222                srcmask |= 1 << j;
223          }
224          source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
225                                        inst->SrcReg[i].Index);
226          if (source < 0) {
227             rc_error(&c->Base, "Failed to translate "
228                                "rgb instruction.\n");
229             return;
230          }
231          pair->RGB.Arg[i].Source = source;
232          pair->RGB.Arg[i].Swizzle = rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
233          pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
234          pair->RGB.Arg[i].Negate =
235             !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
236       }
237       if (needalpha) {
238          unsigned int srcrgb = 0;
239          unsigned int srcalpha = 0;
240          unsigned int swz;
241          if (istranscendent) {
242             swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
243          } else {
244             swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
245          }
246 
247          if (swz < 3)
248             srcrgb = 1;
249          else if (swz < 4)
250             srcalpha = 1;
251          source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
252                                        inst->SrcReg[i].Index);
253          if (source < 0) {
254             rc_error(&c->Base, "Failed to translate "
255                                "alpha instruction.\n");
256             return;
257          }
258          pair->Alpha.Arg[i].Source = source;
259          pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
260          pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
261 
262          if (istranscendent) {
263             pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & inst->DstReg.WriteMask);
264          } else {
265             pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
266          }
267       }
268    }
269 
270    /* Destination handling */
271    if (inst->DstReg.File == RC_FILE_OUTPUT) {
272       if (inst->DstReg.Index == c->OutputDepth) {
273          pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
274       } else {
275          for (i = 0; i < 4; i++) {
276             if (inst->DstReg.Index == c->OutputColor[i]) {
277                pair->RGB.Target = i;
278                pair->Alpha.Target = i;
279                pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
280                pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
281                break;
282             }
283          }
284       }
285    } else {
286       if (needrgb) {
287          pair->RGB.DestIndex = inst->DstReg.Index;
288          pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
289       }
290 
291       if (needalpha) {
292          pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
293          if (pair->Alpha.WriteMask) {
294             pair->Alpha.DestIndex = inst->DstReg.Index;
295          }
296       }
297    }
298 
299    if (needrgb) {
300       pair->RGB.Omod = translate_omod(c, inst);
301    }
302    if (needalpha) {
303       pair->Alpha.Omod = translate_omod(c, inst);
304    }
305 
306    if (inst->WriteALUResult) {
307       pair->WriteALUResult = inst->WriteALUResult;
308       pair->ALUResultCompare = inst->ALUResultCompare;
309    }
310 }
311 
312 static void
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)313 check_opcode_support(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
314 {
315    const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
316 
317    if (opcode->HasDstReg) {
318       if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
319          rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
320          return;
321       }
322    }
323 
324    for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
325       if (inst->SrcReg[i].RelAddr) {
326          rc_error(&c->Base, "Fragment program does not support relative addressing "
327                             " of source operands.\n");
328          return;
329       }
330    }
331 }
332 
333 /**
334  * Translate all ALU instructions into corresponding pair instructions,
335  * performing no other changes.
336  */
337 void
rc_pair_translate(struct radeon_compiler * cc,void * user)338 rc_pair_translate(struct radeon_compiler *cc, void *user)
339 {
340    struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
341 
342    for (struct rc_instruction *inst = c->Base.Program.Instructions.Next;
343         inst != &c->Base.Program.Instructions; inst = inst->Next) {
344       const struct rc_opcode_info *opcode;
345       struct rc_sub_instruction copy;
346 
347       if (inst->Type != RC_INSTRUCTION_NORMAL)
348          continue;
349 
350       opcode = rc_get_opcode_info(inst->U.I.Opcode);
351 
352       if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
353          continue;
354 
355       copy = inst->U.I;
356 
357       check_opcode_support(c, &copy);
358 
359       final_rewrite(&copy);
360       inst->Type = RC_INSTRUCTION_PAIR;
361       set_pair_instruction(c, &inst->U.P, &copy);
362    }
363 }
364