1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_program_pair.h"
7
8 #include "radeon_compiler.h"
9 #include "radeon_compiler_util.h"
10
11 #include "util/compiler.h"
12
13 /**
14 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
15 * and reverse the order of arguments for CMP.
16 */
17 static void
final_rewrite(struct rc_sub_instruction * inst)18 final_rewrite(struct rc_sub_instruction *inst)
19 {
20 struct rc_src_register tmp;
21
22 switch (inst->Opcode) {
23 case RC_OPCODE_ADD:
24 inst->SrcReg[2] = inst->SrcReg[1];
25 inst->SrcReg[1].File = RC_FILE_NONE;
26 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
27 inst->SrcReg[1].Negate = RC_MASK_NONE;
28 inst->Opcode = RC_OPCODE_MAD;
29 break;
30 case RC_OPCODE_CMP:
31 tmp = inst->SrcReg[2];
32 inst->SrcReg[2] = inst->SrcReg[0];
33 inst->SrcReg[0] = tmp;
34 break;
35 case RC_OPCODE_MOV:
36 inst->SrcReg[1] = inst->SrcReg[0];
37 inst->Opcode = RC_OPCODE_MAX;
38 break;
39 case RC_OPCODE_MUL:
40 inst->SrcReg[2].File = RC_FILE_NONE;
41 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
42 inst->Opcode = RC_OPCODE_MAD;
43 break;
44 default:
45 /* nothing to do */
46 break;
47 }
48 }
49
50 /**
51 * ALU operations usually enable the output modifier, which in turn standardizes
52 * NaN values and flushes denormal results to zero. A MOV instruction which
53 * preserves the source bits is implemented by setting US_OMOD_DISABLED
54 * for the instruction and using the MAX(src, src) instruction.
55 * The output modifier cannot be disabled for a saturated MOV (MOV with clamping enabled).
56 * RC_OMOD_DISABLE is only available on R5xx and is only valid with MIN/MAX/CMP/CND.
57 */
58 static unsigned
translate_omod(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)59 translate_omod(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
60 {
61 if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode &&
62 (inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN ||
63 inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND))
64 return RC_OMOD_DISABLE;
65 return inst->Omod;
66 }
67
68 /**
69 * Classify an instruction according to which ALUs etc. it needs
70 */
71 static void
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)72 classify_instruction(struct rc_sub_instruction *inst, int *needrgb, int *needalpha,
73 int *istranscendent)
74 {
75 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
76 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
77 *istranscendent = 0;
78
79 if (inst->WriteALUResult == RC_ALURESULT_X)
80 *needrgb = 1;
81 else if (inst->WriteALUResult == RC_ALURESULT_W)
82 *needalpha = 1;
83
84 switch (inst->Opcode) {
85 case RC_OPCODE_ADD:
86 case RC_OPCODE_CMP:
87 case RC_OPCODE_CND:
88 case RC_OPCODE_DDX:
89 case RC_OPCODE_DDY:
90 case RC_OPCODE_FRC:
91 case RC_OPCODE_MAD:
92 case RC_OPCODE_MAX:
93 case RC_OPCODE_MIN:
94 case RC_OPCODE_MOV:
95 case RC_OPCODE_MUL: break;
96 case RC_OPCODE_COS:
97 case RC_OPCODE_EX2:
98 case RC_OPCODE_LG2:
99 case RC_OPCODE_RCP:
100 case RC_OPCODE_RSQ:
101 case RC_OPCODE_SIN:
102 *istranscendent = 1;
103 *needalpha = 1;
104 break;
105 case RC_OPCODE_DP4: *needalpha = 1; FALLTHROUGH;
106 case RC_OPCODE_DP3: *needrgb = 1; break;
107 default: break;
108 }
109 }
110
111 static void
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)112 src_uses(struct rc_src_register src, unsigned int *rgb, unsigned int *alpha)
113 {
114 int j;
115 for (j = 0; j < 4; ++j) {
116 unsigned int swz = GET_SWZ(src.Swizzle, j);
117 if (swz < 3)
118 *rgb = 1;
119 else if (swz < 4)
120 *alpha = 1;
121 }
122 }
123
124 /**
125 * Fill the given ALU instruction's opcodes and source operands into the given pair,
126 * if possible.
127 */
128 static void
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)129 set_pair_instruction(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *pair,
130 struct rc_sub_instruction *inst)
131 {
132 int needrgb, needalpha, istranscendent;
133 const struct rc_opcode_info *opcode;
134 int i;
135
136 memset(pair, 0, sizeof(struct rc_pair_instruction));
137
138 classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
139
140 if (needrgb) {
141 if (istranscendent)
142 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
143 else
144 pair->RGB.Opcode = inst->Opcode;
145 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
146 pair->RGB.Saturate = 1;
147 }
148 if (needalpha) {
149 pair->Alpha.Opcode = inst->Opcode;
150 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
151 pair->Alpha.Saturate = 1;
152 }
153
154 opcode = rc_get_opcode_info(inst->Opcode);
155
156 /* Presubtract handling:
157 * We need to make sure that the values used by the presubtract
158 * operation end up in src0 or src1. */
159 if (inst->PreSub.Opcode != RC_PRESUB_NONE) {
160 /* rc_pair_alloc_source() will fill in data for
161 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
162 int j;
163 for (j = 0; j < 3; j++) {
164 int src_regs;
165 if (inst->SrcReg[j].File != RC_FILE_PRESUB)
166 continue;
167
168 src_regs = rc_presubtract_src_reg_count(inst->PreSub.Opcode);
169 for (i = 0; i < src_regs; i++) {
170 unsigned int rgb = 0;
171 unsigned int alpha = 0;
172 src_uses(inst->SrcReg[j], &rgb, &alpha);
173 if (rgb) {
174 pair->RGB.Src[i].File = inst->PreSub.SrcReg[i].File;
175 pair->RGB.Src[i].Index = inst->PreSub.SrcReg[i].Index;
176 pair->RGB.Src[i].Used = 1;
177 }
178 if (alpha) {
179 pair->Alpha.Src[i].File = inst->PreSub.SrcReg[i].File;
180 pair->Alpha.Src[i].Index = inst->PreSub.SrcReg[i].Index;
181 pair->Alpha.Src[i].Used = 1;
182 }
183 }
184 }
185 }
186
187 for (i = 0; i < opcode->NumSrcRegs; ++i) {
188 int source;
189 if (needrgb && !istranscendent) {
190 unsigned int srcrgb = 0;
191 unsigned int srcalpha = 0;
192 unsigned int srcmask = 0;
193 int j;
194 /* We don't care about the alpha channel here. We only
195 * want the part of the swizzle that writes to rgb,
196 * since we are creating an rgb instruction. */
197 for (j = 0; j < 3; ++j) {
198 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
199
200 if (swz < RC_SWIZZLE_W)
201 srcrgb = 1;
202 else if (swz == RC_SWIZZLE_W)
203 srcalpha = 1;
204
205 /* We check for ZERO here as well because otherwise the zero
206 * sign (which doesn't matter and we already ignore it previously
207 * when checking for valid swizzle) could mess up the final negate sign.
208 * Example problematic pattern where this would be produced is:
209 * CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000}
210 * ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
211 *
212 * after inline literals would become:
213 * ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
214 *
215 * and after pair translate:
216 * src0.xyz = const[0], src0.w = 4.000000 (0x48)
217 * MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
218 *
219 * Without the zero check there would be -src0.w00.
220 */
221 if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
222 srcmask |= 1 << j;
223 }
224 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
225 inst->SrcReg[i].Index);
226 if (source < 0) {
227 rc_error(&c->Base, "Failed to translate "
228 "rgb instruction.\n");
229 return;
230 }
231 pair->RGB.Arg[i].Source = source;
232 pair->RGB.Arg[i].Swizzle = rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
233 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
234 pair->RGB.Arg[i].Negate =
235 !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
236 }
237 if (needalpha) {
238 unsigned int srcrgb = 0;
239 unsigned int srcalpha = 0;
240 unsigned int swz;
241 if (istranscendent) {
242 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
243 } else {
244 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
245 }
246
247 if (swz < 3)
248 srcrgb = 1;
249 else if (swz < 4)
250 srcalpha = 1;
251 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
252 inst->SrcReg[i].Index);
253 if (source < 0) {
254 rc_error(&c->Base, "Failed to translate "
255 "alpha instruction.\n");
256 return;
257 }
258 pair->Alpha.Arg[i].Source = source;
259 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
260 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
261
262 if (istranscendent) {
263 pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & inst->DstReg.WriteMask);
264 } else {
265 pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
266 }
267 }
268 }
269
270 /* Destination handling */
271 if (inst->DstReg.File == RC_FILE_OUTPUT) {
272 if (inst->DstReg.Index == c->OutputDepth) {
273 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
274 } else {
275 for (i = 0; i < 4; i++) {
276 if (inst->DstReg.Index == c->OutputColor[i]) {
277 pair->RGB.Target = i;
278 pair->Alpha.Target = i;
279 pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
280 pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
281 break;
282 }
283 }
284 }
285 } else {
286 if (needrgb) {
287 pair->RGB.DestIndex = inst->DstReg.Index;
288 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
289 }
290
291 if (needalpha) {
292 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
293 if (pair->Alpha.WriteMask) {
294 pair->Alpha.DestIndex = inst->DstReg.Index;
295 }
296 }
297 }
298
299 if (needrgb) {
300 pair->RGB.Omod = translate_omod(c, inst);
301 }
302 if (needalpha) {
303 pair->Alpha.Omod = translate_omod(c, inst);
304 }
305
306 if (inst->WriteALUResult) {
307 pair->WriteALUResult = inst->WriteALUResult;
308 pair->ALUResultCompare = inst->ALUResultCompare;
309 }
310 }
311
312 static void
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)313 check_opcode_support(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
314 {
315 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
316
317 if (opcode->HasDstReg) {
318 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
319 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
320 return;
321 }
322 }
323
324 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
325 if (inst->SrcReg[i].RelAddr) {
326 rc_error(&c->Base, "Fragment program does not support relative addressing "
327 " of source operands.\n");
328 return;
329 }
330 }
331 }
332
333 /**
334 * Translate all ALU instructions into corresponding pair instructions,
335 * performing no other changes.
336 */
337 void
rc_pair_translate(struct radeon_compiler * cc,void * user)338 rc_pair_translate(struct radeon_compiler *cc, void *user)
339 {
340 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
341
342 for (struct rc_instruction *inst = c->Base.Program.Instructions.Next;
343 inst != &c->Base.Program.Instructions; inst = inst->Next) {
344 const struct rc_opcode_info *opcode;
345 struct rc_sub_instruction copy;
346
347 if (inst->Type != RC_INSTRUCTION_NORMAL)
348 continue;
349
350 opcode = rc_get_opcode_info(inst->U.I.Opcode);
351
352 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
353 continue;
354
355 copy = inst->U.I;
356
357 check_opcode_support(c, ©);
358
359 final_rewrite(©);
360 inst->Type = RC_INSTRUCTION_PAIR;
361 set_pair_instruction(c, &inst->U.P, ©);
362 }
363 }
364