1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
32
33
34 /**
35 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
36 * and reverse the order of arguments for CMP.
37 */
final_rewrite(struct rc_sub_instruction * inst)38 static void final_rewrite(struct rc_sub_instruction *inst)
39 {
40 struct rc_src_register tmp;
41
42 switch(inst->Opcode) {
43 case RC_OPCODE_ADD:
44 inst->SrcReg[2] = inst->SrcReg[1];
45 inst->SrcReg[1].File = RC_FILE_NONE;
46 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
47 inst->SrcReg[1].Negate = RC_MASK_NONE;
48 inst->Opcode = RC_OPCODE_MAD;
49 break;
50 case RC_OPCODE_CMP:
51 tmp = inst->SrcReg[2];
52 inst->SrcReg[2] = inst->SrcReg[0];
53 inst->SrcReg[0] = tmp;
54 break;
55 case RC_OPCODE_MOV:
56 /* AMD say we should use CMP.
57 * However, when we transform
58 * KIL -r0;
59 * into
60 * CMP tmp, -r0, -r0, 0;
61 * KIL tmp;
62 * we get incorrect behaviour on R500 when r0 == 0.0.
63 * It appears that the R500 KIL hardware treats -0.0 as less
64 * than zero.
65 */
66 inst->SrcReg[1].File = RC_FILE_NONE;
67 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
68 inst->SrcReg[2].File = RC_FILE_NONE;
69 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
70 inst->Opcode = RC_OPCODE_MAD;
71 break;
72 case RC_OPCODE_MUL:
73 inst->SrcReg[2].File = RC_FILE_NONE;
74 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
75 inst->Opcode = RC_OPCODE_MAD;
76 break;
77 default:
78 /* nothing to do */
79 break;
80 }
81 }
82
83
84 /**
85 * Classify an instruction according to which ALUs etc. it needs
86 */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)87 static void classify_instruction(struct rc_sub_instruction * inst,
88 int * needrgb, int * needalpha, int * istranscendent)
89 {
90 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
91 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
92 *istranscendent = 0;
93
94 if (inst->WriteALUResult == RC_ALURESULT_X)
95 *needrgb = 1;
96 else if (inst->WriteALUResult == RC_ALURESULT_W)
97 *needalpha = 1;
98
99 switch(inst->Opcode) {
100 case RC_OPCODE_ADD:
101 case RC_OPCODE_CMP:
102 case RC_OPCODE_CND:
103 case RC_OPCODE_DDX:
104 case RC_OPCODE_DDY:
105 case RC_OPCODE_FRC:
106 case RC_OPCODE_MAD:
107 case RC_OPCODE_MAX:
108 case RC_OPCODE_MIN:
109 case RC_OPCODE_MOV:
110 case RC_OPCODE_MUL:
111 break;
112 case RC_OPCODE_COS:
113 case RC_OPCODE_EX2:
114 case RC_OPCODE_LG2:
115 case RC_OPCODE_RCP:
116 case RC_OPCODE_RSQ:
117 case RC_OPCODE_SIN:
118 *istranscendent = 1;
119 *needalpha = 1;
120 break;
121 case RC_OPCODE_DP4:
122 *needalpha = 1;
123 /* fall through */
124 case RC_OPCODE_DP3:
125 *needrgb = 1;
126 break;
127 default:
128 break;
129 }
130 }
131
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)132 static void src_uses(struct rc_src_register src, unsigned int * rgb,
133 unsigned int * alpha)
134 {
135 int j;
136 for(j = 0; j < 4; ++j) {
137 unsigned int swz = GET_SWZ(src.Swizzle, j);
138 if (swz < 3)
139 *rgb = 1;
140 else if (swz < 4)
141 *alpha = 1;
142 }
143 }
144
145 /**
146 * Fill the given ALU instruction's opcodes and source operands into the given pair,
147 * if possible.
148 */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)149 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
150 struct rc_pair_instruction * pair,
151 struct rc_sub_instruction * inst)
152 {
153 int needrgb, needalpha, istranscendent;
154 const struct rc_opcode_info * opcode;
155 int i;
156
157 memset(pair, 0, sizeof(struct rc_pair_instruction));
158
159 classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
160
161 if (needrgb) {
162 if (istranscendent)
163 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
164 else
165 pair->RGB.Opcode = inst->Opcode;
166 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
167 pair->RGB.Saturate = 1;
168 }
169 if (needalpha) {
170 pair->Alpha.Opcode = inst->Opcode;
171 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
172 pair->Alpha.Saturate = 1;
173 }
174
175 opcode = rc_get_opcode_info(inst->Opcode);
176
177 /* Presubtract handling:
178 * We need to make sure that the values used by the presubtract
179 * operation end up in src0 or src1. */
180 if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
181 /* rc_pair_alloc_source() will fill in data for
182 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
183 int j;
184 for(j = 0; j < 3; j++) {
185 int src_regs;
186 if(inst->SrcReg[j].File != RC_FILE_PRESUB)
187 continue;
188
189 src_regs = rc_presubtract_src_reg_count(
190 inst->PreSub.Opcode);
191 for(i = 0; i < src_regs; i++) {
192 unsigned int rgb = 0;
193 unsigned int alpha = 0;
194 src_uses(inst->SrcReg[j], &rgb, &alpha);
195 if(rgb) {
196 pair->RGB.Src[i].File =
197 inst->PreSub.SrcReg[i].File;
198 pair->RGB.Src[i].Index =
199 inst->PreSub.SrcReg[i].Index;
200 pair->RGB.Src[i].Used = 1;
201 }
202 if(alpha) {
203 pair->Alpha.Src[i].File =
204 inst->PreSub.SrcReg[i].File;
205 pair->Alpha.Src[i].Index =
206 inst->PreSub.SrcReg[i].Index;
207 pair->Alpha.Src[i].Used = 1;
208 }
209 }
210 }
211 }
212
213 for(i = 0; i < opcode->NumSrcRegs; ++i) {
214 int source;
215 if (needrgb && !istranscendent) {
216 unsigned int srcrgb = 0;
217 unsigned int srcalpha = 0;
218 unsigned int srcmask = 0;
219 int j;
220 /* We don't care about the alpha channel here. We only
221 * want the part of the swizzle that writes to rgb,
222 * since we are creating an rgb instruction. */
223 for(j = 0; j < 3; ++j) {
224 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
225
226 if (swz < RC_SWIZZLE_W)
227 srcrgb = 1;
228 else if (swz == RC_SWIZZLE_W)
229 srcalpha = 1;
230
231 if (swz < RC_SWIZZLE_UNUSED)
232 srcmask |= 1 << j;
233 }
234 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
235 inst->SrcReg[i].File, inst->SrcReg[i].Index);
236 if (source < 0) {
237 rc_error(&c->Base, "Failed to translate "
238 "rgb instruction.\n");
239 return;
240 }
241 pair->RGB.Arg[i].Source = source;
242 pair->RGB.Arg[i].Swizzle =
243 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
244 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
245 pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
246 }
247 if (needalpha) {
248 unsigned int srcrgb = 0;
249 unsigned int srcalpha = 0;
250 unsigned int swz;
251 if (istranscendent) {
252 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
253 } else {
254 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
255 }
256
257 if (swz < 3)
258 srcrgb = 1;
259 else if (swz < 4)
260 srcalpha = 1;
261 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
262 inst->SrcReg[i].File, inst->SrcReg[i].Index);
263 if (source < 0) {
264 rc_error(&c->Base, "Failed to translate "
265 "alpha instruction.\n");
266 return;
267 }
268 pair->Alpha.Arg[i].Source = source;
269 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
270 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
271
272 if (istranscendent) {
273 pair->Alpha.Arg[i].Negate =
274 !!(inst->SrcReg[i].Negate &
275 inst->DstReg.WriteMask);
276 } else {
277 pair->Alpha.Arg[i].Negate =
278 !!(inst->SrcReg[i].Negate & RC_MASK_W);
279 }
280 }
281 }
282
283 /* Destination handling */
284 if (inst->DstReg.File == RC_FILE_OUTPUT) {
285 if (inst->DstReg.Index == c->OutputDepth) {
286 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
287 } else {
288 for (i = 0; i < 4; i++) {
289 if (inst->DstReg.Index == c->OutputColor[i]) {
290 pair->RGB.Target = i;
291 pair->Alpha.Target = i;
292 pair->RGB.OutputWriteMask |=
293 inst->DstReg.WriteMask & RC_MASK_XYZ;
294 pair->Alpha.OutputWriteMask |=
295 GET_BIT(inst->DstReg.WriteMask, 3);
296 break;
297 }
298 }
299 }
300 } else {
301 if (needrgb) {
302 pair->RGB.DestIndex = inst->DstReg.Index;
303 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
304 }
305
306 if (needalpha) {
307 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
308 if (pair->Alpha.WriteMask) {
309 pair->Alpha.DestIndex = inst->DstReg.Index;
310 }
311 }
312 }
313
314 if (needrgb) {
315 pair->RGB.Omod = inst->Omod;
316 }
317 if (needalpha) {
318 pair->Alpha.Omod = inst->Omod;
319 }
320
321 if (inst->WriteALUResult) {
322 pair->WriteALUResult = inst->WriteALUResult;
323 pair->ALUResultCompare = inst->ALUResultCompare;
324 }
325 }
326
327
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)328 static void check_opcode_support(struct r300_fragment_program_compiler *c,
329 struct rc_sub_instruction *inst)
330 {
331 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
332
333 if (opcode->HasDstReg) {
334 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
335 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
336 return;
337 }
338 }
339
340 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
341 if (inst->SrcReg[i].RelAddr) {
342 rc_error(&c->Base, "Fragment program does not support relative addressing "
343 " of source operands.\n");
344 return;
345 }
346 }
347 }
348
349
350 /**
351 * Translate all ALU instructions into corresponding pair instructions,
352 * performing no other changes.
353 */
rc_pair_translate(struct radeon_compiler * cc,void * user)354 void rc_pair_translate(struct radeon_compiler *cc, void *user)
355 {
356 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
357
358 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
359 inst != &c->Base.Program.Instructions;
360 inst = inst->Next) {
361 const struct rc_opcode_info * opcode;
362 struct rc_sub_instruction copy;
363
364 if (inst->Type != RC_INSTRUCTION_NORMAL)
365 continue;
366
367 opcode = rc_get_opcode_info(inst->U.I.Opcode);
368
369 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
370 continue;
371
372 copy = inst->U.I;
373
374 check_opcode_support(c, ©);
375
376 final_rewrite(©);
377 inst->Type = RC_INSTRUCTION_PAIR;
378 set_pair_instruction(c, &inst->U.P, ©);
379 }
380 }
381