1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
32
33 #include "util/compiler.h"
34
35
36 /**
37 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
38 * and reverse the order of arguments for CMP.
39 */
final_rewrite(struct rc_sub_instruction * inst)40 static void final_rewrite(struct rc_sub_instruction *inst)
41 {
42 struct rc_src_register tmp;
43
44 switch(inst->Opcode) {
45 case RC_OPCODE_ADD:
46 inst->SrcReg[2] = inst->SrcReg[1];
47 inst->SrcReg[1].File = RC_FILE_NONE;
48 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
49 inst->SrcReg[1].Negate = RC_MASK_NONE;
50 inst->Opcode = RC_OPCODE_MAD;
51 break;
52 case RC_OPCODE_CMP:
53 tmp = inst->SrcReg[2];
54 inst->SrcReg[2] = inst->SrcReg[0];
55 inst->SrcReg[0] = tmp;
56 break;
57 case RC_OPCODE_MOV:
58 /* AMD say we should use CMP.
59 * However, when we transform
60 * KIL -r0;
61 * into
62 * CMP tmp, -r0, -r0, 0;
63 * KIL tmp;
64 * we get incorrect behaviour on R500 when r0 == 0.0.
65 * It appears that the R500 KIL hardware treats -0.0 as less
66 * than zero.
67 */
68 inst->SrcReg[1].File = RC_FILE_NONE;
69 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
70 inst->SrcReg[2].File = RC_FILE_NONE;
71 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
72 inst->Opcode = RC_OPCODE_MAD;
73 break;
74 case RC_OPCODE_MUL:
75 inst->SrcReg[2].File = RC_FILE_NONE;
76 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
77 inst->Opcode = RC_OPCODE_MAD;
78 break;
79 default:
80 /* nothing to do */
81 break;
82 }
83 }
84
85
86 /**
87 * Classify an instruction according to which ALUs etc. it needs
88 */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)89 static void classify_instruction(struct rc_sub_instruction * inst,
90 int * needrgb, int * needalpha, int * istranscendent)
91 {
92 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
93 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
94 *istranscendent = 0;
95
96 if (inst->WriteALUResult == RC_ALURESULT_X)
97 *needrgb = 1;
98 else if (inst->WriteALUResult == RC_ALURESULT_W)
99 *needalpha = 1;
100
101 switch(inst->Opcode) {
102 case RC_OPCODE_ADD:
103 case RC_OPCODE_CMP:
104 case RC_OPCODE_CND:
105 case RC_OPCODE_DDX:
106 case RC_OPCODE_DDY:
107 case RC_OPCODE_FRC:
108 case RC_OPCODE_MAD:
109 case RC_OPCODE_MAX:
110 case RC_OPCODE_MIN:
111 case RC_OPCODE_MOV:
112 case RC_OPCODE_MUL:
113 break;
114 case RC_OPCODE_COS:
115 case RC_OPCODE_EX2:
116 case RC_OPCODE_LG2:
117 case RC_OPCODE_RCP:
118 case RC_OPCODE_RSQ:
119 case RC_OPCODE_SIN:
120 *istranscendent = 1;
121 *needalpha = 1;
122 break;
123 case RC_OPCODE_DP4:
124 *needalpha = 1;
125 FALLTHROUGH;
126 case RC_OPCODE_DP3:
127 *needrgb = 1;
128 break;
129 default:
130 break;
131 }
132 }
133
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)134 static void src_uses(struct rc_src_register src, unsigned int * rgb,
135 unsigned int * alpha)
136 {
137 int j;
138 for(j = 0; j < 4; ++j) {
139 unsigned int swz = GET_SWZ(src.Swizzle, j);
140 if (swz < 3)
141 *rgb = 1;
142 else if (swz < 4)
143 *alpha = 1;
144 }
145 }
146
147 /**
148 * Fill the given ALU instruction's opcodes and source operands into the given pair,
149 * if possible.
150 */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)151 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
152 struct rc_pair_instruction * pair,
153 struct rc_sub_instruction * inst)
154 {
155 int needrgb, needalpha, istranscendent;
156 const struct rc_opcode_info * opcode;
157 int i;
158
159 memset(pair, 0, sizeof(struct rc_pair_instruction));
160
161 classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
162
163 if (needrgb) {
164 if (istranscendent)
165 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
166 else
167 pair->RGB.Opcode = inst->Opcode;
168 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
169 pair->RGB.Saturate = 1;
170 }
171 if (needalpha) {
172 pair->Alpha.Opcode = inst->Opcode;
173 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
174 pair->Alpha.Saturate = 1;
175 }
176
177 opcode = rc_get_opcode_info(inst->Opcode);
178
179 /* Presubtract handling:
180 * We need to make sure that the values used by the presubtract
181 * operation end up in src0 or src1. */
182 if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
183 /* rc_pair_alloc_source() will fill in data for
184 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
185 int j;
186 for(j = 0; j < 3; j++) {
187 int src_regs;
188 if(inst->SrcReg[j].File != RC_FILE_PRESUB)
189 continue;
190
191 src_regs = rc_presubtract_src_reg_count(
192 inst->PreSub.Opcode);
193 for(i = 0; i < src_regs; i++) {
194 unsigned int rgb = 0;
195 unsigned int alpha = 0;
196 src_uses(inst->SrcReg[j], &rgb, &alpha);
197 if(rgb) {
198 pair->RGB.Src[i].File =
199 inst->PreSub.SrcReg[i].File;
200 pair->RGB.Src[i].Index =
201 inst->PreSub.SrcReg[i].Index;
202 pair->RGB.Src[i].Used = 1;
203 }
204 if(alpha) {
205 pair->Alpha.Src[i].File =
206 inst->PreSub.SrcReg[i].File;
207 pair->Alpha.Src[i].Index =
208 inst->PreSub.SrcReg[i].Index;
209 pair->Alpha.Src[i].Used = 1;
210 }
211 }
212 }
213 }
214
215 for(i = 0; i < opcode->NumSrcRegs; ++i) {
216 int source;
217 if (needrgb && !istranscendent) {
218 unsigned int srcrgb = 0;
219 unsigned int srcalpha = 0;
220 unsigned int srcmask = 0;
221 int j;
222 /* We don't care about the alpha channel here. We only
223 * want the part of the swizzle that writes to rgb,
224 * since we are creating an rgb instruction. */
225 for(j = 0; j < 3; ++j) {
226 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
227
228 if (swz < RC_SWIZZLE_W)
229 srcrgb = 1;
230 else if (swz == RC_SWIZZLE_W)
231 srcalpha = 1;
232
233 if (swz < RC_SWIZZLE_UNUSED)
234 srcmask |= 1 << j;
235 }
236 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
237 inst->SrcReg[i].File, inst->SrcReg[i].Index);
238 if (source < 0) {
239 rc_error(&c->Base, "Failed to translate "
240 "rgb instruction.\n");
241 return;
242 }
243 pair->RGB.Arg[i].Source = source;
244 pair->RGB.Arg[i].Swizzle =
245 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
246 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
247 pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
248 }
249 if (needalpha) {
250 unsigned int srcrgb = 0;
251 unsigned int srcalpha = 0;
252 unsigned int swz;
253 if (istranscendent) {
254 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
255 } else {
256 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
257 }
258
259 if (swz < 3)
260 srcrgb = 1;
261 else if (swz < 4)
262 srcalpha = 1;
263 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
264 inst->SrcReg[i].File, inst->SrcReg[i].Index);
265 if (source < 0) {
266 rc_error(&c->Base, "Failed to translate "
267 "alpha instruction.\n");
268 return;
269 }
270 pair->Alpha.Arg[i].Source = source;
271 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
272 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
273
274 if (istranscendent) {
275 pair->Alpha.Arg[i].Negate =
276 !!(inst->SrcReg[i].Negate &
277 inst->DstReg.WriteMask);
278 } else {
279 pair->Alpha.Arg[i].Negate =
280 !!(inst->SrcReg[i].Negate & RC_MASK_W);
281 }
282 }
283 }
284
285 /* Destination handling */
286 if (inst->DstReg.File == RC_FILE_OUTPUT) {
287 if (inst->DstReg.Index == c->OutputDepth) {
288 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
289 } else {
290 for (i = 0; i < 4; i++) {
291 if (inst->DstReg.Index == c->OutputColor[i]) {
292 pair->RGB.Target = i;
293 pair->Alpha.Target = i;
294 pair->RGB.OutputWriteMask |=
295 inst->DstReg.WriteMask & RC_MASK_XYZ;
296 pair->Alpha.OutputWriteMask |=
297 GET_BIT(inst->DstReg.WriteMask, 3);
298 break;
299 }
300 }
301 }
302 } else {
303 if (needrgb) {
304 pair->RGB.DestIndex = inst->DstReg.Index;
305 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
306 }
307
308 if (needalpha) {
309 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
310 if (pair->Alpha.WriteMask) {
311 pair->Alpha.DestIndex = inst->DstReg.Index;
312 }
313 }
314 }
315
316 if (needrgb) {
317 pair->RGB.Omod = inst->Omod;
318 }
319 if (needalpha) {
320 pair->Alpha.Omod = inst->Omod;
321 }
322
323 if (inst->WriteALUResult) {
324 pair->WriteALUResult = inst->WriteALUResult;
325 pair->ALUResultCompare = inst->ALUResultCompare;
326 }
327 }
328
329
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)330 static void check_opcode_support(struct r300_fragment_program_compiler *c,
331 struct rc_sub_instruction *inst)
332 {
333 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
334
335 if (opcode->HasDstReg) {
336 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
337 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
338 return;
339 }
340 }
341
342 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
343 if (inst->SrcReg[i].RelAddr) {
344 rc_error(&c->Base, "Fragment program does not support relative addressing "
345 " of source operands.\n");
346 return;
347 }
348 }
349 }
350
351
352 /**
353 * Translate all ALU instructions into corresponding pair instructions,
354 * performing no other changes.
355 */
rc_pair_translate(struct radeon_compiler * cc,void * user)356 void rc_pair_translate(struct radeon_compiler *cc, void *user)
357 {
358 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
359
360 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
361 inst != &c->Base.Program.Instructions;
362 inst = inst->Next) {
363 const struct rc_opcode_info * opcode;
364 struct rc_sub_instruction copy;
365
366 if (inst->Type != RC_INSTRUCTION_NORMAL)
367 continue;
368
369 opcode = rc_get_opcode_info(inst->U.I.Opcode);
370
371 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
372 continue;
373
374 copy = inst->U.I;
375
376 check_opcode_support(c, ©);
377
378 final_rewrite(©);
379 inst->Type = RC_INSTRUCTION_PAIR;
380 set_pair_instruction(c, &inst->U.P, ©);
381 }
382 }
383