• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27 
28 #include "radeon_program_pair.h"
29 
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
32 
33 #include "util/compiler.h"
34 
35 
36 /**
37  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
38  * and reverse the order of arguments for CMP.
39  */
final_rewrite(struct rc_sub_instruction * inst)40 static void final_rewrite(struct rc_sub_instruction *inst)
41 {
42 	struct rc_src_register tmp;
43 
44 	switch(inst->Opcode) {
45 	case RC_OPCODE_ADD:
46 		inst->SrcReg[2] = inst->SrcReg[1];
47 		inst->SrcReg[1].File = RC_FILE_NONE;
48 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
49 		inst->SrcReg[1].Negate = RC_MASK_NONE;
50 		inst->Opcode = RC_OPCODE_MAD;
51 		break;
52 	case RC_OPCODE_CMP:
53 		tmp = inst->SrcReg[2];
54 		inst->SrcReg[2] = inst->SrcReg[0];
55 		inst->SrcReg[0] = tmp;
56 		break;
57 	case RC_OPCODE_MOV:
58 		/* AMD say we should use CMP.
59 		 * However, when we transform
60 		 *  KIL -r0;
61 		 * into
62 		 *  CMP tmp, -r0, -r0, 0;
63 		 *  KIL tmp;
64 		 * we get incorrect behaviour on R500 when r0 == 0.0.
65 		 * It appears that the R500 KIL hardware treats -0.0 as less
66 		 * than zero.
67 		 */
68 		inst->SrcReg[1].File = RC_FILE_NONE;
69 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
70 		inst->SrcReg[2].File = RC_FILE_NONE;
71 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
72 		inst->Opcode = RC_OPCODE_MAD;
73 		break;
74 	case RC_OPCODE_MUL:
75 		inst->SrcReg[2].File = RC_FILE_NONE;
76 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
77 		inst->Opcode = RC_OPCODE_MAD;
78 		break;
79 	default:
80 		/* nothing to do */
81 		break;
82 	}
83 }
84 
85 
86 /**
87  * Classify an instruction according to which ALUs etc. it needs
88  */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)89 static void classify_instruction(struct rc_sub_instruction * inst,
90 	int * needrgb, int * needalpha, int * istranscendent)
91 {
92 	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
93 	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
94 	*istranscendent = 0;
95 
96 	if (inst->WriteALUResult == RC_ALURESULT_X)
97 		*needrgb = 1;
98 	else if (inst->WriteALUResult == RC_ALURESULT_W)
99 		*needalpha = 1;
100 
101 	switch(inst->Opcode) {
102 	case RC_OPCODE_ADD:
103 	case RC_OPCODE_CMP:
104 	case RC_OPCODE_CND:
105 	case RC_OPCODE_DDX:
106 	case RC_OPCODE_DDY:
107 	case RC_OPCODE_FRC:
108 	case RC_OPCODE_MAD:
109 	case RC_OPCODE_MAX:
110 	case RC_OPCODE_MIN:
111 	case RC_OPCODE_MOV:
112 	case RC_OPCODE_MUL:
113 		break;
114 	case RC_OPCODE_COS:
115 	case RC_OPCODE_EX2:
116 	case RC_OPCODE_LG2:
117 	case RC_OPCODE_RCP:
118 	case RC_OPCODE_RSQ:
119 	case RC_OPCODE_SIN:
120 		*istranscendent = 1;
121 		*needalpha = 1;
122 		break;
123 	case RC_OPCODE_DP4:
124 		*needalpha = 1;
125 		FALLTHROUGH;
126 	case RC_OPCODE_DP3:
127 		*needrgb = 1;
128 		break;
129 	default:
130 		break;
131 	}
132 }
133 
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)134 static void src_uses(struct rc_src_register src, unsigned int * rgb,
135 							unsigned int * alpha)
136 {
137 	int j;
138 	for(j = 0; j < 4; ++j) {
139 		unsigned int swz = GET_SWZ(src.Swizzle, j);
140 		if (swz < 3)
141 			*rgb = 1;
142 		else if (swz < 4)
143 			*alpha = 1;
144 	}
145 }
146 
147 /**
148  * Fill the given ALU instruction's opcodes and source operands into the given pair,
149  * if possible.
150  */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)151 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
152 	struct rc_pair_instruction * pair,
153 	struct rc_sub_instruction * inst)
154 {
155 	int needrgb, needalpha, istranscendent;
156 	const struct rc_opcode_info * opcode;
157 	int i;
158 
159 	memset(pair, 0, sizeof(struct rc_pair_instruction));
160 
161 	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
162 
163 	if (needrgb) {
164 		if (istranscendent)
165 			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
166 		else
167 			pair->RGB.Opcode = inst->Opcode;
168 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
169 			pair->RGB.Saturate = 1;
170 	}
171 	if (needalpha) {
172 		pair->Alpha.Opcode = inst->Opcode;
173 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
174 			pair->Alpha.Saturate = 1;
175 	}
176 
177 	opcode = rc_get_opcode_info(inst->Opcode);
178 
179 	/* Presubtract handling:
180 	 * We need to make sure that the values used by the presubtract
181 	 * operation end up in src0 or src1. */
182 	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
183 		/* rc_pair_alloc_source() will fill in data for
184 		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
185 		int j;
186 		for(j = 0; j < 3; j++) {
187 			int src_regs;
188 			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
189 				continue;
190 
191 			src_regs = rc_presubtract_src_reg_count(
192 							inst->PreSub.Opcode);
193 			for(i = 0; i < src_regs; i++) {
194 				unsigned int rgb = 0;
195 				unsigned int alpha = 0;
196 				src_uses(inst->SrcReg[j], &rgb, &alpha);
197 				if(rgb) {
198 					pair->RGB.Src[i].File =
199 						inst->PreSub.SrcReg[i].File;
200 					pair->RGB.Src[i].Index =
201 						inst->PreSub.SrcReg[i].Index;
202 					pair->RGB.Src[i].Used = 1;
203 				}
204 				if(alpha) {
205 					pair->Alpha.Src[i].File =
206 						inst->PreSub.SrcReg[i].File;
207 					pair->Alpha.Src[i].Index =
208 						inst->PreSub.SrcReg[i].Index;
209 					pair->Alpha.Src[i].Used = 1;
210 				}
211 			}
212 		}
213 	}
214 
215 	for(i = 0; i < opcode->NumSrcRegs; ++i) {
216 		int source;
217 		if (needrgb && !istranscendent) {
218 			unsigned int srcrgb = 0;
219 			unsigned int srcalpha = 0;
220 			unsigned int srcmask = 0;
221 			int j;
222 			/* We don't care about the alpha channel here.  We only
223 			 * want the part of the swizzle that writes to rgb,
224 			 * since we are creating an rgb instruction. */
225 			for(j = 0; j < 3; ++j) {
226 				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
227 
228 				if (swz < RC_SWIZZLE_W)
229 					srcrgb = 1;
230 				else if (swz == RC_SWIZZLE_W)
231 					srcalpha = 1;
232 
233 				/* We check for ZERO here as well because otherwise the zero
234 				 * sign (which doesn't matter and we already ignore it previously
235 				 * when checking for valid swizzle) could mess up the final negate sign.
236 				 * Example problematic pattern where this would be produced is:
237 				 *   CONST[1] FLT32 {   0.0000,     0.0000,    -4.0000,     0.0000}
238 				 *   ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
239 				 *
240 				 * after inline literals would become:
241 				 *   ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
242 				 *
243 				 * and after pair translate:
244 				 *   src0.xyz = const[0], src0.w = 4.000000 (0x48)
245 				 *   MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
246 				 *
247 				 * Without the zero check there would be -src0.w00.
248 				 */
249 				if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
250 					srcmask |= 1 << j;
251 			}
252 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
253 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
254 			if (source < 0) {
255 				rc_error(&c->Base, "Failed to translate "
256 							"rgb instruction.\n");
257 				return;
258 			}
259 			pair->RGB.Arg[i].Source = source;
260 			pair->RGB.Arg[i].Swizzle =
261 				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
262 			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
263 			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
264 		}
265 		if (needalpha) {
266 			unsigned int srcrgb = 0;
267 			unsigned int srcalpha = 0;
268 			unsigned int swz;
269 			if (istranscendent) {
270 				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
271 			} else {
272 				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
273 			}
274 
275 			if (swz < 3)
276 				srcrgb = 1;
277 			else if (swz < 4)
278 				srcalpha = 1;
279 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
280 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
281 			if (source < 0) {
282 				rc_error(&c->Base, "Failed to translate "
283 							"alpha instruction.\n");
284 				return;
285 			}
286 			pair->Alpha.Arg[i].Source = source;
287 			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
288 			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
289 
290 			if (istranscendent) {
291 				pair->Alpha.Arg[i].Negate =
292 					!!(inst->SrcReg[i].Negate &
293 							inst->DstReg.WriteMask);
294 			} else {
295 				pair->Alpha.Arg[i].Negate =
296 					!!(inst->SrcReg[i].Negate & RC_MASK_W);
297 			}
298 		}
299 	}
300 
301 	/* Destination handling */
302 	if (inst->DstReg.File == RC_FILE_OUTPUT) {
303         if (inst->DstReg.Index == c->OutputDepth) {
304             pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
305         } else {
306             for (i = 0; i < 4; i++) {
307                 if (inst->DstReg.Index == c->OutputColor[i]) {
308                     pair->RGB.Target = i;
309                     pair->Alpha.Target = i;
310                     pair->RGB.OutputWriteMask |=
311                         inst->DstReg.WriteMask & RC_MASK_XYZ;
312                     pair->Alpha.OutputWriteMask |=
313                         GET_BIT(inst->DstReg.WriteMask, 3);
314                     break;
315                 }
316             }
317         }
318 	} else {
319 		if (needrgb) {
320 			pair->RGB.DestIndex = inst->DstReg.Index;
321 			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
322 		}
323 
324 		if (needalpha) {
325 			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
326 			if (pair->Alpha.WriteMask) {
327 				pair->Alpha.DestIndex = inst->DstReg.Index;
328 			}
329 		}
330 	}
331 
332 	if (needrgb) {
333 		pair->RGB.Omod = inst->Omod;
334 	}
335 	if (needalpha) {
336 		pair->Alpha.Omod = inst->Omod;
337 	}
338 
339 	if (inst->WriteALUResult) {
340 		pair->WriteALUResult = inst->WriteALUResult;
341 		pair->ALUResultCompare = inst->ALUResultCompare;
342 	}
343 }
344 
345 
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)346 static void check_opcode_support(struct r300_fragment_program_compiler *c,
347 				 struct rc_sub_instruction *inst)
348 {
349 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
350 
351 	if (opcode->HasDstReg) {
352 		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
353 			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
354 			return;
355 		}
356 	}
357 
358 	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
359 		if (inst->SrcReg[i].RelAddr) {
360 			rc_error(&c->Base, "Fragment program does not support relative addressing "
361 				 " of source operands.\n");
362 			return;
363 		}
364 	}
365 }
366 
367 
368 /**
369  * Translate all ALU instructions into corresponding pair instructions,
370  * performing no other changes.
371  */
rc_pair_translate(struct radeon_compiler * cc,void * user)372 void rc_pair_translate(struct radeon_compiler *cc, void *user)
373 {
374 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
375 
376 	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
377 	    inst != &c->Base.Program.Instructions;
378 	    inst = inst->Next) {
379 		const struct rc_opcode_info * opcode;
380 		struct rc_sub_instruction copy;
381 
382 		if (inst->Type != RC_INSTRUCTION_NORMAL)
383 			continue;
384 
385 		opcode = rc_get_opcode_info(inst->U.I.Opcode);
386 
387 		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
388 			continue;
389 
390 		copy = inst->U.I;
391 
392 		check_opcode_support(c, &copy);
393 
394 		final_rewrite(&copy);
395 		inst->Type = RC_INSTRUCTION_PAIR;
396 		set_pair_instruction(c, &inst->U.P, &copy);
397 	}
398 }
399