1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 * Authors:
28 * Nicolai Haehnle
29 * Tom Stellard <thomas.stellard@amd.com>
30 */
31
32 #include "radeon_dataflow.h"
33
34 #include "radeon_code.h"
35 #include "radeon_compiler.h"
36 #include "radeon_compiler_util.h"
37 #include "radeon_swizzle.h"
38
get_swizzle_split(struct radeon_compiler * c,struct rc_swizzle_split * split,struct rc_instruction * inst,unsigned src,unsigned * usemask)39 static unsigned int get_swizzle_split(struct radeon_compiler * c,
40 struct rc_swizzle_split * split, struct rc_instruction * inst,
41 unsigned src, unsigned * usemask)
42 {
43 *usemask = 0;
44 for(unsigned int chan = 0; chan < 4; ++chan) {
45 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
46 *usemask |= 1 << chan;
47 }
48
49 c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split);
50 return split->NumPhases;
51 }
52
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)53 static void rewrite_source(struct radeon_compiler * c,
54 struct rc_instruction * inst, unsigned src)
55 {
56 struct rc_swizzle_split split;
57 unsigned int tempreg = rc_find_free_temporary(c);
58 unsigned int usemask;
59
60 get_swizzle_split(c, &split, inst, src, &usemask);
61
62 for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
63 struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
64 unsigned int masked_negate;
65
66 mov->U.I.Opcode = RC_OPCODE_MOV;
67 mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
68 mov->U.I.DstReg.Index = tempreg;
69 mov->U.I.DstReg.WriteMask = split.Phase[phase];
70 mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
71 mov->U.I.PreSub = inst->U.I.PreSub;
72
73 for(unsigned int chan = 0; chan < 4; ++chan) {
74 if (!GET_BIT(split.Phase[phase], chan))
75 SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
76 }
77
78 masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
79 if (masked_negate == 0)
80 mov->U.I.SrcReg[0].Negate = 0;
81 else if (masked_negate == split.Phase[phase])
82 mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
83
84 }
85
86 inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
87 inst->U.I.SrcReg[src].Index = tempreg;
88 inst->U.I.SrcReg[src].Swizzle = 0;
89 inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
90 inst->U.I.SrcReg[src].Abs = 0;
91 for(unsigned int chan = 0; chan < 4; ++chan) {
92 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
93 GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
94 }
95 }
96
97 /**
98 * This function will attempt to rewrite non-native swizzles that read from
99 * immediate registers by rearranging the immediates to allow the
100 * instruction to use native swizzles.
101 */
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)102 static unsigned try_rewrite_constant(struct radeon_compiler *c,
103 struct rc_src_register *reg)
104 {
105 unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
106 unsigned all_inline = 0;
107 bool w_inline_constant = false;
108 float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
109
110 if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
111 /* The register does not contain immediates, but if all
112 * the swizzles are inline constants, we can still rewrite
113 * it. */
114
115 new_swizzle = RC_SWIZZLE_XYZW;
116 for (chan = 0 ; chan < 4; chan++) {
117 unsigned swz = GET_SWZ(reg->Swizzle, chan);
118 if (swz <= RC_SWIZZLE_W) {
119 return 0;
120 }
121 if (swz == RC_SWIZZLE_UNUSED) {
122 SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
123 }
124 }
125 all_inline = 1;
126 } else {
127 new_swizzle = reg->Swizzle;
128 }
129
130 swz = RC_SWIZZLE_UNUSED;
131 found_swizzle = 1;
132 /* Check if all channels have the same swizzle. If they do we can skip
133 * the search for a native swizzle. We only need to check the first
134 * three channels, because any swizzle is legal in the fourth channel.
135 */
136 for (chan = 0; chan < 3; chan++) {
137 unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
138 if (chan_swz == RC_SWIZZLE_UNUSED) {
139 continue;
140 }
141 if (swz == RC_SWIZZLE_UNUSED) {
142 swz = chan_swz;
143 } else if (swz != chan_swz) {
144 found_swizzle = 0;
145 break;
146 }
147 }
148
149 /* Find a legal swizzle */
150
151 /* This loop attempts to find a native swizzle where all the
152 * channels are different. */
153 while (!found_swizzle && !all_inline) {
154 swz0 = GET_SWZ(new_swizzle, 0);
155 swz1 = GET_SWZ(new_swizzle, 1);
156 swz2 = GET_SWZ(new_swizzle, 2);
157
158 /* Swizzle .W. is never legal. */
159 if (swz1 == RC_SWIZZLE_W ||
160 swz1 == RC_SWIZZLE_UNUSED ||
161 swz1 == RC_SWIZZLE_ZERO ||
162 swz1 == RC_SWIZZLE_HALF ||
163 swz1 == RC_SWIZZLE_ONE) {
164 /* We chose Z, because there are two non-repeating
165 * swizzle combinations of the form .Z. There are
166 * only one combination each for .X. and .Y. */
167 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
168 continue;
169 }
170
171 if (swz2 == RC_SWIZZLE_UNUSED) {
172 /* We choose Y, because there are two non-repeating
173 * swizzle combinations of the form ..Y */
174 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
175 continue;
176 }
177
178 switch (swz0) {
179 /* X.. */
180 case RC_SWIZZLE_X:
181 /* Legal swizzles that start with X: XYZ, XXX */
182 switch (swz1) {
183 /* XX. */
184 case RC_SWIZZLE_X:
185 /* The new swizzle will be:
186 * ZXY (XX. => ZX. => ZXY) */
187 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
188 break;
189 /* XY. */
190 case RC_SWIZZLE_Y:
191 /* The new swizzle is XYZ */
192 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
193 found_swizzle = 1;
194 break;
195 /* XZ. */
196 case RC_SWIZZLE_Z:
197 /* XZZ */
198 if (swz2 == RC_SWIZZLE_Z) {
199 /* The new swizzle is XYZ */
200 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
201 found_swizzle = 1;
202 } else { /* XZ[^Z] */
203 /* The new swizzle will be:
204 * YZX (XZ. => YZ. => YZX) */
205 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
206 }
207 break;
208 /* XW. Should have already been handled. */
209 case RC_SWIZZLE_W:
210 assert(0);
211 break;
212 }
213 break;
214 /* Y.. */
215 case RC_SWIZZLE_Y:
216 /* Legal swizzles that start with Y: YYY, YZX */
217 switch (swz1) {
218 /* YY. */
219 case RC_SWIZZLE_Y:
220 /* The new swizzle will be:
221 * XYZ (YY. => XY. => XYZ) */
222 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
223 break;
224 /* YZ. */
225 case RC_SWIZZLE_Z:
226 /* The new swizzle is YZX */
227 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
228 found_swizzle = 1;
229 break;
230 /* YX. */
231 case RC_SWIZZLE_X:
232 /* YXX */
233 if (swz2 == RC_SWIZZLE_X) {
234 /*The new swizzle is YZX */
235 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
236 found_swizzle = 1;
237 } else { /* YX[^X] */
238 /* The new swizzle will be:
239 * ZXY (YX. => ZX. -> ZXY) */
240 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
241 }
242 break;
243 /* YW. Should have already been handled. */
244 case RC_SWIZZLE_W:
245 assert(0);
246 break;
247 }
248 break;
249 /* Z.. */
250 case RC_SWIZZLE_Z:
251 /* Legal swizzles that start with Z: ZZZ, ZXY */
252 switch (swz1) {
253 /* ZZ. */
254 case RC_SWIZZLE_Z:
255 /* The new swizzle will be:
256 * WZY (ZZ. => WZ. => WZY) */
257 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
258 break;
259 /* ZX. */
260 case RC_SWIZZLE_X:
261 /* The new swizzle is ZXY */
262 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
263 found_swizzle = 1;
264 break;
265 /* ZY. */
266 case RC_SWIZZLE_Y:
267 /* ZYY */
268 if (swz2 == RC_SWIZZLE_Y) {
269 /* The new swizzle is ZXY */
270 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
271 found_swizzle = 1;
272 } else { /* ZY[^Y] */
273 /* The new swizzle will be:
274 * XYZ (ZY. => XY. => XYZ) */
275 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
276 }
277 break;
278 /* ZW. Should have already been handled. */
279 case RC_SWIZZLE_W:
280 assert(0);
281 break;
282 }
283 break;
284
285 /* W.. */
286 case RC_SWIZZLE_W:
287 /* Legal swizzles that start with X: WWW, WZY */
288 switch (swz1) {
289 /* WW. Should have already been handled. */
290 case RC_SWIZZLE_W:
291 assert(0);
292 break;
293 /* WZ. */
294 case RC_SWIZZLE_Z:
295 /* The new swizzle will be WZY */
296 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
297 found_swizzle = 1;
298 break;
299 /* WX. */
300 case RC_SWIZZLE_X:
301 /* WY. */
302 case RC_SWIZZLE_Y:
303 /* W[XY]Y */
304 if (swz2 == RC_SWIZZLE_Y) {
305 /* The new swizzle will be WZY */
306 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
307 found_swizzle = 1;
308 } else { /* W[XY][^Y] */
309 /* The new swizzle will be:
310 * ZXY (WX. => XX. => ZX. => ZXY) or
311 * XYZ (WY. => XY. => XYZ)
312 */
313 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
314 }
315 break;
316 }
317 break;
318 /* U.. 0.. 1.. H..*/
319 case RC_SWIZZLE_UNUSED:
320 case RC_SWIZZLE_ZERO:
321 case RC_SWIZZLE_ONE:
322 case RC_SWIZZLE_HALF:
323 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
324 break;
325 }
326 }
327
328 /* Handle the swizzle in the w channel. */
329 swz3 = GET_SWZ(reg->Swizzle, 3);
330
331 /* We can skip this if the swizzle in channel w is an inline constant. */
332 if (is_swizzle_inline_constant(swz3)) {
333 w_inline_constant = true;
334 } else {
335 for (chan = 0; chan < 3; chan++) {
336 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
337 unsigned new_swz = GET_SWZ(new_swizzle, chan);
338 /* If the swizzle in the w channel is the same as the
339 * swizzle in any other channels, we need to rewrite it.
340 * For example:
341 * reg->Swizzle == XWZW
342 * new_swizzle == XYZX
343 * Since the swizzle in the y channel is being
344 * rewritten from W -> Y we need to change the swizzle
345 * in the w channel from W -> Y as well.
346 */
347 if (old_swz == swz3) {
348 SET_SWZ(new_swizzle, 3,
349 GET_SWZ(new_swizzle, chan));
350 break;
351 }
352
353 /* The swizzle in channel w will be overwritten by one
354 * of the new swizzles. */
355 if (new_swz == swz3) {
356 /* Find an unused swizzle */
357 unsigned i;
358 unsigned used = 0;
359 for (i = 0; i < 3; i++) {
360 used |= 1 << GET_SWZ(new_swizzle, i);
361 }
362 for (i = 0; i < 4; i++) {
363 if (used & (1 << i)) {
364 continue;
365 }
366 SET_SWZ(new_swizzle, 3, i);
367 }
368 }
369 }
370 }
371
372 for (chan = 0; chan < 4; chan++) {
373 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
374 unsigned new_swz = GET_SWZ(new_swizzle, chan);
375
376 if (old_swz == RC_SWIZZLE_UNUSED) {
377 continue;
378 }
379
380 /* We don't need to change the swizzle in channel w if it is
381 * an inline constant. These are always legal in the w channel.
382 *
383 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
384 */
385 if (chan == 3 && w_inline_constant) {
386 continue;
387 }
388
389 if (new_swz > RC_SWIZZLE_W) {
390 rc_error(c, "Bad swizzle in try_rewrite_constant()");
391 new_swz = RC_SWIZZLE_X;
392 }
393
394 switch (old_swz) {
395 case RC_SWIZZLE_ZERO:
396 imms[new_swz] = 0.0f;
397 break;
398 case RC_SWIZZLE_HALF:
399 if (reg->Negate & (1 << chan)) {
400 imms[new_swz] = -0.5f;
401 } else {
402 imms[new_swz] = 0.5f;
403 }
404 break;
405 case RC_SWIZZLE_ONE:
406 if (reg->Negate & (1 << chan)) {
407 imms[new_swz] = -1.0f;
408 } else {
409 imms[new_swz] = 1.0f;
410 }
411 break;
412 default:
413 imms[new_swz] = rc_get_constant_value(c, reg->Index,
414 reg->Swizzle, reg->Negate, chan);
415 }
416 SET_SWZ(reg->Swizzle, chan, new_swz);
417 }
418 reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
419 imms);
420 /* We need to set the register file to CONSTANT in case we are
421 * converting a non-constant register with constant swizzles (e.g.
422 * ONE, ZERO, HALF).
423 */
424 reg->File = RC_FILE_CONSTANT;
425 reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0;
426 return 1;
427 }
428
429 /**
430 * Set all channels not specified by writemaks to unused.
431 */
clear_channels(struct rc_instruction * inst,unsigned writemask)432 static void clear_channels(struct rc_instruction * inst, unsigned writemask)
433 {
434 inst->U.I.DstReg.WriteMask = writemask;
435 for (unsigned chan = 0; chan < 4; chan++) {
436 if (writemask & (1 << chan))
437 continue;
438
439 const struct rc_opcode_info * opcode =
440 rc_get_opcode_info(inst->U.I.Opcode);
441 for (unsigned src = 0; src < opcode->NumSrcRegs; src++) {
442 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
443 }
444 }
445 /* TODO: We could in theory add constant swizzles back as well,
446 * they will be all legal when we have just a single channel,
447 * to save some sources and help the pair scheduling later. */
448 }
449
try_splitting_single_channel(struct radeon_compiler * c,struct rc_instruction * inst)450 static bool try_splitting_single_channel(struct radeon_compiler * c,
451 struct rc_instruction * inst)
452 {
453 for (unsigned chan = 0; chan < 3; chan++) {
454 struct rc_instruction * new_inst;
455 new_inst = rc_insert_new_instruction(c, inst);
456 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
457 clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan));
458
459 const struct rc_opcode_info * opcode =
460 rc_get_opcode_info(new_inst->U.I.Opcode);
461 bool valid_swizzles = true;
462
463 for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
464 struct rc_src_register *reg = &new_inst->U.I.SrcReg[src];
465
466 if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg))
467 valid_swizzles = false;
468 }
469
470 if (!valid_swizzles) {
471 rc_remove_instruction(new_inst);
472 } else {
473 clear_channels(inst, 1 << chan);
474 return true;
475 }
476 }
477 return false;
478 }
479
try_splitting_instruction(struct radeon_compiler * c,struct rc_instruction * inst)480 static bool try_splitting_instruction(struct radeon_compiler * c,
481 struct rc_instruction * inst)
482 {
483 /* Adding more output instructions in FS is bad for performance. */
484 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
485 return false;
486
487 /* When only single channel of the swizzle is wrong, like xwzw,
488 * it is best to just split the single channel out.
489 */
490 if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW ||
491 inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) {
492 if (try_splitting_single_channel(c, inst))
493 return true;
494 }
495
496 for (unsigned chan = 0; chan < 3; chan++) {
497 if (!(inst->U.I.DstReg.WriteMask & (1 << chan)))
498 continue;
499
500 unsigned next_chan;
501 for (next_chan = chan + 1; next_chan < 4; next_chan++) {
502 if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan)))
503 continue;
504
505 /* We don't want to split the last used x/y/z channel and the
506 * w channel. Pair scheduling might be able to put it back
507 * together, but we don't trust it that much.
508 *
509 * Next is W already, rewrite the original inst and we are done.
510 */
511 if (next_chan == 3) {
512 clear_channels(inst, (1 << chan) | (1 << next_chan));
513 return true;
514 }
515
516 struct rc_instruction * new_inst;
517 new_inst = rc_insert_new_instruction(c, inst->Prev);
518 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
519 clear_channels(new_inst, 1 << chan);
520 break;
521 }
522
523 /* No next chan */
524 if (next_chan == 4) {
525 clear_channels(inst, 1 << chan);
526 return true;
527 }
528 }
529 assert(0 && "Unreachable\n");
530 return false;
531 }
532
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)533 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
534 {
535 struct rc_instruction * inst;
536
537 for(inst = c->Program.Instructions.Next;
538 inst != &c->Program.Instructions;
539 inst = inst->Next) {
540 const struct rc_opcode_info * opcode =
541 rc_get_opcode_info(inst->U.I.Opcode);
542 unsigned src, usemask;
543 unsigned total_splits = 0;
544 struct rc_swizzle_split split;
545
546 /* If multiple sources needs splitting or some source needs to split
547 * too many times, it is actually better to just split the whole ALU
548 * instruction to separate channels instead of inserting extra movs.
549 */
550 for (src = 0; src < opcode->NumSrcRegs; ++src) {
551 /* Don't count invalid swizzles from immediates, we can just
552 * insert new immediates with the correct order later.
553 */
554 if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File,
555 inst->U.I.SrcReg[src].Index)
556 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) {
557 total_splits++;
558 } else {
559 total_splits += get_swizzle_split(c, &split, inst,
560 src, &usemask);
561 }
562 }
563
564 /* Even if there is only a single split, i.e., two extra movs, this still
565 * accounts to three instructions, the same as when we split
566 * the original instruction right away.
567 */
568 if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) {
569 if (try_splitting_instruction(c, inst))
570 continue;
571 }
572
573 /* For texturing or non-componentwise opcodes we do the old way
574 * of adding extra movs.
575 */
576 for(src = 0; src < opcode->NumSrcRegs; ++src) {
577 struct rc_src_register *reg = &inst->U.I.SrcReg[src];
578 if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
579 continue;
580 }
581 if (!c->is_r500 &&
582 c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
583 (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) &&
584 try_rewrite_constant(c, reg)) {
585 continue;
586 }
587 rewrite_source(c, inst, src);
588 }
589 }
590 if (c->Debug & RC_DBG_LOG)
591 rc_constants_print(&c->Program.Constants);
592 }
593