1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Authors:
5 * Nicolai Haehnle
6 * Tom Stellard <thomas.stellard@amd.com>
7 * SPDX-License-Identifier: MIT
8 */
9
10 #include "radeon_dataflow.h"
11
12 #include "radeon_code.h"
13 #include "radeon_compiler.h"
14 #include "radeon_compiler_util.h"
15 #include "radeon_swizzle.h"
16
17 static unsigned int
get_swizzle_split(struct radeon_compiler * c,struct rc_swizzle_split * split,struct rc_instruction * inst,unsigned src,unsigned * usemask)18 get_swizzle_split(struct radeon_compiler *c, struct rc_swizzle_split *split,
19 struct rc_instruction *inst, unsigned src, unsigned *usemask)
20 {
21 *usemask = 0;
22 for (unsigned int chan = 0; chan < 4; ++chan) {
23 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
24 *usemask |= 1 << chan;
25 }
26
27 c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split);
28 return split->NumPhases;
29 }
30
31 static void
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)32 rewrite_source(struct radeon_compiler *c, struct rc_instruction *inst, unsigned src)
33 {
34 struct rc_swizzle_split split;
35 unsigned int tempreg = rc_find_free_temporary(c);
36 unsigned int usemask;
37
38 get_swizzle_split(c, &split, inst, src, &usemask);
39
40 for (unsigned int phase = 0; phase < split.NumPhases; ++phase) {
41 struct rc_instruction *mov = rc_insert_new_instruction(c, inst->Prev);
42 unsigned int masked_negate;
43
44 mov->U.I.Opcode = RC_OPCODE_MOV;
45 mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
46 mov->U.I.DstReg.Index = tempreg;
47 mov->U.I.DstReg.WriteMask = split.Phase[phase];
48 mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
49 mov->U.I.PreSub = inst->U.I.PreSub;
50
51 /* RC_OPCODE_KIL will trigger if the value is -0 and TEX srcs don't have negate
52 * so considering something like this pattern
53 * 0: ADD temp[1].x, input[0].w___, const[0].-x___;
54 * 1: CMP temp[2].x, temp[1].x___, none.1___, none.0___;
55 * 2: KIL -temp[2].xxxx;
56 * we don't want to insert MOV, because HW docs advise we tranlate MOV to MAX
57 * (with RC_OPCODE_DISABLE) and this in turn will mean the KIL will always
58 * trigger (as it will have either -1 or -0). So emit here ADD src0 + 0 instead.
59 */
60 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
61 assert(!phase);
62 mov->U.I.Opcode = RC_OPCODE_ADD;
63 mov->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
64 }
65
66 for (unsigned int chan = 0; chan < 4; ++chan) {
67 if (!GET_BIT(split.Phase[phase], chan))
68 SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
69 }
70
71 masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
72 if (masked_negate == 0)
73 mov->U.I.SrcReg[0].Negate = 0;
74 else if (masked_negate == split.Phase[phase])
75 mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
76 }
77
78 inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
79 inst->U.I.SrcReg[src].Index = tempreg;
80 inst->U.I.SrcReg[src].Swizzle = 0;
81 inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
82 inst->U.I.SrcReg[src].Abs = 0;
83 for (unsigned int chan = 0; chan < 4; ++chan) {
84 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
85 GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
86 }
87 }
88
89 /**
90 * This function will attempt to rewrite non-native swizzles that read from
91 * immediate registers by rearranging the immediates to allow the
92 * instruction to use native swizzles.
93 */
94 static unsigned
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)95 try_rewrite_constant(struct radeon_compiler *c, struct rc_src_register *reg)
96 {
97 unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
98 unsigned all_inline = 0;
99 bool w_inline_constant = false;
100 float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
101
102 if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
103 /* The register does not contain immediates, but if all
104 * the swizzles are inline constants, we can still rewrite
105 * it. */
106
107 new_swizzle = RC_SWIZZLE_XYZW;
108 for (chan = 0; chan < 4; chan++) {
109 unsigned swz = GET_SWZ(reg->Swizzle, chan);
110 if (swz <= RC_SWIZZLE_W) {
111 return 0;
112 }
113 if (swz == RC_SWIZZLE_UNUSED) {
114 SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
115 }
116 }
117 all_inline = 1;
118 } else {
119 new_swizzle = reg->Swizzle;
120 }
121
122 swz = RC_SWIZZLE_UNUSED;
123 found_swizzle = 1;
124 /* Check if all channels have the same swizzle. If they do we can skip
125 * the search for a native swizzle. We only need to check the first
126 * three channels, because any swizzle is legal in the fourth channel.
127 */
128 for (chan = 0; chan < 3; chan++) {
129 unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
130 if (chan_swz == RC_SWIZZLE_UNUSED) {
131 continue;
132 }
133 if (swz == RC_SWIZZLE_UNUSED) {
134 swz = chan_swz;
135 } else if (swz != chan_swz) {
136 found_swizzle = 0;
137 break;
138 }
139 }
140
141 /* Find a legal swizzle */
142
143 /* This loop attempts to find a native swizzle where all the
144 * channels are different. */
145 while (!found_swizzle && !all_inline) {
146 swz0 = GET_SWZ(new_swizzle, 0);
147 swz1 = GET_SWZ(new_swizzle, 1);
148 swz2 = GET_SWZ(new_swizzle, 2);
149
150 /* Swizzle .W. is never legal. */
151 if (swz1 == RC_SWIZZLE_W || swz1 == RC_SWIZZLE_UNUSED || swz1 == RC_SWIZZLE_ZERO ||
152 swz1 == RC_SWIZZLE_HALF || swz1 == RC_SWIZZLE_ONE) {
153 /* We chose Z, because there are two non-repeating
154 * swizzle combinations of the form .Z. There are
155 * only one combination each for .X. and .Y. */
156 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
157 continue;
158 }
159
160 if (swz2 == RC_SWIZZLE_UNUSED) {
161 /* We choose Y, because there are two non-repeating
162 * swizzle combinations of the form ..Y */
163 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
164 continue;
165 }
166
167 switch (swz0) {
168 /* X.. */
169 case RC_SWIZZLE_X:
170 /* Legal swizzles that start with X: XYZ, XXX */
171 switch (swz1) {
172 /* XX. */
173 case RC_SWIZZLE_X:
174 /* The new swizzle will be:
175 * ZXY (XX. => ZX. => ZXY) */
176 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
177 break;
178 /* XY. */
179 case RC_SWIZZLE_Y:
180 /* The new swizzle is XYZ */
181 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
182 found_swizzle = 1;
183 break;
184 /* XZ. */
185 case RC_SWIZZLE_Z:
186 /* XZZ */
187 if (swz2 == RC_SWIZZLE_Z) {
188 /* The new swizzle is XYZ */
189 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
190 found_swizzle = 1;
191 } else { /* XZ[^Z] */
192 /* The new swizzle will be:
193 * YZX (XZ. => YZ. => YZX) */
194 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
195 }
196 break;
197 /* XW. Should have already been handled. */
198 case RC_SWIZZLE_W:
199 assert(0);
200 break;
201 }
202 break;
203 /* Y.. */
204 case RC_SWIZZLE_Y:
205 /* Legal swizzles that start with Y: YYY, YZX */
206 switch (swz1) {
207 /* YY. */
208 case RC_SWIZZLE_Y:
209 /* The new swizzle will be:
210 * XYZ (YY. => XY. => XYZ) */
211 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
212 break;
213 /* YZ. */
214 case RC_SWIZZLE_Z:
215 /* The new swizzle is YZX */
216 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
217 found_swizzle = 1;
218 break;
219 /* YX. */
220 case RC_SWIZZLE_X:
221 /* YXX */
222 if (swz2 == RC_SWIZZLE_X) {
223 /*The new swizzle is YZX */
224 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
225 found_swizzle = 1;
226 } else { /* YX[^X] */
227 /* The new swizzle will be:
228 * ZXY (YX. => ZX. -> ZXY) */
229 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
230 }
231 break;
232 /* YW. Should have already been handled. */
233 case RC_SWIZZLE_W:
234 assert(0);
235 break;
236 }
237 break;
238 /* Z.. */
239 case RC_SWIZZLE_Z:
240 /* Legal swizzles that start with Z: ZZZ, ZXY */
241 switch (swz1) {
242 /* ZZ. */
243 case RC_SWIZZLE_Z:
244 /* The new swizzle will be:
245 * WZY (ZZ. => WZ. => WZY) */
246 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
247 break;
248 /* ZX. */
249 case RC_SWIZZLE_X:
250 /* The new swizzle is ZXY */
251 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
252 found_swizzle = 1;
253 break;
254 /* ZY. */
255 case RC_SWIZZLE_Y:
256 /* ZYY */
257 if (swz2 == RC_SWIZZLE_Y) {
258 /* The new swizzle is ZXY */
259 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
260 found_swizzle = 1;
261 } else { /* ZY[^Y] */
262 /* The new swizzle will be:
263 * XYZ (ZY. => XY. => XYZ) */
264 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
265 }
266 break;
267 /* ZW. Should have already been handled. */
268 case RC_SWIZZLE_W:
269 assert(0);
270 break;
271 }
272 break;
273
274 /* W.. */
275 case RC_SWIZZLE_W:
276 /* Legal swizzles that start with X: WWW, WZY */
277 switch (swz1) {
278 /* WW. Should have already been handled. */
279 case RC_SWIZZLE_W:
280 assert(0);
281 break;
282 /* WZ. */
283 case RC_SWIZZLE_Z:
284 /* The new swizzle will be WZY */
285 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
286 found_swizzle = 1;
287 break;
288 /* WX. */
289 case RC_SWIZZLE_X:
290 /* WY. */
291 case RC_SWIZZLE_Y:
292 /* W[XY]Y */
293 if (swz2 == RC_SWIZZLE_Y) {
294 /* The new swizzle will be WZY */
295 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
296 found_swizzle = 1;
297 } else { /* W[XY][^Y] */
298 /* The new swizzle will be:
299 * ZXY (WX. => XX. => ZX. => ZXY) or
300 * XYZ (WY. => XY. => XYZ)
301 */
302 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
303 }
304 break;
305 }
306 break;
307 /* U.. 0.. 1.. H..*/
308 case RC_SWIZZLE_UNUSED:
309 case RC_SWIZZLE_ZERO:
310 case RC_SWIZZLE_ONE:
311 case RC_SWIZZLE_HALF:
312 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
313 break;
314 }
315 }
316
317 /* Handle the swizzle in the w channel. */
318 swz3 = GET_SWZ(reg->Swizzle, 3);
319
320 /* We can skip this if the swizzle in channel w is an inline constant. */
321 if (is_swizzle_inline_constant(swz3)) {
322 w_inline_constant = true;
323 } else {
324 for (chan = 0; chan < 3; chan++) {
325 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
326 unsigned new_swz = GET_SWZ(new_swizzle, chan);
327 /* If the swizzle in the w channel is the same as the
328 * swizzle in any other channels, we need to rewrite it.
329 * For example:
330 * reg->Swizzle == XWZW
331 * new_swizzle == XYZX
332 * Since the swizzle in the y channel is being
333 * rewritten from W -> Y we need to change the swizzle
334 * in the w channel from W -> Y as well.
335 */
336 if (old_swz == swz3) {
337 SET_SWZ(new_swizzle, 3, GET_SWZ(new_swizzle, chan));
338 break;
339 }
340
341 /* The swizzle in channel w will be overwritten by one
342 * of the new swizzles. */
343 if (new_swz == swz3) {
344 /* Find an unused swizzle */
345 unsigned i;
346 unsigned used = 0;
347 for (i = 0; i < 3; i++) {
348 used |= 1 << GET_SWZ(new_swizzle, i);
349 }
350 for (i = 0; i < 4; i++) {
351 if (used & (1 << i)) {
352 continue;
353 }
354 SET_SWZ(new_swizzle, 3, i);
355 }
356 }
357 }
358 }
359
360 for (chan = 0; chan < 4; chan++) {
361 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
362 unsigned new_swz = GET_SWZ(new_swizzle, chan);
363
364 if (old_swz == RC_SWIZZLE_UNUSED) {
365 continue;
366 }
367
368 /* We don't need to change the swizzle in channel w if it is
369 * an inline constant. These are always legal in the w channel.
370 *
371 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
372 */
373 if (chan == 3 && w_inline_constant) {
374 continue;
375 }
376
377 if (new_swz > RC_SWIZZLE_W) {
378 rc_error(c, "Bad swizzle in try_rewrite_constant()");
379 new_swz = RC_SWIZZLE_X;
380 }
381
382 switch (old_swz) {
383 case RC_SWIZZLE_ZERO:
384 imms[new_swz] = 0.0f;
385 break;
386 case RC_SWIZZLE_HALF:
387 if (reg->Negate & (1 << chan)) {
388 imms[new_swz] = -0.5f;
389 } else {
390 imms[new_swz] = 0.5f;
391 }
392 break;
393 case RC_SWIZZLE_ONE:
394 if (reg->Negate & (1 << chan)) {
395 imms[new_swz] = -1.0f;
396 } else {
397 imms[new_swz] = 1.0f;
398 }
399 break;
400 default:
401 imms[new_swz] = rc_get_constant_value(c, reg->Index, reg->Swizzle, reg->Negate, chan);
402 }
403 SET_SWZ(reg->Swizzle, chan, new_swz);
404 }
405 reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, imms);
406 /* We need to set the register file to CONSTANT in case we are
407 * converting a non-constant register with constant swizzles (e.g.
408 * ONE, ZERO, HALF).
409 */
410 reg->File = RC_FILE_CONSTANT;
411 reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0;
412 return 1;
413 }
414
415 /**
416 * Set all channels not specified by writemaks to unused.
417 */
418 static void
clear_channels(struct rc_instruction * inst,unsigned writemask)419 clear_channels(struct rc_instruction *inst, unsigned writemask)
420 {
421 inst->U.I.DstReg.WriteMask = writemask;
422 for (unsigned chan = 0; chan < 4; chan++) {
423 if (writemask & (1 << chan))
424 continue;
425
426 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
427 for (unsigned src = 0; src < opcode->NumSrcRegs; src++) {
428 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
429 }
430 }
431 /* TODO: We could in theory add constant swizzles back as well,
432 * they will be all legal when we have just a single channel,
433 * to save some sources and help the pair scheduling later. */
434 }
435
436 static bool
try_splitting_single_channel(struct radeon_compiler * c,struct rc_instruction * inst)437 try_splitting_single_channel(struct radeon_compiler *c, struct rc_instruction *inst)
438 {
439 for (unsigned chan = 0; chan < 3; chan++) {
440 struct rc_instruction *new_inst;
441 new_inst = rc_insert_new_instruction(c, inst);
442 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
443 clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan));
444
445 const struct rc_opcode_info *opcode = rc_get_opcode_info(new_inst->U.I.Opcode);
446 bool valid_swizzles = true;
447
448 for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
449 struct rc_src_register *reg = &new_inst->U.I.SrcReg[src];
450
451 if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg))
452 valid_swizzles = false;
453 }
454
455 if (!valid_swizzles) {
456 rc_remove_instruction(new_inst);
457 } else {
458 clear_channels(inst, 1 << chan);
459 return true;
460 }
461 }
462 return false;
463 }
464
465 static bool
try_splitting_instruction(struct radeon_compiler * c,struct rc_instruction * inst)466 try_splitting_instruction(struct radeon_compiler *c, struct rc_instruction *inst)
467 {
468 /* Adding more output instructions in FS is bad for performance. */
469 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
470 return false;
471
472 /* When only single channel of the swizzle is wrong, like xwzw,
473 * it is best to just split the single channel out.
474 */
475 if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW || inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) {
476 if (try_splitting_single_channel(c, inst))
477 return true;
478 }
479
480 for (unsigned chan = 0; chan < 3; chan++) {
481 if (!(inst->U.I.DstReg.WriteMask & (1 << chan)))
482 continue;
483
484 unsigned next_chan;
485 for (next_chan = chan + 1; next_chan < 4; next_chan++) {
486 if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan)))
487 continue;
488
489 /* We don't want to split the last used x/y/z channel and the
490 * w channel. Pair scheduling might be able to put it back
491 * together, but we don't trust it that much.
492 *
493 * Next is W already, rewrite the original inst and we are done.
494 */
495 if (next_chan == 3) {
496 clear_channels(inst, (1 << chan) | (1 << next_chan));
497 return true;
498 }
499
500 struct rc_instruction *new_inst;
501 new_inst = rc_insert_new_instruction(c, inst->Prev);
502 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
503 clear_channels(new_inst, 1 << chan);
504 break;
505 }
506
507 /* No next chan */
508 if (next_chan == 4) {
509 clear_channels(inst, 1 << chan);
510 return true;
511 }
512 }
513 assert(0 && "Unreachable\n");
514 return false;
515 }
516
517 void
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)518 rc_dataflow_swizzles(struct radeon_compiler *c, void *user)
519 {
520 struct rc_instruction *inst;
521
522 for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
523 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
524 unsigned src, usemask;
525 unsigned total_splits = 0;
526 struct rc_swizzle_split split;
527
528 /* If multiple sources needs splitting or some source needs to split
529 * too many times, it is actually better to just split the whole ALU
530 * instruction to separate channels instead of inserting extra movs.
531 */
532 for (src = 0; src < opcode->NumSrcRegs; ++src) {
533 /* Don't count invalid swizzles from immediates, we can just
534 * insert new immediates with the correct order later.
535 */
536 if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index) &&
537 c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) {
538 total_splits++;
539 } else {
540 total_splits += get_swizzle_split(c, &split, inst, src, &usemask);
541 }
542 }
543
544 /* Even if there is only a single split, i.e., two extra movs, this still
545 * accounts to three instructions, the same as when we split
546 * the original instruction right away.
547 */
548 if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) {
549 if (try_splitting_instruction(c, inst))
550 continue;
551 }
552
553 /* For texturing or non-componentwise opcodes we do the old way
554 * of adding extra movs.
555 */
556 for (src = 0; src < opcode->NumSrcRegs; ++src) {
557 struct rc_src_register *reg = &inst->U.I.SrcReg[src];
558 if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
559 continue;
560 }
561 if (!c->is_r500 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
562 (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) &&
563 try_rewrite_constant(c, reg)) {
564 continue;
565 }
566 rewrite_source(c, inst, src);
567 }
568 }
569 if (c->Debug & RC_DBG_LOG)
570 rc_constants_print(&c->Program.Constants, NULL);
571 }
572