• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2012 Advanced Micro Devices, Inc.
4  * Authors:
5  *   Nicolai Haehnle
6  *   Tom Stellard <thomas.stellard@amd.com>
7  * SPDX-License-Identifier: MIT
8  */
9 
10 #include "radeon_dataflow.h"
11 
12 #include "radeon_code.h"
13 #include "radeon_compiler.h"
14 #include "radeon_compiler_util.h"
15 #include "radeon_swizzle.h"
16 
17 static unsigned int
get_swizzle_split(struct radeon_compiler * c,struct rc_swizzle_split * split,struct rc_instruction * inst,unsigned src,unsigned * usemask)18 get_swizzle_split(struct radeon_compiler *c, struct rc_swizzle_split *split,
19                   struct rc_instruction *inst, unsigned src, unsigned *usemask)
20 {
21    *usemask = 0;
22    for (unsigned int chan = 0; chan < 4; ++chan) {
23       if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
24          *usemask |= 1 << chan;
25    }
26 
27    c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split);
28    return split->NumPhases;
29 }
30 
31 static void
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)32 rewrite_source(struct radeon_compiler *c, struct rc_instruction *inst, unsigned src)
33 {
34    struct rc_swizzle_split split;
35    unsigned int tempreg = rc_find_free_temporary(c);
36    unsigned int usemask;
37 
38    get_swizzle_split(c, &split, inst, src, &usemask);
39 
40    for (unsigned int phase = 0; phase < split.NumPhases; ++phase) {
41       struct rc_instruction *mov = rc_insert_new_instruction(c, inst->Prev);
42       unsigned int masked_negate;
43 
44       mov->U.I.Opcode = RC_OPCODE_MOV;
45       mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
46       mov->U.I.DstReg.Index = tempreg;
47       mov->U.I.DstReg.WriteMask = split.Phase[phase];
48       mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
49       mov->U.I.PreSub = inst->U.I.PreSub;
50 
51       /* RC_OPCODE_KIL will trigger if the value is -0 and TEX srcs don't have negate
52        * so considering something like this pattern
53        *  0: ADD temp[1].x, input[0].w___, const[0].-x___;
54        *  1: CMP temp[2].x, temp[1].x___, none.1___, none.0___;
55        *  2: KIL -temp[2].xxxx;
56        *  we don't want to insert MOV, because HW docs advise we tranlate MOV to MAX
57        *  (with RC_OPCODE_DISABLE) and this in turn will mean the KIL will always
58        *  trigger (as it will have either -1 or -0). So emit here ADD src0 + 0 instead.
59        */
60       if (inst->U.I.Opcode == RC_OPCODE_KIL) {
61          assert(!phase);
62          mov->U.I.Opcode = RC_OPCODE_ADD;
63          mov->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
64       }
65 
66       for (unsigned int chan = 0; chan < 4; ++chan) {
67          if (!GET_BIT(split.Phase[phase], chan))
68             SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
69       }
70 
71       masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
72       if (masked_negate == 0)
73          mov->U.I.SrcReg[0].Negate = 0;
74       else if (masked_negate == split.Phase[phase])
75          mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
76    }
77 
78    inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
79    inst->U.I.SrcReg[src].Index = tempreg;
80    inst->U.I.SrcReg[src].Swizzle = 0;
81    inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
82    inst->U.I.SrcReg[src].Abs = 0;
83    for (unsigned int chan = 0; chan < 4; ++chan) {
84       SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
85               GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
86    }
87 }
88 
89 /**
90  * This function will attempt to rewrite non-native swizzles that read from
91  * immediate registers by rearranging the immediates to allow the
92  * instruction to use native swizzles.
93  */
94 static unsigned
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)95 try_rewrite_constant(struct radeon_compiler *c, struct rc_src_register *reg)
96 {
97    unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
98    unsigned all_inline = 0;
99    bool w_inline_constant = false;
100    float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
101 
102    if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
103       /* The register does not contain immediates, but if all
104        * the swizzles are inline constants, we can still rewrite
105        * it. */
106 
107       new_swizzle = RC_SWIZZLE_XYZW;
108       for (chan = 0; chan < 4; chan++) {
109          unsigned swz = GET_SWZ(reg->Swizzle, chan);
110          if (swz <= RC_SWIZZLE_W) {
111             return 0;
112          }
113          if (swz == RC_SWIZZLE_UNUSED) {
114             SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
115          }
116       }
117       all_inline = 1;
118    } else {
119       new_swizzle = reg->Swizzle;
120    }
121 
122    swz = RC_SWIZZLE_UNUSED;
123    found_swizzle = 1;
124    /* Check if all channels have the same swizzle.  If they do we can skip
125     * the search for a native swizzle.  We only need to check the first
126     * three channels, because any swizzle is legal in the fourth channel.
127     */
128    for (chan = 0; chan < 3; chan++) {
129       unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
130       if (chan_swz == RC_SWIZZLE_UNUSED) {
131          continue;
132       }
133       if (swz == RC_SWIZZLE_UNUSED) {
134          swz = chan_swz;
135       } else if (swz != chan_swz) {
136          found_swizzle = 0;
137          break;
138       }
139    }
140 
141    /* Find a legal swizzle */
142 
143    /* This loop attempts to find a native swizzle where all the
144     * channels are different. */
145    while (!found_swizzle && !all_inline) {
146       swz0 = GET_SWZ(new_swizzle, 0);
147       swz1 = GET_SWZ(new_swizzle, 1);
148       swz2 = GET_SWZ(new_swizzle, 2);
149 
150       /* Swizzle .W. is never legal. */
151       if (swz1 == RC_SWIZZLE_W || swz1 == RC_SWIZZLE_UNUSED || swz1 == RC_SWIZZLE_ZERO ||
152           swz1 == RC_SWIZZLE_HALF || swz1 == RC_SWIZZLE_ONE) {
153          /* We chose Z, because there are two non-repeating
154           * swizzle combinations of the form .Z. There are
155           * only one combination each for .X. and .Y. */
156          SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
157          continue;
158       }
159 
160       if (swz2 == RC_SWIZZLE_UNUSED) {
161          /* We choose Y, because there are two non-repeating
162           * swizzle combinations of the form ..Y */
163          SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
164          continue;
165       }
166 
167       switch (swz0) {
168       /* X.. */
169       case RC_SWIZZLE_X:
170          /* Legal swizzles that start with X: XYZ, XXX */
171          switch (swz1) {
172          /* XX. */
173          case RC_SWIZZLE_X:
174             /*  The new swizzle will be:
175              *  ZXY (XX. => ZX. => ZXY) */
176             SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
177             break;
178          /* XY. */
179          case RC_SWIZZLE_Y:
180             /* The new swizzle is XYZ */
181             SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
182             found_swizzle = 1;
183             break;
184          /* XZ. */
185          case RC_SWIZZLE_Z:
186             /* XZZ */
187             if (swz2 == RC_SWIZZLE_Z) {
188                /* The new swizzle is XYZ */
189                SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
190                found_swizzle = 1;
191             } else { /* XZ[^Z] */
192                /* The new swizzle will be:
193                 * YZX (XZ. => YZ. => YZX) */
194                SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
195             }
196             break;
197          /* XW. Should have already been handled. */
198          case RC_SWIZZLE_W:
199             assert(0);
200             break;
201          }
202          break;
203       /* Y.. */
204       case RC_SWIZZLE_Y:
205          /* Legal swizzles that start with Y: YYY, YZX */
206          switch (swz1) {
207          /* YY. */
208          case RC_SWIZZLE_Y:
209             /* The new swizzle will be:
210              * XYZ (YY. => XY. => XYZ) */
211             SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
212             break;
213          /* YZ. */
214          case RC_SWIZZLE_Z:
215             /* The new swizzle is YZX */
216             SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
217             found_swizzle = 1;
218             break;
219          /* YX. */
220          case RC_SWIZZLE_X:
221             /* YXX */
222             if (swz2 == RC_SWIZZLE_X) {
223                /*The new swizzle is YZX */
224                SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
225                found_swizzle = 1;
226             } else { /* YX[^X] */
227                /* The new swizzle will be:
228                 * ZXY (YX. => ZX. -> ZXY) */
229                SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
230             }
231             break;
232          /* YW. Should have already been handled. */
233          case RC_SWIZZLE_W:
234             assert(0);
235             break;
236          }
237          break;
238       /* Z.. */
239       case RC_SWIZZLE_Z:
240          /* Legal swizzles that start with Z: ZZZ, ZXY */
241          switch (swz1) {
242          /* ZZ. */
243          case RC_SWIZZLE_Z:
244             /* The new swizzle will be:
245              * WZY (ZZ. => WZ. => WZY) */
246             SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
247             break;
248          /* ZX. */
249          case RC_SWIZZLE_X:
250             /* The new swizzle is ZXY */
251             SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
252             found_swizzle = 1;
253             break;
254          /* ZY. */
255          case RC_SWIZZLE_Y:
256             /* ZYY */
257             if (swz2 == RC_SWIZZLE_Y) {
258                /* The new swizzle is ZXY */
259                SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
260                found_swizzle = 1;
261             } else { /* ZY[^Y] */
262                /* The new swizzle will be:
263                 * XYZ (ZY. => XY. => XYZ) */
264                SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
265             }
266             break;
267          /* ZW. Should have already been handled. */
268          case RC_SWIZZLE_W:
269             assert(0);
270             break;
271          }
272          break;
273 
274       /* W.. */
275       case RC_SWIZZLE_W:
276          /* Legal swizzles that start with X: WWW, WZY */
277          switch (swz1) {
278          /* WW. Should have already been handled. */
279          case RC_SWIZZLE_W:
280             assert(0);
281             break;
282          /* WZ. */
283          case RC_SWIZZLE_Z:
284             /* The new swizzle will be WZY */
285             SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
286             found_swizzle = 1;
287             break;
288          /* WX. */
289          case RC_SWIZZLE_X:
290          /* WY. */
291          case RC_SWIZZLE_Y:
292             /* W[XY]Y */
293             if (swz2 == RC_SWIZZLE_Y) {
294                /* The new swizzle will be WZY */
295                SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
296                found_swizzle = 1;
297             } else { /* W[XY][^Y] */
298                /* The new swizzle will be:
299                 * ZXY (WX. => XX. => ZX. => ZXY) or
300                 * XYZ (WY. => XY. => XYZ)
301                 */
302                SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
303             }
304             break;
305          }
306          break;
307       /* U.. 0.. 1.. H..*/
308       case RC_SWIZZLE_UNUSED:
309       case RC_SWIZZLE_ZERO:
310       case RC_SWIZZLE_ONE:
311       case RC_SWIZZLE_HALF:
312          SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
313          break;
314       }
315    }
316 
317    /* Handle the swizzle in the w channel. */
318    swz3 = GET_SWZ(reg->Swizzle, 3);
319 
320    /* We can skip this if the swizzle in channel w is an inline constant. */
321    if (is_swizzle_inline_constant(swz3)) {
322       w_inline_constant = true;
323    } else {
324       for (chan = 0; chan < 3; chan++) {
325          unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
326          unsigned new_swz = GET_SWZ(new_swizzle, chan);
327          /* If the swizzle in the w channel is the same as the
328           * swizzle in any other channels, we need to rewrite it.
329           * For example:
330           * reg->Swizzle == XWZW
331           * new_swizzle  == XYZX
332           * Since the swizzle in the y channel is being
333           * rewritten from W -> Y we need to change the swizzle
334           * in the w channel from W -> Y as well.
335           */
336          if (old_swz == swz3) {
337             SET_SWZ(new_swizzle, 3, GET_SWZ(new_swizzle, chan));
338             break;
339          }
340 
341          /* The swizzle in channel w will be overwritten by one
342           * of the new swizzles. */
343          if (new_swz == swz3) {
344             /* Find an unused swizzle */
345             unsigned i;
346             unsigned used = 0;
347             for (i = 0; i < 3; i++) {
348                used |= 1 << GET_SWZ(new_swizzle, i);
349             }
350             for (i = 0; i < 4; i++) {
351                if (used & (1 << i)) {
352                   continue;
353                }
354                SET_SWZ(new_swizzle, 3, i);
355             }
356          }
357       }
358    }
359 
360    for (chan = 0; chan < 4; chan++) {
361       unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
362       unsigned new_swz = GET_SWZ(new_swizzle, chan);
363 
364       if (old_swz == RC_SWIZZLE_UNUSED) {
365          continue;
366       }
367 
368       /* We don't need to change the swizzle in channel w if it is
369        * an inline constant.  These are always legal in the w channel.
370        *
371        * Swizzles with a value > RC_SWIZZLE_W are inline constants.
372        */
373       if (chan == 3 && w_inline_constant) {
374          continue;
375       }
376 
377       if (new_swz > RC_SWIZZLE_W) {
378          rc_error(c, "Bad swizzle in try_rewrite_constant()");
379          new_swz = RC_SWIZZLE_X;
380       }
381 
382       switch (old_swz) {
383       case RC_SWIZZLE_ZERO:
384          imms[new_swz] = 0.0f;
385          break;
386       case RC_SWIZZLE_HALF:
387          if (reg->Negate & (1 << chan)) {
388             imms[new_swz] = -0.5f;
389          } else {
390             imms[new_swz] = 0.5f;
391          }
392          break;
393       case RC_SWIZZLE_ONE:
394          if (reg->Negate & (1 << chan)) {
395             imms[new_swz] = -1.0f;
396          } else {
397             imms[new_swz] = 1.0f;
398          }
399          break;
400       default:
401          imms[new_swz] = rc_get_constant_value(c, reg->Index, reg->Swizzle, reg->Negate, chan);
402       }
403       SET_SWZ(reg->Swizzle, chan, new_swz);
404    }
405    reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, imms);
406    /* We need to set the register file to CONSTANT in case we are
407     * converting a non-constant register with constant swizzles (e.g.
408     * ONE, ZERO, HALF).
409     */
410    reg->File = RC_FILE_CONSTANT;
411    reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0;
412    return 1;
413 }
414 
415 /**
416  * Set all channels not specified by writemaks to unused.
417  */
418 static void
clear_channels(struct rc_instruction * inst,unsigned writemask)419 clear_channels(struct rc_instruction *inst, unsigned writemask)
420 {
421    inst->U.I.DstReg.WriteMask = writemask;
422    for (unsigned chan = 0; chan < 4; chan++) {
423       if (writemask & (1 << chan))
424          continue;
425 
426       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
427       for (unsigned src = 0; src < opcode->NumSrcRegs; src++) {
428          SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
429       }
430    }
431    /* TODO: We could in theory add constant swizzles back as well,
432     * they will be all legal when we have just a single channel,
433     * to save some sources and help the pair scheduling later. */
434 }
435 
436 static bool
try_splitting_single_channel(struct radeon_compiler * c,struct rc_instruction * inst)437 try_splitting_single_channel(struct radeon_compiler *c, struct rc_instruction *inst)
438 {
439    for (unsigned chan = 0; chan < 3; chan++) {
440       struct rc_instruction *new_inst;
441       new_inst = rc_insert_new_instruction(c, inst);
442       memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
443       clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan));
444 
445       const struct rc_opcode_info *opcode = rc_get_opcode_info(new_inst->U.I.Opcode);
446       bool valid_swizzles = true;
447 
448       for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
449          struct rc_src_register *reg = &new_inst->U.I.SrcReg[src];
450 
451          if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg))
452             valid_swizzles = false;
453       }
454 
455       if (!valid_swizzles) {
456          rc_remove_instruction(new_inst);
457       } else {
458          clear_channels(inst, 1 << chan);
459          return true;
460       }
461    }
462    return false;
463 }
464 
465 static bool
try_splitting_instruction(struct radeon_compiler * c,struct rc_instruction * inst)466 try_splitting_instruction(struct radeon_compiler *c, struct rc_instruction *inst)
467 {
468    /* Adding more output instructions in FS is bad for performance. */
469    if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
470       return false;
471 
472    /* When only single channel of the swizzle is wrong, like xwzw,
473     * it is best to just split the single channel out.
474     */
475    if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW || inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) {
476       if (try_splitting_single_channel(c, inst))
477          return true;
478    }
479 
480    for (unsigned chan = 0; chan < 3; chan++) {
481       if (!(inst->U.I.DstReg.WriteMask & (1 << chan)))
482          continue;
483 
484       unsigned next_chan;
485       for (next_chan = chan + 1; next_chan < 4; next_chan++) {
486          if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan)))
487             continue;
488 
489          /* We don't want to split the last used x/y/z channel and the
490           * w channel. Pair scheduling might be able to put it back
491           * together, but we don't trust it that much.
492           *
493           * Next is W already, rewrite the original inst and we are done.
494           */
495          if (next_chan == 3) {
496             clear_channels(inst, (1 << chan) | (1 << next_chan));
497             return true;
498          }
499 
500          struct rc_instruction *new_inst;
501          new_inst = rc_insert_new_instruction(c, inst->Prev);
502          memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
503          clear_channels(new_inst, 1 << chan);
504          break;
505       }
506 
507       /* No next chan */
508       if (next_chan == 4) {
509          clear_channels(inst, 1 << chan);
510          return true;
511       }
512    }
513    assert(0 && "Unreachable\n");
514    return false;
515 }
516 
517 void
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)518 rc_dataflow_swizzles(struct radeon_compiler *c, void *user)
519 {
520    struct rc_instruction *inst;
521 
522    for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
523       const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
524       unsigned src, usemask;
525       unsigned total_splits = 0;
526       struct rc_swizzle_split split;
527 
528       /* If multiple sources needs splitting or some source needs to split
529        * too many times, it is actually better to just split the whole ALU
530        * instruction to separate channels instead of inserting extra movs.
531        */
532       for (src = 0; src < opcode->NumSrcRegs; ++src) {
533          /* Don't count invalid swizzles from immediates, we can just
534           * insert new immediates with the correct order later.
535           */
536          if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index) &&
537              c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) {
538             total_splits++;
539          } else {
540             total_splits += get_swizzle_split(c, &split, inst, src, &usemask);
541          }
542       }
543 
544       /* Even if there is only a single split, i.e., two extra movs, this still
545        * accounts to three instructions, the same as when we split
546        * the original instruction right away.
547        */
548       if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) {
549          if (try_splitting_instruction(c, inst))
550             continue;
551       }
552 
553       /* For texturing or non-componentwise opcodes we do the old way
554        * of adding extra movs.
555        */
556       for (src = 0; src < opcode->NumSrcRegs; ++src) {
557          struct rc_src_register *reg = &inst->U.I.SrcReg[src];
558          if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
559             continue;
560          }
561          if (!c->is_r500 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
562              (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) &&
563              try_rewrite_constant(c, reg)) {
564             continue;
565          }
566          rewrite_source(c, inst, src);
567       }
568    }
569    if (c->Debug & RC_DBG_LOG)
570       rc_constants_print(&c->Program.Constants, NULL);
571 }
572