1 //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Vector, Reduction, and Cube instructions need to fill the entire instruction
10 // group to work correctly. This pass expands these individual instructions
11 // into several instructions that will completely fill the instruction group.
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "R600Defines.h"
16 #include "R600InstrInfo.h"
17 #include "R600RegisterInfo.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21
22 using namespace llvm;
23
24 namespace {
25
26 class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
27
28 private:
29 static char ID;
30 const R600InstrInfo *TII;
31
32 public:
R600ExpandSpecialInstrsPass(TargetMachine & tm)33 R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
34 TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
35
36 virtual bool runOnMachineFunction(MachineFunction &MF);
37
getPassName() const38 const char *getPassName() const {
39 return "R600 Expand special instructions pass";
40 }
41 };
42
43 } // End anonymous namespace
44
45 char R600ExpandSpecialInstrsPass::ID = 0;
46
createR600ExpandSpecialInstrsPass(TargetMachine & TM)47 FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
48 return new R600ExpandSpecialInstrsPass(TM);
49 }
50
runOnMachineFunction(MachineFunction & MF)51 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
52
53 const R600RegisterInfo &TRI = TII->getRegisterInfo();
54
55 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
56 BB != BB_E; ++BB) {
57 MachineBasicBlock &MBB = *BB;
58 MachineBasicBlock::iterator I = MBB.begin();
59 while (I != MBB.end()) {
60 MachineInstr &MI = *I;
61 I = llvm::next(I);
62
63 bool IsReduction = TII->isReductionOp(MI.getOpcode());
64 bool IsVector = TII->isVector(MI);
65 bool IsCube = TII->isCubeOp(MI.getOpcode());
66 if (!IsReduction && !IsVector && !IsCube) {
67 continue;
68 }
69
70 // Expand the instruction
71 //
72 // Reduction instructions:
73 // T0_X = DP4 T1_XYZW, T2_XYZW
74 // becomes:
75 // TO_X = DP4 T1_X, T2_X
76 // TO_Y (write masked) = DP4 T1_Y, T2_Y
77 // TO_Z (write masked) = DP4 T1_Z, T2_Z
78 // TO_W (write masked) = DP4 T1_W, T2_W
79 //
80 // Vector instructions:
81 // T0_X = MULLO_INT T1_X, T2_X
82 // becomes:
83 // T0_X = MULLO_INT T1_X, T2_X
84 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
85 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
86 // T0_W (write masked) = MULLO_INT T1_X, T2_X
87 //
88 // Cube instructions:
89 // T0_XYZW = CUBE T1_XYZW
90 // becomes:
91 // TO_X = CUBE T1_Z, T1_Y
92 // T0_Y = CUBE T1_Z, T1_X
93 // T0_Z = CUBE T1_X, T1_Z
94 // T0_W = CUBE T1_Y, T1_Z
95 for (unsigned Chan = 0; Chan < 4; Chan++) {
96 unsigned DstReg = MI.getOperand(0).getReg();
97 unsigned Src0 = MI.getOperand(1).getReg();
98 unsigned Src1 = 0;
99
100 // Determine the correct source registers
101 if (!IsCube) {
102 Src1 = MI.getOperand(2).getReg();
103 }
104 if (IsReduction) {
105 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
106 Src0 = TRI.getSubReg(Src0, SubRegIndex);
107 Src1 = TRI.getSubReg(Src1, SubRegIndex);
108 } else if (IsCube) {
109 static const int CubeSrcSwz[] = {2, 2, 0, 1};
110 unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
111 unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
112 Src1 = TRI.getSubReg(Src0, SubRegIndex1);
113 Src0 = TRI.getSubReg(Src0, SubRegIndex0);
114 }
115
116 // Determine the correct destination registers;
117 unsigned Flags = 0;
118 if (IsCube) {
119 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
120 DstReg = TRI.getSubReg(DstReg, SubRegIndex);
121 } else {
122 // Mask the write if the original instruction does not write to
123 // the current Channel.
124 Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
125 unsigned DstBase = TRI.getHWRegIndex(DstReg);
126 DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
127 }
128
129 // Set the IsLast bit
130 Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
131
132 // Add the new instruction
133 unsigned Opcode;
134 if (IsCube) {
135 switch (MI.getOpcode()) {
136 case AMDGPU::CUBE_r600_pseudo:
137 Opcode = AMDGPU::CUBE_r600_real;
138 break;
139 case AMDGPU::CUBE_eg_pseudo:
140 Opcode = AMDGPU::CUBE_eg_real;
141 break;
142 default:
143 assert(!"Unknown CUBE instruction");
144 Opcode = 0;
145 break;
146 }
147 } else {
148 Opcode = MI.getOpcode();
149 }
150 MachineInstr *NewMI =
151 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
152 .addReg(Src0)
153 .addReg(Src1)
154 .addImm(0); // Flag
155
156 NewMI->setIsInsideBundle(Chan != 0);
157 TII->addFlag(NewMI, 0, Flags);
158 }
159 MI.eraseFromParent();
160 }
161 }
162 return false;
163 }
164