• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
13 /// img instruction
14 ///
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIInstrInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Target/TargetMachine.h"
28 
29 #define DEBUG_TYPE "si-img-init"
30 
31 using namespace llvm;
32 
33 namespace {
34 
35 class SIAddIMGInit : public MachineFunctionPass {
36 public:
37   static char ID;
38 
39 public:
SIAddIMGInit()40   SIAddIMGInit() : MachineFunctionPass(ID) {
41     initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
42   }
43 
44   bool runOnMachineFunction(MachineFunction &MF) override;
45 
getAnalysisUsage(AnalysisUsage & AU) const46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.setPreservesCFG();
48     MachineFunctionPass::getAnalysisUsage(AU);
49   }
50 };
51 
52 } // End anonymous namespace.
53 
54 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
55 
56 char SIAddIMGInit::ID = 0;
57 
58 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
59 
createSIAddIMGInitPass()60 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
61 
runOnMachineFunction(MachineFunction & MF)62 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
63   MachineRegisterInfo &MRI = MF.getRegInfo();
64   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
65   const SIInstrInfo *TII = ST.getInstrInfo();
66   const SIRegisterInfo *RI = ST.getRegisterInfo();
67   bool Changed = false;
68 
69   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
70        ++BI) {
71     MachineBasicBlock &MBB = *BI;
72     MachineBasicBlock::iterator I, Next;
73     for (I = MBB.begin(); I != MBB.end(); I = Next) {
74       Next = std::next(I);
75       MachineInstr &MI = *I;
76 
77       auto Opcode = MI.getOpcode();
78       if (TII->isMIMG(Opcode) && !MI.mayStore()) {
79         MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
80         MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
81         MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
82 
83         // Check for instructions that don't have tfe or lwe fields
84         // There shouldn't be any at this point.
85         assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
86 
87         unsigned TFEVal = TFE->getImm();
88         unsigned LWEVal = LWE->getImm();
89         unsigned D16Val = D16 ? D16->getImm() : 0;
90 
91         if (TFEVal || LWEVal) {
92           // At least one of TFE or LWE are non-zero
93           // We have to insert a suitable initialization of the result value and
94           // tie this to the dest of the image instruction.
95 
96           const DebugLoc &DL = MI.getDebugLoc();
97 
98           int DstIdx =
99               AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
100 
101           // Calculate which dword we have to initialize to 0.
102           MachineOperand *MO_Dmask =
103               TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
104 
105           // check that dmask operand is found.
106           assert(MO_Dmask && "Expected dmask operand in instruction");
107 
108           unsigned dmask = MO_Dmask->getImm();
109           // Determine the number of active lanes taking into account the
110           // Gather4 special case
111           unsigned ActiveLanes =
112               TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
113 
114           // Subreg indices are counted from 1
115           // When D16 then we want next whole VGPR after write data.
116           static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
117 
118           bool Packed = !ST.hasUnpackedD16VMem();
119 
120           unsigned InitIdx =
121               D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
122 
123           // Abandon attempt if the dst size isn't large enough
124           // - this is in fact an error but this is picked up elsewhere and
125           // reported correctly.
126           uint32_t DstSize =
127               RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
128           if (DstSize < InitIdx)
129             continue;
130 
131           // Create a register for the intialization value.
132           Register PrevDst =
133               MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
134           unsigned NewDst = 0; // Final initialized value will be in here
135 
136           // If PRTStrictNull feature is enabled (the default) then initialize
137           // all the result registers to 0, otherwise just the error indication
138           // register (VGPRn+1)
139           unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
140           unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
141 
142           if (DstSize == 1) {
143             // In this case we can just initialize the result directly
144             BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
145                 .addImm(0);
146             NewDst = PrevDst;
147           } else {
148             BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
149             for (; SizeLeft; SizeLeft--, CurrIdx++) {
150               NewDst =
151                   MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
152               // Initialize dword
153               Register SubReg =
154                   MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
155               BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
156                   .addImm(0);
157               // Insert into the super-reg
158               BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
159                   .addReg(PrevDst)
160                   .addReg(SubReg)
161                   .addImm(CurrIdx);
162 
163               PrevDst = NewDst;
164             }
165           }
166 
167           // Add as an implicit operand
168           MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
169 
170           // Tie the just added implicit operand to the dst
171           MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
172 
173           Changed = true;
174         }
175       }
176     }
177   }
178 
179   return Changed;
180 }
181