1 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Interface definition of the TargetLowering class that is common 12 /// to all AMD GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 17 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 18 19 #include "llvm/Target/TargetLowering.h" 20 21 namespace llvm { 22 23 class AMDGPUMachineFunction; 24 class AMDGPUSubtarget; 25 class MachineRegisterInfo; 26 27 class AMDGPUTargetLowering : public TargetLowering { 28 protected: 29 const AMDGPUSubtarget *Subtarget; 30 31 SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, 32 const SDValue &InitPtr, 33 SDValue Chain, 34 SelectionDAG &DAG) const; 35 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 36 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 37 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 38 /// \brief Lower vector stores by merging the vector elements into an integer 39 /// of the same bitwidth. 40 SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; 41 /// \brief Split a vector store into multiple scalar stores. 42 /// \returns The resulting chain. 43 44 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 45 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 46 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 47 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 48 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 49 50 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; 51 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; 52 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 53 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 54 55 SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; 56 57 SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; 58 SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 59 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 60 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 61 62 SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const; 63 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 64 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 65 66 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 67 68 protected: 69 bool shouldCombineMemoryType(EVT VT) const; 70 SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 71 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 72 SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; 73 SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 74 SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; 75 SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 76 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 77 SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, 78 SDValue RHS, DAGCombinerInfo &DCI) const; 79 SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; 80 81 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 82 static EVT getEquivalentBitType(LLVMContext &Context, EVT VT); 83 84 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 85 SelectionDAG &DAG) const; 86 87 /// Return 64-bit value Op as two 32-bit integers. 88 std::pair<SDValue, SDValue> split64BitValue(SDValue Op, 89 SelectionDAG &DAG) const; 90 SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; 91 SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; 92 93 /// \brief Split a vector load into 2 loads of half the vector. 94 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 95 96 /// \brief Split a vector store into 2 stores of half the vector. 97 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 98 99 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 100 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 101 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 102 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 103 void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 104 SmallVectorImpl<SDValue> &Results) const; 105 /// The SelectionDAGBuilder will automatically promote function arguments 106 /// with illegal types. However, this does not work for the AMDGPU targets 107 /// since the function arguments are stored in memory as these illegal types. 108 /// In order to handle this properly we need to get the origianl types sizes 109 /// from the LLVM IR Function and fixup the ISD:InputArg values before 110 /// passing them to AnalyzeFormalArguments() 111 void getOriginalFunctionArgs(SelectionDAG &DAG, 112 const Function *F, 113 const SmallVectorImpl<ISD::InputArg> &Ins, 114 SmallVectorImpl<ISD::InputArg> &OrigIns) const; 115 void AnalyzeFormalArguments(CCState &State, 116 const SmallVectorImpl<ISD::InputArg> &Ins) const; 117 void AnalyzeReturn(CCState &State, 118 const SmallVectorImpl<ISD::OutputArg> &Outs) const; 119 120 public: 121 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); 122 123 bool isFAbsFree(EVT VT) const override; 124 bool isFNegFree(EVT VT) const override; 125 bool isTruncateFree(EVT Src, EVT Dest) const override; 126 bool isTruncateFree(Type *Src, Type *Dest) const override; 127 128 bool isZExtFree(Type *Src, Type *Dest) const override; 129 bool isZExtFree(EVT Src, EVT Dest) const override; 130 bool isZExtFree(SDValue Val, EVT VT2) const override; 131 132 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 133 134 MVT getVectorIdxTy(const DataLayout &) const override; 135 bool isSelectSupported(SelectSupportKind) const override; 136 137 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 138 bool ShouldShrinkFPConstant(EVT VT) const override; 139 bool shouldReduceLoadWidth(SDNode *Load, 140 ISD::LoadExtType ExtType, 141 EVT ExtVT) const override; 142 143 bool isLoadBitCastBeneficial(EVT, EVT) const final; 144 145 bool storeOfVectorConstantIsCheap(EVT MemVT, 146 unsigned NumElem, 147 unsigned AS) const override; 148 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; 149 bool isCheapToSpeculateCttz() const override; 150 bool isCheapToSpeculateCtlz() const override; 151 152 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 153 const SmallVectorImpl<ISD::OutputArg> &Outs, 154 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 155 SelectionDAG &DAG) const override; 156 SDValue LowerCall(CallLoweringInfo &CLI, 157 SmallVectorImpl<SDValue> &InVals) const override; 158 159 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, 160 SelectionDAG &DAG) const; 161 162 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 163 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 164 void ReplaceNodeResults(SDNode * N, 165 SmallVectorImpl<SDValue> &Results, 166 SelectionDAG &DAG) const override; 167 168 SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, 169 SDValue RHS, SDValue True, SDValue False, 170 SDValue CC, DAGCombinerInfo &DCI) const; 171 172 const char* getTargetNodeName(unsigned Opcode) const override; 173 174 SDValue getRsqrtEstimate(SDValue Operand, 175 DAGCombinerInfo &DCI, 176 unsigned &RefinementSteps, 177 bool &UseOneConstNR) const override; 178 SDValue getRecipEstimate(SDValue Operand, 179 DAGCombinerInfo &DCI, 180 unsigned &RefinementSteps) const override; 181 182 virtual SDNode *PostISelFolding(MachineSDNode *N, 183 SelectionDAG &DAG) const = 0; 184 185 /// \brief Determine which of the bits specified in \p Mask are known to be 186 /// either zero or one and return them in the \p KnownZero and \p KnownOne 187 /// bitsets. 188 void computeKnownBitsForTargetNode(const SDValue Op, 189 APInt &KnownZero, 190 APInt &KnownOne, 191 const SelectionDAG &DAG, 192 unsigned Depth = 0) const override; 193 194 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, 195 unsigned Depth = 0) const override; 196 197 /// \brief Helper function that adds Reg to the LiveIn list of the DAG's 198 /// MachineFunction. 199 /// 200 /// \returns a RegisterSDNode representing Reg. 201 virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, 202 const TargetRegisterClass *RC, 203 unsigned Reg, EVT VT) const; 204 205 enum ImplicitParameter { 206 FIRST_IMPLICIT, 207 GRID_DIM = FIRST_IMPLICIT, 208 GRID_OFFSET, 209 }; 210 211 /// \brief Helper function that returns the byte offset of the given 212 /// type of implicit parameter. 213 uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, 214 const ImplicitParameter Param) const; 215 }; 216 217 namespace AMDGPUISD { 218 219 enum NodeType : unsigned { 220 // AMDIL ISD Opcodes 221 FIRST_NUMBER = ISD::BUILTIN_OP_END, 222 CALL, // Function call based on a single integer 223 UMUL, // 32bit unsigned multiplication 224 BRANCH_COND, 225 // End AMDIL ISD Opcodes 226 ENDPGM, 227 RETURN, 228 DWORDADDR, 229 FRACT, 230 CLAMP, 231 232 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 233 // Denormals handled on some parts. 234 COS_HW, 235 SIN_HW, 236 FMAX_LEGACY, 237 FMIN_LEGACY, 238 FMAX3, 239 SMAX3, 240 UMAX3, 241 FMIN3, 242 SMIN3, 243 UMIN3, 244 FMED3, 245 SMED3, 246 UMED3, 247 URECIP, 248 DIV_SCALE, 249 DIV_FMAS, 250 DIV_FIXUP, 251 TRIG_PREOP, // 1 ULP max error for f64 252 253 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 254 // For f64, max error 2^29 ULP, handles denormals. 255 RCP, 256 RSQ, 257 RSQ_LEGACY, 258 RSQ_CLAMP, 259 LDEXP, 260 FP_CLASS, 261 DOT4, 262 CARRY, 263 BORROW, 264 BFE_U32, // Extract range of bits with zero extension to 32-bits. 265 BFE_I32, // Extract range of bits with sign extension to 32-bits. 266 BFI, // (src0 & src1) | (~src0 & src2) 267 BFM, // Insert a range of bits into a 32-bit word. 268 FFBH_U32, // ctlz with -1 if input is zero. 269 MUL_U24, 270 MUL_I24, 271 MAD_U24, 272 MAD_I24, 273 TEXTURE_FETCH, 274 EXPORT, 275 CONST_ADDRESS, 276 REGISTER_LOAD, 277 REGISTER_STORE, 278 LOAD_INPUT, 279 SAMPLE, 280 SAMPLEB, 281 SAMPLED, 282 SAMPLEL, 283 284 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 285 CVT_F32_UBYTE0, 286 CVT_F32_UBYTE1, 287 CVT_F32_UBYTE2, 288 CVT_F32_UBYTE3, 289 /// This node is for VLIW targets and it is used to represent a vector 290 /// that is stored in consecutive registers with the same channel. 291 /// For example: 292 /// |X |Y|Z|W| 293 /// T0|v.x| | | | 294 /// T1|v.y| | | | 295 /// T2|v.z| | | | 296 /// T3|v.w| | | | 297 BUILD_VERTICAL_VECTOR, 298 /// Pointer to the start of the shader's constant data. 299 CONST_DATA_PTR, 300 SENDMSG, 301 INTERP_MOV, 302 INTERP_P1, 303 INTERP_P2, 304 PC_ADD_REL_OFFSET, 305 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 306 STORE_MSKOR, 307 LOAD_CONSTANT, 308 TBUFFER_STORE_FORMAT, 309 ATOMIC_CMP_SWAP, 310 ATOMIC_INC, 311 ATOMIC_DEC, 312 LAST_AMDGPU_ISD_NUMBER 313 }; 314 315 316 } // End namespace AMDGPUISD 317 318 } // End namespace llvm 319 320 #endif 321