1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 17 18 #include "NVPTX.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/Target/TargetLowering.h" 21 22 namespace llvm { 23 namespace NVPTXISD { 24 enum NodeType : unsigned { 25 // Start the numbering from where ISD NodeType finishes. 26 FIRST_NUMBER = ISD::BUILTIN_OP_END, 27 Wrapper, 28 CALL, 29 RET_FLAG, 30 LOAD_PARAM, 31 DeclareParam, 32 DeclareScalarParam, 33 DeclareRetParam, 34 DeclareRet, 35 DeclareScalarRet, 36 PrintCall, 37 PrintConvergentCall, 38 PrintCallUni, 39 PrintConvergentCallUni, 40 CallArgBegin, 41 CallArg, 42 LastCallArg, 43 CallArgEnd, 44 CallVoid, 45 CallVal, 46 CallSymbol, 47 Prototype, 48 MoveParam, 49 PseudoUseParam, 50 RETURN, 51 CallSeqBegin, 52 CallSeqEnd, 53 CallPrototype, 54 FUN_SHFL_CLAMP, 55 FUN_SHFR_CLAMP, 56 MUL_WIDE_SIGNED, 57 MUL_WIDE_UNSIGNED, 58 IMAD, 59 Dummy, 60 61 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, 62 LoadV4, 63 LDGV2, // LDG.v2 64 LDGV4, // LDG.v4 65 LDUV2, // LDU.v2 66 LDUV4, // LDU.v4 67 StoreV2, 68 StoreV4, 69 LoadParam, 70 LoadParamV2, 71 LoadParamV4, 72 StoreParam, 73 StoreParamV2, 74 StoreParamV4, 75 StoreParamS32, // to sext and store a <32bit value, not used currently 76 StoreParamU32, // to zext and store a <32bit value, not used currently 77 StoreRetval, 78 StoreRetvalV2, 79 StoreRetvalV4, 80 81 // Texture intrinsics 82 Tex1DFloatS32, 83 Tex1DFloatFloat, 84 Tex1DFloatFloatLevel, 85 Tex1DFloatFloatGrad, 86 Tex1DS32S32, 87 Tex1DS32Float, 88 Tex1DS32FloatLevel, 89 Tex1DS32FloatGrad, 90 Tex1DU32S32, 91 Tex1DU32Float, 92 Tex1DU32FloatLevel, 93 Tex1DU32FloatGrad, 94 Tex1DArrayFloatS32, 95 Tex1DArrayFloatFloat, 96 Tex1DArrayFloatFloatLevel, 97 Tex1DArrayFloatFloatGrad, 98 Tex1DArrayS32S32, 99 Tex1DArrayS32Float, 100 Tex1DArrayS32FloatLevel, 101 Tex1DArrayS32FloatGrad, 102 Tex1DArrayU32S32, 103 Tex1DArrayU32Float, 104 Tex1DArrayU32FloatLevel, 105 Tex1DArrayU32FloatGrad, 106 Tex2DFloatS32, 107 Tex2DFloatFloat, 108 Tex2DFloatFloatLevel, 109 Tex2DFloatFloatGrad, 110 Tex2DS32S32, 111 Tex2DS32Float, 112 Tex2DS32FloatLevel, 113 Tex2DS32FloatGrad, 114 Tex2DU32S32, 115 Tex2DU32Float, 116 Tex2DU32FloatLevel, 117 Tex2DU32FloatGrad, 118 Tex2DArrayFloatS32, 119 Tex2DArrayFloatFloat, 120 Tex2DArrayFloatFloatLevel, 121 Tex2DArrayFloatFloatGrad, 122 Tex2DArrayS32S32, 123 Tex2DArrayS32Float, 124 Tex2DArrayS32FloatLevel, 125 Tex2DArrayS32FloatGrad, 126 Tex2DArrayU32S32, 127 Tex2DArrayU32Float, 128 Tex2DArrayU32FloatLevel, 129 Tex2DArrayU32FloatGrad, 130 Tex3DFloatS32, 131 Tex3DFloatFloat, 132 Tex3DFloatFloatLevel, 133 Tex3DFloatFloatGrad, 134 Tex3DS32S32, 135 Tex3DS32Float, 136 Tex3DS32FloatLevel, 137 Tex3DS32FloatGrad, 138 Tex3DU32S32, 139 Tex3DU32Float, 140 Tex3DU32FloatLevel, 141 Tex3DU32FloatGrad, 142 TexCubeFloatFloat, 143 TexCubeFloatFloatLevel, 144 TexCubeS32Float, 145 TexCubeS32FloatLevel, 146 TexCubeU32Float, 147 TexCubeU32FloatLevel, 148 TexCubeArrayFloatFloat, 149 TexCubeArrayFloatFloatLevel, 150 TexCubeArrayS32Float, 151 TexCubeArrayS32FloatLevel, 152 TexCubeArrayU32Float, 153 TexCubeArrayU32FloatLevel, 154 Tld4R2DFloatFloat, 155 Tld4G2DFloatFloat, 156 Tld4B2DFloatFloat, 157 Tld4A2DFloatFloat, 158 Tld4R2DS64Float, 159 Tld4G2DS64Float, 160 Tld4B2DS64Float, 161 Tld4A2DS64Float, 162 Tld4R2DU64Float, 163 Tld4G2DU64Float, 164 Tld4B2DU64Float, 165 Tld4A2DU64Float, 166 TexUnified1DFloatS32, 167 TexUnified1DFloatFloat, 168 TexUnified1DFloatFloatLevel, 169 TexUnified1DFloatFloatGrad, 170 TexUnified1DS32S32, 171 TexUnified1DS32Float, 172 TexUnified1DS32FloatLevel, 173 TexUnified1DS32FloatGrad, 174 TexUnified1DU32S32, 175 TexUnified1DU32Float, 176 TexUnified1DU32FloatLevel, 177 TexUnified1DU32FloatGrad, 178 TexUnified1DArrayFloatS32, 179 TexUnified1DArrayFloatFloat, 180 TexUnified1DArrayFloatFloatLevel, 181 TexUnified1DArrayFloatFloatGrad, 182 TexUnified1DArrayS32S32, 183 TexUnified1DArrayS32Float, 184 TexUnified1DArrayS32FloatLevel, 185 TexUnified1DArrayS32FloatGrad, 186 TexUnified1DArrayU32S32, 187 TexUnified1DArrayU32Float, 188 TexUnified1DArrayU32FloatLevel, 189 TexUnified1DArrayU32FloatGrad, 190 TexUnified2DFloatS32, 191 TexUnified2DFloatFloat, 192 TexUnified2DFloatFloatLevel, 193 TexUnified2DFloatFloatGrad, 194 TexUnified2DS32S32, 195 TexUnified2DS32Float, 196 TexUnified2DS32FloatLevel, 197 TexUnified2DS32FloatGrad, 198 TexUnified2DU32S32, 199 TexUnified2DU32Float, 200 TexUnified2DU32FloatLevel, 201 TexUnified2DU32FloatGrad, 202 TexUnified2DArrayFloatS32, 203 TexUnified2DArrayFloatFloat, 204 TexUnified2DArrayFloatFloatLevel, 205 TexUnified2DArrayFloatFloatGrad, 206 TexUnified2DArrayS32S32, 207 TexUnified2DArrayS32Float, 208 TexUnified2DArrayS32FloatLevel, 209 TexUnified2DArrayS32FloatGrad, 210 TexUnified2DArrayU32S32, 211 TexUnified2DArrayU32Float, 212 TexUnified2DArrayU32FloatLevel, 213 TexUnified2DArrayU32FloatGrad, 214 TexUnified3DFloatS32, 215 TexUnified3DFloatFloat, 216 TexUnified3DFloatFloatLevel, 217 TexUnified3DFloatFloatGrad, 218 TexUnified3DS32S32, 219 TexUnified3DS32Float, 220 TexUnified3DS32FloatLevel, 221 TexUnified3DS32FloatGrad, 222 TexUnified3DU32S32, 223 TexUnified3DU32Float, 224 TexUnified3DU32FloatLevel, 225 TexUnified3DU32FloatGrad, 226 TexUnifiedCubeFloatFloat, 227 TexUnifiedCubeFloatFloatLevel, 228 TexUnifiedCubeS32Float, 229 TexUnifiedCubeS32FloatLevel, 230 TexUnifiedCubeU32Float, 231 TexUnifiedCubeU32FloatLevel, 232 TexUnifiedCubeArrayFloatFloat, 233 TexUnifiedCubeArrayFloatFloatLevel, 234 TexUnifiedCubeArrayS32Float, 235 TexUnifiedCubeArrayS32FloatLevel, 236 TexUnifiedCubeArrayU32Float, 237 TexUnifiedCubeArrayU32FloatLevel, 238 Tld4UnifiedR2DFloatFloat, 239 Tld4UnifiedG2DFloatFloat, 240 Tld4UnifiedB2DFloatFloat, 241 Tld4UnifiedA2DFloatFloat, 242 Tld4UnifiedR2DS64Float, 243 Tld4UnifiedG2DS64Float, 244 Tld4UnifiedB2DS64Float, 245 Tld4UnifiedA2DS64Float, 246 Tld4UnifiedR2DU64Float, 247 Tld4UnifiedG2DU64Float, 248 Tld4UnifiedB2DU64Float, 249 Tld4UnifiedA2DU64Float, 250 251 // Surface intrinsics 252 Suld1DI8Clamp, 253 Suld1DI16Clamp, 254 Suld1DI32Clamp, 255 Suld1DI64Clamp, 256 Suld1DV2I8Clamp, 257 Suld1DV2I16Clamp, 258 Suld1DV2I32Clamp, 259 Suld1DV2I64Clamp, 260 Suld1DV4I8Clamp, 261 Suld1DV4I16Clamp, 262 Suld1DV4I32Clamp, 263 264 Suld1DArrayI8Clamp, 265 Suld1DArrayI16Clamp, 266 Suld1DArrayI32Clamp, 267 Suld1DArrayI64Clamp, 268 Suld1DArrayV2I8Clamp, 269 Suld1DArrayV2I16Clamp, 270 Suld1DArrayV2I32Clamp, 271 Suld1DArrayV2I64Clamp, 272 Suld1DArrayV4I8Clamp, 273 Suld1DArrayV4I16Clamp, 274 Suld1DArrayV4I32Clamp, 275 276 Suld2DI8Clamp, 277 Suld2DI16Clamp, 278 Suld2DI32Clamp, 279 Suld2DI64Clamp, 280 Suld2DV2I8Clamp, 281 Suld2DV2I16Clamp, 282 Suld2DV2I32Clamp, 283 Suld2DV2I64Clamp, 284 Suld2DV4I8Clamp, 285 Suld2DV4I16Clamp, 286 Suld2DV4I32Clamp, 287 288 Suld2DArrayI8Clamp, 289 Suld2DArrayI16Clamp, 290 Suld2DArrayI32Clamp, 291 Suld2DArrayI64Clamp, 292 Suld2DArrayV2I8Clamp, 293 Suld2DArrayV2I16Clamp, 294 Suld2DArrayV2I32Clamp, 295 Suld2DArrayV2I64Clamp, 296 Suld2DArrayV4I8Clamp, 297 Suld2DArrayV4I16Clamp, 298 Suld2DArrayV4I32Clamp, 299 300 Suld3DI8Clamp, 301 Suld3DI16Clamp, 302 Suld3DI32Clamp, 303 Suld3DI64Clamp, 304 Suld3DV2I8Clamp, 305 Suld3DV2I16Clamp, 306 Suld3DV2I32Clamp, 307 Suld3DV2I64Clamp, 308 Suld3DV4I8Clamp, 309 Suld3DV4I16Clamp, 310 Suld3DV4I32Clamp, 311 312 Suld1DI8Trap, 313 Suld1DI16Trap, 314 Suld1DI32Trap, 315 Suld1DI64Trap, 316 Suld1DV2I8Trap, 317 Suld1DV2I16Trap, 318 Suld1DV2I32Trap, 319 Suld1DV2I64Trap, 320 Suld1DV4I8Trap, 321 Suld1DV4I16Trap, 322 Suld1DV4I32Trap, 323 324 Suld1DArrayI8Trap, 325 Suld1DArrayI16Trap, 326 Suld1DArrayI32Trap, 327 Suld1DArrayI64Trap, 328 Suld1DArrayV2I8Trap, 329 Suld1DArrayV2I16Trap, 330 Suld1DArrayV2I32Trap, 331 Suld1DArrayV2I64Trap, 332 Suld1DArrayV4I8Trap, 333 Suld1DArrayV4I16Trap, 334 Suld1DArrayV4I32Trap, 335 336 Suld2DI8Trap, 337 Suld2DI16Trap, 338 Suld2DI32Trap, 339 Suld2DI64Trap, 340 Suld2DV2I8Trap, 341 Suld2DV2I16Trap, 342 Suld2DV2I32Trap, 343 Suld2DV2I64Trap, 344 Suld2DV4I8Trap, 345 Suld2DV4I16Trap, 346 Suld2DV4I32Trap, 347 348 Suld2DArrayI8Trap, 349 Suld2DArrayI16Trap, 350 Suld2DArrayI32Trap, 351 Suld2DArrayI64Trap, 352 Suld2DArrayV2I8Trap, 353 Suld2DArrayV2I16Trap, 354 Suld2DArrayV2I32Trap, 355 Suld2DArrayV2I64Trap, 356 Suld2DArrayV4I8Trap, 357 Suld2DArrayV4I16Trap, 358 Suld2DArrayV4I32Trap, 359 360 Suld3DI8Trap, 361 Suld3DI16Trap, 362 Suld3DI32Trap, 363 Suld3DI64Trap, 364 Suld3DV2I8Trap, 365 Suld3DV2I16Trap, 366 Suld3DV2I32Trap, 367 Suld3DV2I64Trap, 368 Suld3DV4I8Trap, 369 Suld3DV4I16Trap, 370 Suld3DV4I32Trap, 371 372 Suld1DI8Zero, 373 Suld1DI16Zero, 374 Suld1DI32Zero, 375 Suld1DI64Zero, 376 Suld1DV2I8Zero, 377 Suld1DV2I16Zero, 378 Suld1DV2I32Zero, 379 Suld1DV2I64Zero, 380 Suld1DV4I8Zero, 381 Suld1DV4I16Zero, 382 Suld1DV4I32Zero, 383 384 Suld1DArrayI8Zero, 385 Suld1DArrayI16Zero, 386 Suld1DArrayI32Zero, 387 Suld1DArrayI64Zero, 388 Suld1DArrayV2I8Zero, 389 Suld1DArrayV2I16Zero, 390 Suld1DArrayV2I32Zero, 391 Suld1DArrayV2I64Zero, 392 Suld1DArrayV4I8Zero, 393 Suld1DArrayV4I16Zero, 394 Suld1DArrayV4I32Zero, 395 396 Suld2DI8Zero, 397 Suld2DI16Zero, 398 Suld2DI32Zero, 399 Suld2DI64Zero, 400 Suld2DV2I8Zero, 401 Suld2DV2I16Zero, 402 Suld2DV2I32Zero, 403 Suld2DV2I64Zero, 404 Suld2DV4I8Zero, 405 Suld2DV4I16Zero, 406 Suld2DV4I32Zero, 407 408 Suld2DArrayI8Zero, 409 Suld2DArrayI16Zero, 410 Suld2DArrayI32Zero, 411 Suld2DArrayI64Zero, 412 Suld2DArrayV2I8Zero, 413 Suld2DArrayV2I16Zero, 414 Suld2DArrayV2I32Zero, 415 Suld2DArrayV2I64Zero, 416 Suld2DArrayV4I8Zero, 417 Suld2DArrayV4I16Zero, 418 Suld2DArrayV4I32Zero, 419 420 Suld3DI8Zero, 421 Suld3DI16Zero, 422 Suld3DI32Zero, 423 Suld3DI64Zero, 424 Suld3DV2I8Zero, 425 Suld3DV2I16Zero, 426 Suld3DV2I32Zero, 427 Suld3DV2I64Zero, 428 Suld3DV4I8Zero, 429 Suld3DV4I16Zero, 430 Suld3DV4I32Zero 431 }; 432 } 433 434 class NVPTXSubtarget; 435 436 //===--------------------------------------------------------------------===// 437 // TargetLowering Implementation 438 //===--------------------------------------------------------------------===// 439 class NVPTXTargetLowering : public TargetLowering { 440 public: 441 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, 442 const NVPTXSubtarget &STI); 443 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 444 445 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 446 447 const char *getTargetNodeName(unsigned Opcode) const override; 448 449 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 450 unsigned Intrinsic) const override; 451 452 /// isLegalAddressingMode - Return true if the addressing mode represented 453 /// by AM is legal for this target, for a load/store of the specified type 454 /// Used to guide target specific optimizations, like loop strength 455 /// reduction (LoopStrengthReduce.cpp) and memory optimization for 456 /// address mode (CodeGenPrepare.cpp) 457 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 458 unsigned AS) const override; 459 isTruncateFree(Type * SrcTy,Type * DstTy)460 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { 461 // Truncating 64-bit to 32-bit is free in SASS. 462 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 463 return false; 464 return SrcTy->getPrimitiveSizeInBits() == 64 && 465 DstTy->getPrimitiveSizeInBits() == 32; 466 } 467 getSetCCResultType(const DataLayout & DL,LLVMContext & Ctx,EVT VT)468 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, 469 EVT VT) const override { 470 if (VT.isVector()) 471 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); 472 return MVT::i1; 473 } 474 475 ConstraintType getConstraintType(StringRef Constraint) const override; 476 std::pair<unsigned, const TargetRegisterClass *> 477 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 478 StringRef Constraint, MVT VT) const override; 479 480 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 481 bool isVarArg, 482 const SmallVectorImpl<ISD::InputArg> &Ins, 483 const SDLoc &dl, SelectionDAG &DAG, 484 SmallVectorImpl<SDValue> &InVals) const override; 485 486 SDValue LowerCall(CallLoweringInfo &CLI, 487 SmallVectorImpl<SDValue> &InVals) const override; 488 489 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, 490 const SmallVectorImpl<ISD::OutputArg> &, 491 unsigned retAlignment, 492 const ImmutableCallSite *CS) const; 493 494 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 495 const SmallVectorImpl<ISD::OutputArg> &Outs, 496 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, 497 SelectionDAG &DAG) const override; 498 499 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 500 std::vector<SDValue> &Ops, 501 SelectionDAG &DAG) const override; 502 503 const NVPTXTargetMachine *nvTM; 504 505 // PTX always uses 32-bit shift amounts getScalarShiftAmountTy(const DataLayout &,EVT)506 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 507 return MVT::i32; 508 } 509 510 TargetLoweringBase::LegalizeTypeAction 511 getPreferredVectorAction(EVT VT) const override; 512 513 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; 514 isFMAFasterThanFMulAndFAdd(EVT)515 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } 516 enableAggressiveFMAFusion(EVT VT)517 bool enableAggressiveFMAFusion(EVT VT) const override { return true; } 518 519 private: 520 const NVPTXSubtarget &STI; // cache the subtarget here 521 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; 522 523 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 524 525 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 526 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; 527 528 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 529 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; 530 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; 531 532 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 533 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 534 535 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; 536 537 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 538 SelectionDAG &DAG) const override; 539 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 540 541 unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, 542 Type *Ty, unsigned Idx) const; 543 }; 544 } // namespace llvm 545 546 #endif 547