1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Interface definition for SIInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 17 18 #include "AMDGPUInstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIRegisterInfo.h" 21 #include "Utils/AMDGPUBaseInfo.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/Support/Compiler.h" 31 #include <cassert> 32 #include <cstdint> 33 34 #define GET_INSTRINFO_HEADER 35 #include "AMDGPUGenInstrInfo.inc" 36 37 namespace llvm { 38 39 class APInt; 40 class MachineRegisterInfo; 41 class RegScavenger; 42 class GCNSubtarget; 43 class TargetRegisterClass; 44 45 class SIInstrInfo final : public AMDGPUGenInstrInfo { 46 private: 47 const SIRegisterInfo RI; 48 const GCNSubtarget &ST; 49 50 // The inverse predicate should have the negative value. 51 enum BranchPredicate { 52 INVALID_BR = 0, 53 SCC_TRUE = 1, 54 SCC_FALSE = -1, 55 VCCNZ = 2, 56 VCCZ = -2, 57 EXECNZ = -3, 58 EXECZ = 3 59 }; 60 61 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 62 63 static unsigned getBranchOpcode(BranchPredicate Cond); 64 static BranchPredicate getBranchPredicate(unsigned Opcode); 65 66 public: 67 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 68 MachineRegisterInfo &MRI, 69 MachineOperand &SuperReg, 70 const TargetRegisterClass *SuperRC, 71 unsigned SubIdx, 72 const TargetRegisterClass *SubRC) const; 73 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 74 MachineRegisterInfo &MRI, 75 MachineOperand &SuperReg, 76 const TargetRegisterClass *SuperRC, 77 unsigned SubIdx, 78 const TargetRegisterClass *SubRC) const; 79 private: 80 void swapOperands(MachineInstr &Inst) const; 81 82 bool moveScalarAddSub(SetVectorType &Worklist, 83 MachineInstr &Inst) const; 84 85 void lowerScalarAbs(SetVectorType &Worklist, 86 MachineInstr &Inst) const; 87 88 void lowerScalarXnor(SetVectorType &Worklist, 89 MachineInstr &Inst) const; 90 91 void splitScalar64BitUnaryOp(SetVectorType &Worklist, 92 MachineInstr &Inst, unsigned Opcode) const; 93 94 void splitScalar64BitAddSub(SetVectorType &Worklist, 95 MachineInstr &Inst) const; 96 97 void splitScalar64BitBinaryOp(SetVectorType &Worklist, 98 MachineInstr &Inst, unsigned Opcode) const; 99 100 void splitScalar64BitBCNT(SetVectorType &Worklist, 101 MachineInstr &Inst) const; 102 void splitScalar64BitBFE(SetVectorType &Worklist, 103 MachineInstr &Inst) const; 104 void movePackToVALU(SetVectorType &Worklist, 105 MachineRegisterInfo &MRI, 106 MachineInstr &Inst) const; 107 108 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, 109 SetVectorType &Worklist) const; 110 111 void 112 addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, 113 SetVectorType &Worklist) const; 114 115 const TargetRegisterClass * 116 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 117 118 bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const; 119 120 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 121 122 protected: 123 bool swapSourceModifiers(MachineInstr &MI, 124 MachineOperand &Src0, unsigned Src0OpName, 125 MachineOperand &Src1, unsigned Src1OpName) const; 126 127 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 128 unsigned OpIdx0, 129 unsigned OpIdx1) const override; 130 131 public: 132 enum TargetOperandFlags { 133 MO_MASK = 0x7, 134 135 MO_NONE = 0, 136 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 137 MO_GOTPCREL = 1, 138 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 139 MO_GOTPCREL32 = 2, 140 MO_GOTPCREL32_LO = 2, 141 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 142 MO_GOTPCREL32_HI = 3, 143 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 144 MO_REL32 = 4, 145 MO_REL32_LO = 4, 146 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 147 MO_REL32_HI = 5 148 }; 149 150 explicit SIInstrInfo(const GCNSubtarget &ST); 151 getRegisterInfo()152 const SIRegisterInfo &getRegisterInfo() const { 153 return RI; 154 } 155 156 bool isReallyTriviallyReMaterializable(const MachineInstr &MI, 157 AliasAnalysis *AA) const override; 158 159 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 160 int64_t &Offset1, 161 int64_t &Offset2) const override; 162 163 bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, 164 int64_t &Offset, 165 const TargetRegisterInfo *TRI) const final; 166 167 bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, 168 MachineInstr &SecondLdSt, unsigned BaseReg2, 169 unsigned NumLoads) const override; 170 171 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 172 int64_t Offset1, unsigned NumLoads) const override; 173 174 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 175 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, 176 bool KillSrc) const override; 177 178 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, 179 RegScavenger *RS, unsigned TmpReg, 180 unsigned Offset, unsigned Size) const; 181 182 void materializeImmediate(MachineBasicBlock &MBB, 183 MachineBasicBlock::iterator MI, 184 const DebugLoc &DL, 185 unsigned DestReg, 186 int64_t Value) const; 187 188 const TargetRegisterClass *getPreferredSelectRegClass( 189 unsigned Size) const; 190 191 unsigned insertNE(MachineBasicBlock *MBB, 192 MachineBasicBlock::iterator I, const DebugLoc &DL, 193 unsigned SrcReg, int Value) const; 194 195 unsigned insertEQ(MachineBasicBlock *MBB, 196 MachineBasicBlock::iterator I, const DebugLoc &DL, 197 unsigned SrcReg, int Value) const; 198 199 void storeRegToStackSlot(MachineBasicBlock &MBB, 200 MachineBasicBlock::iterator MI, unsigned SrcReg, 201 bool isKill, int FrameIndex, 202 const TargetRegisterClass *RC, 203 const TargetRegisterInfo *TRI) const override; 204 205 void loadRegFromStackSlot(MachineBasicBlock &MBB, 206 MachineBasicBlock::iterator MI, unsigned DestReg, 207 int FrameIndex, const TargetRegisterClass *RC, 208 const TargetRegisterInfo *TRI) const override; 209 210 bool expandPostRAPseudo(MachineInstr &MI) const override; 211 212 // Returns an opcode that can be used to move a value to a \p DstRC 213 // register. If there is no hardware instruction that can store to \p 214 // DstRC, then AMDGPU::COPY is returned. 215 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 216 217 LLVM_READONLY 218 int commuteOpcode(unsigned Opc) const; 219 220 LLVM_READONLY commuteOpcode(const MachineInstr & MI)221 inline int commuteOpcode(const MachineInstr &MI) const { 222 return commuteOpcode(MI.getOpcode()); 223 } 224 225 bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, 226 unsigned &SrcOpIdx2) const override; 227 228 bool isBranchOffsetInRange(unsigned BranchOpc, 229 int64_t BrOffset) const override; 230 231 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 232 233 unsigned insertIndirectBranch(MachineBasicBlock &MBB, 234 MachineBasicBlock &NewDestBB, 235 const DebugLoc &DL, 236 int64_t BrOffset, 237 RegScavenger *RS = nullptr) const override; 238 239 bool analyzeBranchImpl(MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator I, 241 MachineBasicBlock *&TBB, 242 MachineBasicBlock *&FBB, 243 SmallVectorImpl<MachineOperand> &Cond, 244 bool AllowModify) const; 245 246 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 247 MachineBasicBlock *&FBB, 248 SmallVectorImpl<MachineOperand> &Cond, 249 bool AllowModify = false) const override; 250 251 unsigned removeBranch(MachineBasicBlock &MBB, 252 int *BytesRemoved = nullptr) const override; 253 254 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 255 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 256 const DebugLoc &DL, 257 int *BytesAdded = nullptr) const override; 258 259 bool reverseBranchCondition( 260 SmallVectorImpl<MachineOperand> &Cond) const override; 261 262 bool canInsertSelect(const MachineBasicBlock &MBB, 263 ArrayRef<MachineOperand> Cond, 264 unsigned TrueReg, unsigned FalseReg, 265 int &CondCycles, 266 int &TrueCycles, int &FalseCycles) const override; 267 268 void insertSelect(MachineBasicBlock &MBB, 269 MachineBasicBlock::iterator I, const DebugLoc &DL, 270 unsigned DstReg, ArrayRef<MachineOperand> Cond, 271 unsigned TrueReg, unsigned FalseReg) const override; 272 273 void insertVectorSelect(MachineBasicBlock &MBB, 274 MachineBasicBlock::iterator I, const DebugLoc &DL, 275 unsigned DstReg, ArrayRef<MachineOperand> Cond, 276 unsigned TrueReg, unsigned FalseReg) const; 277 278 unsigned getAddressSpaceForPseudoSourceKind( 279 PseudoSourceValue::PSVKind Kind) const override; 280 281 bool 282 areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, 283 AliasAnalysis *AA = nullptr) const override; 284 285 bool isFoldableCopy(const MachineInstr &MI) const; 286 287 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, 288 MachineRegisterInfo *MRI) const final; 289 getMachineCSELookAheadLimit()290 unsigned getMachineCSELookAheadLimit() const override { return 500; } 291 292 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, 293 MachineInstr &MI, 294 LiveVariables *LV) const override; 295 296 bool isSchedulingBoundary(const MachineInstr &MI, 297 const MachineBasicBlock *MBB, 298 const MachineFunction &MF) const override; 299 isSALU(const MachineInstr & MI)300 static bool isSALU(const MachineInstr &MI) { 301 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 302 } 303 isSALU(uint16_t Opcode)304 bool isSALU(uint16_t Opcode) const { 305 return get(Opcode).TSFlags & SIInstrFlags::SALU; 306 } 307 isVALU(const MachineInstr & MI)308 static bool isVALU(const MachineInstr &MI) { 309 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 310 } 311 isVALU(uint16_t Opcode)312 bool isVALU(uint16_t Opcode) const { 313 return get(Opcode).TSFlags & SIInstrFlags::VALU; 314 } 315 isVMEM(const MachineInstr & MI)316 static bool isVMEM(const MachineInstr &MI) { 317 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 318 } 319 isVMEM(uint16_t Opcode)320 bool isVMEM(uint16_t Opcode) const { 321 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 322 } 323 isSOP1(const MachineInstr & MI)324 static bool isSOP1(const MachineInstr &MI) { 325 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 326 } 327 isSOP1(uint16_t Opcode)328 bool isSOP1(uint16_t Opcode) const { 329 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 330 } 331 isSOP2(const MachineInstr & MI)332 static bool isSOP2(const MachineInstr &MI) { 333 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 334 } 335 isSOP2(uint16_t Opcode)336 bool isSOP2(uint16_t Opcode) const { 337 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 338 } 339 isSOPC(const MachineInstr & MI)340 static bool isSOPC(const MachineInstr &MI) { 341 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 342 } 343 isSOPC(uint16_t Opcode)344 bool isSOPC(uint16_t Opcode) const { 345 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 346 } 347 isSOPK(const MachineInstr & MI)348 static bool isSOPK(const MachineInstr &MI) { 349 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 350 } 351 isSOPK(uint16_t Opcode)352 bool isSOPK(uint16_t Opcode) const { 353 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 354 } 355 isSOPP(const MachineInstr & MI)356 static bool isSOPP(const MachineInstr &MI) { 357 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 358 } 359 isSOPP(uint16_t Opcode)360 bool isSOPP(uint16_t Opcode) const { 361 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 362 } 363 isVOP1(const MachineInstr & MI)364 static bool isVOP1(const MachineInstr &MI) { 365 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 366 } 367 isVOP1(uint16_t Opcode)368 bool isVOP1(uint16_t Opcode) const { 369 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 370 } 371 isVOP2(const MachineInstr & MI)372 static bool isVOP2(const MachineInstr &MI) { 373 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 374 } 375 isVOP2(uint16_t Opcode)376 bool isVOP2(uint16_t Opcode) const { 377 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 378 } 379 isVOP3(const MachineInstr & MI)380 static bool isVOP3(const MachineInstr &MI) { 381 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 382 } 383 isVOP3(uint16_t Opcode)384 bool isVOP3(uint16_t Opcode) const { 385 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 386 } 387 isSDWA(const MachineInstr & MI)388 static bool isSDWA(const MachineInstr &MI) { 389 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 390 } 391 isSDWA(uint16_t Opcode)392 bool isSDWA(uint16_t Opcode) const { 393 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 394 } 395 isVOPC(const MachineInstr & MI)396 static bool isVOPC(const MachineInstr &MI) { 397 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 398 } 399 isVOPC(uint16_t Opcode)400 bool isVOPC(uint16_t Opcode) const { 401 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 402 } 403 isMUBUF(const MachineInstr & MI)404 static bool isMUBUF(const MachineInstr &MI) { 405 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 406 } 407 isMUBUF(uint16_t Opcode)408 bool isMUBUF(uint16_t Opcode) const { 409 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 410 } 411 isMTBUF(const MachineInstr & MI)412 static bool isMTBUF(const MachineInstr &MI) { 413 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 414 } 415 isMTBUF(uint16_t Opcode)416 bool isMTBUF(uint16_t Opcode) const { 417 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 418 } 419 isSMRD(const MachineInstr & MI)420 static bool isSMRD(const MachineInstr &MI) { 421 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 422 } 423 isSMRD(uint16_t Opcode)424 bool isSMRD(uint16_t Opcode) const { 425 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 426 } 427 428 bool isBufferSMRD(const MachineInstr &MI) const; 429 isDS(const MachineInstr & MI)430 static bool isDS(const MachineInstr &MI) { 431 return MI.getDesc().TSFlags & SIInstrFlags::DS; 432 } 433 isDS(uint16_t Opcode)434 bool isDS(uint16_t Opcode) const { 435 return get(Opcode).TSFlags & SIInstrFlags::DS; 436 } 437 isMIMG(const MachineInstr & MI)438 static bool isMIMG(const MachineInstr &MI) { 439 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 440 } 441 isMIMG(uint16_t Opcode)442 bool isMIMG(uint16_t Opcode) const { 443 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 444 } 445 isGather4(const MachineInstr & MI)446 static bool isGather4(const MachineInstr &MI) { 447 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 448 } 449 isGather4(uint16_t Opcode)450 bool isGather4(uint16_t Opcode) const { 451 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 452 } 453 isFLAT(const MachineInstr & MI)454 static bool isFLAT(const MachineInstr &MI) { 455 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 456 } 457 458 // Is a FLAT encoded instruction which accesses a specific segment, 459 // i.e. global_* or scratch_*. isSegmentSpecificFLAT(const MachineInstr & MI)460 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 461 auto Flags = MI.getDesc().TSFlags; 462 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT); 463 } 464 465 // Any FLAT encoded instruction, including global_* and scratch_*. isFLAT(uint16_t Opcode)466 bool isFLAT(uint16_t Opcode) const { 467 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 468 } 469 isEXP(const MachineInstr & MI)470 static bool isEXP(const MachineInstr &MI) { 471 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 472 } 473 isEXP(uint16_t Opcode)474 bool isEXP(uint16_t Opcode) const { 475 return get(Opcode).TSFlags & SIInstrFlags::EXP; 476 } 477 isWQM(const MachineInstr & MI)478 static bool isWQM(const MachineInstr &MI) { 479 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 480 } 481 isWQM(uint16_t Opcode)482 bool isWQM(uint16_t Opcode) const { 483 return get(Opcode).TSFlags & SIInstrFlags::WQM; 484 } 485 isDisableWQM(const MachineInstr & MI)486 static bool isDisableWQM(const MachineInstr &MI) { 487 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 488 } 489 isDisableWQM(uint16_t Opcode)490 bool isDisableWQM(uint16_t Opcode) const { 491 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 492 } 493 isVGPRSpill(const MachineInstr & MI)494 static bool isVGPRSpill(const MachineInstr &MI) { 495 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 496 } 497 isVGPRSpill(uint16_t Opcode)498 bool isVGPRSpill(uint16_t Opcode) const { 499 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 500 } 501 isSGPRSpill(const MachineInstr & MI)502 static bool isSGPRSpill(const MachineInstr &MI) { 503 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 504 } 505 isSGPRSpill(uint16_t Opcode)506 bool isSGPRSpill(uint16_t Opcode) const { 507 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 508 } 509 isDPP(const MachineInstr & MI)510 static bool isDPP(const MachineInstr &MI) { 511 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 512 } 513 isDPP(uint16_t Opcode)514 bool isDPP(uint16_t Opcode) const { 515 return get(Opcode).TSFlags & SIInstrFlags::DPP; 516 } 517 isVOP3P(const MachineInstr & MI)518 static bool isVOP3P(const MachineInstr &MI) { 519 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 520 } 521 isVOP3P(uint16_t Opcode)522 bool isVOP3P(uint16_t Opcode) const { 523 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 524 } 525 isVINTRP(const MachineInstr & MI)526 static bool isVINTRP(const MachineInstr &MI) { 527 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 528 } 529 isVINTRP(uint16_t Opcode)530 bool isVINTRP(uint16_t Opcode) const { 531 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 532 } 533 isScalarUnit(const MachineInstr & MI)534 static bool isScalarUnit(const MachineInstr &MI) { 535 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 536 } 537 usesVM_CNT(const MachineInstr & MI)538 static bool usesVM_CNT(const MachineInstr &MI) { 539 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 540 } 541 usesLGKM_CNT(const MachineInstr & MI)542 static bool usesLGKM_CNT(const MachineInstr &MI) { 543 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 544 } 545 sopkIsZext(const MachineInstr & MI)546 static bool sopkIsZext(const MachineInstr &MI) { 547 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 548 } 549 sopkIsZext(uint16_t Opcode)550 bool sopkIsZext(uint16_t Opcode) const { 551 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 552 } 553 554 /// \returns true if this is an s_store_dword* instruction. This is more 555 /// specific than than isSMEM && mayStore. isScalarStore(const MachineInstr & MI)556 static bool isScalarStore(const MachineInstr &MI) { 557 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 558 } 559 isScalarStore(uint16_t Opcode)560 bool isScalarStore(uint16_t Opcode) const { 561 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 562 } 563 isFixedSize(const MachineInstr & MI)564 static bool isFixedSize(const MachineInstr &MI) { 565 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 566 } 567 isFixedSize(uint16_t Opcode)568 bool isFixedSize(uint16_t Opcode) const { 569 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 570 } 571 hasFPClamp(const MachineInstr & MI)572 static bool hasFPClamp(const MachineInstr &MI) { 573 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 574 } 575 hasFPClamp(uint16_t Opcode)576 bool hasFPClamp(uint16_t Opcode) const { 577 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 578 } 579 hasIntClamp(const MachineInstr & MI)580 static bool hasIntClamp(const MachineInstr &MI) { 581 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 582 } 583 getClampMask(const MachineInstr & MI)584 uint64_t getClampMask(const MachineInstr &MI) const { 585 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 586 SIInstrFlags::IntClamp | 587 SIInstrFlags::ClampLo | 588 SIInstrFlags::ClampHi; 589 return MI.getDesc().TSFlags & ClampFlags; 590 } 591 isVGPRCopy(const MachineInstr & MI)592 bool isVGPRCopy(const MachineInstr &MI) const { 593 assert(MI.isCopy()); 594 unsigned Dest = MI.getOperand(0).getReg(); 595 const MachineFunction &MF = *MI.getParent()->getParent(); 596 const MachineRegisterInfo &MRI = MF.getRegInfo(); 597 return !RI.isSGPRReg(MRI, Dest); 598 } 599 600 /// Whether we must prevent this instruction from executing with EXEC = 0. 601 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 602 603 bool isInlineConstant(const APInt &Imm) const; 604 605 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 606 isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)607 bool isInlineConstant(const MachineOperand &MO, 608 const MCOperandInfo &OpInfo) const { 609 return isInlineConstant(MO, OpInfo.OperandType); 610 } 611 612 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 613 /// be an inline immediate. isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)614 bool isInlineConstant(const MachineInstr &MI, 615 const MachineOperand &UseMO, 616 const MachineOperand &DefMO) const { 617 assert(UseMO.getParent() == &MI); 618 int OpIdx = MI.getOperandNo(&UseMO); 619 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) { 620 return false; 621 } 622 623 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]); 624 } 625 626 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 627 /// immediate. isInlineConstant(const MachineInstr & MI,unsigned OpIdx)628 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 629 const MachineOperand &MO = MI.getOperand(OpIdx); 630 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 631 } 632 isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)633 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 634 const MachineOperand &MO) const { 635 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) 636 return false; 637 638 if (MI.isCopy()) { 639 unsigned Size = getOpSize(MI, OpIdx); 640 assert(Size == 8 || Size == 4); 641 642 uint8_t OpType = (Size == 8) ? 643 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 644 return isInlineConstant(MO, OpType); 645 } 646 647 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 648 } 649 isInlineConstant(const MachineOperand & MO)650 bool isInlineConstant(const MachineOperand &MO) const { 651 const MachineInstr *Parent = MO.getParent(); 652 return isInlineConstant(*Parent, Parent->getOperandNo(&MO)); 653 } 654 isLiteralConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)655 bool isLiteralConstant(const MachineOperand &MO, 656 const MCOperandInfo &OpInfo) const { 657 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType); 658 } 659 isLiteralConstant(const MachineInstr & MI,int OpIdx)660 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const { 661 const MachineOperand &MO = MI.getOperand(OpIdx); 662 return MO.isImm() && !isInlineConstant(MI, OpIdx); 663 } 664 665 // Returns true if this operand could potentially require a 32-bit literal 666 // operand, but not necessarily. A FrameIndex for example could resolve to an 667 // inline immediate value that will not require an additional 4-bytes; this 668 // assumes that it will. 669 bool isLiteralConstantLike(const MachineOperand &MO, 670 const MCOperandInfo &OpInfo) const; 671 672 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 673 const MachineOperand &MO) const; 674 675 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 676 /// This function will return false if you pass it a 32-bit instruction. 677 bool hasVALU32BitEncoding(unsigned Opcode) const; 678 679 /// Returns true if this operand uses the constant bus. 680 bool usesConstantBus(const MachineRegisterInfo &MRI, 681 const MachineOperand &MO, 682 const MCOperandInfo &OpInfo) const; 683 684 /// Return true if this instruction has any modifiers. 685 /// e.g. src[012]_mod, omod, clamp. 686 bool hasModifiers(unsigned Opcode) const; 687 688 bool hasModifiersSet(const MachineInstr &MI, 689 unsigned OpName) const; 690 bool hasAnyModifiersSet(const MachineInstr &MI) const; 691 692 bool verifyInstruction(const MachineInstr &MI, 693 StringRef &ErrInfo) const override; 694 695 unsigned getVALUOp(const MachineInstr &MI) const; 696 697 /// Return the correct register class for \p OpNo. For target-specific 698 /// instructions, this will return the register class that has been defined 699 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 700 /// the register class of its machine operand. 701 /// to infer the correct register class base on the other operands. 702 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 703 unsigned OpNo) const; 704 705 /// Return the size in bytes of the operand OpNo on the given 706 // instruction opcode. getOpSize(uint16_t Opcode,unsigned OpNo)707 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 708 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; 709 710 if (OpInfo.RegClass == -1) { 711 // If this is an immediate operand, this must be a 32-bit literal. 712 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 713 return 4; 714 } 715 716 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 717 } 718 719 /// This form should usually be preferred since it handles operands 720 /// with unknown register classes. getOpSize(const MachineInstr & MI,unsigned OpNo)721 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 722 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 723 } 724 725 /// \returns true if it is legal for the operand at index \p OpNo 726 /// to read a VGPR. 727 bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const; 728 729 /// Legalize the \p OpIndex operand of this instruction by inserting 730 /// a MOV. For example: 731 /// ADD_I32_e32 VGPR0, 15 732 /// to 733 /// MOV VGPR1, 15 734 /// ADD_I32_e32 VGPR0, VGPR1 735 /// 736 /// If the operand being legalized is a register, then a COPY will be used 737 /// instead of MOV. 738 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 739 740 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 741 /// for \p MI. 742 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 743 const MachineOperand *MO = nullptr) const; 744 745 /// Check if \p MO would be a valid operand for the given operand 746 /// definition \p OpInfo. Note this does not attempt to validate constant bus 747 /// restrictions (e.g. literal constant usage). 748 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 749 const MCOperandInfo &OpInfo, 750 const MachineOperand &MO) const; 751 752 /// Check if \p MO (a register operand) is a legal register for the 753 /// given operand description. 754 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 755 const MCOperandInfo &OpInfo, 756 const MachineOperand &MO) const; 757 758 /// Legalize operands in \p MI by either commuting it or inserting a 759 /// copy of src1. 760 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 761 762 /// Fix operands in \p MI to satisfy constant bus requirements. 763 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 764 765 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 766 /// be used when it is know that the value in SrcReg is same across all 767 /// threads in the wave. 768 /// \returns The SGPR register that \p SrcReg was copied to. 769 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 770 MachineRegisterInfo &MRI) const; 771 772 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 773 774 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 775 MachineBasicBlock::iterator I, 776 const TargetRegisterClass *DstRC, 777 MachineOperand &Op, MachineRegisterInfo &MRI, 778 const DebugLoc &DL) const; 779 780 /// Legalize all operands in this instruction. This function may 781 /// create new instruction and insert them before \p MI. 782 void legalizeOperands(MachineInstr &MI) const; 783 784 /// Replace this instruction's opcode with the equivalent VALU 785 /// opcode. This function will also move the users of \p MI to the 786 /// VALU if necessary. 787 void moveToVALU(MachineInstr &MI) const; 788 789 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, 790 int Count) const; 791 792 void insertNoop(MachineBasicBlock &MBB, 793 MachineBasicBlock::iterator MI) const override; 794 795 void insertReturn(MachineBasicBlock &MBB) const; 796 /// Return the number of wait states that result from executing this 797 /// instruction. 798 unsigned getNumWaitStates(const MachineInstr &MI) const; 799 800 /// Returns the operand named \p Op. If \p MI does not have an 801 /// operand named \c Op, this function returns nullptr. 802 LLVM_READONLY 803 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 804 805 LLVM_READONLY getNamedOperand(const MachineInstr & MI,unsigned OpName)806 const MachineOperand *getNamedOperand(const MachineInstr &MI, 807 unsigned OpName) const { 808 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 809 } 810 811 /// Get required immediate operand getNamedImmOperand(const MachineInstr & MI,unsigned OpName)812 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 813 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 814 return MI.getOperand(Idx).getImm(); 815 } 816 817 uint64_t getDefaultRsrcDataFormat() const; 818 uint64_t getScratchRsrcWords23() const; 819 820 bool isLowLatencyInstruction(const MachineInstr &MI) const; 821 bool isHighLatencyInstruction(const MachineInstr &MI) const; 822 823 /// Return the descriptor of the target-specific machine instruction 824 /// that corresponds to the specified pseudo or native opcode. getMCOpcodeFromPseudo(unsigned Opcode)825 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 826 return get(pseudoToMCOpcode(Opcode)); 827 } 828 829 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 830 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 831 832 unsigned isLoadFromStackSlot(const MachineInstr &MI, 833 int &FrameIndex) const override; 834 unsigned isStoreToStackSlot(const MachineInstr &MI, 835 int &FrameIndex) const override; 836 837 unsigned getInstBundleSize(const MachineInstr &MI) const; 838 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 839 840 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 841 842 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 843 844 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 845 MachineBasicBlock *IfEnd) const; 846 847 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 848 MachineBasicBlock *LoopEnd) const; 849 850 std::pair<unsigned, unsigned> 851 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 852 853 ArrayRef<std::pair<int, const char *>> 854 getSerializableTargetIndices() const override; 855 856 ArrayRef<std::pair<unsigned, const char *>> 857 getSerializableDirectMachineOperandTargetFlags() const override; 858 859 ScheduleHazardRecognizer * 860 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 861 const ScheduleDAG *DAG) const override; 862 863 ScheduleHazardRecognizer * 864 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 865 866 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 867 868 /// Return a partially built integer add instruction without carry. 869 /// Caller must add source operands. 870 /// For pre-GFX9 it will generate unused carry destination operand. 871 /// TODO: After GFX9 it should return a no-carry operation. 872 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 873 MachineBasicBlock::iterator I, 874 const DebugLoc &DL, 875 unsigned DestReg) const; 876 877 static bool isKillTerminator(unsigned Opcode); 878 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 879 isLegalMUBUFImmOffset(unsigned Imm)880 static bool isLegalMUBUFImmOffset(unsigned Imm) { 881 return isUInt<12>(Imm); 882 } 883 884 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 885 /// Return -1 if the target-specific opcode for the pseudo instruction does 886 /// not exist. If Opcode is not a pseudo instruction, this is identity. 887 int pseudoToMCOpcode(int Opcode) const; 888 889 }; 890 891 namespace AMDGPU { 892 893 LLVM_READONLY 894 int getVOPe64(uint16_t Opcode); 895 896 LLVM_READONLY 897 int getVOPe32(uint16_t Opcode); 898 899 LLVM_READONLY 900 int getSDWAOp(uint16_t Opcode); 901 902 LLVM_READONLY 903 int getBasicFromSDWAOp(uint16_t Opcode); 904 905 LLVM_READONLY 906 int getCommuteRev(uint16_t Opcode); 907 908 LLVM_READONLY 909 int getCommuteOrig(uint16_t Opcode); 910 911 LLVM_READONLY 912 int getAddr64Inst(uint16_t Opcode); 913 914 LLVM_READONLY 915 int getMUBUFNoLdsInst(uint16_t Opcode); 916 917 LLVM_READONLY 918 int getAtomicRetOp(uint16_t Opcode); 919 920 LLVM_READONLY 921 int getAtomicNoRetOp(uint16_t Opcode); 922 923 LLVM_READONLY 924 int getSOPKOp(uint16_t Opcode); 925 926 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 927 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 928 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 929 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 930 931 // For MachineOperands. 932 enum TargetFlags { 933 TF_LONG_BRANCH_FORWARD = 1 << 0, 934 TF_LONG_BRANCH_BACKWARD = 1 << 1 935 }; 936 937 } // end namespace AMDGPU 938 939 namespace SI { 940 namespace KernelInputOffsets { 941 942 /// Offsets in bytes from the start of the input buffer 943 enum Offsets { 944 NGROUPS_X = 0, 945 NGROUPS_Y = 4, 946 NGROUPS_Z = 8, 947 GLOBAL_SIZE_X = 12, 948 GLOBAL_SIZE_Y = 16, 949 GLOBAL_SIZE_Z = 20, 950 LOCAL_SIZE_X = 24, 951 LOCAL_SIZE_Y = 28, 952 LOCAL_SIZE_Z = 32 953 }; 954 955 } // end namespace KernelInputOffsets 956 } // end namespace SI 957 958 } // end namespace llvm 959 960 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 961