1 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that PPC uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 16 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 17 18 #include "PPC.h" 19 #include "PPCInstrInfo.h" 20 #include "PPCRegisterInfo.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/Target/TargetLowering.h" 24 25 namespace llvm { 26 namespace PPCISD { 27 enum NodeType : unsigned { 28 // Start the numbering where the builtin ops and target ops leave off. 29 FIRST_NUMBER = ISD::BUILTIN_OP_END, 30 31 /// FSEL - Traditional three-operand fsel node. 32 /// 33 FSEL, 34 35 /// FCFID - The FCFID instruction, taking an f64 operand and producing 36 /// and f64 value containing the FP representation of the integer that 37 /// was temporarily in the f64 operand. 38 FCFID, 39 40 /// Newer FCFID[US] integer-to-floating-point conversion instructions for 41 /// unsigned integers and single-precision outputs. 42 FCFIDU, FCFIDS, FCFIDUS, 43 44 /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 45 /// operand, producing an f64 value containing the integer representation 46 /// of that FP value. 47 FCTIDZ, FCTIWZ, 48 49 /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for 50 /// unsigned integers. 51 FCTIDUZ, FCTIWUZ, 52 53 /// Reciprocal estimate instructions (unary FP ops). 54 FRE, FRSQRTE, 55 56 // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking 57 // three v4f32 operands and producing a v4f32 result. 58 VMADDFP, VNMSUBFP, 59 60 /// VPERM - The PPC VPERM Instruction. 61 /// 62 VPERM, 63 64 /// XXSPLT - The PPC VSX splat instructions 65 /// 66 XXSPLT, 67 68 /// XXINSERT - The PPC VSX insert instruction 69 /// 70 XXINSERT, 71 72 /// VECSHL - The PPC VSX shift left instruction 73 /// 74 VECSHL, 75 76 /// The CMPB instruction (takes two operands of i32 or i64). 77 CMPB, 78 79 /// Hi/Lo - These represent the high and low 16-bit parts of a global 80 /// address respectively. These nodes have two operands, the first of 81 /// which must be a TargetGlobalAddress, and the second of which must be a 82 /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', 83 /// though these are usually folded into other nodes. 84 Hi, Lo, 85 86 /// The following two target-specific nodes are used for calls through 87 /// function pointers in the 64-bit SVR4 ABI. 88 89 /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) 90 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to 91 /// compute an allocation on the stack. 92 DYNALLOC, 93 94 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to 95 /// compute an offset from native SP to the address of the most recent 96 /// dynamic alloca. 97 DYNAREAOFFSET, 98 99 /// GlobalBaseReg - On Darwin, this node represents the result of the mflr 100 /// at function entry, used for PIC code. 101 GlobalBaseReg, 102 103 /// These nodes represent the 32-bit PPC shifts that operate on 6-bit 104 /// shift amounts. These nodes are generated by the multi-precision shift 105 /// code. 106 SRL, SRA, SHL, 107 108 /// The combination of sra[wd]i and addze used to implemented signed 109 /// integer division by a power of 2. The first operand is the dividend, 110 /// and the second is the constant shift amount (representing the 111 /// divisor). 112 SRA_ADDZE, 113 114 /// CALL - A direct function call. 115 /// CALL_NOP is a call with the special NOP which follows 64-bit 116 /// SVR4 calls. 117 CALL, CALL_NOP, 118 119 /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a 120 /// MTCTR instruction. 121 MTCTR, 122 123 /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a 124 /// BCTRL instruction. 125 BCTRL, 126 127 /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl 128 /// instruction and the TOC reload required on SVR4 PPC64. 129 BCTRL_LOAD_TOC, 130 131 /// Return with a flag operand, matched by 'blr' 132 RET_FLAG, 133 134 /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. 135 /// This copies the bits corresponding to the specified CRREG into the 136 /// resultant GPR. Bits corresponding to other CR regs are undefined. 137 MFOCRF, 138 139 /// Direct move from a VSX register to a GPR 140 MFVSR, 141 142 /// Direct move from a GPR to a VSX register (algebraic) 143 MTVSRA, 144 145 /// Direct move from a GPR to a VSX register (zero) 146 MTVSRZ, 147 148 /// Extract a subvector from signed integer vector and convert to FP. 149 /// It is primarily used to convert a (widened) illegal integer vector 150 /// type to a legal floating point vector type. 151 /// For example v2i32 -> widened to v4i32 -> v2f64 152 SINT_VEC_TO_FP, 153 154 /// Extract a subvector from unsigned integer vector and convert to FP. 155 /// As with SINT_VEC_TO_FP, used for converting illegal types. 156 UINT_VEC_TO_FP, 157 158 // FIXME: Remove these once the ANDI glue bug is fixed: 159 /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the 160 /// eq or gt bit of CR0 after executing andi. x, 1. This is used to 161 /// implement truncation of i32 or i64 to i1. 162 ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, 163 164 // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit 165 // target (returns (Lo, Hi)). It takes a chain operand. 166 READ_TIME_BASE, 167 168 // EH_SJLJ_SETJMP - SjLj exception handling setjmp. 169 EH_SJLJ_SETJMP, 170 171 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. 172 EH_SJLJ_LONGJMP, 173 174 /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* 175 /// instructions. For lack of better number, we use the opcode number 176 /// encoding for the OPC field to identify the compare. For example, 838 177 /// is VCMPGTSH. 178 VCMP, 179 180 /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the 181 /// altivec VCMP*o instructions. For lack of better number, we use the 182 /// opcode number encoding for the OPC field to identify the compare. For 183 /// example, 838 is VCMPGTSH. 184 VCMPo, 185 186 /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This 187 /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the 188 /// condition register to branch on, OPC is the branch opcode to use (e.g. 189 /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is 190 /// an optional input flag argument. 191 COND_BRANCH, 192 193 /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based 194 /// loops. 195 BDNZ, BDZ, 196 197 /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding 198 /// towards zero. Used only as part of the long double-to-int 199 /// conversion sequence. 200 FADDRTZ, 201 202 /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. 203 MFFS, 204 205 /// TC_RETURN - A tail call return. 206 /// operand #0 chain 207 /// operand #1 callee (register or absolute) 208 /// operand #2 stack adjustment 209 /// operand #3 optional in flag 210 TC_RETURN, 211 212 /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls 213 CR6SET, 214 CR6UNSET, 215 216 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS 217 /// on PPC32. 218 PPC32_GOT, 219 220 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and 221 /// local dynamic TLS on PPC32. 222 PPC32_PICGOT, 223 224 /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec 225 /// TLS model, produces an ADDIS8 instruction that adds the GOT 226 /// base to sym\@got\@tprel\@ha. 227 ADDIS_GOT_TPREL_HA, 228 229 /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec 230 /// TLS model, produces a LD instruction with base register G8RReg 231 /// and offset sym\@got\@tprel\@l. This completes the addition that 232 /// finds the offset of "sym" relative to the thread pointer. 233 LD_GOT_TPREL_L, 234 235 /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS 236 /// model, produces an ADD instruction that adds the contents of 237 /// G8RReg to the thread pointer. Symbol contains a relocation 238 /// sym\@tls which is to be replaced by the thread pointer and 239 /// identifies to the linker that the instruction is part of a 240 /// TLS sequence. 241 ADD_TLS, 242 243 /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS 244 /// model, produces an ADDIS8 instruction that adds the GOT base 245 /// register to sym\@got\@tlsgd\@ha. 246 ADDIS_TLSGD_HA, 247 248 /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS 249 /// model, produces an ADDI8 instruction that adds G8RReg to 250 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by 251 /// ADDIS_TLSGD_L_ADDR until after register assignment. 252 ADDI_TLSGD_L, 253 254 /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS 255 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by 256 /// ADDIS_TLSGD_L_ADDR until after register assignment. 257 GET_TLS_ADDR, 258 259 /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that 260 /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following 261 /// register assignment. 262 ADDI_TLSGD_L_ADDR, 263 264 /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS 265 /// model, produces an ADDIS8 instruction that adds the GOT base 266 /// register to sym\@got\@tlsld\@ha. 267 ADDIS_TLSLD_HA, 268 269 /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS 270 /// model, produces an ADDI8 instruction that adds G8RReg to 271 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by 272 /// ADDIS_TLSLD_L_ADDR until after register assignment. 273 ADDI_TLSLD_L, 274 275 /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS 276 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by 277 /// ADDIS_TLSLD_L_ADDR until after register assignment. 278 GET_TLSLD_ADDR, 279 280 /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that 281 /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion 282 /// following register assignment. 283 ADDI_TLSLD_L_ADDR, 284 285 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS 286 /// model, produces an ADDIS8 instruction that adds X3 to 287 /// sym\@dtprel\@ha. 288 ADDIS_DTPREL_HA, 289 290 /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS 291 /// model, produces an ADDI8 instruction that adds G8RReg to 292 /// sym\@got\@dtprel\@l. 293 ADDI_DTPREL_L, 294 295 /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded 296 /// during instruction selection to optimize a BUILD_VECTOR into 297 /// operations on splats. This is necessary to avoid losing these 298 /// optimizations due to constant folding. 299 VADD_SPLAT, 300 301 /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned 302 /// operand identifies the operating system entry point. 303 SC, 304 305 /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. 306 CLRBHRB, 307 308 /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch 309 /// history rolling buffer entry. 310 MFBHRBE, 311 312 /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. 313 RFEBB, 314 315 /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little 316 /// endian. Maps to an xxswapd instruction that corrects an lxvd2x 317 /// or stxvd2x instruction. The chain is necessary because the 318 /// sequence replaces a load and needs to provide the same number 319 /// of outputs. 320 XXSWAPD, 321 322 /// An SDNode for swaps that are not associated with any loads/stores 323 /// and thereby have no chain. 324 SWAP_NO_CHAIN, 325 326 /// QVFPERM = This corresponds to the QPX qvfperm instruction. 327 QVFPERM, 328 329 /// QVGPCI = This corresponds to the QPX qvgpci instruction. 330 QVGPCI, 331 332 /// QVALIGNI = This corresponds to the QPX qvaligni instruction. 333 QVALIGNI, 334 335 /// QVESPLATI = This corresponds to the QPX qvesplati instruction. 336 QVESPLATI, 337 338 /// QBFLT = Access the underlying QPX floating-point boolean 339 /// representation. 340 QBFLT, 341 342 /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a 343 /// byte-swapping store instruction. It byte-swaps the low "Type" bits of 344 /// the GPRC input, then stores it through Ptr. Type can be either i16 or 345 /// i32. 346 STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, 347 348 /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a 349 /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, 350 /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 351 /// or i32. 352 LBRX, 353 354 /// STFIWX - The STFIWX instruction. The first operand is an input token 355 /// chain, then an f64 value to store, then an address to store it to. 356 STFIWX, 357 358 /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point 359 /// load which sign-extends from a 32-bit integer value into the 360 /// destination 64-bit register. 361 LFIWAX, 362 363 /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point 364 /// load which zero-extends from a 32-bit integer value into the 365 /// destination 64-bit register. 366 LFIWZX, 367 368 /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. 369 /// Maps directly to an lxvd2x instruction that will be followed by 370 /// an xxswapd. 371 LXVD2X, 372 373 /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. 374 /// Maps directly to an stxvd2x instruction that will be preceded by 375 /// an xxswapd. 376 STXVD2X, 377 378 /// QBRC, CHAIN = QVLFSb CHAIN, Ptr 379 /// The 4xf32 load used for v4i1 constants. 380 QVLFSb, 381 382 /// GPRC = TOC_ENTRY GA, TOC 383 /// Loads the entry for GA from the TOC, where the TOC base is given by 384 /// the last operand. 385 TOC_ENTRY 386 }; 387 } 388 389 /// Define some predicates that are used for node matching. 390 namespace PPC { 391 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 392 /// VPKUHUM instruction. 393 bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 394 SelectionDAG &DAG); 395 396 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 397 /// VPKUWUM instruction. 398 bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 399 SelectionDAG &DAG); 400 401 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 402 /// VPKUDUM instruction. 403 bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 404 SelectionDAG &DAG); 405 406 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 407 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 408 bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 409 unsigned ShuffleKind, SelectionDAG &DAG); 410 411 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 412 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 413 bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 414 unsigned ShuffleKind, SelectionDAG &DAG); 415 416 /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for 417 /// a VMRGEW or VMRGOW instruction 418 bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 419 unsigned ShuffleKind, SelectionDAG &DAG); 420 421 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the 422 /// shift amount, otherwise return -1. 423 int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 424 SelectionDAG &DAG); 425 426 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 427 /// specifies a splat of a single element that is suitable for input to 428 /// VSPLTB/VSPLTH/VSPLTW. 429 bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); 430 431 /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by 432 /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any 433 /// shuffle of v4f32/v4i32 vectors that just inserts one element from one 434 /// vector into the other. This function will also set a couple of 435 /// output parameters for how much the source vector needs to be shifted and 436 /// what byte number needs to be specified for the instruction to put the 437 /// element in the desired location of the target vector. 438 bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, 439 unsigned &InsertAtByte, bool &Swap, bool IsLE); 440 441 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 442 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 443 unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); 444 445 /// get_VSPLTI_elt - If this is a build_vector of constants which can be 446 /// formed by using a vspltis[bhw] instruction of the specified element 447 /// size, return the constant being splatted. The ByteSize field indicates 448 /// the number of bytes of each element [124] -> [bhw]. 449 SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); 450 451 /// If this is a qvaligni shuffle mask, return the shift 452 /// amount, otherwise return -1. 453 int isQVALIGNIShuffleMask(SDNode *N); 454 } 455 456 class PPCTargetLowering : public TargetLowering { 457 const PPCSubtarget &Subtarget; 458 459 public: 460 explicit PPCTargetLowering(const PPCTargetMachine &TM, 461 const PPCSubtarget &STI); 462 463 /// getTargetNodeName() - This method returns the name of a target specific 464 /// DAG node. 465 const char *getTargetNodeName(unsigned Opcode) const override; 466 467 /// getPreferredVectorAction - The code we generate when vector types are 468 /// legalized by promoting the integer element type is often much worse 469 /// than code we generate if we widen the type for applicable vector types. 470 /// The issue with promoting is that the vector is scalaraized, individual 471 /// elements promoted and then the vector is rebuilt. So say we load a pair 472 /// of v4i8's and shuffle them. This will turn into a mess of 8 extending 473 /// loads, moves back into VSR's (or memory ops if we don't have moves) and 474 /// then the VPERM for the shuffle. All in all a very slow sequence. getPreferredVectorAction(EVT VT)475 TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) 476 const override { 477 if (VT.getVectorElementType().getSizeInBits() % 8 == 0) 478 return TypeWidenVector; 479 return TargetLoweringBase::getPreferredVectorAction(VT); 480 } 481 bool useSoftFloat() const override; 482 getScalarShiftAmountTy(const DataLayout &,EVT)483 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 484 return MVT::i32; 485 } 486 isCheapToSpeculateCttz()487 bool isCheapToSpeculateCttz() const override { 488 return true; 489 } 490 isCheapToSpeculateCtlz()491 bool isCheapToSpeculateCtlz() const override { 492 return true; 493 } 494 supportSplitCSR(MachineFunction * MF)495 bool supportSplitCSR(MachineFunction *MF) const override { 496 return 497 MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && 498 MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); 499 } 500 501 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 502 503 void insertCopiesSplitCSR( 504 MachineBasicBlock *Entry, 505 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 506 507 /// getSetCCResultType - Return the ISD::SETCC ValueType 508 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 509 EVT VT) const override; 510 511 /// Return true if target always beneficiates from combining into FMA for a 512 /// given value type. This must typically return false on targets where FMA 513 /// takes more cycles to execute than FADD. 514 bool enableAggressiveFMAFusion(EVT VT) const override; 515 516 /// getPreIndexedAddressParts - returns true by value, base pointer and 517 /// offset pointer and addressing mode by reference if the node's address 518 /// can be legally represented as pre-indexed load / store address. 519 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, 520 SDValue &Offset, 521 ISD::MemIndexedMode &AM, 522 SelectionDAG &DAG) const override; 523 524 /// SelectAddressRegReg - Given the specified addressed, check to see if it 525 /// can be represented as an indexed [r+r] operation. Returns false if it 526 /// can be more efficiently represented with [r+imm]. 527 bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, 528 SelectionDAG &DAG) const; 529 530 /// SelectAddressRegImm - Returns true if the address N can be represented 531 /// by a base register plus a signed 16-bit displacement [r+imm], and if it 532 /// is not better represented as reg+reg. If Aligned is true, only accept 533 /// displacements suitable for STD and friends, i.e. multiples of 4. 534 bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, 535 SelectionDAG &DAG, bool Aligned) const; 536 537 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 538 /// represented as an indexed [r+r] operation. 539 bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, 540 SelectionDAG &DAG) const; 541 542 Sched::Preference getSchedulingPreference(SDNode *N) const override; 543 544 /// LowerOperation - Provide custom lowering hooks for some operations. 545 /// 546 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 547 548 /// ReplaceNodeResults - Replace the results of node with an illegal result 549 /// type with new values built out of custom code. 550 /// 551 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 552 SelectionDAG &DAG) const override; 553 554 SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; 555 SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; 556 557 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 558 559 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 560 std::vector<SDNode *> *Created) const override; 561 562 unsigned getRegisterByName(const char* RegName, EVT VT, 563 SelectionDAG &DAG) const override; 564 565 void computeKnownBitsForTargetNode(const SDValue Op, 566 APInt &KnownZero, 567 APInt &KnownOne, 568 const SelectionDAG &DAG, 569 unsigned Depth = 0) const override; 570 571 unsigned getPrefLoopAlignment(MachineLoop *ML) const override; 572 shouldInsertFencesForAtomic(const Instruction * I)573 bool shouldInsertFencesForAtomic(const Instruction *I) const override { 574 return true; 575 } 576 577 Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 578 bool IsStore, bool IsLoad) const override; 579 Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 580 bool IsStore, bool IsLoad) const override; 581 582 MachineBasicBlock * 583 EmitInstrWithCustomInserter(MachineInstr &MI, 584 MachineBasicBlock *MBB) const override; 585 MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, 586 MachineBasicBlock *MBB, 587 unsigned AtomicSize, 588 unsigned BinOpcode) const; 589 MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, 590 MachineBasicBlock *MBB, 591 bool is8bit, 592 unsigned Opcode) const; 593 594 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 595 MachineBasicBlock *MBB) const; 596 597 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 598 MachineBasicBlock *MBB) const; 599 600 ConstraintType getConstraintType(StringRef Constraint) const override; 601 602 /// Examine constraint string and operand type and determine a weight value. 603 /// The operand object must already have been set up with the operand type. 604 ConstraintWeight getSingleConstraintMatchWeight( 605 AsmOperandInfo &info, const char *constraint) const override; 606 607 std::pair<unsigned, const TargetRegisterClass *> 608 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 609 StringRef Constraint, MVT VT) const override; 610 611 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 612 /// function arguments in the caller parameter area. This is the actual 613 /// alignment, not its logarithm. 614 unsigned getByValTypeAlignment(Type *Ty, 615 const DataLayout &DL) const override; 616 617 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 618 /// vector. If it is invalid, don't add anything to Ops. 619 void LowerAsmOperandForConstraint(SDValue Op, 620 std::string &Constraint, 621 std::vector<SDValue> &Ops, 622 SelectionDAG &DAG) const override; 623 624 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode)625 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 626 if (ConstraintCode == "es") 627 return InlineAsm::Constraint_es; 628 else if (ConstraintCode == "o") 629 return InlineAsm::Constraint_o; 630 else if (ConstraintCode == "Q") 631 return InlineAsm::Constraint_Q; 632 else if (ConstraintCode == "Z") 633 return InlineAsm::Constraint_Z; 634 else if (ConstraintCode == "Zy") 635 return InlineAsm::Constraint_Zy; 636 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 637 } 638 639 /// isLegalAddressingMode - Return true if the addressing mode represented 640 /// by AM is legal for this target, for a load/store of the specified type. 641 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 642 Type *Ty, unsigned AS) const override; 643 644 /// isLegalICmpImmediate - Return true if the specified immediate is legal 645 /// icmp immediate, that is the target has icmp instructions which can 646 /// compare a register against the immediate without having to materialize 647 /// the immediate into a register. 648 bool isLegalICmpImmediate(int64_t Imm) const override; 649 650 /// isLegalAddImmediate - Return true if the specified immediate is legal 651 /// add immediate, that is the target has add instructions which can 652 /// add a register and the immediate without having to materialize 653 /// the immediate into a register. 654 bool isLegalAddImmediate(int64_t Imm) const override; 655 656 /// isTruncateFree - Return true if it's free to truncate a value of 657 /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in 658 /// register X1 to i32 by referencing its sub-register R1. 659 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 660 bool isTruncateFree(EVT VT1, EVT VT2) const override; 661 662 bool isZExtFree(SDValue Val, EVT VT2) const override; 663 664 bool isFPExtFree(EVT VT) const override; 665 666 /// \brief Returns true if it is beneficial to convert a load of a constant 667 /// to just the constant itself. 668 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 669 Type *Ty) const override; 670 671 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 672 673 bool getTgtMemIntrinsic(IntrinsicInfo &Info, 674 const CallInst &I, 675 unsigned Intrinsic) const override; 676 677 /// getOptimalMemOpType - Returns the target specific optimal type for load 678 /// and store operations as a result of memset, memcpy, and memmove 679 /// lowering. If DstAlign is zero that means it's safe to destination 680 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 681 /// means there isn't a need to check it against alignment requirement, 682 /// probably because the source does not need to be loaded. If 'IsMemset' is 683 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 684 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 685 /// source is constant so it does not need to be loaded. 686 /// It returns EVT::Other if the type should be determined using generic 687 /// target-independent logic. 688 EVT 689 getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 690 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 691 MachineFunction &MF) const override; 692 693 /// Is unaligned memory access allowed for the given type, and is it fast 694 /// relative to software emulation. 695 bool allowsMisalignedMemoryAccesses(EVT VT, 696 unsigned AddrSpace, 697 unsigned Align = 1, 698 bool *Fast = nullptr) const override; 699 700 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster 701 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be 702 /// expanded to FMAs when this method returns true, otherwise fmuladd is 703 /// expanded to fmul + fadd. 704 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 705 706 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 707 708 // Should we expand the build vector with shuffles? 709 bool 710 shouldExpandBuildVectorWithShuffles(EVT VT, 711 unsigned DefinedValues) const override; 712 713 /// createFastISel - This method returns a target-specific FastISel object, 714 /// or null if the target does not support "fast" instruction selection. 715 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 716 const TargetLibraryInfo *LibInfo) const override; 717 718 /// \brief Returns true if an argument of type Ty needs to be passed in a 719 /// contiguous block of registers in calling convention CallConv. functionArgumentNeedsConsecutiveRegisters(Type * Ty,CallingConv::ID CallConv,bool isVarArg)720 bool functionArgumentNeedsConsecutiveRegisters( 721 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { 722 // We support any array type as "consecutive" block in the parameter 723 // save area. The element type defines the alignment requirement and 724 // whether the argument should go in GPRs, FPRs, or VRs if available. 725 // 726 // Note that clang uses this capability both to implement the ELFv2 727 // homogeneous float/vector aggregate ABI, and to avoid having to use 728 // "byval" when passing aggregates that might fully fit in registers. 729 return Ty->isArrayTy(); 730 } 731 732 /// If a physical register, this returns the register that receives the 733 /// exception address on entry to an EH pad. 734 unsigned 735 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 736 737 /// If a physical register, this returns the register that receives the 738 /// exception typeid on entry to a landing pad. 739 unsigned 740 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 741 742 /// Override to support customized stack guard loading. 743 bool useLoadStackGuardNode() const override; 744 void insertSSPDeclarations(Module &M) const override; 745 746 private: 747 struct ReuseLoadInfo { 748 SDValue Ptr; 749 SDValue Chain; 750 SDValue ResChain; 751 MachinePointerInfo MPI; 752 bool IsInvariant; 753 unsigned Alignment; 754 AAMDNodes AAInfo; 755 const MDNode *Ranges; 756 ReuseLoadInfoReuseLoadInfo757 ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {} 758 }; 759 760 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, 761 SelectionDAG &DAG, 762 ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; 763 void spliceIntoChain(SDValue ResChain, SDValue NewResChain, 764 SelectionDAG &DAG) const; 765 766 void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 767 SelectionDAG &DAG, const SDLoc &dl) const; 768 SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, 769 const SDLoc &dl) const; 770 SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, 771 const SDLoc &dl) const; 772 773 SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; 774 SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; 775 776 bool 777 IsEligibleForTailCallOptimization(SDValue Callee, 778 CallingConv::ID CalleeCC, 779 bool isVarArg, 780 const SmallVectorImpl<ISD::InputArg> &Ins, 781 SelectionDAG& DAG) const; 782 783 bool 784 IsEligibleForTailCallOptimization_64SVR4( 785 SDValue Callee, 786 CallingConv::ID CalleeCC, 787 ImmutableCallSite *CS, 788 bool isVarArg, 789 const SmallVectorImpl<ISD::OutputArg> &Outs, 790 const SmallVectorImpl<ISD::InputArg> &Ins, 791 SelectionDAG& DAG) const; 792 793 SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, 794 SDValue Chain, SDValue &LROpOut, 795 SDValue &FPOpOut, 796 const SDLoc &dl) const; 797 798 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 799 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 800 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 801 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 802 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 803 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 804 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 805 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 806 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 807 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 808 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 809 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 810 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 811 SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; 812 SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; 813 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 814 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 815 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 816 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 817 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 818 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 819 const SDLoc &dl) const; 820 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 821 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 822 SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; 823 SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; 824 SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; 825 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 826 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 827 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 828 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 829 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 830 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 831 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 832 833 SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; 834 SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; 835 836 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 837 CallingConv::ID CallConv, bool isVarArg, 838 const SmallVectorImpl<ISD::InputArg> &Ins, 839 const SDLoc &dl, SelectionDAG &DAG, 840 SmallVectorImpl<SDValue> &InVals) const; 841 SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl, 842 bool isTailCall, bool isVarArg, bool isPatchPoint, 843 bool hasNest, SelectionDAG &DAG, 844 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, 845 SDValue InFlag, SDValue Chain, SDValue CallSeqStart, 846 SDValue &Callee, int SPDiff, unsigned NumBytes, 847 const SmallVectorImpl<ISD::InputArg> &Ins, 848 SmallVectorImpl<SDValue> &InVals, 849 ImmutableCallSite *CS) const; 850 851 SDValue 852 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 853 const SmallVectorImpl<ISD::InputArg> &Ins, 854 const SDLoc &dl, SelectionDAG &DAG, 855 SmallVectorImpl<SDValue> &InVals) const override; 856 857 SDValue 858 LowerCall(TargetLowering::CallLoweringInfo &CLI, 859 SmallVectorImpl<SDValue> &InVals) const override; 860 861 bool 862 CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 863 bool isVarArg, 864 const SmallVectorImpl<ISD::OutputArg> &Outs, 865 LLVMContext &Context) const override; 866 867 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 868 const SmallVectorImpl<ISD::OutputArg> &Outs, 869 const SmallVectorImpl<SDValue> &OutVals, 870 const SDLoc &dl, SelectionDAG &DAG) const override; 871 872 SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 873 SelectionDAG &DAG, SDValue ArgVal, 874 const SDLoc &dl) const; 875 876 SDValue LowerFormalArguments_Darwin( 877 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 878 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 879 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; 880 SDValue LowerFormalArguments_64SVR4( 881 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 882 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 883 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; 884 SDValue LowerFormalArguments_32SVR4( 885 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 886 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 887 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; 888 889 SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 890 SDValue CallSeqStart, 891 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 892 const SDLoc &dl) const; 893 894 SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, 895 CallingConv::ID CallConv, bool isVarArg, 896 bool isTailCall, bool isPatchPoint, 897 const SmallVectorImpl<ISD::OutputArg> &Outs, 898 const SmallVectorImpl<SDValue> &OutVals, 899 const SmallVectorImpl<ISD::InputArg> &Ins, 900 const SDLoc &dl, SelectionDAG &DAG, 901 SmallVectorImpl<SDValue> &InVals, 902 ImmutableCallSite *CS) const; 903 SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, 904 CallingConv::ID CallConv, bool isVarArg, 905 bool isTailCall, bool isPatchPoint, 906 const SmallVectorImpl<ISD::OutputArg> &Outs, 907 const SmallVectorImpl<SDValue> &OutVals, 908 const SmallVectorImpl<ISD::InputArg> &Ins, 909 const SDLoc &dl, SelectionDAG &DAG, 910 SmallVectorImpl<SDValue> &InVals, 911 ImmutableCallSite *CS) const; 912 SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, 913 CallingConv::ID CallConv, bool isVarArg, 914 bool isTailCall, bool isPatchPoint, 915 const SmallVectorImpl<ISD::OutputArg> &Outs, 916 const SmallVectorImpl<SDValue> &OutVals, 917 const SmallVectorImpl<ISD::InputArg> &Ins, 918 const SDLoc &dl, SelectionDAG &DAG, 919 SmallVectorImpl<SDValue> &InVals, 920 ImmutableCallSite *CS) const; 921 922 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 923 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 924 925 SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; 926 SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; 927 SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; 928 SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; 929 930 SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, 931 unsigned &RefinementSteps, 932 bool &UseOneConstNR) const override; 933 SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, 934 unsigned &RefinementSteps) const override; 935 unsigned combineRepeatedFPDivisors() const override; 936 937 CCAssignFn *useFastISelCCs(unsigned Flag) const; 938 }; 939 940 namespace PPC { 941 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 942 const TargetLibraryInfo *LibInfo); 943 } 944 945 bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 946 CCValAssign::LocInfo &LocInfo, 947 ISD::ArgFlagsTy &ArgFlags, 948 CCState &State); 949 950 bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 951 MVT &LocVT, 952 CCValAssign::LocInfo &LocInfo, 953 ISD::ArgFlagsTy &ArgFlags, 954 CCState &State); 955 956 bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 957 MVT &LocVT, 958 CCValAssign::LocInfo &LocInfo, 959 ISD::ArgFlagsTy &ArgFlags, 960 CCState &State); 961 } 962 963 #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H 964