1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef X86ISELLOWERING_H 16 #define X86ISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/Target/TargetLowering.h" 21 #include "llvm/Target/TargetOptions.h" 22 23 namespace llvm { 24 class X86Subtarget; 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// BSF - Bit scan forward. 34 /// BSR - Bit scan reverse. 35 BSF, 36 BSR, 37 38 /// SHLD, SHRD - Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// FAND - Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// FOR - Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// FXOR - Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// FANDN - Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// FSRL - Bitwise logical right shift of floating point values. These 60 /// corresponds to X86::PSRLDQ. 61 FSRL, 62 63 /// CALL - These operations represent an abstract X86 call 64 /// instruction, which includes a bunch of information. In particular the 65 /// operands of these node are: 66 /// 67 /// #0 - The incoming token chain 68 /// #1 - The callee 69 /// #2 - The number of arg bytes the caller pushes on the stack. 70 /// #3 - The number of arg bytes the callee pops off the stack. 71 /// #4 - The value to pass in AL/AX/EAX (optional) 72 /// #5 - The value to pass in DL/DX/EDX (optional) 73 /// 74 /// The result values of these nodes are: 75 /// 76 /// #0 - The outgoing token chain 77 /// #1 - The first register result value (optional) 78 /// #2 - The second register result value (optional) 79 /// 80 CALL, 81 82 /// RDTSC_DAG - This operation implements the lowering for 83 /// readcyclecounter 84 RDTSC_DAG, 85 86 /// X86 Read Time-Stamp Counter and Processor ID. 87 RDTSCP_DAG, 88 89 /// X86 Read Performance Monitoring Counters. 90 RDPMC_DAG, 91 92 /// X86 compare and logical compare instructions. 93 CMP, COMI, UCOMI, 94 95 /// X86 bit-test instructions. 96 BT, 97 98 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 99 /// operand, usually produced by a CMP instruction. 100 SETCC, 101 102 /// X86 Select 103 SELECT, 104 105 // Same as SETCC except it's materialized with a sbb and the value is all 106 // one's or all zero's. 107 SETCC_CARRY, // R = carry_bit ? ~0 : 0 108 109 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 110 /// Operands are two FP values to compare; result is a mask of 111 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 112 FSETCC, 113 114 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 115 /// result in an integer GPR. Needs masking for scalar result. 116 FGETSIGNx86, 117 118 /// X86 conditional moves. Operand 0 and operand 1 are the two values 119 /// to select from. Operand 2 is the condition code, and operand 3 is the 120 /// flag operand produced by a CMP or TEST instruction. It also writes a 121 /// flag result. 122 CMOV, 123 124 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 125 /// is the block to branch if condition is true, operand 2 is the 126 /// condition code, and operand 3 is the flag operand produced by a CMP 127 /// or TEST instruction. 128 BRCOND, 129 130 /// Return with a flag operand. Operand 0 is the chain operand, operand 131 /// 1 is the number of bytes of stack to pop. 132 RET_FLAG, 133 134 /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. 135 REP_STOS, 136 137 /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. 138 REP_MOVS, 139 140 /// GlobalBaseReg - On Darwin, this node represents the result of the popl 141 /// at function entry, used for PIC code. 142 GlobalBaseReg, 143 144 /// Wrapper - A wrapper node for TargetConstantPool, 145 /// TargetExternalSymbol, and TargetGlobalAddress. 146 Wrapper, 147 148 /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP 149 /// relative displacements. 150 WrapperRIP, 151 152 /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector 153 /// to an MMX vector. If you think this is too close to the previous 154 /// mnemonic, so do I; blame Intel. 155 MOVDQ2Q, 156 157 /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX 158 /// vector to a GPR. 159 MMX_MOVD2W, 160 161 /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to 162 /// i32, corresponds to X86::PEXTRB. 163 PEXTRB, 164 165 /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to 166 /// i32, corresponds to X86::PEXTRW. 167 PEXTRW, 168 169 /// INSERTPS - Insert any element of a 4 x float vector into any element 170 /// of a destination 4 x floatvector. 171 INSERTPS, 172 173 /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, 174 /// corresponds to X86::PINSRB. 175 PINSRB, 176 177 /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, 178 /// corresponds to X86::PINSRW. 179 PINSRW, MMX_PINSRW, 180 181 /// PSHUFB - Shuffle 16 8-bit values within a vector. 182 PSHUFB, 183 184 /// ANDNP - Bitwise Logical AND NOT of Packed FP values. 185 ANDNP, 186 187 /// PSIGN - Copy integer sign. 188 PSIGN, 189 190 /// BLENDV - Blend where the selector is a register. 191 BLENDV, 192 193 /// BLENDI - Blend where the selector is an immediate. 194 BLENDI, 195 196 // SUBUS - Integer sub with unsigned saturation. 197 SUBUS, 198 199 /// HADD - Integer horizontal add. 200 HADD, 201 202 /// HSUB - Integer horizontal sub. 203 HSUB, 204 205 /// FHADD - Floating point horizontal add. 206 FHADD, 207 208 /// FHSUB - Floating point horizontal sub. 209 FHSUB, 210 211 /// UMAX, UMIN - Unsigned integer max and min. 212 UMAX, UMIN, 213 214 /// SMAX, SMIN - Signed integer max and min. 215 SMAX, SMIN, 216 217 /// FMAX, FMIN - Floating point max and min. 218 /// 219 FMAX, FMIN, 220 221 /// FMAXC, FMINC - Commutative FMIN and FMAX. 222 FMAXC, FMINC, 223 224 /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal 225 /// approximation. Note that these typically require refinement 226 /// in order to obtain suitable precision. 227 FRSQRT, FRCP, 228 229 // TLSADDR - Thread Local Storage. 230 TLSADDR, 231 232 // TLSBASEADDR - Thread Local Storage. A call to get the start address 233 // of the TLS block for the current module. 234 TLSBASEADDR, 235 236 // TLSCALL - Thread Local Storage. When calling to an OS provided 237 // thunk at the address from an earlier relocation. 238 TLSCALL, 239 240 // EH_RETURN - Exception Handling helpers. 241 EH_RETURN, 242 243 // EH_SJLJ_SETJMP - SjLj exception handling setjmp. 244 EH_SJLJ_SETJMP, 245 246 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. 247 EH_SJLJ_LONGJMP, 248 249 /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for 250 /// the list of operands. 251 TC_RETURN, 252 253 // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements. 254 VZEXT_MOVL, 255 256 // VZEXT - Vector integer zero-extend. 257 VZEXT, 258 259 // VSEXT - Vector integer signed-extend. 260 VSEXT, 261 262 // VTRUNC - Vector integer truncate. 263 VTRUNC, 264 265 // VTRUNC - Vector integer truncate with mask. 266 VTRUNCM, 267 268 // VFPEXT - Vector FP extend. 269 VFPEXT, 270 271 // VFPROUND - Vector FP round. 272 VFPROUND, 273 274 // VSHL, VSRL - 128-bit vector logical left / right shift 275 VSHLDQ, VSRLDQ, 276 277 // VSHL, VSRL, VSRA - Vector shift elements 278 VSHL, VSRL, VSRA, 279 280 // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate 281 VSHLI, VSRLI, VSRAI, 282 283 // CMPP - Vector packed double/float comparison. 284 CMPP, 285 286 // PCMP* - Vector integer comparisons. 287 PCMPEQ, PCMPGT, 288 // PCMP*M - Vector integer comparisons, the result is in a mask vector. 289 PCMPEQM, PCMPGTM, 290 291 /// CMPM, CMPMU - Vector comparison generating mask bits for fp and 292 /// integer signed and unsigned data types. 293 CMPM, 294 CMPMU, 295 296 // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. 297 ADD, SUB, ADC, SBB, SMUL, 298 INC, DEC, OR, XOR, AND, 299 300 BEXTR, // BEXTR - Bit field extract 301 302 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 303 304 // MUL_IMM - X86 specific multiply by immediate. 305 MUL_IMM, 306 307 // PTEST - Vector bitwise comparisons. 308 PTEST, 309 310 // TESTP - Vector packed fp sign bitwise comparisons. 311 TESTP, 312 313 // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector. 314 TESTM, 315 TESTNM, 316 317 // OR/AND test for masks 318 KORTEST, 319 320 // Several flavors of instructions with vector shuffle behaviors. 321 PACKSS, 322 PACKUS, 323 PALIGNR, 324 PSHUFD, 325 PSHUFHW, 326 PSHUFLW, 327 SHUFP, 328 MOVDDUP, 329 MOVSHDUP, 330 MOVSLDUP, 331 MOVLHPS, 332 MOVLHPD, 333 MOVHLPS, 334 MOVLPS, 335 MOVLPD, 336 MOVSD, 337 MOVSS, 338 UNPCKL, 339 UNPCKH, 340 VPERMILP, 341 VPERMV, 342 VPERMV3, 343 VPERMIV3, 344 VPERMI, 345 VPERM2X128, 346 VBROADCAST, 347 // masked broadcast 348 VBROADCASTM, 349 // Insert/Extract vector element 350 VINSERT, 351 VEXTRACT, 352 353 // PMULUDQ - Vector multiply packed unsigned doubleword integers 354 PMULUDQ, 355 // PMULUDQ - Vector multiply packed signed doubleword integers 356 PMULDQ, 357 358 // FMA nodes 359 FMADD, 360 FNMADD, 361 FMSUB, 362 FNMSUB, 363 FMADDSUB, 364 FMSUBADD, 365 366 // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, 367 // according to %al. An operator is needed so that this can be expanded 368 // with control flow. 369 VASTART_SAVE_XMM_REGS, 370 371 // WIN_ALLOCA - Windows's _chkstk call to do stack probing. 372 WIN_ALLOCA, 373 374 // SEG_ALLOCA - For allocating variable amounts of stack space when using 375 // segmented stacks. Check if the current stacklet has enough space, and 376 // falls back to heap allocation if not. 377 SEG_ALLOCA, 378 379 // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui. 380 WIN_FTOL, 381 382 // Memory barrier 383 MEMBARRIER, 384 MFENCE, 385 SFENCE, 386 LFENCE, 387 388 // FNSTSW16r - Store FP status word into i16 register. 389 FNSTSW16r, 390 391 // SAHF - Store contents of %ah into %eflags. 392 SAHF, 393 394 // RDRAND - Get a random integer and indicate whether it is valid in CF. 395 RDRAND, 396 397 // RDSEED - Get a NIST SP800-90B & C compliant random integer and 398 // indicate whether it is valid in CF. 399 RDSEED, 400 401 // PCMP*STRI 402 PCMPISTRI, 403 PCMPESTRI, 404 405 // XTEST - Test if in transactional execution. 406 XTEST, 407 408 // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. 409 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 410 LCMPXCHG8_DAG, 411 LCMPXCHG16_DAG, 412 413 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. 414 VZEXT_LOAD, 415 416 // FNSTCW16m - Store FP control world into i16 memory. 417 FNSTCW16m, 418 419 /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the 420 /// integer destination in memory and a FP reg source. This corresponds 421 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 422 /// has two inputs (token chain and address) and two outputs (int value 423 /// and token chain). 424 FP_TO_INT16_IN_MEM, 425 FP_TO_INT32_IN_MEM, 426 FP_TO_INT64_IN_MEM, 427 428 /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the 429 /// integer source in memory and FP reg result. This corresponds to the 430 /// X86::FILD*m instructions. It has three inputs (token chain, address, 431 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 432 /// also produces a flag). 433 FILD, 434 FILD_FLAG, 435 436 /// FLD - This instruction implements an extending load to FP stack slots. 437 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 438 /// operand, ptr to load from, and a ValueType node indicating the type 439 /// to load to. 440 FLD, 441 442 /// FST - This instruction implements a truncating store to FP stack 443 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 444 /// chain operand, value to store, address, and a ValueType to store it 445 /// as. 446 FST, 447 448 /// VAARG_64 - This instruction grabs the address of the next argument 449 /// from a va_list. (reads and modifies the va_list in memory) 450 VAARG_64 451 452 // WARNING: Do not add anything in the end unless you want the node to 453 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 454 // thought as target memory ops! 455 }; 456 } 457 458 /// Define some predicates that are used for node matching. 459 namespace X86 { 460 /// isVEXTRACT128Index - Return true if the specified 461 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 462 /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. 463 bool isVEXTRACT128Index(SDNode *N); 464 465 /// isVINSERT128Index - Return true if the specified 466 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 467 /// suitable for input to VINSERTF128, VINSERTI128 instructions. 468 bool isVINSERT128Index(SDNode *N); 469 470 /// isVEXTRACT256Index - Return true if the specified 471 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 472 /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. 473 bool isVEXTRACT256Index(SDNode *N); 474 475 /// isVINSERT256Index - Return true if the specified 476 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 477 /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. 478 bool isVINSERT256Index(SDNode *N); 479 480 /// getExtractVEXTRACT128Immediate - Return the appropriate 481 /// immediate to extract the specified EXTRACT_SUBVECTOR index 482 /// with VEXTRACTF128, VEXTRACTI128 instructions. 483 unsigned getExtractVEXTRACT128Immediate(SDNode *N); 484 485 /// getInsertVINSERT128Immediate - Return the appropriate 486 /// immediate to insert at the specified INSERT_SUBVECTOR index 487 /// with VINSERTF128, VINSERT128 instructions. 488 unsigned getInsertVINSERT128Immediate(SDNode *N); 489 490 /// getExtractVEXTRACT256Immediate - Return the appropriate 491 /// immediate to extract the specified EXTRACT_SUBVECTOR index 492 /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. 493 unsigned getExtractVEXTRACT256Immediate(SDNode *N); 494 495 /// getInsertVINSERT256Immediate - Return the appropriate 496 /// immediate to insert at the specified INSERT_SUBVECTOR index 497 /// with VINSERTF64x4, VINSERTI64x4 instructions. 498 unsigned getInsertVINSERT256Immediate(SDNode *N); 499 500 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 501 /// constant +0.0. 502 bool isZeroNode(SDValue Elt); 503 504 /// isOffsetSuitableForCodeModel - Returns true of the given offset can be 505 /// fit into displacement field of the instruction. 506 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 507 bool hasSymbolicDisplacement = true); 508 509 510 /// isCalleePop - Determines whether the callee is required to pop its 511 /// own arguments. Callee pop is necessary to support tail calls. 512 bool isCalleePop(CallingConv::ID CallingConv, 513 bool is64Bit, bool IsVarArg, bool TailCallOpt); 514 } 515 516 //===--------------------------------------------------------------------===// 517 // X86TargetLowering - X86 Implementation of the TargetLowering interface 518 class X86TargetLowering final : public TargetLowering { 519 public: 520 explicit X86TargetLowering(X86TargetMachine &TM); 521 522 unsigned getJumpTableEncoding() const override; 523 getScalarShiftAmountTy(EVT LHSTy)524 MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } 525 526 const MCExpr * 527 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 528 const MachineBasicBlock *MBB, unsigned uid, 529 MCContext &Ctx) const override; 530 531 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 532 /// jumptable. 533 SDValue getPICJumpTableRelocBase(SDValue Table, 534 SelectionDAG &DAG) const override; 535 const MCExpr * 536 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 537 unsigned JTI, MCContext &Ctx) const override; 538 539 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 540 /// function arguments in the caller parameter area. For X86, aggregates 541 /// that contains are placed at 16-byte boundaries while the rest are at 542 /// 4-byte boundaries. 543 unsigned getByValTypeAlignment(Type *Ty) const override; 544 545 /// getOptimalMemOpType - Returns the target specific optimal type for load 546 /// and store operations as a result of memset, memcpy, and memmove 547 /// lowering. If DstAlign is zero that means it's safe to destination 548 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 549 /// means there isn't a need to check it against alignment requirement, 550 /// probably because the source does not need to be loaded. If 'IsMemset' is 551 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 552 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 553 /// source is constant so it does not need to be loaded. 554 /// It returns EVT::Other if the type should be determined using generic 555 /// target-independent logic. 556 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 557 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 558 MachineFunction &MF) const override; 559 560 /// isSafeMemOpType - Returns true if it's safe to use load / store of the 561 /// specified type to expand memcpy / memset inline. This is mostly true 562 /// for all types except for some special cases. For example, on X86 563 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 564 /// also does type conversion. Note the specified type doesn't have to be 565 /// legal as the hook is used before type legalization. 566 bool isSafeMemOpType(MVT VT) const override; 567 568 /// allowsUnalignedMemoryAccesses - Returns true if the target allows 569 /// unaligned memory accesses. of the specified type. Returns whether it 570 /// is "fast" by reference in the second argument. 571 bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, 572 bool *Fast) const override; 573 574 /// LowerOperation - Provide custom lowering hooks for some operations. 575 /// 576 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 577 578 /// ReplaceNodeResults - Replace the results of node with an illegal result 579 /// type with new values built out of custom code. 580 /// 581 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 582 SelectionDAG &DAG) const override; 583 584 585 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 586 587 /// isTypeDesirableForOp - Return true if the target has native support for 588 /// the specified value type and it is 'desirable' to use the type for the 589 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 590 /// instruction encodings are longer and some i16 instructions are slow. 591 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 592 593 /// isTypeDesirable - Return true if the target has native support for the 594 /// specified value type and it is 'desirable' to use the type. e.g. On x86 595 /// i16 is legal, but undesirable since i16 instruction encodings are longer 596 /// and some i16 instructions are slow. 597 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 598 599 MachineBasicBlock * 600 EmitInstrWithCustomInserter(MachineInstr *MI, 601 MachineBasicBlock *MBB) const override; 602 603 604 /// getTargetNodeName - This method returns the name of a target specific 605 /// DAG node. 606 const char *getTargetNodeName(unsigned Opcode) const override; 607 608 /// getSetCCResultType - Return the value type to use for ISD::SETCC. 609 EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; 610 611 /// computeKnownBitsForTargetNode - Determine which of the bits specified 612 /// in Mask are known to be either zero or one and return them in the 613 /// KnownZero/KnownOne bitsets. 614 void computeKnownBitsForTargetNode(const SDValue Op, 615 APInt &KnownZero, 616 APInt &KnownOne, 617 const SelectionDAG &DAG, 618 unsigned Depth = 0) const override; 619 620 // ComputeNumSignBitsForTargetNode - Determine the number of bits in the 621 // operation that are sign bits. 622 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 623 const SelectionDAG &DAG, 624 unsigned Depth) const override; 625 626 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, 627 int64_t &Offset) const override; 628 629 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 630 631 bool ExpandInlineAsm(CallInst *CI) const override; 632 633 ConstraintType 634 getConstraintType(const std::string &Constraint) const override; 635 636 /// Examine constraint string and operand type and determine a weight value. 637 /// The operand object must already have been set up with the operand type. 638 ConstraintWeight 639 getSingleConstraintMatchWeight(AsmOperandInfo &info, 640 const char *constraint) const override; 641 642 const char *LowerXConstraint(EVT ConstraintVT) const override; 643 644 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 645 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is 646 /// true it means one of the asm constraint of the inline asm instruction 647 /// being processed is 'm'. 648 void LowerAsmOperandForConstraint(SDValue Op, 649 std::string &Constraint, 650 std::vector<SDValue> &Ops, 651 SelectionDAG &DAG) const override; 652 653 /// getRegForInlineAsmConstraint - Given a physical register constraint 654 /// (e.g. {edx}), return the register number and the register class for the 655 /// register. This should only be used for C_Register constraints. On 656 /// error, this returns a register number of 0. 657 std::pair<unsigned, const TargetRegisterClass*> 658 getRegForInlineAsmConstraint(const std::string &Constraint, 659 MVT VT) const override; 660 661 /// isLegalAddressingMode - Return true if the addressing mode represented 662 /// by AM is legal for this target, for a load/store of the specified type. 663 bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; 664 665 /// isLegalICmpImmediate - Return true if the specified immediate is legal 666 /// icmp immediate, that is the target has icmp instructions which can 667 /// compare a register against the immediate without having to materialize 668 /// the immediate into a register. 669 bool isLegalICmpImmediate(int64_t Imm) const override; 670 671 /// isLegalAddImmediate - Return true if the specified immediate is legal 672 /// add immediate, that is the target has add instructions which can 673 /// add a register and the immediate without having to materialize 674 /// the immediate into a register. 675 bool isLegalAddImmediate(int64_t Imm) const override; 676 677 /// \brief Return the cost of the scaling factor used in the addressing 678 /// mode represented by AM for this target, for a load/store 679 /// of the specified type. 680 /// If the AM is supported, the return value must be >= 0. 681 /// If the AM is not supported, it returns a negative value. 682 int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; 683 684 bool isVectorShiftByScalarCheap(Type *Ty) const override; 685 686 /// isTruncateFree - Return true if it's free to truncate a value of 687 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 688 /// register EAX to i16 by referencing its sub-register AX. 689 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 690 bool isTruncateFree(EVT VT1, EVT VT2) const override; 691 692 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 693 694 /// isZExtFree - Return true if any actual instruction that defines a 695 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 696 /// register. This does not necessarily include registers defined in 697 /// unknown ways, such as incoming arguments, or copies from unknown 698 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 699 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 700 /// all instructions that define 32-bit values implicit zero-extend the 701 /// result out to 64 bits. 702 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 703 bool isZExtFree(EVT VT1, EVT VT2) const override; 704 bool isZExtFree(SDValue Val, EVT VT2) const override; 705 706 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster 707 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be 708 /// expanded to FMAs when this method returns true, otherwise fmuladd is 709 /// expanded to fmul + fadd. 710 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 711 712 /// isNarrowingProfitable - Return true if it's profitable to narrow 713 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 714 /// from i32 to i8 but not from i32 to i16. 715 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 716 717 /// isFPImmLegal - Returns true if the target can instruction select the 718 /// specified FP immediate natively. If false, the legalizer will 719 /// materialize the FP immediate as a load from a constant pool. 720 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 721 722 /// isShuffleMaskLegal - Targets can use this to indicate that they only 723 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 724 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask 725 /// values are assumed to be legal. 726 bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 727 EVT VT) const override; 728 729 /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is 730 /// used by Targets can use this to indicate if there is a suitable 731 /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant 732 /// pool entry. 733 bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 734 EVT VT) const override; 735 736 /// ShouldShrinkFPConstant - If true, then instruction selection should 737 /// seek to shrink the FP constant of the specified type to a smaller type 738 /// in order to save space and / or reduce runtime. ShouldShrinkFPConstant(EVT VT)739 bool ShouldShrinkFPConstant(EVT VT) const override { 740 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 741 // expensive than a straight movsd. On the other hand, it's important to 742 // shrink long double fp constant since fldt is very slow. 743 return !X86ScalarSSEf64 || VT == MVT::f80; 744 } 745 getSubtarget()746 const X86Subtarget* getSubtarget() const { 747 return Subtarget; 748 } 749 750 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 751 /// computed in an SSE register, not on the X87 floating point stack. isScalarFPTypeInSSEReg(EVT VT)752 bool isScalarFPTypeInSSEReg(EVT VT) const { 753 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 754 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 755 } 756 757 /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine 758 /// for fptoui. 759 bool isTargetFTOL() const; 760 761 /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be 762 /// used for fptoui to the given type. isIntegerTypeFTOL(EVT VT)763 bool isIntegerTypeFTOL(EVT VT) const { 764 return isTargetFTOL() && VT == MVT::i64; 765 } 766 767 /// \brief Returns true if it is beneficial to convert a load of a constant 768 /// to just the constant itself. 769 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 770 Type *Ty) const override; 771 772 /// Intel processors have a unified instruction and data cache getClearCacheBuiltinName()773 const char * getClearCacheBuiltinName() const override { 774 return nullptr; // nothing to do, move along. 775 } 776 777 unsigned getRegisterByName(const char* RegName, EVT VT) const override; 778 779 /// createFastISel - This method returns a target specific FastISel object, 780 /// or null if the target does not support "fast" ISel. 781 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 782 const TargetLibraryInfo *libInfo) const override; 783 784 /// getStackCookieLocation - Return true if the target stores stack 785 /// protector cookies at a fixed offset in some non-standard address 786 /// space, and populates the address space and offset as 787 /// appropriate. 788 bool getStackCookieLocation(unsigned &AddressSpace, 789 unsigned &Offset) const override; 790 791 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 792 SelectionDAG &DAG) const; 793 794 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 795 796 /// \brief Reset the operation actions based on target options. 797 void resetOperationActions() override; 798 799 /// \brief Customize the preferred legalization strategy for certain types. 800 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; 801 802 protected: 803 std::pair<const TargetRegisterClass*, uint8_t> 804 findRepresentativeClass(MVT VT) const override; 805 806 private: 807 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 808 /// make the right decision when generating code for different targets. 809 const X86Subtarget *Subtarget; 810 const DataLayout *TD; 811 812 /// Used to store the TargetOptions so that we don't waste time resetting 813 /// the operation actions unless we have to. 814 TargetOptions TO; 815 816 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 817 /// floating point ops. 818 /// When SSE is available, use it for f32 operations. 819 /// When SSE2 is available, use it for f64 operations. 820 bool X86ScalarSSEf32; 821 bool X86ScalarSSEf64; 822 823 /// LegalFPImmediates - A list of legal fp immediates. 824 std::vector<APFloat> LegalFPImmediates; 825 826 /// addLegalFPImmediate - Indicate that this x86 target can instruction 827 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)828 void addLegalFPImmediate(const APFloat& Imm) { 829 LegalFPImmediates.push_back(Imm); 830 } 831 832 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 833 CallingConv::ID CallConv, bool isVarArg, 834 const SmallVectorImpl<ISD::InputArg> &Ins, 835 SDLoc dl, SelectionDAG &DAG, 836 SmallVectorImpl<SDValue> &InVals) const; 837 SDValue LowerMemArgument(SDValue Chain, 838 CallingConv::ID CallConv, 839 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 840 SDLoc dl, SelectionDAG &DAG, 841 const CCValAssign &VA, MachineFrameInfo *MFI, 842 unsigned i) const; 843 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 844 SDLoc dl, SelectionDAG &DAG, 845 const CCValAssign &VA, 846 ISD::ArgFlagsTy Flags) const; 847 848 // Call lowering helpers. 849 850 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 851 /// for tail call optimization. Targets which want to do tail call 852 /// optimization should implement this function. 853 bool IsEligibleForTailCallOptimization(SDValue Callee, 854 CallingConv::ID CalleeCC, 855 bool isVarArg, 856 bool isCalleeStructRet, 857 bool isCallerStructRet, 858 Type *RetTy, 859 const SmallVectorImpl<ISD::OutputArg> &Outs, 860 const SmallVectorImpl<SDValue> &OutVals, 861 const SmallVectorImpl<ISD::InputArg> &Ins, 862 SelectionDAG& DAG) const; 863 bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; 864 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 865 SDValue Chain, bool IsTailCall, bool Is64Bit, 866 int FPDiff, SDLoc dl) const; 867 868 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 869 SelectionDAG &DAG) const; 870 871 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 872 bool isSigned, 873 bool isReplace) const; 874 875 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 876 SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; 877 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 878 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 879 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 880 SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; 881 SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; 882 883 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 884 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 885 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 886 SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, 887 int64_t Offset, SelectionDAG &DAG) const; 888 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 889 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 890 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 891 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 892 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 893 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 894 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 895 SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; 896 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 897 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 898 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 899 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 900 SDLoc dl, SelectionDAG &DAG) const; 901 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 902 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 903 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 904 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; 905 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 906 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 907 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 908 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 909 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 910 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 911 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 912 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 913 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 914 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 915 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 916 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 917 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 918 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 919 920 SDValue 921 LowerFormalArguments(SDValue Chain, 922 CallingConv::ID CallConv, bool isVarArg, 923 const SmallVectorImpl<ISD::InputArg> &Ins, 924 SDLoc dl, SelectionDAG &DAG, 925 SmallVectorImpl<SDValue> &InVals) const override; 926 SDValue LowerCall(CallLoweringInfo &CLI, 927 SmallVectorImpl<SDValue> &InVals) const override; 928 929 SDValue LowerReturn(SDValue Chain, 930 CallingConv::ID CallConv, bool isVarArg, 931 const SmallVectorImpl<ISD::OutputArg> &Outs, 932 const SmallVectorImpl<SDValue> &OutVals, 933 SDLoc dl, SelectionDAG &DAG) const override; 934 935 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 936 937 bool mayBeEmittedAsTailCall(CallInst *CI) const override; 938 939 MVT getTypeForExtArgOrReturn(MVT VT, 940 ISD::NodeType ExtendKind) const override; 941 942 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 943 bool isVarArg, 944 const SmallVectorImpl<ISD::OutputArg> &Outs, 945 LLVMContext &Context) const override; 946 947 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 948 949 /// Utility function to emit atomic-load-arith operations (and, or, xor, 950 /// nand, max, min, umax, umin). It takes the corresponding instruction to 951 /// expand, the associated machine basic block, and the associated X86 952 /// opcodes for reg/reg. 953 MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, 954 MachineBasicBlock *MBB) const; 955 956 /// Utility function to emit atomic-load-arith operations (and, or, xor, 957 /// nand, add, sub, swap) for 64-bit operands on 32-bit target. 958 MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, 959 MachineBasicBlock *MBB) const; 960 961 // Utility function to emit the low-level va_arg code for X86-64. 962 MachineBasicBlock *EmitVAARG64WithCustomInserter( 963 MachineInstr *MI, 964 MachineBasicBlock *MBB) const; 965 966 /// Utility function to emit the xmm reg save portion of va_start. 967 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 968 MachineInstr *BInstr, 969 MachineBasicBlock *BB) const; 970 971 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 972 MachineBasicBlock *BB) const; 973 974 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 975 MachineBasicBlock *BB) const; 976 977 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 978 MachineBasicBlock *BB, 979 bool Is64Bit) const; 980 981 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 982 MachineBasicBlock *BB) const; 983 984 MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, 985 MachineBasicBlock *BB) const; 986 987 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 988 MachineBasicBlock *MBB) const; 989 990 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 991 MachineBasicBlock *MBB) const; 992 993 MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, 994 MachineBasicBlock *MBB) const; 995 996 /// Emit nodes that will be selected as "test Op0,Op0", or something 997 /// equivalent, for use with the given x86 condition code. 998 SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, 999 SelectionDAG &DAG) const; 1000 1001 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1002 /// equivalent, for use with the given x86 condition code. 1003 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, 1004 SelectionDAG &DAG) const; 1005 1006 /// Convert a comparison if required by the subtarget. 1007 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1008 }; 1009 1010 namespace X86 { 1011 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1012 const TargetLibraryInfo *libInfo); 1013 } 1014 } 1015 1016 #endif // X86ISELLOWERING_H 1017