X86InstrInfo.cpp - OpenGrok cross reference for /external/llvm/lib/Target/X86/X86InstrInfo.cpp

Lines Matching +full:dynamic +full:- +full:bmi2
1 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
44 #define DEBUG_TYPE "x86-instr-info"
50 NoFusing("disable-spill-fusing",
53 PrintFailedFusing("print-failed-fuse-candidates",
58 ReMatPICStubLoad("remat-pic-stub-load",
59                  cl::desc("Re-materialize load from stub in PIC mode"),
62 PartialRegUpdateClearance("partial-reg-update-clearance",
68 UndefRegClearance("undef-reg-clearance",
75   // (stored in bits 0 - 3)
83   // Do not insert the reverse map (MemOp -> RegOp) into the table.
84   // This may be needed because there is a many -> one mapping.
87   // Do not insert the forward map (RegOp -> MemOp) into the table.
96   // Used for RegOp->MemOp conversion.
97   // (stored in bits 8 - 15)
377     // AVX 128-bit versions of foldable instructions  in X86InstrInfo()
392     // AVX 256-bit foldable instructions  in X86InstrInfo()
400     // AVX-512 foldable instructions  in X86InstrInfo()
413     // AVX-512 foldable instructions (256-bit versions)  in X86InstrInfo()
425     // AVX-512 foldable instructions (128-bit versions)  in X86InstrInfo()
563     // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0  in X86InstrInfo()
588     // AVX 128-bit versions of foldable instructions  in X86InstrInfo()
663     // AVX 256-bit foldable instructions  in X86InstrInfo()
770     // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions  in X86InstrInfo()
819     // AVX-512 foldable instructions  in X86InstrInfo()
839     // AVX-512 foldable instructions (256-bit versions)  in X86InstrInfo()
855     // AVX-512 foldable instructions (128-bit versions)  in X86InstrInfo()
981     // to be 128-bits, so the load size would not match.  in X86InstrInfo()
1138     // FIXME: TEST*rr -> swapped operand of TEST*mr.  in X86InstrInfo()
1241     // AVX 128-bit versions of foldable instructions  in X86InstrInfo()
1291     // these instructions. When folded, the load is required to be 128-bits, so  in X86InstrInfo()
1438     // AVX 256-bit foldable instructions  in X86InstrInfo()
1664     // BMI/BMI2 foldable instructions  in X86InstrInfo()
1680     // AVX-512 foldable instructions  in X86InstrInfo()
1721     // AVX-512{F,VL} foldable instructions  in X86InstrInfo()
1726     // AVX-512{F,VL} foldable instructions  in X86InstrInfo()
1934     // AVX-512 VPERMI instructions with 3 source operands.  in X86InstrInfo()
1948      // AVX-512 arithmetic instructions  in X86InstrInfo()
1961     // AVX-512{F,VL} arithmetic instructions 256-bit  in X86InstrInfo()
1974     // AVX-512{F,VL} arithmetic instructions 128-bit  in X86InstrInfo()
1997      // AVX-512 foldable instructions  in X86InstrInfo()
2010     // AVX-512{F,VL} foldable instructions 256-bit  in X86InstrInfo()
2023     // AVX-512{F,VL} foldable instructions 128-bit  in X86InstrInfo()
2073       // It's not always legal to reference the low 8-bit of the larger  in isCoalescableExtInstr()
2074       // register in 32-bit mode.  in isCoalescableExtInstr()
2110   const MachineFunction *MF = MI.getParent()->getParent();  in getSPAdjust()
2111   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();  in getSPAdjust()
2115     unsigned StackAlign = TFI->getStackAlignment();  in getSPAdjust()
2117         (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign;  in getSPAdjust()
2119     SPAdj -= MI.getOperand(1).getImm();  in getSPAdjust()
2124       return -SPAdj;  in getSPAdjust()
2133     for (auto E = MBB->end(); I != E; ++I) {  in getSPAdjust()
2134       if (I->getOpcode() == getCallFrameDestroyOpcode() ||  in getSPAdjust()
2135           I->isCall())  in getSPAdjust()
2141     if (I->getOpcode() != getCallFrameDestroyOpcode())  in getSPAdjust()
2144     return -(I->getOperand(1).getImm());  in getSPAdjust()
2265     // Check for post-frame index elimination operations  in isLoadFromStackSlotPostFE()
2287     // Check for post-frame index elimination operations  in isStoreToStackSlotPostFE()
2296   // Don't waste compile time scanning use-def chains of physregs.  in regIsPICBase()
2303     if (DefMI->getOpcode() != X86::MOVPC32r)  in regIsPICBase()
2345   // AVX-512  in isReallyTriviallyReMaterializable()
2382       // Allow re-materialization of PIC load.  in isReallyTriviallyReMaterializable()
2385       const MachineFunction &MF = *MI.getParent()->getParent();  in isReallyTriviallyReMaterializable()
2404       // Allow re-materialization of lea PICBase + x.  in isReallyTriviallyReMaterializable()
2405       const MachineFunction &MF = *MI.getParent()->getParent();  in isReallyTriviallyReMaterializable()
2428     for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {  in isSafeToClobberEFLAGS()
2429       MachineOperand &MO = Iter->getOperand(j);  in isSafeToClobberEFLAGS()
2446     while (Iter != E && Iter->isDebugValue())  in isSafeToClobberEFLAGS()
2454       if (S->isLiveIn(X86::EFLAGS))  in isSafeToClobberEFLAGS()
2463     // EFLAGS iff EFLAGS is not live-in.  in isSafeToClobberEFLAGS()
2467     --Iter;  in isSafeToClobberEFLAGS()
2469     while (Iter != B && Iter->isDebugValue())  in isSafeToClobberEFLAGS()
2470       --Iter;  in isSafeToClobberEFLAGS()
2473     for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {  in isSafeToClobberEFLAGS()
2474       MachineOperand &MO = Iter->getOperand(j);  in isSafeToClobberEFLAGS()
2509     // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side  in reMaterialize()
2515     case X86::MOV32r_1: Value = -1; break;  in reMaterialize()
2525     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);  in reMaterialize()
2545 /// Check whether the shift count for a machine operand is non-zero.
2557   // Left shift instructions can be transformed into load-effective-address  in isTruncatedShiftCountForLEA()
2569   MachineFunction &MF = *MI.getParent()->getParent();  in classifyLEAReg()
2580   // type (32-bit or 64-bit) we may just need to forbid SP.  in classifyLEAReg()
2593   // This is for an LEA64_32r and incoming registers are 32-bit. One way or  in classifyLEAReg()
2594   // another we need to add 64-bit registers to the final MI.  in classifyLEAReg()
2601         MI.getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);  in classifyLEAReg()
2619     // Virtual register of the wrong class, we have to create a temporary 64-bit  in classifyLEAReg()
2635 /// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
2636 /// LEA to form 3-address code by promoting to a 32-bit superregister and then
2637 /// truncating back down to a 16-bit subregister.
2647   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();  in convertToThreeAddressWithLEA()
2659   // well be shifting and then extracting the lower 16-bits.  in convertToThreeAddressWithLEA()
2662   //   leal    -65(%rdx), %esi  in convertToThreeAddressWithLEA()
2663   // But testing has shown this *does* help performance in 64-bit mode (at  in convertToThreeAddressWithLEA()
2685     addRegOffset(MIB, leaInReg, true, -1);  in convertToThreeAddressWithLEA()
2709       // well be shifting and then extracting the lower 16-bits.  in convertToThreeAddressWithLEA()
2717       LV->replaceKillInstruction(Src2, MI, *InsMI2);  in convertToThreeAddressWithLEA()
2730     LV->getVarInfo(leaInReg).Kills.push_back(NewMI);  in convertToThreeAddressWithLEA()
2731     LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);  in convertToThreeAddressWithLEA()
2733       LV->replaceKillInstruction(Src, MI, *InsMI);  in convertToThreeAddressWithLEA()
2735       LV->replaceKillInstruction(Dest, MI, *ExtMI);  in convertToThreeAddressWithLEA()
2743 /// may be able to convert a two-address instruction into a true
2744 /// three-address instruction on demand.  This allows the X86 target (for
2746 /// would require register copies due to two-addressness.
2760   MachineFunction &MF = *MI.getParent()->getParent();  in convertToThreeAddress()
2761   // All instructions input are two-addr instructions.  Get the known operands.  in convertToThreeAddress()
2766   // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When  in convertToThreeAddress()
2767   // we have better subtarget support, enable the 16-bit LEA generation here.  in convertToThreeAddress()
2768   // 16-bit LEA is also slow on Core2.  in convertToThreeAddress()
2894     NewMI = addOffset(MIB, -1);  in convertToThreeAddress()
2906                       -1);  in convertToThreeAddress()
2944     NewMI->getOperand(1).setIsUndef(isUndef);  in convertToThreeAddress()
2945     NewMI->getOperand(3).setIsUndef(isUndef2);  in convertToThreeAddress()
2948       LV->replaceKillInstruction(SrcReg2, MI, *NewMI);  in convertToThreeAddress()
2966     NewMI->getOperand(1).setIsUndef(isUndef);  in convertToThreeAddress()
2967     NewMI->getOperand(3).setIsUndef(isUndef2);  in convertToThreeAddress()
2970       LV->replaceKillInstruction(Src2, MI, *NewMI);  in convertToThreeAddress()
3026       LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);  in convertToThreeAddress()
3028       LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);  in convertToThreeAddress()
3031   MFI->insert(MI.getIterator(), NewMI); // Insert the new inst  in convertToThreeAddress()
3190   auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {  in commuteInstructionImpl()
3192       return *MI.getParent()->getParent()->CloneMachineInstr(&MI);  in commuteInstructionImpl()
3197   case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)  in commuteInstructionImpl()
3198   case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)  in commuteInstructionImpl()
3199   case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)  in commuteInstructionImpl()
3200   case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)  in commuteInstructionImpl()
3201   case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)  in commuteInstructionImpl()
3202   case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)  in commuteInstructionImpl()
3217     WorkingMI.getOperand(3).setImm(Size - Amt);  in commuteInstructionImpl()
3292     case 0x00: Imm = 0x02; break; // LT -> GT  in commuteInstructionImpl()
3293     case 0x01: Imm = 0x03; break; // LE -> GE  in commuteInstructionImpl()
3294     case 0x02: Imm = 0x00; break; // GT -> LT  in commuteInstructionImpl()
3295     case 0x03: Imm = 0x01; break; // GE -> LE  in commuteInstructionImpl()
3443     for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {  in findFMA3CommutedOpIndices()
3943   // Look for non-EHPad successors other than TBB. If we find exactly one, it  in getFallThroughMBB()
3948   for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {  in getFallThroughMBB()
3949     if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB))  in getFallThroughMBB()
3969     --I;  in AnalyzeBranchImpl()
3970     if (I->isDebugValue())  in AnalyzeBranchImpl()
3973     // Working from the bottom, when we see a non-terminator instruction, we're  in AnalyzeBranchImpl()
3980     if (!I->isBranch())  in AnalyzeBranchImpl()
3984     if (I->getOpcode() == X86::JMP_1) {  in AnalyzeBranchImpl()
3988         TBB = I->getOperand(0).getMBB();  in AnalyzeBranchImpl()
3994         std::next(I)->eraseFromParent();  in AnalyzeBranchImpl()
3999       // Delete the JMP if it's equivalent to a fall-through.  in AnalyzeBranchImpl()
4000       if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {  in AnalyzeBranchImpl()
4002         I->eraseFromParent();  in AnalyzeBranchImpl()
4009       TBB = I->getOperand(0).getMBB();  in AnalyzeBranchImpl()
4014     X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());  in AnalyzeBranchImpl()
4020       MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();  in AnalyzeBranchImpl()
4039         // We conditionally jump to the fall-through block.  in AnalyzeBranchImpl()
4045           .addMBB(UnCondBrIter->getOperand(0).getMBB());  in AnalyzeBranchImpl()
4049         OldInst->eraseFromParent();  in AnalyzeBranchImpl()
4050         UnCondBrIter->eraseFromParent();  in AnalyzeBranchImpl()
4059       TBB = I->getOperand(0).getMBB();  in AnalyzeBranchImpl()
4067     // opcodes fit one of the special multi-branch idioms.  in AnalyzeBranchImpl()
4073     auto NewTBB = I->getOperand(0).getMBB();  in AnalyzeBranchImpl()
4154     if (I->modifiesRegister(X86::EFLAGS, TRI)) {  in analyzeBranchPredicate()
4159     if (I->readsRegister(X86::EFLAGS, TRI))  in analyzeBranchPredicate()
4168       if (Succ->isLiveIn(X86::EFLAGS))  in analyzeBranchPredicate()
4183   if (ConditionDef->getOpcode() == TestOpcode &&  in analyzeBranchPredicate()
4184       ConditionDef->getNumOperands() == 3 &&  in analyzeBranchPredicate()
4185       ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&  in analyzeBranchPredicate()
4187     MBP.LHS = ConditionDef->getOperand(0);  in analyzeBranchPredicate()
4203     --I;  in RemoveBranch()
4204     if (I->isDebugValue())  in RemoveBranch()
4206     if (I->getOpcode() != X86::JMP_1 &&  in RemoveBranch()
4207         getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)  in RemoveBranch()
4210     I->eraseFromParent();  in RemoveBranch()
4235   // If FBB is null, it is implied to be a fall-through block.  in InsertBranch()
4254                     "body is a fall-through.");  in InsertBranch()
4269     // Two-way Conditional branch. Insert the second branch.  in InsertBranch()
4291   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();  in canInsertSelect()
4318   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();  in insertSelect()
4321                                  MRI.getRegClass(DstReg)->getSize(),  in insertSelect()
4335   // SrcReg(VR128) -> DestReg(GR64)  in CopyToFromAsymmetricReg()
4336   // SrcReg(VR64)  -> DestReg(GR64)  in CopyToFromAsymmetricReg()
4337   // SrcReg(GR64)  -> DestReg(VR128)  in CopyToFromAsymmetricReg()
4338   // SrcReg(GR64)  -> DestReg(VR64)  in CopyToFromAsymmetricReg()
4360   // SrcReg(FR32) -> DestReg(GR32)  in CopyToFromAsymmetricReg()
4361   // SrcReg(GR32) -> DestReg(FR32)  in CopyToFromAsymmetricReg()
4462     // Copying to or from a physical H register on x86-64 requires a NOREX  in copyPhysReg()
4469              "8-bit H register can not be copied outside GR8_NOREX");  in copyPhysReg()
4506              "Not having LAHF/SAHF only happens on 64-bit.");  in copyPhysReg()
4509       // first frame index. See X86FrameLowering.cpp - usesTheStack.  in copyPhysReg()
4524     //   - Save the overflow flag OF into AL using SETO, and restore it using a  in copyPhysReg()
4525     //     signed 8-bit addition of AL and INT8_MAX.  in copyPhysReg()
4526     //   - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH  in copyPhysReg()
4528     //   - When RAX/EAX is live and isn't the destination register, make sure it  in copyPhysReg()
4542     // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.  in copyPhysReg()
4558         --I;  in copyPhysReg()
4595   switch (RC->getSize()) {  in getLoadStoreMaskRegOpcode()
4615     if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))  in getLoadStoreRegOpcode()
4617     if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))  in getLoadStoreRegOpcode()
4624   switch (RC->getSize()) {  in getLoadStoreRegOpcode()
4628     assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");  in getLoadStoreRegOpcode()
4630       // Copying to or from a physical H register on x86-64 requires a NOREX  in getLoadStoreRegOpcode()
4636     assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");  in getLoadStoreRegOpcode()
4647     llvm_unreachable("Unknown 4-byte regclass");  in getLoadStoreRegOpcode()
4659     llvm_unreachable("Unknown 8-byte regclass");  in getLoadStoreRegOpcode()
4661     assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");  in getLoadStoreRegOpcode()
4665             X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");  in getLoadStoreRegOpcode()
4683             X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");  in getLoadStoreRegOpcode()
4697     assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");  in getLoadStoreRegOpcode()
4698     assert(STI.hasVLX() && "Using 512-bit register requires AVX512");  in getLoadStoreRegOpcode()
4761   assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&  in storeRegToStackSlot()
4763   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);  in storeRegToStackSlot()
4765       (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||  in storeRegToStackSlot()
4780   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);  in storeRegToAddr()
4782                    (*MMOBegin)->getAlignment() >= Alignment;  in storeRegToAddr()
4800   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);  in loadRegFromStackSlot()
4802       (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||  in loadRegFromStackSlot()
4815   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);  in loadRegFromAddr()
4817                    (*MMOBegin)->getAlignment() >= Alignment;  in loadRegFromAddr()
4942   // The shift instructions only modify ZF if their shift count is non-zero.  in isDefConvertible()
5067     if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))  in optimizeCompareInstr()
5098   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);  in optimizeCompareInstr()
5107   if (IsCmpZero && MI->getParent() != CmpInstr.getParent())  in optimizeCompareInstr()
5121       if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&  in optimizeCompareInstr()
5122           J->getOperand(1).getReg() == SrcReg) {  in optimizeCompareInstr()
5123         assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");  in optimizeCompareInstr()
5149       RE = CmpInstr.getParent() == MI->getParent()  in optimizeCompareInstr()
5151                : CmpInstr.getParent()->rend();  in optimizeCompareInstr()
5184   bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&  in optimizeCompareInstr()
5185                     Sub->getOperand(2).getReg() == SrcReg);  in optimizeCompareInstr()
5190   // live-out.  in optimizeCompareInstr()
5193   MachineBasicBlock::iterator E = CmpInstr.getParent()->end();  in optimizeCompareInstr()
5265         NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),  in optimizeCompareInstr()
5281   // If EFLAGS is not killed nor re-defined, we should check whether it is  in optimizeCompareInstr()
5282   // live-out. If it is live-out, do not optimize.  in optimizeCompareInstr()
5285     for (MachineBasicBlock *Successor : MBB->successors())  in optimizeCompareInstr()
5286       if (Successor->isLiveIn(X86::EFLAGS))  in optimizeCompareInstr()
5298                 InsertE = Sub->getParent()->rend();  in optimizeCompareInstr()
5301       if (!Instr->readsRegister(X86::EFLAGS, TRI) &&  in optimizeCompareInstr()
5302           Instr->modifiesRegister(X86::EFLAGS, TRI)) {  in optimizeCompareInstr()
5303         Sub->getParent()->remove(Movr0Inst);  in optimizeCompareInstr()
5304         Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),  in optimizeCompareInstr()
5314   unsigned i = 0, e = Sub->getNumOperands();  in optimizeCompareInstr()
5316     MachineOperand &MO = Sub->getOperand(i);  in optimizeCompareInstr()
5328     Op.first->setDesc(get(Op.second));  in optimizeCompareInstr()
5335 /// instructions in-between do not load or store, and have no side effects.
5349   DefMI = MRI->getVRegDef(FoldAsLoadDefReg);  in optimizeLoadInstr()
5352   if (!DefMI->isSafeToMove(nullptr, SawStore))  in optimizeLoadInstr()
5384 /// Expand a single-def pseudo instruction to a two-addr
5393   assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");  in Expand2AddrUndef()
5394   unsigned Reg = MIB->getOperand(0).getReg();  in Expand2AddrUndef()
5395   MIB->setDesc(Desc);  in Expand2AddrUndef()
5401   assert(MIB->getOperand(1).getReg() == Reg &&  in Expand2AddrUndef()
5402          MIB->getOperand(2).getReg() == Reg && "Misplaced operand");  in Expand2AddrUndef()
5406 /// Expand a single-def pseudo instruction to a two-addr
5414   assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");  in Expand2AddrKreg()
5415   MIB->setDesc(Desc);  in Expand2AddrKreg()
5422   MachineBasicBlock &MBB = *MIB->getParent();  in expandMOV32r1()
5423   DebugLoc DL = MIB->getDebugLoc();  in expandMOV32r1()
5424   unsigned Reg = MIB->getOperand(0).getReg();  in expandMOV32r1()
5432   MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));  in expandMOV32r1()
5439   MachineBasicBlock &MBB = *MIB->getParent();  in ExpandMOVImmSExti8()
5440   DebugLoc DL = MIB->getDebugLoc();  in ExpandMOVImmSExti8()
5441   int64_t Imm = MIB->getOperand(1).getImm();  in ExpandMOVImmSExti8()
5448     assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||  in ExpandMOVImmSExti8()
5449            MIB->getOpcode() == X86::MOV32ImmSExti8);  in ExpandMOVImmSExti8()
5453         MBB.getParent()->getInfo<X86MachineFunctionInfo>();  in ExpandMOVImmSExti8()
5454     if (X86FI->getUsesRedZone()) {  in ExpandMOVImmSExti8()
5455       MIB->setDesc(get(MIB->getOpcode() == X86::MOV32ImmSExti8 ? X86::MOV32ri  in ExpandMOVImmSExti8()
5460     // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and  in ExpandMOVImmSExti8()
5464     MIB->setDesc(get(X86::POP64r));  in ExpandMOVImmSExti8()
5465     MIB->getOperand(0)  in ExpandMOVImmSExti8()
5466         .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));  in ExpandMOVImmSExti8()
5468     assert(MIB->getOpcode() == X86::MOV32ImmSExti8);  in ExpandMOVImmSExti8()
5471     MIB->setDesc(get(X86::POP32r));  in ExpandMOVImmSExti8()
5477   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();  in ExpandMOVImmSExti8()
5480       (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());  in ExpandMOVImmSExti8()
5481   bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;  in ExpandMOVImmSExti8()
5483     TFL->BuildCFI(MBB, I, DL,  in ExpandMOVImmSExti8()
5485     TFL->BuildCFI(MBB, std::next(I), DL,  in ExpandMOVImmSExti8()
5486         MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));  in ExpandMOVImmSExti8()
5492 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
5496   MachineBasicBlock &MBB = *MIB->getParent();  in expandLoadStackGuard()
5497   DebugLoc DL = MIB->getDebugLoc();  in expandLoadStackGuard()
5498   unsigned Reg = MIB->getOperand(0).getReg();  in expandLoadStackGuard()
5500       cast<GlobalValue>((*MIB->memoperands_begin())->getValue());  in expandLoadStackGuard()
5502   MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(  in expandLoadStackGuard()
5509   MIB->setDebugLoc(DL);  in expandLoadStackGuard()
5510   MIB->setDesc(TII.get(X86::MOV64rm));  in expandLoadStackGuard()
5516   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);  in expandPostRAPseudo()
5553     unsigned Reg = MIB->getOperand(0).getReg();  in expandPostRAPseudo()
5554     MIB->setDesc(get(X86::VPTERNLOGDZrri));  in expandPostRAPseudo()
5568   // KNL does not recognize dependency-breaking idioms for mask registers,  in expandPostRAPseudo()
5595     // FrameIndex only - add an immediate offset (whether its zero or not).  in addOperands()
5600     // General Memory Addressing - we need to add any offset to an existing  in addOperands()
5627   unsigned NumOps = MI.getDesc().getNumOperands() - 2;  in FuseTwoAddrInst()
5637   MachineBasicBlock *MBB = InsertPt->getParent();  in FuseTwoAddrInst()
5638   MBB->insert(InsertPt, NewMI);  in FuseTwoAddrInst()
5663   MachineBasicBlock *MBB = InsertPt->getParent();  in FuseInst()
5664   MBB->insert(InsertPt, NewMI);  in FuseInst()
5673   MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,  in MakeM0Inst()
5689       unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();  in foldMemoryOperandCustom()
5694       unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();  in foldMemoryOperandCustom()
5703         NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);  in foldMemoryOperandCustom()
5710     // Move the upper 64-bits of the second operand to the lower 64-bits.  in foldMemoryOperandCustom()
5712     // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.  in foldMemoryOperandCustom()
5714       unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();  in foldMemoryOperandCustom()
5742   if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&  in foldMemoryOperandImpl()
5750       NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;  in foldMemoryOperandImpl()
5765   // Folding a memory location into the two-address part of a two-address  in foldMemoryOperandImpl()
5794     auto I = OpcodeTablePtr->find(MI.getOpcode());  in foldMemoryOperandImpl()
5795     if (I != OpcodeTablePtr->end()) {  in foldMemoryOperandImpl()
5796       unsigned Opcode = I->second.first;  in foldMemoryOperandImpl()
5797       unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;  in foldMemoryOperandImpl()
5802         unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();  in foldMemoryOperandImpl()
5808           // If this is a 64-bit load, but the spill slot is 32, then we can do  in foldMemoryOperandImpl()
5809           // a 32-bit load which is implicitly zero-extended. This likely is  in foldMemoryOperandImpl()
5824         // If this is the special case where we use a MOV32rm to load a 32-bit  in foldMemoryOperandImpl()
5825         // value and zero-extend the top bits. Change the destination register  in foldMemoryOperandImpl()
5826         // to a 32-bit one.  in foldMemoryOperandImpl()
5827         unsigned DstReg = NewMI->getOperand(0).getReg();  in foldMemoryOperandImpl()
5829           NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));  in foldMemoryOperandImpl()
5831           NewMI->getOperand(0).setSubReg(X86::sub_32bit);  in foldMemoryOperandImpl()
5865         CommutedMI->eraseFromParent();  in foldMemoryOperandImpl()
5875       // Folding failed again - undo the commute before returning.  in foldMemoryOperandImpl()
5884         UncommutedMI->eraseFromParent();  in foldMemoryOperandImpl()
5900 /// the first 32 or 64-bits of the destination register and leave the rest
5902 /// only update part of the destination register, and the non-updated part is
6036     // AVX-512  in hasUndefRegUpdate()
6060 /// high bits that are passed-through are not live.
6094     // It wants to read and write the xmm sub-register.  in breakPartialRegDependency()
6095     unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);  in breakPartialRegDependency()
6115   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))  in foldMemoryOperandImpl()
6119   unsigned Size = MFI->getObjectSize(FrameIndex);  in foldMemoryOperandImpl()
6120   unsigned Alignment = MFI->getObjectAlignment(FrameIndex);  in foldMemoryOperandImpl()
6125         std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());  in foldMemoryOperandImpl()
6156 /// ->
6162 /// ->
6171       MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();  in isNonFoldablePartialRegisterLoad()
6235   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))  in foldMemoryOperandImpl()
6241     Alignment = (*LoadMI.memoperands_begin())->getAlignment();  in foldMemoryOperandImpl()
6300     // Create a constant-pool entry and operands to load from it.  in foldMemoryOperandImpl()
6307     // x86-32 PIC requires a PIC base register for constant pools.  in foldMemoryOperandImpl()
6320     // Create a constant-pool entry.  in foldMemoryOperandImpl()
6325       Ty = Type::getFloatTy(MF.getFunction()->getContext());  in foldMemoryOperandImpl()
6327       Ty = Type::getDoubleTy(MF.getFunction()->getContext());  in foldMemoryOperandImpl()
6329       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);  in foldMemoryOperandImpl()
6332       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);  in foldMemoryOperandImpl()
6334       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);  in foldMemoryOperandImpl()
6355     MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,  in foldMemoryOperandImpl()
6370   unsigned Opc = I->second.first;  in unfoldMemoryOperand()
6371   unsigned Index = I->second.second & TB_INDEX_MASK;  in unfoldMemoryOperand()
6372   bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;  in unfoldMemoryOperand()
6373   bool FoldedStore = I->second.second & TB_FOLDED_STORE;  in unfoldMemoryOperand()
6383   // TODO: Check if 32-byte or greater accesses are slow too?  in unfoldMemoryOperand()
6414         MachineOperand &MO = NewMIs[0]->getOperand(i);  in unfoldMemoryOperand()
6442   switch (DataMI->getOpcode()) {  in unfoldMemoryOperand()
6451     MachineOperand &MO0 = DataMI->getOperand(0);  in unfoldMemoryOperand()
6452     MachineOperand &MO1 = DataMI->getOperand(1);  in unfoldMemoryOperand()
6455       switch (DataMI->getOpcode()) {  in unfoldMemoryOperand()
6465       DataMI->setDesc(get(NewOpc));  in unfoldMemoryOperand()
6486   if (!N->isMachineOpcode())  in unfoldMemoryOperand()
6489   auto I = MemOp2RegOpTable.find(N->getMachineOpcode());  in unfoldMemoryOperand()
6492   unsigned Opc = I->second.first;  in unfoldMemoryOperand()
6493   unsigned Index = I->second.second & TB_INDEX_MASK;  in unfoldMemoryOperand()
6494   bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;  in unfoldMemoryOperand()
6495   bool FoldedStore = I->second.second & TB_FOLDED_STORE;  in unfoldMemoryOperand()
6504   unsigned NumOps = N->getNumOperands();  in unfoldMemoryOperand()
6505   for (unsigned i = 0; i != NumOps-1; ++i) {  in unfoldMemoryOperand()
6506     SDValue Op = N->getOperand(i);  in unfoldMemoryOperand()
6507     if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)  in unfoldMemoryOperand()
6509     else if (i < Index-NumDefs)  in unfoldMemoryOperand()
6511     else if (i > Index-NumDefs)  in unfoldMemoryOperand()
6514   SDValue Chain = N->getOperand(NumOps-1);  in unfoldMemoryOperand()
6520     EVT VT = *RC->vt_begin();  in unfoldMemoryOperand()
6523       MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),  in unfoldMemoryOperand()
6524                             cast<MachineSDNode>(N)->memoperands_end());  in unfoldMemoryOperand()
6530     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte  in unfoldMemoryOperand()
6532     unsigned Alignment = RC->getSize() == 32 ? 32 : 16;  in unfoldMemoryOperand()
6534                      (*MMOs.first)->getAlignment() >= Alignment;  in unfoldMemoryOperand()
6540     cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);  in unfoldMemoryOperand()
6548     VTs.push_back(*DstRC->vt_begin());  in unfoldMemoryOperand()
6550   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {  in unfoldMemoryOperand()
6551     EVT VT = N->getValueType(i);  in unfoldMemoryOperand()
6568       MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),  in unfoldMemoryOperand()
6569                              cast<MachineSDNode>(N)->memoperands_end());  in unfoldMemoryOperand()
6575     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte  in unfoldMemoryOperand()
6577     unsigned Alignment = RC->getSize() == 32 ? 32 : 16;  in unfoldMemoryOperand()
6579                      (*MMOs.first)->getAlignment() >= Alignment;  in unfoldMemoryOperand()
6586     cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);  in unfoldMemoryOperand()
6598   bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;  in getOpcodeAfterMemoryUnfold()
6599   bool FoldedStore = I->second.second & TB_FOLDED_STORE;  in getOpcodeAfterMemoryUnfold()
6605     *LoadRegIndex = I->second.second & TB_INDEX_MASK;  in getOpcodeAfterMemoryUnfold()
6606   return I->second.first;  in getOpcodeAfterMemoryUnfold()
6612   if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())  in areLoadsFromSameBasePtr()
6614   unsigned Opc1 = Load1->getMachineOpcode();  in areLoadsFromSameBasePtr()
6615   unsigned Opc2 = Load2->getMachineOpcode();  in areLoadsFromSameBasePtr()
6692   if (Load1->getOperand(0) != Load2->getOperand(0) ||  in areLoadsFromSameBasePtr()
6693       Load1->getOperand(5) != Load2->getOperand(5))  in areLoadsFromSameBasePtr()
6696   if (Load1->getOperand(4) != Load2->getOperand(4))  in areLoadsFromSameBasePtr()
6699   if (Load1->getOperand(1) == Load2->getOperand(1) &&  in areLoadsFromSameBasePtr()
6700       Load1->getOperand(2) == Load2->getOperand(2)) {  in areLoadsFromSameBasePtr()
6701     if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)  in areLoadsFromSameBasePtr()
6705     if (isa<ConstantSDNode>(Load1->getOperand(3)) &&  in areLoadsFromSameBasePtr()
6706         isa<ConstantSDNode>(Load2->getOperand(3))) {  in areLoadsFromSameBasePtr()
6707       Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();  in areLoadsFromSameBasePtr()
6708       Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();  in areLoadsFromSameBasePtr()
6719   if ((Offset2 - Offset1) / 8 > 64)  in shouldScheduleLoadsNear()
6722   unsigned Opc1 = Load1->getMachineOpcode();  in shouldScheduleLoadsNear()
6723   unsigned Opc2 = Load2->getMachineOpcode();  in shouldScheduleLoadsNear()
6737   EVT VT = Load1->getValueType(0);  in shouldScheduleLoadsNear()
6740     // XMM registers. In 64-bit mode we can be a bit more aggressive since we  in shouldScheduleLoadsNear()
6765   // Check if this processor supports macro-fusion. Since this is a minor  in shouldScheduleAdjacent()
6946          "X86-64 PIC uses RIP relative addressing");  in getGlobalBaseReg()
6948   X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();  in getGlobalBaseReg()
6949   unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();  in getGlobalBaseReg()
6955   MachineRegisterInfo &RegInfo = MF->getRegInfo();  in getGlobalBaseReg()
6957   X86FI->setGlobalBaseReg(GlobalBaseReg);  in getGlobalBaseReg()
6981   // AVX 128-bit support
6997   // AVX 256-bit support
7035     if (Row[domain-1] == opcode)  in lookup()
7042     if (Row[domain-1] == opcode)  in lookupAVX2()
7066            "256-bit vector operations only available in AVX2");  in setExecutionDomain()
7070   MI.setDesc(get(table[Domain - 1]));  in setExecutionDomain()
7286     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;  in isAssociativeAndCommutative()
7292 /// This is an architecture-specific helper function of reassociateOps.
7341       {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},  in getSerializableDirectMachineOperandTargetFlags()
7342       {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},  in getSerializableDirectMachineOperandTargetFlags()
7343       {MO_GOT, "x86-got"},  in getSerializableDirectMachineOperandTargetFlags()
7344       {MO_GOTOFF, "x86-gotoff"},  in getSerializableDirectMachineOperandTargetFlags()
7345       {MO_GOTPCREL, "x86-gotpcrel"},  in getSerializableDirectMachineOperandTargetFlags()
7346       {MO_PLT, "x86-plt"},  in getSerializableDirectMachineOperandTargetFlags()
7347       {MO_TLSGD, "x86-tlsgd"},  in getSerializableDirectMachineOperandTargetFlags()
7348       {MO_TLSLD, "x86-tlsld"},  in getSerializableDirectMachineOperandTargetFlags()
7349       {MO_TLSLDM, "x86-tlsldm"},  in getSerializableDirectMachineOperandTargetFlags()
7350       {MO_GOTTPOFF, "x86-gottpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7351       {MO_INDNTPOFF, "x86-indntpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7352       {MO_TPOFF, "x86-tpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7353       {MO_DTPOFF, "x86-dtpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7354       {MO_NTPOFF, "x86-ntpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7355       {MO_GOTNTPOFF, "x86-gotntpoff"},  in getSerializableDirectMachineOperandTargetFlags()
7356       {MO_DLLIMPORT, "x86-dllimport"},  in getSerializableDirectMachineOperandTargetFlags()
7357       {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},  in getSerializableDirectMachineOperandTargetFlags()
7358       {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},  in getSerializableDirectMachineOperandTargetFlags()
7359       {MO_TLVP, "x86-tlvp"},  in getSerializableDirectMachineOperandTargetFlags()
7360       {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},  in getSerializableDirectMachineOperandTargetFlags()
7361       {MO_SECREL, "x86-secrel"}};  in getSerializableDirectMachineOperandTargetFlags()
7367   /// global base register for x86-32.
7377       // Don't do anything if this is 64-bit as 64-bit PIC  in runOnMachineFunction()
7383       if (!TM->isPositionIndependent())  in runOnMachineFunction()
7387       unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();  in runOnMachineFunction()
7408       BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);  in runOnMachineFunction()
7413         // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register  in runOnMachineFunction()
7414         BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)  in runOnMachineFunction()
7447       if (MFI->getNumLocalDynamicTLSAccesses() < 2) {  in runOnMachineFunction()
7453       return VisitNode(DT->getRootNode(), 0);  in runOnMachineFunction()
7456     // Visit the dominator subtree rooted at Node in pre-order.
7457     // If TLSBaseAddrReg is non-null, then use that to replace any
7462       MachineBasicBlock *BB = Node->getBlock();  in VisitNode()
7466       for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;  in VisitNode()
7468         switch (I->getOpcode()) {  in VisitNode()
7483       for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();  in VisitNode()
7495       MachineFunction *MF = I.getParent()->getParent();  in ReplaceTLSBaseAddrCall()
7496       const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();  in ReplaceTLSBaseAddrCall()
7503                   TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)  in ReplaceTLSBaseAddrCall()
7515       MachineFunction *MF = I.getParent()->getParent();  in SetRegister()
7516       const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();  in SetRegister()
7521       MachineRegisterInfo &RegInfo = MF->getRegInfo();  in SetRegister()
7530                   TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)  in SetRegister()
7537       return "Local Dynamic TLS Access Clean-up";  in getPassName()