• Home
  • Raw
  • Download

Lines Matching +full:dynamic +full:- +full:bmi2

1 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
44 #define DEBUG_TYPE "x86-instr-info"
50 NoFusing("disable-spill-fusing",
53 PrintFailedFusing("print-failed-fuse-candidates",
58 ReMatPICStubLoad("remat-pic-stub-load",
59 cl::desc("Re-materialize load from stub in PIC mode"),
62 PartialRegUpdateClearance("partial-reg-update-clearance",
68 UndefRegClearance("undef-reg-clearance",
75 // (stored in bits 0 - 3)
83 // Do not insert the reverse map (MemOp -> RegOp) into the table.
84 // This may be needed because there is a many -> one mapping.
87 // Do not insert the forward map (RegOp -> MemOp) into the table.
96 // Used for RegOp->MemOp conversion.
97 // (stored in bits 8 - 15)
377 // AVX 128-bit versions of foldable instructions in X86InstrInfo()
392 // AVX 256-bit foldable instructions in X86InstrInfo()
400 // AVX-512 foldable instructions in X86InstrInfo()
413 // AVX-512 foldable instructions (256-bit versions) in X86InstrInfo()
425 // AVX-512 foldable instructions (128-bit versions) in X86InstrInfo()
563 // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 in X86InstrInfo()
588 // AVX 128-bit versions of foldable instructions in X86InstrInfo()
663 // AVX 256-bit foldable instructions in X86InstrInfo()
770 // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions in X86InstrInfo()
819 // AVX-512 foldable instructions in X86InstrInfo()
839 // AVX-512 foldable instructions (256-bit versions) in X86InstrInfo()
855 // AVX-512 foldable instructions (128-bit versions) in X86InstrInfo()
981 // to be 128-bits, so the load size would not match. in X86InstrInfo()
1138 // FIXME: TEST*rr -> swapped operand of TEST*mr. in X86InstrInfo()
1241 // AVX 128-bit versions of foldable instructions in X86InstrInfo()
1291 // these instructions. When folded, the load is required to be 128-bits, so in X86InstrInfo()
1438 // AVX 256-bit foldable instructions in X86InstrInfo()
1664 // BMI/BMI2 foldable instructions in X86InstrInfo()
1680 // AVX-512 foldable instructions in X86InstrInfo()
1721 // AVX-512{F,VL} foldable instructions in X86InstrInfo()
1726 // AVX-512{F,VL} foldable instructions in X86InstrInfo()
1934 // AVX-512 VPERMI instructions with 3 source operands. in X86InstrInfo()
1948 // AVX-512 arithmetic instructions in X86InstrInfo()
1961 // AVX-512{F,VL} arithmetic instructions 256-bit in X86InstrInfo()
1974 // AVX-512{F,VL} arithmetic instructions 128-bit in X86InstrInfo()
1997 // AVX-512 foldable instructions in X86InstrInfo()
2010 // AVX-512{F,VL} foldable instructions 256-bit in X86InstrInfo()
2023 // AVX-512{F,VL} foldable instructions 128-bit in X86InstrInfo()
2073 // It's not always legal to reference the low 8-bit of the larger in isCoalescableExtInstr()
2074 // register in 32-bit mode. in isCoalescableExtInstr()
2110 const MachineFunction *MF = MI.getParent()->getParent(); in getSPAdjust()
2111 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); in getSPAdjust()
2115 unsigned StackAlign = TFI->getStackAlignment(); in getSPAdjust()
2117 (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign; in getSPAdjust()
2119 SPAdj -= MI.getOperand(1).getImm(); in getSPAdjust()
2124 return -SPAdj; in getSPAdjust()
2133 for (auto E = MBB->end(); I != E; ++I) { in getSPAdjust()
2134 if (I->getOpcode() == getCallFrameDestroyOpcode() || in getSPAdjust()
2135 I->isCall()) in getSPAdjust()
2141 if (I->getOpcode() != getCallFrameDestroyOpcode()) in getSPAdjust()
2144 return -(I->getOperand(1).getImm()); in getSPAdjust()
2265 // Check for post-frame index elimination operations in isLoadFromStackSlotPostFE()
2287 // Check for post-frame index elimination operations in isStoreToStackSlotPostFE()
2296 // Don't waste compile time scanning use-def chains of physregs. in regIsPICBase()
2303 if (DefMI->getOpcode() != X86::MOVPC32r) in regIsPICBase()
2345 // AVX-512 in isReallyTriviallyReMaterializable()
2382 // Allow re-materialization of PIC load. in isReallyTriviallyReMaterializable()
2385 const MachineFunction &MF = *MI.getParent()->getParent(); in isReallyTriviallyReMaterializable()
2404 // Allow re-materialization of lea PICBase + x. in isReallyTriviallyReMaterializable()
2405 const MachineFunction &MF = *MI.getParent()->getParent(); in isReallyTriviallyReMaterializable()
2428 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { in isSafeToClobberEFLAGS()
2429 MachineOperand &MO = Iter->getOperand(j); in isSafeToClobberEFLAGS()
2446 while (Iter != E && Iter->isDebugValue()) in isSafeToClobberEFLAGS()
2454 if (S->isLiveIn(X86::EFLAGS)) in isSafeToClobberEFLAGS()
2463 // EFLAGS iff EFLAGS is not live-in. in isSafeToClobberEFLAGS()
2467 --Iter; in isSafeToClobberEFLAGS()
2469 while (Iter != B && Iter->isDebugValue()) in isSafeToClobberEFLAGS()
2470 --Iter; in isSafeToClobberEFLAGS()
2473 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { in isSafeToClobberEFLAGS()
2474 MachineOperand &MO = Iter->getOperand(j); in isSafeToClobberEFLAGS()
2509 // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side in reMaterialize()
2515 case X86::MOV32r_1: Value = -1; break; in reMaterialize()
2525 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); in reMaterialize()
2545 /// Check whether the shift count for a machine operand is non-zero.
2557 // Left shift instructions can be transformed into load-effective-address in isTruncatedShiftCountForLEA()
2569 MachineFunction &MF = *MI.getParent()->getParent(); in classifyLEAReg()
2580 // type (32-bit or 64-bit) we may just need to forbid SP. in classifyLEAReg()
2593 // This is for an LEA64_32r and incoming registers are 32-bit. One way or in classifyLEAReg()
2594 // another we need to add 64-bit registers to the final MI. in classifyLEAReg()
2601 MI.getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI); in classifyLEAReg()
2619 // Virtual register of the wrong class, we have to create a temporary 64-bit in classifyLEAReg()
2635 /// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
2636 /// LEA to form 3-address code by promoting to a 32-bit superregister and then
2637 /// truncating back down to a 16-bit subregister.
2647 MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); in convertToThreeAddressWithLEA()
2659 // well be shifting and then extracting the lower 16-bits. in convertToThreeAddressWithLEA()
2662 // leal -65(%rdx), %esi in convertToThreeAddressWithLEA()
2663 // But testing has shown this *does* help performance in 64-bit mode (at in convertToThreeAddressWithLEA()
2685 addRegOffset(MIB, leaInReg, true, -1); in convertToThreeAddressWithLEA()
2709 // well be shifting and then extracting the lower 16-bits. in convertToThreeAddressWithLEA()
2717 LV->replaceKillInstruction(Src2, MI, *InsMI2); in convertToThreeAddressWithLEA()
2730 LV->getVarInfo(leaInReg).Kills.push_back(NewMI); in convertToThreeAddressWithLEA()
2731 LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); in convertToThreeAddressWithLEA()
2733 LV->replaceKillInstruction(Src, MI, *InsMI); in convertToThreeAddressWithLEA()
2735 LV->replaceKillInstruction(Dest, MI, *ExtMI); in convertToThreeAddressWithLEA()
2743 /// may be able to convert a two-address instruction into a true
2744 /// three-address instruction on demand. This allows the X86 target (for
2746 /// would require register copies due to two-addressness.
2760 MachineFunction &MF = *MI.getParent()->getParent(); in convertToThreeAddress()
2761 // All instructions input are two-addr instructions. Get the known operands. in convertToThreeAddress()
2766 // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When in convertToThreeAddress()
2767 // we have better subtarget support, enable the 16-bit LEA generation here. in convertToThreeAddress()
2768 // 16-bit LEA is also slow on Core2. in convertToThreeAddress()
2894 NewMI = addOffset(MIB, -1); in convertToThreeAddress()
2906 -1); in convertToThreeAddress()
2944 NewMI->getOperand(1).setIsUndef(isUndef); in convertToThreeAddress()
2945 NewMI->getOperand(3).setIsUndef(isUndef2); in convertToThreeAddress()
2948 LV->replaceKillInstruction(SrcReg2, MI, *NewMI); in convertToThreeAddress()
2966 NewMI->getOperand(1).setIsUndef(isUndef); in convertToThreeAddress()
2967 NewMI->getOperand(3).setIsUndef(isUndef2); in convertToThreeAddress()
2970 LV->replaceKillInstruction(Src2, MI, *NewMI); in convertToThreeAddress()
3026 LV->replaceKillInstruction(Src.getReg(), MI, *NewMI); in convertToThreeAddress()
3028 LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI); in convertToThreeAddress()
3031 MFI->insert(MI.getIterator(), NewMI); // Insert the new inst in convertToThreeAddress()
3190 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { in commuteInstructionImpl()
3192 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); in commuteInstructionImpl()
3197 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) in commuteInstructionImpl()
3198 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) in commuteInstructionImpl()
3199 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) in commuteInstructionImpl()
3200 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) in commuteInstructionImpl()
3201 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) in commuteInstructionImpl()
3202 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) in commuteInstructionImpl()
3217 WorkingMI.getOperand(3).setImm(Size - Amt); in commuteInstructionImpl()
3292 case 0x00: Imm = 0x02; break; // LT -> GT in commuteInstructionImpl()
3293 case 0x01: Imm = 0x03; break; // LE -> GE in commuteInstructionImpl()
3294 case 0x02: Imm = 0x00; break; // GT -> LT in commuteInstructionImpl()
3295 case 0x03: Imm = 0x01; break; // GE -> LE in commuteInstructionImpl()
3443 for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) { in findFMA3CommutedOpIndices()
3943 // Look for non-EHPad successors other than TBB. If we find exactly one, it in getFallThroughMBB()
3948 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { in getFallThroughMBB()
3949 if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB)) in getFallThroughMBB()
3969 --I; in AnalyzeBranchImpl()
3970 if (I->isDebugValue()) in AnalyzeBranchImpl()
3973 // Working from the bottom, when we see a non-terminator instruction, we're in AnalyzeBranchImpl()
3980 if (!I->isBranch()) in AnalyzeBranchImpl()
3984 if (I->getOpcode() == X86::JMP_1) { in AnalyzeBranchImpl()
3988 TBB = I->getOperand(0).getMBB(); in AnalyzeBranchImpl()
3994 std::next(I)->eraseFromParent(); in AnalyzeBranchImpl()
3999 // Delete the JMP if it's equivalent to a fall-through. in AnalyzeBranchImpl()
4000 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { in AnalyzeBranchImpl()
4002 I->eraseFromParent(); in AnalyzeBranchImpl()
4009 TBB = I->getOperand(0).getMBB(); in AnalyzeBranchImpl()
4014 X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode()); in AnalyzeBranchImpl()
4020 MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); in AnalyzeBranchImpl()
4039 // We conditionally jump to the fall-through block. in AnalyzeBranchImpl()
4045 .addMBB(UnCondBrIter->getOperand(0).getMBB()); in AnalyzeBranchImpl()
4049 OldInst->eraseFromParent(); in AnalyzeBranchImpl()
4050 UnCondBrIter->eraseFromParent(); in AnalyzeBranchImpl()
4059 TBB = I->getOperand(0).getMBB(); in AnalyzeBranchImpl()
4067 // opcodes fit one of the special multi-branch idioms. in AnalyzeBranchImpl()
4073 auto NewTBB = I->getOperand(0).getMBB(); in AnalyzeBranchImpl()
4154 if (I->modifiesRegister(X86::EFLAGS, TRI)) { in analyzeBranchPredicate()
4159 if (I->readsRegister(X86::EFLAGS, TRI)) in analyzeBranchPredicate()
4168 if (Succ->isLiveIn(X86::EFLAGS)) in analyzeBranchPredicate()
4183 if (ConditionDef->getOpcode() == TestOpcode && in analyzeBranchPredicate()
4184 ConditionDef->getNumOperands() == 3 && in analyzeBranchPredicate()
4185 ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) && in analyzeBranchPredicate()
4187 MBP.LHS = ConditionDef->getOperand(0); in analyzeBranchPredicate()
4203 --I; in RemoveBranch()
4204 if (I->isDebugValue()) in RemoveBranch()
4206 if (I->getOpcode() != X86::JMP_1 && in RemoveBranch()
4207 getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) in RemoveBranch()
4210 I->eraseFromParent(); in RemoveBranch()
4235 // If FBB is null, it is implied to be a fall-through block. in InsertBranch()
4254 "body is a fall-through."); in InsertBranch()
4269 // Two-way Conditional branch. Insert the second branch. in InsertBranch()
4291 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in canInsertSelect()
4318 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); in insertSelect()
4321 MRI.getRegClass(DstReg)->getSize(), in insertSelect()
4335 // SrcReg(VR128) -> DestReg(GR64) in CopyToFromAsymmetricReg()
4336 // SrcReg(VR64) -> DestReg(GR64) in CopyToFromAsymmetricReg()
4337 // SrcReg(GR64) -> DestReg(VR128) in CopyToFromAsymmetricReg()
4338 // SrcReg(GR64) -> DestReg(VR64) in CopyToFromAsymmetricReg()
4360 // SrcReg(FR32) -> DestReg(GR32) in CopyToFromAsymmetricReg()
4361 // SrcReg(GR32) -> DestReg(FR32) in CopyToFromAsymmetricReg()
4462 // Copying to or from a physical H register on x86-64 requires a NOREX in copyPhysReg()
4469 "8-bit H register can not be copied outside GR8_NOREX"); in copyPhysReg()
4506 "Not having LAHF/SAHF only happens on 64-bit."); in copyPhysReg()
4509 // first frame index. See X86FrameLowering.cpp - usesTheStack. in copyPhysReg()
4524 // - Save the overflow flag OF into AL using SETO, and restore it using a in copyPhysReg()
4525 // signed 8-bit addition of AL and INT8_MAX. in copyPhysReg()
4526 // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH in copyPhysReg()
4528 // - When RAX/EAX is live and isn't the destination register, make sure it in copyPhysReg()
4542 // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. in copyPhysReg()
4558 --I; in copyPhysReg()
4595 switch (RC->getSize()) { in getLoadStoreMaskRegOpcode()
4615 if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC)) in getLoadStoreRegOpcode()
4617 if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC)) in getLoadStoreRegOpcode()
4624 switch (RC->getSize()) { in getLoadStoreRegOpcode()
4628 assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); in getLoadStoreRegOpcode()
4630 // Copying to or from a physical H register on x86-64 requires a NOREX in getLoadStoreRegOpcode()
4636 assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); in getLoadStoreRegOpcode()
4647 llvm_unreachable("Unknown 4-byte regclass"); in getLoadStoreRegOpcode()
4659 llvm_unreachable("Unknown 8-byte regclass"); in getLoadStoreRegOpcode()
4661 assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); in getLoadStoreRegOpcode()
4665 X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass"); in getLoadStoreRegOpcode()
4683 X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass"); in getLoadStoreRegOpcode()
4697 assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); in getLoadStoreRegOpcode()
4698 assert(STI.hasVLX() && "Using 512-bit register requires AVX512"); in getLoadStoreRegOpcode()
4761 assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && in storeRegToStackSlot()
4763 unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); in storeRegToStackSlot()
4765 (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || in storeRegToStackSlot()
4780 unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); in storeRegToAddr()
4782 (*MMOBegin)->getAlignment() >= Alignment; in storeRegToAddr()
4800 unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); in loadRegFromStackSlot()
4802 (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || in loadRegFromStackSlot()
4815 unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); in loadRegFromAddr()
4817 (*MMOBegin)->getAlignment() >= Alignment; in loadRegFromAddr()
4942 // The shift instructions only modify ZF if their shift count is non-zero. in isDefConvertible()
5067 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) in optimizeCompareInstr()
5098 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); in optimizeCompareInstr()
5107 if (IsCmpZero && MI->getParent() != CmpInstr.getParent()) in optimizeCompareInstr()
5121 if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && in optimizeCompareInstr()
5122 J->getOperand(1).getReg() == SrcReg) { in optimizeCompareInstr()
5123 assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); in optimizeCompareInstr()
5149 RE = CmpInstr.getParent() == MI->getParent() in optimizeCompareInstr()
5151 : CmpInstr.getParent()->rend(); in optimizeCompareInstr()
5184 bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && in optimizeCompareInstr()
5185 Sub->getOperand(2).getReg() == SrcReg); in optimizeCompareInstr()
5190 // live-out. in optimizeCompareInstr()
5193 MachineBasicBlock::iterator E = CmpInstr.getParent()->end(); in optimizeCompareInstr()
5265 NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), in optimizeCompareInstr()
5281 // If EFLAGS is not killed nor re-defined, we should check whether it is in optimizeCompareInstr()
5282 // live-out. If it is live-out, do not optimize. in optimizeCompareInstr()
5285 for (MachineBasicBlock *Successor : MBB->successors()) in optimizeCompareInstr()
5286 if (Successor->isLiveIn(X86::EFLAGS)) in optimizeCompareInstr()
5298 InsertE = Sub->getParent()->rend(); in optimizeCompareInstr()
5301 if (!Instr->readsRegister(X86::EFLAGS, TRI) && in optimizeCompareInstr()
5302 Instr->modifiesRegister(X86::EFLAGS, TRI)) { in optimizeCompareInstr()
5303 Sub->getParent()->remove(Movr0Inst); in optimizeCompareInstr()
5304 Instr->getParent()->insert(MachineBasicBlock::iterator(Instr), in optimizeCompareInstr()
5314 unsigned i = 0, e = Sub->getNumOperands(); in optimizeCompareInstr()
5316 MachineOperand &MO = Sub->getOperand(i); in optimizeCompareInstr()
5328 Op.first->setDesc(get(Op.second)); in optimizeCompareInstr()
5335 /// instructions in-between do not load or store, and have no side effects.
5349 DefMI = MRI->getVRegDef(FoldAsLoadDefReg); in optimizeLoadInstr()
5352 if (!DefMI->isSafeToMove(nullptr, SawStore)) in optimizeLoadInstr()
5384 /// Expand a single-def pseudo instruction to a two-addr
5393 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); in Expand2AddrUndef()
5394 unsigned Reg = MIB->getOperand(0).getReg(); in Expand2AddrUndef()
5395 MIB->setDesc(Desc); in Expand2AddrUndef()
5401 assert(MIB->getOperand(1).getReg() == Reg && in Expand2AddrUndef()
5402 MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); in Expand2AddrUndef()
5406 /// Expand a single-def pseudo instruction to a two-addr
5414 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); in Expand2AddrKreg()
5415 MIB->setDesc(Desc); in Expand2AddrKreg()
5422 MachineBasicBlock &MBB = *MIB->getParent(); in expandMOV32r1()
5423 DebugLoc DL = MIB->getDebugLoc(); in expandMOV32r1()
5424 unsigned Reg = MIB->getOperand(0).getReg(); in expandMOV32r1()
5432 MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r)); in expandMOV32r1()
5439 MachineBasicBlock &MBB = *MIB->getParent(); in ExpandMOVImmSExti8()
5440 DebugLoc DL = MIB->getDebugLoc(); in ExpandMOVImmSExti8()
5441 int64_t Imm = MIB->getOperand(1).getImm(); in ExpandMOVImmSExti8()
5448 assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || in ExpandMOVImmSExti8()
5449 MIB->getOpcode() == X86::MOV32ImmSExti8); in ExpandMOVImmSExti8()
5453 MBB.getParent()->getInfo<X86MachineFunctionInfo>(); in ExpandMOVImmSExti8()
5454 if (X86FI->getUsesRedZone()) { in ExpandMOVImmSExti8()
5455 MIB->setDesc(get(MIB->getOpcode() == X86::MOV32ImmSExti8 ? X86::MOV32ri in ExpandMOVImmSExti8()
5460 // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and in ExpandMOVImmSExti8()
5464 MIB->setDesc(get(X86::POP64r)); in ExpandMOVImmSExti8()
5465 MIB->getOperand(0) in ExpandMOVImmSExti8()
5466 .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); in ExpandMOVImmSExti8()
5468 assert(MIB->getOpcode() == X86::MOV32ImmSExti8); in ExpandMOVImmSExti8()
5471 MIB->setDesc(get(X86::POP32r)); in ExpandMOVImmSExti8()
5477 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); in ExpandMOVImmSExti8()
5480 (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); in ExpandMOVImmSExti8()
5481 bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; in ExpandMOVImmSExti8()
5483 TFL->BuildCFI(MBB, I, DL, in ExpandMOVImmSExti8()
5485 TFL->BuildCFI(MBB, std::next(I), DL, in ExpandMOVImmSExti8()
5486 MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); in ExpandMOVImmSExti8()
5492 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
5496 MachineBasicBlock &MBB = *MIB->getParent(); in expandLoadStackGuard()
5497 DebugLoc DL = MIB->getDebugLoc(); in expandLoadStackGuard()
5498 unsigned Reg = MIB->getOperand(0).getReg(); in expandLoadStackGuard()
5500 cast<GlobalValue>((*MIB->memoperands_begin())->getValue()); in expandLoadStackGuard()
5502 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( in expandLoadStackGuard()
5509 MIB->setDebugLoc(DL); in expandLoadStackGuard()
5510 MIB->setDesc(TII.get(X86::MOV64rm)); in expandLoadStackGuard()
5516 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); in expandPostRAPseudo()
5553 unsigned Reg = MIB->getOperand(0).getReg(); in expandPostRAPseudo()
5554 MIB->setDesc(get(X86::VPTERNLOGDZrri)); in expandPostRAPseudo()
5568 // KNL does not recognize dependency-breaking idioms for mask registers, in expandPostRAPseudo()
5595 // FrameIndex only - add an immediate offset (whether its zero or not). in addOperands()
5600 // General Memory Addressing - we need to add any offset to an existing in addOperands()
5627 unsigned NumOps = MI.getDesc().getNumOperands() - 2; in FuseTwoAddrInst()
5637 MachineBasicBlock *MBB = InsertPt->getParent(); in FuseTwoAddrInst()
5638 MBB->insert(InsertPt, NewMI); in FuseTwoAddrInst()
5663 MachineBasicBlock *MBB = InsertPt->getParent(); in FuseInst()
5664 MBB->insert(InsertPt, NewMI); in FuseInst()
5673 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, in MakeM0Inst()
5689 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); in foldMemoryOperandCustom()
5694 unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); in foldMemoryOperandCustom()
5703 NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm); in foldMemoryOperandCustom()
5710 // Move the upper 64-bits of the second operand to the lower 64-bits. in foldMemoryOperandCustom()
5712 // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. in foldMemoryOperandCustom()
5714 unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); in foldMemoryOperandCustom()
5742 if (isCallRegIndirect && !MF.getFunction()->optForMinSize() && in foldMemoryOperandImpl()
5750 NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; in foldMemoryOperandImpl()
5765 // Folding a memory location into the two-address part of a two-address in foldMemoryOperandImpl()
5794 auto I = OpcodeTablePtr->find(MI.getOpcode()); in foldMemoryOperandImpl()
5795 if (I != OpcodeTablePtr->end()) { in foldMemoryOperandImpl()
5796 unsigned Opcode = I->second.first; in foldMemoryOperandImpl()
5797 unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; in foldMemoryOperandImpl()
5802 unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); in foldMemoryOperandImpl()
5808 // If this is a 64-bit load, but the spill slot is 32, then we can do in foldMemoryOperandImpl()
5809 // a 32-bit load which is implicitly zero-extended. This likely is in foldMemoryOperandImpl()
5824 // If this is the special case where we use a MOV32rm to load a 32-bit in foldMemoryOperandImpl()
5825 // value and zero-extend the top bits. Change the destination register in foldMemoryOperandImpl()
5826 // to a 32-bit one. in foldMemoryOperandImpl()
5827 unsigned DstReg = NewMI->getOperand(0).getReg(); in foldMemoryOperandImpl()
5829 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); in foldMemoryOperandImpl()
5831 NewMI->getOperand(0).setSubReg(X86::sub_32bit); in foldMemoryOperandImpl()
5865 CommutedMI->eraseFromParent(); in foldMemoryOperandImpl()
5875 // Folding failed again - undo the commute before returning. in foldMemoryOperandImpl()
5884 UncommutedMI->eraseFromParent(); in foldMemoryOperandImpl()
5900 /// the first 32 or 64-bits of the destination register and leave the rest
5902 /// only update part of the destination register, and the non-updated part is
6036 // AVX-512 in hasUndefRegUpdate()
6060 /// high bits that are passed-through are not live.
6094 // It wants to read and write the xmm sub-register. in breakPartialRegDependency()
6095 unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); in breakPartialRegDependency()
6115 if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) in foldMemoryOperandImpl()
6119 unsigned Size = MFI->getObjectSize(FrameIndex); in foldMemoryOperandImpl()
6120 unsigned Alignment = MFI->getObjectAlignment(FrameIndex); in foldMemoryOperandImpl()
6125 std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment()); in foldMemoryOperandImpl()
6156 /// ->
6162 /// ->
6171 MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); in isNonFoldablePartialRegisterLoad()
6235 if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) in foldMemoryOperandImpl()
6241 Alignment = (*LoadMI.memoperands_begin())->getAlignment(); in foldMemoryOperandImpl()
6300 // Create a constant-pool entry and operands to load from it. in foldMemoryOperandImpl()
6307 // x86-32 PIC requires a PIC base register for constant pools. in foldMemoryOperandImpl()
6320 // Create a constant-pool entry. in foldMemoryOperandImpl()
6325 Ty = Type::getFloatTy(MF.getFunction()->getContext()); in foldMemoryOperandImpl()
6327 Ty = Type::getDoubleTy(MF.getFunction()->getContext()); in foldMemoryOperandImpl()
6329 Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16); in foldMemoryOperandImpl()
6332 Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); in foldMemoryOperandImpl()
6334 Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); in foldMemoryOperandImpl()
6355 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, in foldMemoryOperandImpl()
6370 unsigned Opc = I->second.first; in unfoldMemoryOperand()
6371 unsigned Index = I->second.second & TB_INDEX_MASK; in unfoldMemoryOperand()
6372 bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; in unfoldMemoryOperand()
6373 bool FoldedStore = I->second.second & TB_FOLDED_STORE; in unfoldMemoryOperand()
6383 // TODO: Check if 32-byte or greater accesses are slow too? in unfoldMemoryOperand()
6414 MachineOperand &MO = NewMIs[0]->getOperand(i); in unfoldMemoryOperand()
6442 switch (DataMI->getOpcode()) { in unfoldMemoryOperand()
6451 MachineOperand &MO0 = DataMI->getOperand(0); in unfoldMemoryOperand()
6452 MachineOperand &MO1 = DataMI->getOperand(1); in unfoldMemoryOperand()
6455 switch (DataMI->getOpcode()) { in unfoldMemoryOperand()
6465 DataMI->setDesc(get(NewOpc)); in unfoldMemoryOperand()
6486 if (!N->isMachineOpcode()) in unfoldMemoryOperand()
6489 auto I = MemOp2RegOpTable.find(N->getMachineOpcode()); in unfoldMemoryOperand()
6492 unsigned Opc = I->second.first; in unfoldMemoryOperand()
6493 unsigned Index = I->second.second & TB_INDEX_MASK; in unfoldMemoryOperand()
6494 bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; in unfoldMemoryOperand()
6495 bool FoldedStore = I->second.second & TB_FOLDED_STORE; in unfoldMemoryOperand()
6504 unsigned NumOps = N->getNumOperands(); in unfoldMemoryOperand()
6505 for (unsigned i = 0; i != NumOps-1; ++i) { in unfoldMemoryOperand()
6506 SDValue Op = N->getOperand(i); in unfoldMemoryOperand()
6507 if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) in unfoldMemoryOperand()
6509 else if (i < Index-NumDefs) in unfoldMemoryOperand()
6511 else if (i > Index-NumDefs) in unfoldMemoryOperand()
6514 SDValue Chain = N->getOperand(NumOps-1); in unfoldMemoryOperand()
6520 EVT VT = *RC->vt_begin(); in unfoldMemoryOperand()
6523 MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), in unfoldMemoryOperand()
6524 cast<MachineSDNode>(N)->memoperands_end()); in unfoldMemoryOperand()
6530 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte in unfoldMemoryOperand()
6532 unsigned Alignment = RC->getSize() == 32 ? 32 : 16; in unfoldMemoryOperand()
6534 (*MMOs.first)->getAlignment() >= Alignment; in unfoldMemoryOperand()
6540 cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); in unfoldMemoryOperand()
6548 VTs.push_back(*DstRC->vt_begin()); in unfoldMemoryOperand()
6550 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { in unfoldMemoryOperand()
6551 EVT VT = N->getValueType(i); in unfoldMemoryOperand()
6568 MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), in unfoldMemoryOperand()
6569 cast<MachineSDNode>(N)->memoperands_end()); in unfoldMemoryOperand()
6575 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte in unfoldMemoryOperand()
6577 unsigned Alignment = RC->getSize() == 32 ? 32 : 16; in unfoldMemoryOperand()
6579 (*MMOs.first)->getAlignment() >= Alignment; in unfoldMemoryOperand()
6586 cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second); in unfoldMemoryOperand()
6598 bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; in getOpcodeAfterMemoryUnfold()
6599 bool FoldedStore = I->second.second & TB_FOLDED_STORE; in getOpcodeAfterMemoryUnfold()
6605 *LoadRegIndex = I->second.second & TB_INDEX_MASK; in getOpcodeAfterMemoryUnfold()
6606 return I->second.first; in getOpcodeAfterMemoryUnfold()
6612 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) in areLoadsFromSameBasePtr()
6614 unsigned Opc1 = Load1->getMachineOpcode(); in areLoadsFromSameBasePtr()
6615 unsigned Opc2 = Load2->getMachineOpcode(); in areLoadsFromSameBasePtr()
6692 if (Load1->getOperand(0) != Load2->getOperand(0) || in areLoadsFromSameBasePtr()
6693 Load1->getOperand(5) != Load2->getOperand(5)) in areLoadsFromSameBasePtr()
6696 if (Load1->getOperand(4) != Load2->getOperand(4)) in areLoadsFromSameBasePtr()
6699 if (Load1->getOperand(1) == Load2->getOperand(1) && in areLoadsFromSameBasePtr()
6700 Load1->getOperand(2) == Load2->getOperand(2)) { in areLoadsFromSameBasePtr()
6701 if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1) in areLoadsFromSameBasePtr()
6705 if (isa<ConstantSDNode>(Load1->getOperand(3)) && in areLoadsFromSameBasePtr()
6706 isa<ConstantSDNode>(Load2->getOperand(3))) { in areLoadsFromSameBasePtr()
6707 Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue(); in areLoadsFromSameBasePtr()
6708 Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue(); in areLoadsFromSameBasePtr()
6719 if ((Offset2 - Offset1) / 8 > 64) in shouldScheduleLoadsNear()
6722 unsigned Opc1 = Load1->getMachineOpcode(); in shouldScheduleLoadsNear()
6723 unsigned Opc2 = Load2->getMachineOpcode(); in shouldScheduleLoadsNear()
6737 EVT VT = Load1->getValueType(0); in shouldScheduleLoadsNear()
6740 // XMM registers. In 64-bit mode we can be a bit more aggressive since we in shouldScheduleLoadsNear()
6765 // Check if this processor supports macro-fusion. Since this is a minor in shouldScheduleAdjacent()
6946 "X86-64 PIC uses RIP relative addressing"); in getGlobalBaseReg()
6948 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); in getGlobalBaseReg()
6949 unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); in getGlobalBaseReg()
6955 MachineRegisterInfo &RegInfo = MF->getRegInfo(); in getGlobalBaseReg()
6957 X86FI->setGlobalBaseReg(GlobalBaseReg); in getGlobalBaseReg()
6981 // AVX 128-bit support
6997 // AVX 256-bit support
7035 if (Row[domain-1] == opcode) in lookup()
7042 if (Row[domain-1] == opcode) in lookupAVX2()
7066 "256-bit vector operations only available in AVX2"); in setExecutionDomain()
7070 MI.setDesc(get(table[Domain - 1])); in setExecutionDomain()
7286 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; in isAssociativeAndCommutative()
7292 /// This is an architecture-specific helper function of reassociateOps.
7341 {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"}, in getSerializableDirectMachineOperandTargetFlags()
7342 {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"}, in getSerializableDirectMachineOperandTargetFlags()
7343 {MO_GOT, "x86-got"}, in getSerializableDirectMachineOperandTargetFlags()
7344 {MO_GOTOFF, "x86-gotoff"}, in getSerializableDirectMachineOperandTargetFlags()
7345 {MO_GOTPCREL, "x86-gotpcrel"}, in getSerializableDirectMachineOperandTargetFlags()
7346 {MO_PLT, "x86-plt"}, in getSerializableDirectMachineOperandTargetFlags()
7347 {MO_TLSGD, "x86-tlsgd"}, in getSerializableDirectMachineOperandTargetFlags()
7348 {MO_TLSLD, "x86-tlsld"}, in getSerializableDirectMachineOperandTargetFlags()
7349 {MO_TLSLDM, "x86-tlsldm"}, in getSerializableDirectMachineOperandTargetFlags()
7350 {MO_GOTTPOFF, "x86-gottpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7351 {MO_INDNTPOFF, "x86-indntpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7352 {MO_TPOFF, "x86-tpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7353 {MO_DTPOFF, "x86-dtpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7354 {MO_NTPOFF, "x86-ntpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7355 {MO_GOTNTPOFF, "x86-gotntpoff"}, in getSerializableDirectMachineOperandTargetFlags()
7356 {MO_DLLIMPORT, "x86-dllimport"}, in getSerializableDirectMachineOperandTargetFlags()
7357 {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"}, in getSerializableDirectMachineOperandTargetFlags()
7358 {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"}, in getSerializableDirectMachineOperandTargetFlags()
7359 {MO_TLVP, "x86-tlvp"}, in getSerializableDirectMachineOperandTargetFlags()
7360 {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"}, in getSerializableDirectMachineOperandTargetFlags()
7361 {MO_SECREL, "x86-secrel"}}; in getSerializableDirectMachineOperandTargetFlags()
7367 /// global base register for x86-32.
7377 // Don't do anything if this is 64-bit as 64-bit PIC in runOnMachineFunction()
7383 if (!TM->isPositionIndependent()) in runOnMachineFunction()
7387 unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); in runOnMachineFunction()
7408 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); in runOnMachineFunction()
7413 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register in runOnMachineFunction()
7414 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) in runOnMachineFunction()
7447 if (MFI->getNumLocalDynamicTLSAccesses() < 2) { in runOnMachineFunction()
7453 return VisitNode(DT->getRootNode(), 0); in runOnMachineFunction()
7456 // Visit the dominator subtree rooted at Node in pre-order.
7457 // If TLSBaseAddrReg is non-null, then use that to replace any
7462 MachineBasicBlock *BB = Node->getBlock(); in VisitNode()
7466 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; in VisitNode()
7468 switch (I->getOpcode()) { in VisitNode()
7483 for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); in VisitNode()
7495 MachineFunction *MF = I.getParent()->getParent(); in ReplaceTLSBaseAddrCall()
7496 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); in ReplaceTLSBaseAddrCall()
7503 TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX) in ReplaceTLSBaseAddrCall()
7515 MachineFunction *MF = I.getParent()->getParent(); in SetRegister()
7516 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>(); in SetRegister()
7521 MachineRegisterInfo &RegInfo = MF->getRegInfo(); in SetRegister()
7530 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) in SetRegister()
7537 return "Local Dynamic TLS Access Clean-up"; in getPassName()