1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36
37 /// NOTE: The TargetMachine owns TLOF.
TargetLowering(const TargetMachine & tm)38 TargetLowering::TargetLowering(const TargetMachine &tm)
39 : TargetLoweringBase(tm) {}
40
getTargetNodeName(unsigned Opcode) const41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42 return nullptr;
43 }
44
isPositionIndependent() const45 bool TargetLowering::isPositionIndependent() const {
46 return getTargetMachine().isPositionIndependent();
47 }
48
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
isInTailCallPosition(SelectionDAG & DAG,SDNode * Node,SDValue & Chain) const51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52 SDValue &Chain) const {
53 const Function &F = DAG.getMachineFunction().getFunction();
54
55 // First, check if tail calls have been disabled in this function.
56 if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
57 return false;
58
59 // Conservatively require the attributes of the call to match those of
60 // the return. Ignore NoAlias and NonNull because they don't affect the
61 // call sequence.
62 AttributeList CallerAttrs = F.getAttributes();
63 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
64 .removeAttribute(Attribute::NoAlias)
65 .removeAttribute(Attribute::NonNull)
66 .hasAttributes())
67 return false;
68
69 // It's not safe to eliminate the sign / zero extension of the return value.
70 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
71 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
72 return false;
73
74 // Check if the only use is a function return node.
75 return isUsedByReturnOnly(Node, Chain);
76 }
77
parametersInCSRMatch(const MachineRegisterInfo & MRI,const uint32_t * CallerPreservedMask,const SmallVectorImpl<CCValAssign> & ArgLocs,const SmallVectorImpl<SDValue> & OutVals) const78 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
79 const uint32_t *CallerPreservedMask,
80 const SmallVectorImpl<CCValAssign> &ArgLocs,
81 const SmallVectorImpl<SDValue> &OutVals) const {
82 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
83 const CCValAssign &ArgLoc = ArgLocs[I];
84 if (!ArgLoc.isRegLoc())
85 continue;
86 Register Reg = ArgLoc.getLocReg();
87 // Only look at callee saved registers.
88 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
89 continue;
90 // Check that we pass the value used for the caller.
91 // (We look for a CopyFromReg reading a virtual register that is used
92 // for the function live-in value of register Reg)
93 SDValue Value = OutVals[I];
94 if (Value->getOpcode() != ISD::CopyFromReg)
95 return false;
96 unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
97 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
98 return false;
99 }
100 return true;
101 }
102
103 /// Set CallLoweringInfo attribute flags based on a call instruction
104 /// and called function attributes.
setAttributes(const CallBase * Call,unsigned ArgIdx)105 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
106 unsigned ArgIdx) {
107 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
108 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
109 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
110 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
111 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
112 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
113 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
114 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
115 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
116 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
117 Alignment = Call->getParamAlignment(ArgIdx);
118 ByValType = nullptr;
119 if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
120 ByValType = Call->getParamByValType(ArgIdx);
121 }
122
123 /// Generate a libcall taking the given operands as arguments and returning a
124 /// result of type RetVT.
125 std::pair<SDValue, SDValue>
makeLibCall(SelectionDAG & DAG,RTLIB::Libcall LC,EVT RetVT,ArrayRef<SDValue> Ops,MakeLibCallOptions CallOptions,const SDLoc & dl,SDValue InChain) const126 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
127 ArrayRef<SDValue> Ops,
128 MakeLibCallOptions CallOptions,
129 const SDLoc &dl,
130 SDValue InChain) const {
131 if (!InChain)
132 InChain = DAG.getEntryNode();
133
134 TargetLowering::ArgListTy Args;
135 Args.reserve(Ops.size());
136
137 TargetLowering::ArgListEntry Entry;
138 for (unsigned i = 0; i < Ops.size(); ++i) {
139 SDValue NewOp = Ops[i];
140 Entry.Node = NewOp;
141 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
142 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
143 CallOptions.IsSExt);
144 Entry.IsZExt = !Entry.IsSExt;
145
146 if (CallOptions.IsSoften &&
147 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
148 Entry.IsSExt = Entry.IsZExt = false;
149 }
150 Args.push_back(Entry);
151 }
152
153 if (LC == RTLIB::UNKNOWN_LIBCALL)
154 report_fatal_error("Unsupported library call operation!");
155 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
156 getPointerTy(DAG.getDataLayout()));
157
158 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
159 TargetLowering::CallLoweringInfo CLI(DAG);
160 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
161 bool zeroExtend = !signExtend;
162
163 if (CallOptions.IsSoften &&
164 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
165 signExtend = zeroExtend = false;
166 }
167
168 CLI.setDebugLoc(dl)
169 .setChain(InChain)
170 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
171 .setNoReturn(CallOptions.DoesNotReturn)
172 .setDiscardResult(!CallOptions.IsReturnValueUsed)
173 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
174 .setSExtResult(signExtend)
175 .setZExtResult(zeroExtend);
176 return LowerCallTo(CLI);
177 }
178
179 bool
findOptimalMemOpLowering(std::vector<EVT> & MemOps,unsigned Limit,uint64_t Size,unsigned DstAlign,unsigned SrcAlign,bool IsMemset,bool ZeroMemset,bool MemcpyStrSrc,bool AllowOverlap,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes) const180 TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
181 unsigned Limit, uint64_t Size,
182 unsigned DstAlign, unsigned SrcAlign,
183 bool IsMemset,
184 bool ZeroMemset,
185 bool MemcpyStrSrc,
186 bool AllowOverlap,
187 unsigned DstAS, unsigned SrcAS,
188 const AttributeList &FuncAttributes) const {
189 // If 'SrcAlign' is zero, that means the memory operation does not need to
190 // load the value, i.e. memset or memcpy from constant string. Otherwise,
191 // it's the inferred alignment of the source. 'DstAlign', on the other hand,
192 // is the specified alignment of the memory operation. If it is zero, that
193 // means it's possible to change the alignment of the destination.
194 // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
195 // not need to be loaded.
196 if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
197 return false;
198
199 EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
200 IsMemset, ZeroMemset, MemcpyStrSrc,
201 FuncAttributes);
202
203 if (VT == MVT::Other) {
204 // Use the largest integer type whose alignment constraints are satisfied.
205 // We only need to check DstAlign here as SrcAlign is always greater or
206 // equal to DstAlign (or zero).
207 VT = MVT::i64;
208 while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
209 !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
210 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
211 assert(VT.isInteger());
212
213 // Find the largest legal integer type.
214 MVT LVT = MVT::i64;
215 while (!isTypeLegal(LVT))
216 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
217 assert(LVT.isInteger());
218
219 // If the type we've chosen is larger than the largest legal integer type
220 // then use that instead.
221 if (VT.bitsGT(LVT))
222 VT = LVT;
223 }
224
225 unsigned NumMemOps = 0;
226 while (Size != 0) {
227 unsigned VTSize = VT.getSizeInBits() / 8;
228 while (VTSize > Size) {
229 // For now, only use non-vector load / store's for the left-over pieces.
230 EVT NewVT = VT;
231 unsigned NewVTSize;
232
233 bool Found = false;
234 if (VT.isVector() || VT.isFloatingPoint()) {
235 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
236 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
237 isSafeMemOpType(NewVT.getSimpleVT()))
238 Found = true;
239 else if (NewVT == MVT::i64 &&
240 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
241 isSafeMemOpType(MVT::f64)) {
242 // i64 is usually not legal on 32-bit targets, but f64 may be.
243 NewVT = MVT::f64;
244 Found = true;
245 }
246 }
247
248 if (!Found) {
249 do {
250 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
251 if (NewVT == MVT::i8)
252 break;
253 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
254 }
255 NewVTSize = NewVT.getSizeInBits() / 8;
256
257 // If the new VT cannot cover all of the remaining bits, then consider
258 // issuing a (or a pair of) unaligned and overlapping load / store.
259 bool Fast;
260 if (NumMemOps && AllowOverlap && NewVTSize < Size &&
261 allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
262 MachineMemOperand::MONone, &Fast) &&
263 Fast)
264 VTSize = Size;
265 else {
266 VT = NewVT;
267 VTSize = NewVTSize;
268 }
269 }
270
271 if (++NumMemOps > Limit)
272 return false;
273
274 MemOps.push_back(VT);
275 Size -= VTSize;
276 }
277
278 return true;
279 }
280
281 /// Soften the operands of a comparison. This code is shared among BR_CC,
282 /// SELECT_CC, and SETCC handlers.
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS) const283 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
284 SDValue &NewLHS, SDValue &NewRHS,
285 ISD::CondCode &CCCode,
286 const SDLoc &dl, const SDValue OldLHS,
287 const SDValue OldRHS) const {
288 SDValue Chain;
289 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
290 OldRHS, Chain);
291 }
292
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS,SDValue & Chain,bool IsSignaling) const293 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
294 SDValue &NewLHS, SDValue &NewRHS,
295 ISD::CondCode &CCCode,
296 const SDLoc &dl, const SDValue OldLHS,
297 const SDValue OldRHS,
298 SDValue &Chain,
299 bool IsSignaling) const {
300 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
301 // not supporting it. We can update this code when libgcc provides such
302 // functions.
303
304 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
305 && "Unsupported setcc type!");
306
307 // Expand into one or more soft-fp libcall(s).
308 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
309 bool ShouldInvertCC = false;
310 switch (CCCode) {
311 case ISD::SETEQ:
312 case ISD::SETOEQ:
313 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
314 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
315 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
316 break;
317 case ISD::SETNE:
318 case ISD::SETUNE:
319 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
320 (VT == MVT::f64) ? RTLIB::UNE_F64 :
321 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
322 break;
323 case ISD::SETGE:
324 case ISD::SETOGE:
325 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
326 (VT == MVT::f64) ? RTLIB::OGE_F64 :
327 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
328 break;
329 case ISD::SETLT:
330 case ISD::SETOLT:
331 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
332 (VT == MVT::f64) ? RTLIB::OLT_F64 :
333 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
334 break;
335 case ISD::SETLE:
336 case ISD::SETOLE:
337 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
338 (VT == MVT::f64) ? RTLIB::OLE_F64 :
339 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
340 break;
341 case ISD::SETGT:
342 case ISD::SETOGT:
343 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
344 (VT == MVT::f64) ? RTLIB::OGT_F64 :
345 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
346 break;
347 case ISD::SETO:
348 ShouldInvertCC = true;
349 LLVM_FALLTHROUGH;
350 case ISD::SETUO:
351 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
352 (VT == MVT::f64) ? RTLIB::UO_F64 :
353 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
354 break;
355 case ISD::SETONE:
356 // SETONE = O && UNE
357 ShouldInvertCC = true;
358 LLVM_FALLTHROUGH;
359 case ISD::SETUEQ:
360 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361 (VT == MVT::f64) ? RTLIB::UO_F64 :
362 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
364 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
365 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
366 break;
367 default:
368 // Invert CC for unordered comparisons
369 ShouldInvertCC = true;
370 switch (CCCode) {
371 case ISD::SETULT:
372 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
373 (VT == MVT::f64) ? RTLIB::OGE_F64 :
374 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
375 break;
376 case ISD::SETULE:
377 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
378 (VT == MVT::f64) ? RTLIB::OGT_F64 :
379 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
380 break;
381 case ISD::SETUGT:
382 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
383 (VT == MVT::f64) ? RTLIB::OLE_F64 :
384 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
385 break;
386 case ISD::SETUGE:
387 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
388 (VT == MVT::f64) ? RTLIB::OLT_F64 :
389 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
390 break;
391 default: llvm_unreachable("Do not know how to soften this setcc!");
392 }
393 }
394
395 // Use the target specific return value for comparions lib calls.
396 EVT RetVT = getCmpLibcallReturnType();
397 SDValue Ops[2] = {NewLHS, NewRHS};
398 TargetLowering::MakeLibCallOptions CallOptions;
399 EVT OpsVT[2] = { OldLHS.getValueType(),
400 OldRHS.getValueType() };
401 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
402 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
403 NewLHS = Call.first;
404 NewRHS = DAG.getConstant(0, dl, RetVT);
405
406 CCCode = getCmpLibcallCC(LC1);
407 if (ShouldInvertCC) {
408 assert(RetVT.isInteger());
409 CCCode = getSetCCInverse(CCCode, RetVT);
410 }
411
412 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
413 // Update Chain.
414 Chain = Call.second;
415 } else {
416 EVT SetCCVT =
417 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
418 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
419 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
420 CCCode = getCmpLibcallCC(LC2);
421 if (ShouldInvertCC)
422 CCCode = getSetCCInverse(CCCode, RetVT);
423 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
424 if (Chain)
425 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
426 Call2.second);
427 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
428 Tmp.getValueType(), Tmp, NewLHS);
429 NewRHS = SDValue();
430 }
431 }
432
433 /// Return the entry encoding for a jump table in the current function. The
434 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
getJumpTableEncoding() const435 unsigned TargetLowering::getJumpTableEncoding() const {
436 // In non-pic modes, just use the address of a block.
437 if (!isPositionIndependent())
438 return MachineJumpTableInfo::EK_BlockAddress;
439
440 // In PIC mode, if the target supports a GPRel32 directive, use it.
441 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
442 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
443
444 // Otherwise, use a label difference.
445 return MachineJumpTableInfo::EK_LabelDifference32;
446 }
447
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const448 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
449 SelectionDAG &DAG) const {
450 // If our PIC model is GP relative, use the global offset table as the base.
451 unsigned JTEncoding = getJumpTableEncoding();
452
453 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
454 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
455 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
456
457 return Table;
458 }
459
460 /// This returns the relocation base for the given PIC jumptable, the same as
461 /// getPICJumpTableRelocBase, but as an MCExpr.
462 const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction * MF,unsigned JTI,MCContext & Ctx) const463 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
464 unsigned JTI,MCContext &Ctx) const{
465 // The normal PIC reloc base is the label at the start of the jump table.
466 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
467 }
468
469 bool
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const470 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
471 const TargetMachine &TM = getTargetMachine();
472 const GlobalValue *GV = GA->getGlobal();
473
474 // If the address is not even local to this DSO we will have to load it from
475 // a got and then add the offset.
476 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
477 return false;
478
479 // If the code is position independent we will have to add a base register.
480 if (isPositionIndependent())
481 return false;
482
483 // Otherwise we can do it.
484 return true;
485 }
486
487 //===----------------------------------------------------------------------===//
488 // Optimization Methods
489 //===----------------------------------------------------------------------===//
490
491 /// If the specified instruction has a constant integer operand and there are
492 /// bits set in that constant that are not demanded, then clear those bits and
493 /// return true.
ShrinkDemandedConstant(SDValue Op,const APInt & Demanded,TargetLoweringOpt & TLO) const494 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
495 TargetLoweringOpt &TLO) const {
496 SDLoc DL(Op);
497 unsigned Opcode = Op.getOpcode();
498
499 // Do target-specific constant optimization.
500 if (targetShrinkDemandedConstant(Op, Demanded, TLO))
501 return TLO.New.getNode();
502
503 // FIXME: ISD::SELECT, ISD::SELECT_CC
504 switch (Opcode) {
505 default:
506 break;
507 case ISD::XOR:
508 case ISD::AND:
509 case ISD::OR: {
510 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
511 if (!Op1C)
512 return false;
513
514 // If this is a 'not' op, don't touch it because that's a canonical form.
515 const APInt &C = Op1C->getAPIntValue();
516 if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
517 return false;
518
519 if (!C.isSubsetOf(Demanded)) {
520 EVT VT = Op.getValueType();
521 SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
522 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
523 return TLO.CombineTo(Op, NewOp);
524 }
525
526 break;
527 }
528 }
529
530 return false;
531 }
532
533 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
534 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
535 /// generalized for targets with other types of implicit widening casts.
ShrinkDemandedOp(SDValue Op,unsigned BitWidth,const APInt & Demanded,TargetLoweringOpt & TLO) const536 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
537 const APInt &Demanded,
538 TargetLoweringOpt &TLO) const {
539 assert(Op.getNumOperands() == 2 &&
540 "ShrinkDemandedOp only supports binary operators!");
541 assert(Op.getNode()->getNumValues() == 1 &&
542 "ShrinkDemandedOp only supports nodes with one result!");
543
544 SelectionDAG &DAG = TLO.DAG;
545 SDLoc dl(Op);
546
547 // Early return, as this function cannot handle vector types.
548 if (Op.getValueType().isVector())
549 return false;
550
551 // Don't do this if the node has another user, which may require the
552 // full value.
553 if (!Op.getNode()->hasOneUse())
554 return false;
555
556 // Search for the smallest integer type with free casts to and from
557 // Op's type. For expedience, just check power-of-2 integer types.
558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
559 unsigned DemandedSize = Demanded.getActiveBits();
560 unsigned SmallVTBits = DemandedSize;
561 if (!isPowerOf2_32(SmallVTBits))
562 SmallVTBits = NextPowerOf2(SmallVTBits);
563 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
564 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
565 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
566 TLI.isZExtFree(SmallVT, Op.getValueType())) {
567 // We found a type with free casts.
568 SDValue X = DAG.getNode(
569 Op.getOpcode(), dl, SmallVT,
570 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
571 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
572 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
573 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
574 return TLO.CombineTo(Op, Z);
575 }
576 }
577 return false;
578 }
579
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,DAGCombinerInfo & DCI) const580 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
581 DAGCombinerInfo &DCI) const {
582 SelectionDAG &DAG = DCI.DAG;
583 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
584 !DCI.isBeforeLegalizeOps());
585 KnownBits Known;
586
587 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
588 if (Simplified) {
589 DCI.AddToWorklist(Op.getNode());
590 DCI.CommitTargetLoweringOpt(TLO);
591 }
592 return Simplified;
593 }
594
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const595 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
596 KnownBits &Known,
597 TargetLoweringOpt &TLO,
598 unsigned Depth,
599 bool AssumeSingleUse) const {
600 EVT VT = Op.getValueType();
601 APInt DemandedElts = VT.isVector()
602 ? APInt::getAllOnesValue(VT.getVectorNumElements())
603 : APInt(1, 1);
604 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
605 AssumeSingleUse);
606 }
607
608 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
609 // TODO: Under what circumstances can we create nodes? Constant folding?
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const610 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
611 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
612 SelectionDAG &DAG, unsigned Depth) const {
613 // Limit search depth.
614 if (Depth >= SelectionDAG::MaxRecursionDepth)
615 return SDValue();
616
617 // Ignore UNDEFs.
618 if (Op.isUndef())
619 return SDValue();
620
621 // Not demanding any bits/elts from Op.
622 if (DemandedBits == 0 || DemandedElts == 0)
623 return DAG.getUNDEF(Op.getValueType());
624
625 unsigned NumElts = DemandedElts.getBitWidth();
626 KnownBits LHSKnown, RHSKnown;
627 switch (Op.getOpcode()) {
628 case ISD::BITCAST: {
629 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
630 EVT SrcVT = Src.getValueType();
631 EVT DstVT = Op.getValueType();
632 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
633 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
634
635 if (NumSrcEltBits == NumDstEltBits)
636 if (SDValue V = SimplifyMultipleUseDemandedBits(
637 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
638 return DAG.getBitcast(DstVT, V);
639
640 // TODO - bigendian once we have test coverage.
641 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
642 DAG.getDataLayout().isLittleEndian()) {
643 unsigned Scale = NumDstEltBits / NumSrcEltBits;
644 unsigned NumSrcElts = SrcVT.getVectorNumElements();
645 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
646 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
647 for (unsigned i = 0; i != Scale; ++i) {
648 unsigned Offset = i * NumSrcEltBits;
649 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
650 if (!Sub.isNullValue()) {
651 DemandedSrcBits |= Sub;
652 for (unsigned j = 0; j != NumElts; ++j)
653 if (DemandedElts[j])
654 DemandedSrcElts.setBit((j * Scale) + i);
655 }
656 }
657
658 if (SDValue V = SimplifyMultipleUseDemandedBits(
659 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
660 return DAG.getBitcast(DstVT, V);
661 }
662
663 // TODO - bigendian once we have test coverage.
664 if ((NumSrcEltBits % NumDstEltBits) == 0 &&
665 DAG.getDataLayout().isLittleEndian()) {
666 unsigned Scale = NumSrcEltBits / NumDstEltBits;
667 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
668 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
669 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
670 for (unsigned i = 0; i != NumElts; ++i)
671 if (DemandedElts[i]) {
672 unsigned Offset = (i % Scale) * NumDstEltBits;
673 DemandedSrcBits.insertBits(DemandedBits, Offset);
674 DemandedSrcElts.setBit(i / Scale);
675 }
676
677 if (SDValue V = SimplifyMultipleUseDemandedBits(
678 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
679 return DAG.getBitcast(DstVT, V);
680 }
681
682 break;
683 }
684 case ISD::AND: {
685 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
686 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
687
688 // If all of the demanded bits are known 1 on one side, return the other.
689 // These bits cannot contribute to the result of the 'and' in this
690 // context.
691 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
692 return Op.getOperand(0);
693 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
694 return Op.getOperand(1);
695 break;
696 }
697 case ISD::OR: {
698 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
699 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
700
701 // If all of the demanded bits are known zero on one side, return the
702 // other. These bits cannot contribute to the result of the 'or' in this
703 // context.
704 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
705 return Op.getOperand(0);
706 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
707 return Op.getOperand(1);
708 break;
709 }
710 case ISD::XOR: {
711 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
712 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
713
714 // If all of the demanded bits are known zero on one side, return the
715 // other.
716 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
717 return Op.getOperand(0);
718 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
719 return Op.getOperand(1);
720 break;
721 }
722 case ISD::SETCC: {
723 SDValue Op0 = Op.getOperand(0);
724 SDValue Op1 = Op.getOperand(1);
725 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
726 // If (1) we only need the sign-bit, (2) the setcc operands are the same
727 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
728 // -1, we may be able to bypass the setcc.
729 if (DemandedBits.isSignMask() &&
730 Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
731 getBooleanContents(Op0.getValueType()) ==
732 BooleanContent::ZeroOrNegativeOneBooleanContent) {
733 // If we're testing X < 0, then this compare isn't needed - just use X!
734 // FIXME: We're limiting to integer types here, but this should also work
735 // if we don't care about FP signed-zero. The use of SETLT with FP means
736 // that we don't care about NaNs.
737 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
738 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
739 return Op0;
740 }
741 break;
742 }
743 case ISD::SIGN_EXTEND_INREG: {
744 // If none of the extended bits are demanded, eliminate the sextinreg.
745 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
746 if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
747 return Op.getOperand(0);
748 break;
749 }
750 case ISD::INSERT_VECTOR_ELT: {
751 // If we don't demand the inserted element, return the base vector.
752 SDValue Vec = Op.getOperand(0);
753 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
754 EVT VecVT = Vec.getValueType();
755 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
756 !DemandedElts[CIdx->getZExtValue()])
757 return Vec;
758 break;
759 }
760 case ISD::VECTOR_SHUFFLE: {
761 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
762
763 // If all the demanded elts are from one operand and are inline,
764 // then we can use the operand directly.
765 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
766 for (unsigned i = 0; i != NumElts; ++i) {
767 int M = ShuffleMask[i];
768 if (M < 0 || !DemandedElts[i])
769 continue;
770 AllUndef = false;
771 IdentityLHS &= (M == (int)i);
772 IdentityRHS &= ((M - NumElts) == i);
773 }
774
775 if (AllUndef)
776 return DAG.getUNDEF(Op.getValueType());
777 if (IdentityLHS)
778 return Op.getOperand(0);
779 if (IdentityRHS)
780 return Op.getOperand(1);
781 break;
782 }
783 default:
784 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
785 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
786 Op, DemandedBits, DemandedElts, DAG, Depth))
787 return V;
788 break;
789 }
790 return SDValue();
791 }
792
793 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
794 /// result of Op are ever used downstream. If we can use this information to
795 /// simplify Op, create a new simplified DAG node and return true, returning the
796 /// original and new nodes in Old and New. Otherwise, analyze the expression and
797 /// return a mask of Known bits for the expression (used to simplify the
798 /// caller). The Known bits may only be accurate for those bits in the
799 /// OriginalDemandedBits and OriginalDemandedElts.
SimplifyDemandedBits(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const800 bool TargetLowering::SimplifyDemandedBits(
801 SDValue Op, const APInt &OriginalDemandedBits,
802 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
803 unsigned Depth, bool AssumeSingleUse) const {
804 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
805 assert(Op.getScalarValueSizeInBits() == BitWidth &&
806 "Mask size mismatches value type size!");
807
808 unsigned NumElts = OriginalDemandedElts.getBitWidth();
809 assert((!Op.getValueType().isVector() ||
810 NumElts == Op.getValueType().getVectorNumElements()) &&
811 "Unexpected vector size");
812
813 APInt DemandedBits = OriginalDemandedBits;
814 APInt DemandedElts = OriginalDemandedElts;
815 SDLoc dl(Op);
816 auto &DL = TLO.DAG.getDataLayout();
817
818 // Don't know anything.
819 Known = KnownBits(BitWidth);
820
821 // Undef operand.
822 if (Op.isUndef())
823 return false;
824
825 if (Op.getOpcode() == ISD::Constant) {
826 // We know all of the bits for a constant!
827 Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
828 Known.Zero = ~Known.One;
829 return false;
830 }
831
832 // Other users may use these bits.
833 EVT VT = Op.getValueType();
834 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
835 if (Depth != 0) {
836 // If not at the root, Just compute the Known bits to
837 // simplify things downstream.
838 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
839 return false;
840 }
841 // If this is the root being simplified, allow it to have multiple uses,
842 // just set the DemandedBits/Elts to all bits.
843 DemandedBits = APInt::getAllOnesValue(BitWidth);
844 DemandedElts = APInt::getAllOnesValue(NumElts);
845 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
846 // Not demanding any bits/elts from Op.
847 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
848 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
849 // Limit search depth.
850 return false;
851 }
852
853 KnownBits Known2, KnownOut;
854 switch (Op.getOpcode()) {
855 case ISD::TargetConstant:
856 llvm_unreachable("Can't simplify this node");
857 case ISD::SCALAR_TO_VECTOR: {
858 if (!DemandedElts[0])
859 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
860
861 KnownBits SrcKnown;
862 SDValue Src = Op.getOperand(0);
863 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
864 APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
865 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
866 return true;
867 Known = SrcKnown.zextOrTrunc(BitWidth, false);
868 break;
869 }
870 case ISD::BUILD_VECTOR:
871 // Collect the known bits that are shared by every demanded element.
872 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
873 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
874 return false; // Don't fall through, will infinitely loop.
875 case ISD::LOAD: {
876 LoadSDNode *LD = cast<LoadSDNode>(Op);
877 if (getTargetConstantFromLoad(LD)) {
878 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
879 return false; // Don't fall through, will infinitely loop.
880 }
881 break;
882 }
883 case ISD::INSERT_VECTOR_ELT: {
884 SDValue Vec = Op.getOperand(0);
885 SDValue Scl = Op.getOperand(1);
886 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
887 EVT VecVT = Vec.getValueType();
888
889 // If index isn't constant, assume we need all vector elements AND the
890 // inserted element.
891 APInt DemandedVecElts(DemandedElts);
892 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
893 unsigned Idx = CIdx->getZExtValue();
894 DemandedVecElts.clearBit(Idx);
895
896 // Inserted element is not required.
897 if (!DemandedElts[Idx])
898 return TLO.CombineTo(Op, Vec);
899 }
900
901 KnownBits KnownScl;
902 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
903 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
904 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
905 return true;
906
907 Known = KnownScl.zextOrTrunc(BitWidth, false);
908
909 KnownBits KnownVec;
910 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
911 Depth + 1))
912 return true;
913
914 if (!!DemandedVecElts) {
915 Known.One &= KnownVec.One;
916 Known.Zero &= KnownVec.Zero;
917 }
918
919 return false;
920 }
921 case ISD::INSERT_SUBVECTOR: {
922 SDValue Base = Op.getOperand(0);
923 SDValue Sub = Op.getOperand(1);
924 EVT SubVT = Sub.getValueType();
925 unsigned NumSubElts = SubVT.getVectorNumElements();
926
927 // If index isn't constant, assume we need the original demanded base
928 // elements and ALL the inserted subvector elements.
929 APInt BaseElts = DemandedElts;
930 APInt SubElts = APInt::getAllOnesValue(NumSubElts);
931 if (isa<ConstantSDNode>(Op.getOperand(2))) {
932 const APInt &Idx = Op.getConstantOperandAPInt(2);
933 if (Idx.ule(NumElts - NumSubElts)) {
934 unsigned SubIdx = Idx.getZExtValue();
935 SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
936 BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
937 }
938 }
939
940 KnownBits KnownSub, KnownBase;
941 if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
942 Depth + 1))
943 return true;
944 if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
945 Depth + 1))
946 return true;
947
948 Known.Zero.setAllBits();
949 Known.One.setAllBits();
950 if (!!SubElts) {
951 Known.One &= KnownSub.One;
952 Known.Zero &= KnownSub.Zero;
953 }
954 if (!!BaseElts) {
955 Known.One &= KnownBase.One;
956 Known.Zero &= KnownBase.Zero;
957 }
958 break;
959 }
960 case ISD::EXTRACT_SUBVECTOR: {
961 // If index isn't constant, assume we need all the source vector elements.
962 SDValue Src = Op.getOperand(0);
963 ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
964 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
965 APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
966 if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
967 // Offset the demanded elts by the subvector index.
968 uint64_t Idx = SubIdx->getZExtValue();
969 SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
970 }
971 if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
972 return true;
973 break;
974 }
975 case ISD::CONCAT_VECTORS: {
976 Known.Zero.setAllBits();
977 Known.One.setAllBits();
978 EVT SubVT = Op.getOperand(0).getValueType();
979 unsigned NumSubVecs = Op.getNumOperands();
980 unsigned NumSubElts = SubVT.getVectorNumElements();
981 for (unsigned i = 0; i != NumSubVecs; ++i) {
982 APInt DemandedSubElts =
983 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
984 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
985 Known2, TLO, Depth + 1))
986 return true;
987 // Known bits are shared by every demanded subvector element.
988 if (!!DemandedSubElts) {
989 Known.One &= Known2.One;
990 Known.Zero &= Known2.Zero;
991 }
992 }
993 break;
994 }
995 case ISD::VECTOR_SHUFFLE: {
996 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
997
998 // Collect demanded elements from shuffle operands..
999 APInt DemandedLHS(NumElts, 0);
1000 APInt DemandedRHS(NumElts, 0);
1001 for (unsigned i = 0; i != NumElts; ++i) {
1002 if (!DemandedElts[i])
1003 continue;
1004 int M = ShuffleMask[i];
1005 if (M < 0) {
1006 // For UNDEF elements, we don't know anything about the common state of
1007 // the shuffle result.
1008 DemandedLHS.clearAllBits();
1009 DemandedRHS.clearAllBits();
1010 break;
1011 }
1012 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1013 if (M < (int)NumElts)
1014 DemandedLHS.setBit(M);
1015 else
1016 DemandedRHS.setBit(M - NumElts);
1017 }
1018
1019 if (!!DemandedLHS || !!DemandedRHS) {
1020 SDValue Op0 = Op.getOperand(0);
1021 SDValue Op1 = Op.getOperand(1);
1022
1023 Known.Zero.setAllBits();
1024 Known.One.setAllBits();
1025 if (!!DemandedLHS) {
1026 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1027 Depth + 1))
1028 return true;
1029 Known.One &= Known2.One;
1030 Known.Zero &= Known2.Zero;
1031 }
1032 if (!!DemandedRHS) {
1033 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1034 Depth + 1))
1035 return true;
1036 Known.One &= Known2.One;
1037 Known.Zero &= Known2.Zero;
1038 }
1039
1040 // Attempt to avoid multi-use ops if we don't need anything from them.
1041 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1042 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1043 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1044 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1045 if (DemandedOp0 || DemandedOp1) {
1046 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1047 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1048 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1049 return TLO.CombineTo(Op, NewOp);
1050 }
1051 }
1052 break;
1053 }
1054 case ISD::AND: {
1055 SDValue Op0 = Op.getOperand(0);
1056 SDValue Op1 = Op.getOperand(1);
1057
1058 // If the RHS is a constant, check to see if the LHS would be zero without
1059 // using the bits from the RHS. Below, we use knowledge about the RHS to
1060 // simplify the LHS, here we're using information from the LHS to simplify
1061 // the RHS.
1062 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1063 // Do not increment Depth here; that can cause an infinite loop.
1064 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1065 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1066 if ((LHSKnown.Zero & DemandedBits) ==
1067 (~RHSC->getAPIntValue() & DemandedBits))
1068 return TLO.CombineTo(Op, Op0);
1069
1070 // If any of the set bits in the RHS are known zero on the LHS, shrink
1071 // the constant.
1072 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1073 return true;
1074
1075 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1076 // constant, but if this 'and' is only clearing bits that were just set by
1077 // the xor, then this 'and' can be eliminated by shrinking the mask of
1078 // the xor. For example, for a 32-bit X:
1079 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1080 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1081 LHSKnown.One == ~RHSC->getAPIntValue()) {
1082 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1083 return TLO.CombineTo(Op, Xor);
1084 }
1085 }
1086
1087 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1088 Depth + 1))
1089 return true;
1090 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1091 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1092 Known2, TLO, Depth + 1))
1093 return true;
1094 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1095
1096 // Attempt to avoid multi-use ops if we don't need anything from them.
1097 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1098 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1099 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1100 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1101 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1102 if (DemandedOp0 || DemandedOp1) {
1103 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1104 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1105 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1106 return TLO.CombineTo(Op, NewOp);
1107 }
1108 }
1109
1110 // If all of the demanded bits are known one on one side, return the other.
1111 // These bits cannot contribute to the result of the 'and'.
1112 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1113 return TLO.CombineTo(Op, Op0);
1114 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1115 return TLO.CombineTo(Op, Op1);
1116 // If all of the demanded bits in the inputs are known zeros, return zero.
1117 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1118 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1119 // If the RHS is a constant, see if we can simplify it.
1120 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1121 return true;
1122 // If the operation can be done in a smaller type, do so.
1123 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1124 return true;
1125
1126 // Output known-1 bits are only known if set in both the LHS & RHS.
1127 Known.One &= Known2.One;
1128 // Output known-0 are known to be clear if zero in either the LHS | RHS.
1129 Known.Zero |= Known2.Zero;
1130 break;
1131 }
1132 case ISD::OR: {
1133 SDValue Op0 = Op.getOperand(0);
1134 SDValue Op1 = Op.getOperand(1);
1135
1136 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1137 Depth + 1))
1138 return true;
1139 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1140 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1141 Known2, TLO, Depth + 1))
1142 return true;
1143 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1144
1145 // Attempt to avoid multi-use ops if we don't need anything from them.
1146 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1147 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1148 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1149 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1150 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1151 if (DemandedOp0 || DemandedOp1) {
1152 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1153 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1154 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1155 return TLO.CombineTo(Op, NewOp);
1156 }
1157 }
1158
1159 // If all of the demanded bits are known zero on one side, return the other.
1160 // These bits cannot contribute to the result of the 'or'.
1161 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1162 return TLO.CombineTo(Op, Op0);
1163 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1164 return TLO.CombineTo(Op, Op1);
1165 // If the RHS is a constant, see if we can simplify it.
1166 if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1167 return true;
1168 // If the operation can be done in a smaller type, do so.
1169 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1170 return true;
1171
1172 // Output known-0 bits are only known if clear in both the LHS & RHS.
1173 Known.Zero &= Known2.Zero;
1174 // Output known-1 are known to be set if set in either the LHS | RHS.
1175 Known.One |= Known2.One;
1176 break;
1177 }
1178 case ISD::XOR: {
1179 SDValue Op0 = Op.getOperand(0);
1180 SDValue Op1 = Op.getOperand(1);
1181
1182 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1183 Depth + 1))
1184 return true;
1185 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1186 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1187 Depth + 1))
1188 return true;
1189 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1190
1191 // Attempt to avoid multi-use ops if we don't need anything from them.
1192 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1193 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1194 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1195 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1196 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1197 if (DemandedOp0 || DemandedOp1) {
1198 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1199 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1200 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1201 return TLO.CombineTo(Op, NewOp);
1202 }
1203 }
1204
1205 // If all of the demanded bits are known zero on one side, return the other.
1206 // These bits cannot contribute to the result of the 'xor'.
1207 if (DemandedBits.isSubsetOf(Known.Zero))
1208 return TLO.CombineTo(Op, Op0);
1209 if (DemandedBits.isSubsetOf(Known2.Zero))
1210 return TLO.CombineTo(Op, Op1);
1211 // If the operation can be done in a smaller type, do so.
1212 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1213 return true;
1214
1215 // If all of the unknown bits are known to be zero on one side or the other
1216 // (but not both) turn this into an *inclusive* or.
1217 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1218 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1219 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1220
1221 // Output known-0 bits are known if clear or set in both the LHS & RHS.
1222 KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1223 // Output known-1 are known to be set if set in only one of the LHS, RHS.
1224 KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1225
1226 if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1227 // If one side is a constant, and all of the known set bits on the other
1228 // side are also set in the constant, turn this into an AND, as we know
1229 // the bits will be cleared.
1230 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1231 // NB: it is okay if more bits are known than are requested
1232 if (C->getAPIntValue() == Known2.One) {
1233 SDValue ANDC =
1234 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1235 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1236 }
1237
1238 // If the RHS is a constant, see if we can change it. Don't alter a -1
1239 // constant because that's a 'not' op, and that is better for combining
1240 // and codegen.
1241 if (!C->isAllOnesValue()) {
1242 if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1243 // We're flipping all demanded bits. Flip the undemanded bits too.
1244 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1245 return TLO.CombineTo(Op, New);
1246 }
1247 // If we can't turn this into a 'not', try to shrink the constant.
1248 if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1249 return true;
1250 }
1251 }
1252
1253 Known = std::move(KnownOut);
1254 break;
1255 }
1256 case ISD::SELECT:
1257 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1258 Depth + 1))
1259 return true;
1260 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1261 Depth + 1))
1262 return true;
1263 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1264 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1265
1266 // If the operands are constants, see if we can simplify them.
1267 if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1268 return true;
1269
1270 // Only known if known in both the LHS and RHS.
1271 Known.One &= Known2.One;
1272 Known.Zero &= Known2.Zero;
1273 break;
1274 case ISD::SELECT_CC:
1275 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1276 Depth + 1))
1277 return true;
1278 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1279 Depth + 1))
1280 return true;
1281 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1282 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1283
1284 // If the operands are constants, see if we can simplify them.
1285 if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1286 return true;
1287
1288 // Only known if known in both the LHS and RHS.
1289 Known.One &= Known2.One;
1290 Known.Zero &= Known2.Zero;
1291 break;
1292 case ISD::SETCC: {
1293 SDValue Op0 = Op.getOperand(0);
1294 SDValue Op1 = Op.getOperand(1);
1295 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1296 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1297 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1298 // -1, we may be able to bypass the setcc.
1299 if (DemandedBits.isSignMask() &&
1300 Op0.getScalarValueSizeInBits() == BitWidth &&
1301 getBooleanContents(Op0.getValueType()) ==
1302 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1303 // If we're testing X < 0, then this compare isn't needed - just use X!
1304 // FIXME: We're limiting to integer types here, but this should also work
1305 // if we don't care about FP signed-zero. The use of SETLT with FP means
1306 // that we don't care about NaNs.
1307 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1308 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1309 return TLO.CombineTo(Op, Op0);
1310
1311 // TODO: Should we check for other forms of sign-bit comparisons?
1312 // Examples: X <= -1, X >= 0
1313 }
1314 if (getBooleanContents(Op0.getValueType()) ==
1315 TargetLowering::ZeroOrOneBooleanContent &&
1316 BitWidth > 1)
1317 Known.Zero.setBitsFrom(1);
1318 break;
1319 }
1320 case ISD::SHL: {
1321 SDValue Op0 = Op.getOperand(0);
1322 SDValue Op1 = Op.getOperand(1);
1323
1324 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1325 // If the shift count is an invalid immediate, don't do anything.
1326 if (SA->getAPIntValue().uge(BitWidth))
1327 break;
1328
1329 unsigned ShAmt = SA->getZExtValue();
1330 if (ShAmt == 0)
1331 return TLO.CombineTo(Op, Op0);
1332
1333 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1334 // single shift. We can do this if the bottom bits (which are shifted
1335 // out) are never demanded.
1336 // TODO - support non-uniform vector amounts.
1337 if (Op0.getOpcode() == ISD::SRL) {
1338 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1339 if (ConstantSDNode *SA2 =
1340 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1341 if (SA2->getAPIntValue().ult(BitWidth)) {
1342 unsigned C1 = SA2->getZExtValue();
1343 unsigned Opc = ISD::SHL;
1344 int Diff = ShAmt - C1;
1345 if (Diff < 0) {
1346 Diff = -Diff;
1347 Opc = ISD::SRL;
1348 }
1349
1350 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1351 return TLO.CombineTo(
1352 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1353 }
1354 }
1355 }
1356 }
1357
1358 if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1359 Known, TLO, Depth + 1))
1360 return true;
1361
1362 // Try shrinking the operation as long as the shift amount will still be
1363 // in range.
1364 if ((ShAmt < DemandedBits.getActiveBits()) &&
1365 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1366 return true;
1367
1368 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1369 // are not demanded. This will likely allow the anyext to be folded away.
1370 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1371 SDValue InnerOp = Op0.getOperand(0);
1372 EVT InnerVT = InnerOp.getValueType();
1373 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1374 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1375 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1376 EVT ShTy = getShiftAmountTy(InnerVT, DL);
1377 if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1378 ShTy = InnerVT;
1379 SDValue NarrowShl =
1380 TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1381 TLO.DAG.getConstant(ShAmt, dl, ShTy));
1382 return TLO.CombineTo(
1383 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1384 }
1385 // Repeat the SHL optimization above in cases where an extension
1386 // intervenes: (shl (anyext (shr x, c1)), c2) to
1387 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1388 // aren't demanded (as above) and that the shifted upper c1 bits of
1389 // x aren't demanded.
1390 if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1391 InnerOp.hasOneUse()) {
1392 if (ConstantSDNode *SA2 =
1393 isConstOrConstSplat(InnerOp.getOperand(1))) {
1394 unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1395 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1396 DemandedBits.getActiveBits() <=
1397 (InnerBits - InnerShAmt + ShAmt) &&
1398 DemandedBits.countTrailingZeros() >= ShAmt) {
1399 SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1400 Op1.getValueType());
1401 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1402 InnerOp.getOperand(0));
1403 return TLO.CombineTo(
1404 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1405 }
1406 }
1407 }
1408 }
1409
1410 Known.Zero <<= ShAmt;
1411 Known.One <<= ShAmt;
1412 // low bits known zero.
1413 Known.Zero.setLowBits(ShAmt);
1414 }
1415 break;
1416 }
1417 case ISD::SRL: {
1418 SDValue Op0 = Op.getOperand(0);
1419 SDValue Op1 = Op.getOperand(1);
1420
1421 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1422 // If the shift count is an invalid immediate, don't do anything.
1423 if (SA->getAPIntValue().uge(BitWidth))
1424 break;
1425
1426 unsigned ShAmt = SA->getZExtValue();
1427 if (ShAmt == 0)
1428 return TLO.CombineTo(Op, Op0);
1429
1430 EVT ShiftVT = Op1.getValueType();
1431 APInt InDemandedMask = (DemandedBits << ShAmt);
1432
1433 // If the shift is exact, then it does demand the low bits (and knows that
1434 // they are zero).
1435 if (Op->getFlags().hasExact())
1436 InDemandedMask.setLowBits(ShAmt);
1437
1438 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1439 // single shift. We can do this if the top bits (which are shifted out)
1440 // are never demanded.
1441 // TODO - support non-uniform vector amounts.
1442 if (Op0.getOpcode() == ISD::SHL) {
1443 if (ConstantSDNode *SA2 =
1444 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1445 if (!DemandedBits.intersects(
1446 APInt::getHighBitsSet(BitWidth, ShAmt))) {
1447 if (SA2->getAPIntValue().ult(BitWidth)) {
1448 unsigned C1 = SA2->getZExtValue();
1449 unsigned Opc = ISD::SRL;
1450 int Diff = ShAmt - C1;
1451 if (Diff < 0) {
1452 Diff = -Diff;
1453 Opc = ISD::SHL;
1454 }
1455
1456 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1457 return TLO.CombineTo(
1458 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1459 }
1460 }
1461 }
1462 }
1463
1464 // Compute the new bits that are at the top now.
1465 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1466 Depth + 1))
1467 return true;
1468 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1469 Known.Zero.lshrInPlace(ShAmt);
1470 Known.One.lshrInPlace(ShAmt);
1471
1472 Known.Zero.setHighBits(ShAmt); // High bits known zero.
1473 }
1474 break;
1475 }
1476 case ISD::SRA: {
1477 SDValue Op0 = Op.getOperand(0);
1478 SDValue Op1 = Op.getOperand(1);
1479
1480 // If this is an arithmetic shift right and only the low-bit is set, we can
1481 // always convert this into a logical shr, even if the shift amount is
1482 // variable. The low bit of the shift cannot be an input sign bit unless
1483 // the shift amount is >= the size of the datatype, which is undefined.
1484 if (DemandedBits.isOneValue())
1485 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1486
1487 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1488 // If the shift count is an invalid immediate, don't do anything.
1489 if (SA->getAPIntValue().uge(BitWidth))
1490 break;
1491
1492 unsigned ShAmt = SA->getZExtValue();
1493 if (ShAmt == 0)
1494 return TLO.CombineTo(Op, Op0);
1495
1496 APInt InDemandedMask = (DemandedBits << ShAmt);
1497
1498 // If the shift is exact, then it does demand the low bits (and knows that
1499 // they are zero).
1500 if (Op->getFlags().hasExact())
1501 InDemandedMask.setLowBits(ShAmt);
1502
1503 // If any of the demanded bits are produced by the sign extension, we also
1504 // demand the input sign bit.
1505 if (DemandedBits.countLeadingZeros() < ShAmt)
1506 InDemandedMask.setSignBit();
1507
1508 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1509 Depth + 1))
1510 return true;
1511 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1512 Known.Zero.lshrInPlace(ShAmt);
1513 Known.One.lshrInPlace(ShAmt);
1514
1515 // If the input sign bit is known to be zero, or if none of the top bits
1516 // are demanded, turn this into an unsigned shift right.
1517 if (Known.Zero[BitWidth - ShAmt - 1] ||
1518 DemandedBits.countLeadingZeros() >= ShAmt) {
1519 SDNodeFlags Flags;
1520 Flags.setExact(Op->getFlags().hasExact());
1521 return TLO.CombineTo(
1522 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1523 }
1524
1525 int Log2 = DemandedBits.exactLogBase2();
1526 if (Log2 >= 0) {
1527 // The bit must come from the sign.
1528 SDValue NewSA =
1529 TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1530 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1531 }
1532
1533 if (Known.One[BitWidth - ShAmt - 1])
1534 // New bits are known one.
1535 Known.One.setHighBits(ShAmt);
1536 }
1537 break;
1538 }
1539 case ISD::FSHL:
1540 case ISD::FSHR: {
1541 SDValue Op0 = Op.getOperand(0);
1542 SDValue Op1 = Op.getOperand(1);
1543 SDValue Op2 = Op.getOperand(2);
1544 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1545
1546 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1547 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1548
1549 // For fshl, 0-shift returns the 1st arg.
1550 // For fshr, 0-shift returns the 2nd arg.
1551 if (Amt == 0) {
1552 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1553 Known, TLO, Depth + 1))
1554 return true;
1555 break;
1556 }
1557
1558 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1559 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1560 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1561 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1562 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1563 Depth + 1))
1564 return true;
1565 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1566 Depth + 1))
1567 return true;
1568
1569 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1570 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1571 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1572 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1573 Known.One |= Known2.One;
1574 Known.Zero |= Known2.Zero;
1575 }
1576 break;
1577 }
1578 case ISD::BITREVERSE: {
1579 SDValue Src = Op.getOperand(0);
1580 APInt DemandedSrcBits = DemandedBits.reverseBits();
1581 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1582 Depth + 1))
1583 return true;
1584 Known.One = Known2.One.reverseBits();
1585 Known.Zero = Known2.Zero.reverseBits();
1586 break;
1587 }
1588 case ISD::BSWAP: {
1589 SDValue Src = Op.getOperand(0);
1590 APInt DemandedSrcBits = DemandedBits.byteSwap();
1591 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1592 Depth + 1))
1593 return true;
1594 Known.One = Known2.One.byteSwap();
1595 Known.Zero = Known2.Zero.byteSwap();
1596 break;
1597 }
1598 case ISD::SIGN_EXTEND_INREG: {
1599 SDValue Op0 = Op.getOperand(0);
1600 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1601 unsigned ExVTBits = ExVT.getScalarSizeInBits();
1602
1603 // If we only care about the highest bit, don't bother shifting right.
1604 if (DemandedBits.isSignMask()) {
1605 unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1606 bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1607 // However if the input is already sign extended we expect the sign
1608 // extension to be dropped altogether later and do not simplify.
1609 if (!AlreadySignExtended) {
1610 // Compute the correct shift amount type, which must be getShiftAmountTy
1611 // for scalar types after legalization.
1612 EVT ShiftAmtTy = VT;
1613 if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1614 ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1615
1616 SDValue ShiftAmt =
1617 TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1618 return TLO.CombineTo(Op,
1619 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1620 }
1621 }
1622
1623 // If none of the extended bits are demanded, eliminate the sextinreg.
1624 if (DemandedBits.getActiveBits() <= ExVTBits)
1625 return TLO.CombineTo(Op, Op0);
1626
1627 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1628
1629 // Since the sign extended bits are demanded, we know that the sign
1630 // bit is demanded.
1631 InputDemandedBits.setBit(ExVTBits - 1);
1632
1633 if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1634 return true;
1635 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1636
1637 // If the sign bit of the input is known set or clear, then we know the
1638 // top bits of the result.
1639
1640 // If the input sign bit is known zero, convert this into a zero extension.
1641 if (Known.Zero[ExVTBits - 1])
1642 return TLO.CombineTo(
1643 Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1644
1645 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1646 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1647 Known.One.setBitsFrom(ExVTBits);
1648 Known.Zero &= Mask;
1649 } else { // Input sign bit unknown
1650 Known.Zero &= Mask;
1651 Known.One &= Mask;
1652 }
1653 break;
1654 }
1655 case ISD::BUILD_PAIR: {
1656 EVT HalfVT = Op.getOperand(0).getValueType();
1657 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1658
1659 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1660 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1661
1662 KnownBits KnownLo, KnownHi;
1663
1664 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1665 return true;
1666
1667 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1668 return true;
1669
1670 Known.Zero = KnownLo.Zero.zext(BitWidth) |
1671 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1672
1673 Known.One = KnownLo.One.zext(BitWidth) |
1674 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1675 break;
1676 }
1677 case ISD::ZERO_EXTEND:
1678 case ISD::ZERO_EXTEND_VECTOR_INREG: {
1679 SDValue Src = Op.getOperand(0);
1680 EVT SrcVT = Src.getValueType();
1681 unsigned InBits = SrcVT.getScalarSizeInBits();
1682 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1683 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1684
1685 // If none of the top bits are demanded, convert this into an any_extend.
1686 if (DemandedBits.getActiveBits() <= InBits) {
1687 // If we only need the non-extended bits of the bottom element
1688 // then we can just bitcast to the result.
1689 if (IsVecInReg && DemandedElts == 1 &&
1690 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1691 TLO.DAG.getDataLayout().isLittleEndian())
1692 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1693
1694 unsigned Opc =
1695 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1696 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1697 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1698 }
1699
1700 APInt InDemandedBits = DemandedBits.trunc(InBits);
1701 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1702 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1703 Depth + 1))
1704 return true;
1705 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1706 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1707 Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1708 break;
1709 }
1710 case ISD::SIGN_EXTEND:
1711 case ISD::SIGN_EXTEND_VECTOR_INREG: {
1712 SDValue Src = Op.getOperand(0);
1713 EVT SrcVT = Src.getValueType();
1714 unsigned InBits = SrcVT.getScalarSizeInBits();
1715 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1716 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1717
1718 // If none of the top bits are demanded, convert this into an any_extend.
1719 if (DemandedBits.getActiveBits() <= InBits) {
1720 // If we only need the non-extended bits of the bottom element
1721 // then we can just bitcast to the result.
1722 if (IsVecInReg && DemandedElts == 1 &&
1723 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1724 TLO.DAG.getDataLayout().isLittleEndian())
1725 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1726
1727 unsigned Opc =
1728 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1729 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1730 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1731 }
1732
1733 APInt InDemandedBits = DemandedBits.trunc(InBits);
1734 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1735
1736 // Since some of the sign extended bits are demanded, we know that the sign
1737 // bit is demanded.
1738 InDemandedBits.setBit(InBits - 1);
1739
1740 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1741 Depth + 1))
1742 return true;
1743 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1744 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1745
1746 // If the sign bit is known one, the top bits match.
1747 Known = Known.sext(BitWidth);
1748
1749 // If the sign bit is known zero, convert this to a zero extend.
1750 if (Known.isNonNegative()) {
1751 unsigned Opc =
1752 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1753 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1754 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1755 }
1756 break;
1757 }
1758 case ISD::ANY_EXTEND:
1759 case ISD::ANY_EXTEND_VECTOR_INREG: {
1760 SDValue Src = Op.getOperand(0);
1761 EVT SrcVT = Src.getValueType();
1762 unsigned InBits = SrcVT.getScalarSizeInBits();
1763 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1764 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1765
1766 // If we only need the bottom element then we can just bitcast.
1767 // TODO: Handle ANY_EXTEND?
1768 if (IsVecInReg && DemandedElts == 1 &&
1769 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1770 TLO.DAG.getDataLayout().isLittleEndian())
1771 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1772
1773 APInt InDemandedBits = DemandedBits.trunc(InBits);
1774 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1775 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1776 Depth + 1))
1777 return true;
1778 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1779 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1780 Known = Known.zext(BitWidth, false /* => any extend */);
1781 break;
1782 }
1783 case ISD::TRUNCATE: {
1784 SDValue Src = Op.getOperand(0);
1785
1786 // Simplify the input, using demanded bit information, and compute the known
1787 // zero/one bits live out.
1788 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1789 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1790 if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1791 return true;
1792 Known = Known.trunc(BitWidth);
1793
1794 // Attempt to avoid multi-use ops if we don't need anything from them.
1795 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1796 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1797 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1798
1799 // If the input is only used by this truncate, see if we can shrink it based
1800 // on the known demanded bits.
1801 if (Src.getNode()->hasOneUse()) {
1802 switch (Src.getOpcode()) {
1803 default:
1804 break;
1805 case ISD::SRL:
1806 // Shrink SRL by a constant if none of the high bits shifted in are
1807 // demanded.
1808 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1809 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1810 // undesirable.
1811 break;
1812
1813 SDValue ShAmt = Src.getOperand(1);
1814 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1815 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
1816 break;
1817 uint64_t ShVal = ShAmtC->getZExtValue();
1818
1819 APInt HighBits =
1820 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1821 HighBits.lshrInPlace(ShVal);
1822 HighBits = HighBits.trunc(BitWidth);
1823
1824 if (!(HighBits & DemandedBits)) {
1825 // None of the shifted in bits are needed. Add a truncate of the
1826 // shift input, then shift it.
1827 if (TLO.LegalTypes())
1828 ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1829 SDValue NewTrunc =
1830 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1831 return TLO.CombineTo(
1832 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
1833 }
1834 break;
1835 }
1836 }
1837
1838 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1839 break;
1840 }
1841 case ISD::AssertZext: {
1842 // AssertZext demands all of the high bits, plus any of the low bits
1843 // demanded by its users.
1844 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1845 APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1846 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1847 TLO, Depth + 1))
1848 return true;
1849 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1850
1851 Known.Zero |= ~InMask;
1852 break;
1853 }
1854 case ISD::EXTRACT_VECTOR_ELT: {
1855 SDValue Src = Op.getOperand(0);
1856 SDValue Idx = Op.getOperand(1);
1857 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1858 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1859
1860 // Demand the bits from every vector element without a constant index.
1861 APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1862 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1863 if (CIdx->getAPIntValue().ult(NumSrcElts))
1864 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1865
1866 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1867 // anything about the extended bits.
1868 APInt DemandedSrcBits = DemandedBits;
1869 if (BitWidth > EltBitWidth)
1870 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1871
1872 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1873 Depth + 1))
1874 return true;
1875
1876 // Attempt to avoid multi-use ops if we don't need anything from them.
1877 if (!DemandedSrcBits.isAllOnesValue() ||
1878 !DemandedSrcElts.isAllOnesValue()) {
1879 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1880 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
1881 SDValue NewOp =
1882 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
1883 return TLO.CombineTo(Op, NewOp);
1884 }
1885 }
1886
1887 Known = Known2;
1888 if (BitWidth > EltBitWidth)
1889 Known = Known.zext(BitWidth, false /* => any extend */);
1890 break;
1891 }
1892 case ISD::BITCAST: {
1893 SDValue Src = Op.getOperand(0);
1894 EVT SrcVT = Src.getValueType();
1895 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1896
1897 // If this is an FP->Int bitcast and if the sign bit is the only
1898 // thing demanded, turn this into a FGETSIGN.
1899 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1900 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1901 SrcVT.isFloatingPoint()) {
1902 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1903 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1904 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1905 SrcVT != MVT::f128) {
1906 // Cannot eliminate/lower SHL for f128 yet.
1907 EVT Ty = OpVTLegal ? VT : MVT::i32;
1908 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1909 // place. We expect the SHL to be eliminated by other optimizations.
1910 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1911 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1912 if (!OpVTLegal && OpVTSizeInBits > 32)
1913 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1914 unsigned ShVal = Op.getValueSizeInBits() - 1;
1915 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1916 return TLO.CombineTo(Op,
1917 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1918 }
1919 }
1920
1921 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1922 // Demand the elt/bit if any of the original elts/bits are demanded.
1923 // TODO - bigendian once we have test coverage.
1924 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
1925 TLO.DAG.getDataLayout().isLittleEndian()) {
1926 unsigned Scale = BitWidth / NumSrcEltBits;
1927 unsigned NumSrcElts = SrcVT.getVectorNumElements();
1928 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1929 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1930 for (unsigned i = 0; i != Scale; ++i) {
1931 unsigned Offset = i * NumSrcEltBits;
1932 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1933 if (!Sub.isNullValue()) {
1934 DemandedSrcBits |= Sub;
1935 for (unsigned j = 0; j != NumElts; ++j)
1936 if (DemandedElts[j])
1937 DemandedSrcElts.setBit((j * Scale) + i);
1938 }
1939 }
1940
1941 APInt KnownSrcUndef, KnownSrcZero;
1942 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1943 KnownSrcZero, TLO, Depth + 1))
1944 return true;
1945
1946 KnownBits KnownSrcBits;
1947 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1948 KnownSrcBits, TLO, Depth + 1))
1949 return true;
1950 } else if ((NumSrcEltBits % BitWidth) == 0 &&
1951 TLO.DAG.getDataLayout().isLittleEndian()) {
1952 unsigned Scale = NumSrcEltBits / BitWidth;
1953 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1954 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1955 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1956 for (unsigned i = 0; i != NumElts; ++i)
1957 if (DemandedElts[i]) {
1958 unsigned Offset = (i % Scale) * BitWidth;
1959 DemandedSrcBits.insertBits(DemandedBits, Offset);
1960 DemandedSrcElts.setBit(i / Scale);
1961 }
1962
1963 if (SrcVT.isVector()) {
1964 APInt KnownSrcUndef, KnownSrcZero;
1965 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1966 KnownSrcZero, TLO, Depth + 1))
1967 return true;
1968 }
1969
1970 KnownBits KnownSrcBits;
1971 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1972 KnownSrcBits, TLO, Depth + 1))
1973 return true;
1974 }
1975
1976 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
1977 // recursive call where Known may be useful to the caller.
1978 if (Depth > 0) {
1979 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1980 return false;
1981 }
1982 break;
1983 }
1984 case ISD::ADD:
1985 case ISD::MUL:
1986 case ISD::SUB: {
1987 // Add, Sub, and Mul don't demand any bits in positions beyond that
1988 // of the highest bit demanded of them.
1989 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1990 SDNodeFlags Flags = Op.getNode()->getFlags();
1991 unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1992 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1993 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1994 Depth + 1) ||
1995 SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1996 Depth + 1) ||
1997 // See if the operation should be performed at a smaller bit width.
1998 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1999 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2000 // Disable the nsw and nuw flags. We can no longer guarantee that we
2001 // won't wrap after simplification.
2002 Flags.setNoSignedWrap(false);
2003 Flags.setNoUnsignedWrap(false);
2004 SDValue NewOp =
2005 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2006 return TLO.CombineTo(Op, NewOp);
2007 }
2008 return true;
2009 }
2010
2011 // Attempt to avoid multi-use ops if we don't need anything from them.
2012 if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2013 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2014 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2015 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2016 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2017 if (DemandedOp0 || DemandedOp1) {
2018 Flags.setNoSignedWrap(false);
2019 Flags.setNoUnsignedWrap(false);
2020 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2021 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2022 SDValue NewOp =
2023 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2024 return TLO.CombineTo(Op, NewOp);
2025 }
2026 }
2027
2028 // If we have a constant operand, we may be able to turn it into -1 if we
2029 // do not demand the high bits. This can make the constant smaller to
2030 // encode, allow more general folding, or match specialized instruction
2031 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2032 // is probably not useful (and could be detrimental).
2033 ConstantSDNode *C = isConstOrConstSplat(Op1);
2034 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2035 if (C && !C->isAllOnesValue() && !C->isOne() &&
2036 (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2037 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2038 // Disable the nsw and nuw flags. We can no longer guarantee that we
2039 // won't wrap after simplification.
2040 Flags.setNoSignedWrap(false);
2041 Flags.setNoUnsignedWrap(false);
2042 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2043 return TLO.CombineTo(Op, NewOp);
2044 }
2045
2046 LLVM_FALLTHROUGH;
2047 }
2048 default:
2049 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2050 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2051 Known, TLO, Depth))
2052 return true;
2053 break;
2054 }
2055
2056 // Just use computeKnownBits to compute output bits.
2057 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2058 break;
2059 }
2060
2061 // If we know the value of all of the demanded bits, return this as a
2062 // constant.
2063 if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2064 // Avoid folding to a constant if any OpaqueConstant is involved.
2065 const SDNode *N = Op.getNode();
2066 for (SDNodeIterator I = SDNodeIterator::begin(N),
2067 E = SDNodeIterator::end(N);
2068 I != E; ++I) {
2069 SDNode *Op = *I;
2070 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2071 if (C->isOpaque())
2072 return false;
2073 }
2074 // TODO: Handle float bits as well.
2075 if (VT.isInteger())
2076 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2077 }
2078
2079 return false;
2080 }
2081
SimplifyDemandedVectorElts(SDValue Op,const APInt & DemandedElts,APInt & KnownUndef,APInt & KnownZero,DAGCombinerInfo & DCI) const2082 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2083 const APInt &DemandedElts,
2084 APInt &KnownUndef,
2085 APInt &KnownZero,
2086 DAGCombinerInfo &DCI) const {
2087 SelectionDAG &DAG = DCI.DAG;
2088 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2089 !DCI.isBeforeLegalizeOps());
2090
2091 bool Simplified =
2092 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2093 if (Simplified) {
2094 DCI.AddToWorklist(Op.getNode());
2095 DCI.CommitTargetLoweringOpt(TLO);
2096 }
2097
2098 return Simplified;
2099 }
2100
2101 /// Given a vector binary operation and known undefined elements for each input
2102 /// operand, compute whether each element of the output is undefined.
getKnownUndefForVectorBinop(SDValue BO,SelectionDAG & DAG,const APInt & UndefOp0,const APInt & UndefOp1)2103 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2104 const APInt &UndefOp0,
2105 const APInt &UndefOp1) {
2106 EVT VT = BO.getValueType();
2107 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2108 "Vector binop only");
2109
2110 EVT EltVT = VT.getVectorElementType();
2111 unsigned NumElts = VT.getVectorNumElements();
2112 assert(UndefOp0.getBitWidth() == NumElts &&
2113 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2114
2115 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2116 const APInt &UndefVals) {
2117 if (UndefVals[Index])
2118 return DAG.getUNDEF(EltVT);
2119
2120 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2121 // Try hard to make sure that the getNode() call is not creating temporary
2122 // nodes. Ignore opaque integers because they do not constant fold.
2123 SDValue Elt = BV->getOperand(Index);
2124 auto *C = dyn_cast<ConstantSDNode>(Elt);
2125 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2126 return Elt;
2127 }
2128
2129 return SDValue();
2130 };
2131
2132 APInt KnownUndef = APInt::getNullValue(NumElts);
2133 for (unsigned i = 0; i != NumElts; ++i) {
2134 // If both inputs for this element are either constant or undef and match
2135 // the element type, compute the constant/undef result for this element of
2136 // the vector.
2137 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2138 // not handle FP constants. The code within getNode() should be refactored
2139 // to avoid the danger of creating a bogus temporary node here.
2140 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2141 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2142 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2143 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2144 KnownUndef.setBit(i);
2145 }
2146 return KnownUndef;
2147 }
2148
SimplifyDemandedVectorElts(SDValue Op,const APInt & OriginalDemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const2149 bool TargetLowering::SimplifyDemandedVectorElts(
2150 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2151 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2152 bool AssumeSingleUse) const {
2153 EVT VT = Op.getValueType();
2154 APInt DemandedElts = OriginalDemandedElts;
2155 unsigned NumElts = DemandedElts.getBitWidth();
2156 assert(VT.isVector() && "Expected vector op");
2157 assert(VT.getVectorNumElements() == NumElts &&
2158 "Mask size mismatches value type element count!");
2159
2160 KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2161
2162 // Undef operand.
2163 if (Op.isUndef()) {
2164 KnownUndef.setAllBits();
2165 return false;
2166 }
2167
2168 // If Op has other users, assume that all elements are needed.
2169 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2170 DemandedElts.setAllBits();
2171
2172 // Not demanding any elements from Op.
2173 if (DemandedElts == 0) {
2174 KnownUndef.setAllBits();
2175 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2176 }
2177
2178 // Limit search depth.
2179 if (Depth >= SelectionDAG::MaxRecursionDepth)
2180 return false;
2181
2182 SDLoc DL(Op);
2183 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2184
2185 switch (Op.getOpcode()) {
2186 case ISD::SCALAR_TO_VECTOR: {
2187 if (!DemandedElts[0]) {
2188 KnownUndef.setAllBits();
2189 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2190 }
2191 KnownUndef.setHighBits(NumElts - 1);
2192 break;
2193 }
2194 case ISD::BITCAST: {
2195 SDValue Src = Op.getOperand(0);
2196 EVT SrcVT = Src.getValueType();
2197
2198 // We only handle vectors here.
2199 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2200 if (!SrcVT.isVector())
2201 break;
2202
2203 // Fast handling of 'identity' bitcasts.
2204 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2205 if (NumSrcElts == NumElts)
2206 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2207 KnownZero, TLO, Depth + 1);
2208
2209 APInt SrcZero, SrcUndef;
2210 APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2211
2212 // Bitcast from 'large element' src vector to 'small element' vector, we
2213 // must demand a source element if any DemandedElt maps to it.
2214 if ((NumElts % NumSrcElts) == 0) {
2215 unsigned Scale = NumElts / NumSrcElts;
2216 for (unsigned i = 0; i != NumElts; ++i)
2217 if (DemandedElts[i])
2218 SrcDemandedElts.setBit(i / Scale);
2219
2220 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2221 TLO, Depth + 1))
2222 return true;
2223
2224 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2225 // of the large element.
2226 // TODO - bigendian once we have test coverage.
2227 if (TLO.DAG.getDataLayout().isLittleEndian()) {
2228 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2229 APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2230 for (unsigned i = 0; i != NumElts; ++i)
2231 if (DemandedElts[i]) {
2232 unsigned Ofs = (i % Scale) * EltSizeInBits;
2233 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2234 }
2235
2236 KnownBits Known;
2237 if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2238 return true;
2239 }
2240
2241 // If the src element is zero/undef then all the output elements will be -
2242 // only demanded elements are guaranteed to be correct.
2243 for (unsigned i = 0; i != NumSrcElts; ++i) {
2244 if (SrcDemandedElts[i]) {
2245 if (SrcZero[i])
2246 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2247 if (SrcUndef[i])
2248 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2249 }
2250 }
2251 }
2252
2253 // Bitcast from 'small element' src vector to 'large element' vector, we
2254 // demand all smaller source elements covered by the larger demanded element
2255 // of this vector.
2256 if ((NumSrcElts % NumElts) == 0) {
2257 unsigned Scale = NumSrcElts / NumElts;
2258 for (unsigned i = 0; i != NumElts; ++i)
2259 if (DemandedElts[i])
2260 SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2261
2262 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2263 TLO, Depth + 1))
2264 return true;
2265
2266 // If all the src elements covering an output element are zero/undef, then
2267 // the output element will be as well, assuming it was demanded.
2268 for (unsigned i = 0; i != NumElts; ++i) {
2269 if (DemandedElts[i]) {
2270 if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2271 KnownZero.setBit(i);
2272 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2273 KnownUndef.setBit(i);
2274 }
2275 }
2276 }
2277 break;
2278 }
2279 case ISD::BUILD_VECTOR: {
2280 // Check all elements and simplify any unused elements with UNDEF.
2281 if (!DemandedElts.isAllOnesValue()) {
2282 // Don't simplify BROADCASTS.
2283 if (llvm::any_of(Op->op_values(),
2284 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2285 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2286 bool Updated = false;
2287 for (unsigned i = 0; i != NumElts; ++i) {
2288 if (!DemandedElts[i] && !Ops[i].isUndef()) {
2289 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2290 KnownUndef.setBit(i);
2291 Updated = true;
2292 }
2293 }
2294 if (Updated)
2295 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2296 }
2297 }
2298 for (unsigned i = 0; i != NumElts; ++i) {
2299 SDValue SrcOp = Op.getOperand(i);
2300 if (SrcOp.isUndef()) {
2301 KnownUndef.setBit(i);
2302 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2303 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2304 KnownZero.setBit(i);
2305 }
2306 }
2307 break;
2308 }
2309 case ISD::CONCAT_VECTORS: {
2310 EVT SubVT = Op.getOperand(0).getValueType();
2311 unsigned NumSubVecs = Op.getNumOperands();
2312 unsigned NumSubElts = SubVT.getVectorNumElements();
2313 for (unsigned i = 0; i != NumSubVecs; ++i) {
2314 SDValue SubOp = Op.getOperand(i);
2315 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2316 APInt SubUndef, SubZero;
2317 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2318 Depth + 1))
2319 return true;
2320 KnownUndef.insertBits(SubUndef, i * NumSubElts);
2321 KnownZero.insertBits(SubZero, i * NumSubElts);
2322 }
2323 break;
2324 }
2325 case ISD::INSERT_SUBVECTOR: {
2326 if (!isa<ConstantSDNode>(Op.getOperand(2)))
2327 break;
2328 SDValue Base = Op.getOperand(0);
2329 SDValue Sub = Op.getOperand(1);
2330 EVT SubVT = Sub.getValueType();
2331 unsigned NumSubElts = SubVT.getVectorNumElements();
2332 const APInt &Idx = Op.getConstantOperandAPInt(2);
2333 if (Idx.ugt(NumElts - NumSubElts))
2334 break;
2335 unsigned SubIdx = Idx.getZExtValue();
2336 APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2337 APInt SubUndef, SubZero;
2338 if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2339 Depth + 1))
2340 return true;
2341 APInt BaseElts = DemandedElts;
2342 BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2343
2344 // If none of the base operand elements are demanded, replace it with undef.
2345 if (!BaseElts && !Base.isUndef())
2346 return TLO.CombineTo(Op,
2347 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2348 TLO.DAG.getUNDEF(VT),
2349 Op.getOperand(1),
2350 Op.getOperand(2)));
2351
2352 if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2353 Depth + 1))
2354 return true;
2355 KnownUndef.insertBits(SubUndef, SubIdx);
2356 KnownZero.insertBits(SubZero, SubIdx);
2357 break;
2358 }
2359 case ISD::EXTRACT_SUBVECTOR: {
2360 SDValue Src = Op.getOperand(0);
2361 ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2362 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2363 if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2364 // Offset the demanded elts by the subvector index.
2365 uint64_t Idx = SubIdx->getZExtValue();
2366 APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2367 APInt SrcUndef, SrcZero;
2368 if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2369 Depth + 1))
2370 return true;
2371 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2372 KnownZero = SrcZero.extractBits(NumElts, Idx);
2373 }
2374 break;
2375 }
2376 case ISD::INSERT_VECTOR_ELT: {
2377 SDValue Vec = Op.getOperand(0);
2378 SDValue Scl = Op.getOperand(1);
2379 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2380
2381 // For a legal, constant insertion index, if we don't need this insertion
2382 // then strip it, else remove it from the demanded elts.
2383 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2384 unsigned Idx = CIdx->getZExtValue();
2385 if (!DemandedElts[Idx])
2386 return TLO.CombineTo(Op, Vec);
2387
2388 APInt DemandedVecElts(DemandedElts);
2389 DemandedVecElts.clearBit(Idx);
2390 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2391 KnownZero, TLO, Depth + 1))
2392 return true;
2393
2394 KnownUndef.clearBit(Idx);
2395 if (Scl.isUndef())
2396 KnownUndef.setBit(Idx);
2397
2398 KnownZero.clearBit(Idx);
2399 if (isNullConstant(Scl) || isNullFPConstant(Scl))
2400 KnownZero.setBit(Idx);
2401 break;
2402 }
2403
2404 APInt VecUndef, VecZero;
2405 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2406 Depth + 1))
2407 return true;
2408 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2409 break;
2410 }
2411 case ISD::VSELECT: {
2412 // Try to transform the select condition based on the current demanded
2413 // elements.
2414 // TODO: If a condition element is undef, we can choose from one arm of the
2415 // select (and if one arm is undef, then we can propagate that to the
2416 // result).
2417 // TODO - add support for constant vselect masks (see IR version of this).
2418 APInt UnusedUndef, UnusedZero;
2419 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2420 UnusedZero, TLO, Depth + 1))
2421 return true;
2422
2423 // See if we can simplify either vselect operand.
2424 APInt DemandedLHS(DemandedElts);
2425 APInt DemandedRHS(DemandedElts);
2426 APInt UndefLHS, ZeroLHS;
2427 APInt UndefRHS, ZeroRHS;
2428 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2429 ZeroLHS, TLO, Depth + 1))
2430 return true;
2431 if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2432 ZeroRHS, TLO, Depth + 1))
2433 return true;
2434
2435 KnownUndef = UndefLHS & UndefRHS;
2436 KnownZero = ZeroLHS & ZeroRHS;
2437 break;
2438 }
2439 case ISD::VECTOR_SHUFFLE: {
2440 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2441
2442 // Collect demanded elements from shuffle operands..
2443 APInt DemandedLHS(NumElts, 0);
2444 APInt DemandedRHS(NumElts, 0);
2445 for (unsigned i = 0; i != NumElts; ++i) {
2446 int M = ShuffleMask[i];
2447 if (M < 0 || !DemandedElts[i])
2448 continue;
2449 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2450 if (M < (int)NumElts)
2451 DemandedLHS.setBit(M);
2452 else
2453 DemandedRHS.setBit(M - NumElts);
2454 }
2455
2456 // See if we can simplify either shuffle operand.
2457 APInt UndefLHS, ZeroLHS;
2458 APInt UndefRHS, ZeroRHS;
2459 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2460 ZeroLHS, TLO, Depth + 1))
2461 return true;
2462 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2463 ZeroRHS, TLO, Depth + 1))
2464 return true;
2465
2466 // Simplify mask using undef elements from LHS/RHS.
2467 bool Updated = false;
2468 bool IdentityLHS = true, IdentityRHS = true;
2469 SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2470 for (unsigned i = 0; i != NumElts; ++i) {
2471 int &M = NewMask[i];
2472 if (M < 0)
2473 continue;
2474 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2475 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2476 Updated = true;
2477 M = -1;
2478 }
2479 IdentityLHS &= (M < 0) || (M == (int)i);
2480 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2481 }
2482
2483 // Update legal shuffle masks based on demanded elements if it won't reduce
2484 // to Identity which can cause premature removal of the shuffle mask.
2485 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2486 SDValue LegalShuffle =
2487 buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2488 NewMask, TLO.DAG);
2489 if (LegalShuffle)
2490 return TLO.CombineTo(Op, LegalShuffle);
2491 }
2492
2493 // Propagate undef/zero elements from LHS/RHS.
2494 for (unsigned i = 0; i != NumElts; ++i) {
2495 int M = ShuffleMask[i];
2496 if (M < 0) {
2497 KnownUndef.setBit(i);
2498 } else if (M < (int)NumElts) {
2499 if (UndefLHS[M])
2500 KnownUndef.setBit(i);
2501 if (ZeroLHS[M])
2502 KnownZero.setBit(i);
2503 } else {
2504 if (UndefRHS[M - NumElts])
2505 KnownUndef.setBit(i);
2506 if (ZeroRHS[M - NumElts])
2507 KnownZero.setBit(i);
2508 }
2509 }
2510 break;
2511 }
2512 case ISD::ANY_EXTEND_VECTOR_INREG:
2513 case ISD::SIGN_EXTEND_VECTOR_INREG:
2514 case ISD::ZERO_EXTEND_VECTOR_INREG: {
2515 APInt SrcUndef, SrcZero;
2516 SDValue Src = Op.getOperand(0);
2517 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2518 APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2519 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2520 Depth + 1))
2521 return true;
2522 KnownZero = SrcZero.zextOrTrunc(NumElts);
2523 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2524
2525 if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2526 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2527 DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2528 // aext - if we just need the bottom element then we can bitcast.
2529 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2530 }
2531
2532 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2533 // zext(undef) upper bits are guaranteed to be zero.
2534 if (DemandedElts.isSubsetOf(KnownUndef))
2535 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2536 KnownUndef.clearAllBits();
2537 }
2538 break;
2539 }
2540
2541 // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2542 // MAX, saturated math, etc.
2543 case ISD::OR:
2544 case ISD::XOR:
2545 case ISD::ADD:
2546 case ISD::SUB:
2547 case ISD::FADD:
2548 case ISD::FSUB:
2549 case ISD::FMUL:
2550 case ISD::FDIV:
2551 case ISD::FREM: {
2552 APInt UndefRHS, ZeroRHS;
2553 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2554 ZeroRHS, TLO, Depth + 1))
2555 return true;
2556 APInt UndefLHS, ZeroLHS;
2557 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2558 ZeroLHS, TLO, Depth + 1))
2559 return true;
2560
2561 KnownZero = ZeroLHS & ZeroRHS;
2562 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2563 break;
2564 }
2565 case ISD::SHL:
2566 case ISD::SRL:
2567 case ISD::SRA:
2568 case ISD::ROTL:
2569 case ISD::ROTR: {
2570 APInt UndefRHS, ZeroRHS;
2571 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2572 ZeroRHS, TLO, Depth + 1))
2573 return true;
2574 APInt UndefLHS, ZeroLHS;
2575 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2576 ZeroLHS, TLO, Depth + 1))
2577 return true;
2578
2579 KnownZero = ZeroLHS;
2580 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2581 break;
2582 }
2583 case ISD::MUL:
2584 case ISD::AND: {
2585 APInt SrcUndef, SrcZero;
2586 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2587 SrcZero, TLO, Depth + 1))
2588 return true;
2589 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2590 KnownZero, TLO, Depth + 1))
2591 return true;
2592
2593 // If either side has a zero element, then the result element is zero, even
2594 // if the other is an UNDEF.
2595 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2596 // and then handle 'and' nodes with the rest of the binop opcodes.
2597 KnownZero |= SrcZero;
2598 KnownUndef &= SrcUndef;
2599 KnownUndef &= ~KnownZero;
2600 break;
2601 }
2602 case ISD::TRUNCATE:
2603 case ISD::SIGN_EXTEND:
2604 case ISD::ZERO_EXTEND:
2605 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2606 KnownZero, TLO, Depth + 1))
2607 return true;
2608
2609 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2610 // zext(undef) upper bits are guaranteed to be zero.
2611 if (DemandedElts.isSubsetOf(KnownUndef))
2612 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2613 KnownUndef.clearAllBits();
2614 }
2615 break;
2616 default: {
2617 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2618 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2619 KnownZero, TLO, Depth))
2620 return true;
2621 } else {
2622 KnownBits Known;
2623 APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2624 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2625 TLO, Depth, AssumeSingleUse))
2626 return true;
2627 }
2628 break;
2629 }
2630 }
2631 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2632
2633 // Constant fold all undef cases.
2634 // TODO: Handle zero cases as well.
2635 if (DemandedElts.isSubsetOf(KnownUndef))
2636 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2637
2638 return false;
2639 }
2640
2641 /// Determine which of the bits specified in Mask are known to be either zero or
2642 /// one and return them in the Known.
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const2643 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2644 KnownBits &Known,
2645 const APInt &DemandedElts,
2646 const SelectionDAG &DAG,
2647 unsigned Depth) const {
2648 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2649 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2650 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2651 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2652 "Should use MaskedValueIsZero if you don't know whether Op"
2653 " is a target node!");
2654 Known.resetAll();
2655 }
2656
computeKnownBitsForTargetInstr(GISelKnownBits & Analysis,Register R,KnownBits & Known,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const2657 void TargetLowering::computeKnownBitsForTargetInstr(
2658 GISelKnownBits &Analysis, Register R, KnownBits &Known,
2659 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2660 unsigned Depth) const {
2661 Known.resetAll();
2662 }
2663
computeKnownBitsForFrameIndex(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const2664 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2665 KnownBits &Known,
2666 const APInt &DemandedElts,
2667 const SelectionDAG &DAG,
2668 unsigned Depth) const {
2669 assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2670
2671 if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2672 // The low bits are known zero if the pointer is aligned.
2673 Known.Zero.setLowBits(Log2_32(Align));
2674 }
2675 }
2676
2677 /// This method can be implemented by targets that want to expose additional
2678 /// information about sign bits to the DAG Combiner.
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt &,const SelectionDAG &,unsigned Depth) const2679 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2680 const APInt &,
2681 const SelectionDAG &,
2682 unsigned Depth) const {
2683 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2684 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2685 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2686 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2687 "Should use ComputeNumSignBits if you don't know whether Op"
2688 " is a target node!");
2689 return 1;
2690 }
2691
SimplifyDemandedVectorEltsForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth) const2692 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2693 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2694 TargetLoweringOpt &TLO, unsigned Depth) const {
2695 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2696 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2697 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2698 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2699 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2700 " is a target node!");
2701 return false;
2702 }
2703
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const2704 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2705 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2706 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2707 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2708 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2709 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2710 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2711 "Should use SimplifyDemandedBits if you don't know whether Op"
2712 " is a target node!");
2713 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2714 return false;
2715 }
2716
SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const2717 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2718 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2719 SelectionDAG &DAG, unsigned Depth) const {
2720 assert(
2721 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2722 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2723 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2724 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2725 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2726 " is a target node!");
2727 return SDValue();
2728 }
2729
2730 SDValue
buildLegalVectorShuffle(EVT VT,const SDLoc & DL,SDValue N0,SDValue N1,MutableArrayRef<int> Mask,SelectionDAG & DAG) const2731 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2732 SDValue N1, MutableArrayRef<int> Mask,
2733 SelectionDAG &DAG) const {
2734 bool LegalMask = isShuffleMaskLegal(Mask, VT);
2735 if (!LegalMask) {
2736 std::swap(N0, N1);
2737 ShuffleVectorSDNode::commuteMask(Mask);
2738 LegalMask = isShuffleMaskLegal(Mask, VT);
2739 }
2740
2741 if (!LegalMask)
2742 return SDValue();
2743
2744 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2745 }
2746
getTargetConstantFromLoad(LoadSDNode *) const2747 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2748 return nullptr;
2749 }
2750
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const2751 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2752 const SelectionDAG &DAG,
2753 bool SNaN,
2754 unsigned Depth) const {
2755 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2756 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2758 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2759 "Should use isKnownNeverNaN if you don't know whether Op"
2760 " is a target node!");
2761 return false;
2762 }
2763
2764 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2765 // work with truncating build vectors and vectors with elements of less than
2766 // 8 bits.
isConstTrueVal(const SDNode * N) const2767 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2768 if (!N)
2769 return false;
2770
2771 APInt CVal;
2772 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2773 CVal = CN->getAPIntValue();
2774 } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2775 auto *CN = BV->getConstantSplatNode();
2776 if (!CN)
2777 return false;
2778
2779 // If this is a truncating build vector, truncate the splat value.
2780 // Otherwise, we may fail to match the expected values below.
2781 unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2782 CVal = CN->getAPIntValue();
2783 if (BVEltWidth < CVal.getBitWidth())
2784 CVal = CVal.trunc(BVEltWidth);
2785 } else {
2786 return false;
2787 }
2788
2789 switch (getBooleanContents(N->getValueType(0))) {
2790 case UndefinedBooleanContent:
2791 return CVal[0];
2792 case ZeroOrOneBooleanContent:
2793 return CVal.isOneValue();
2794 case ZeroOrNegativeOneBooleanContent:
2795 return CVal.isAllOnesValue();
2796 }
2797
2798 llvm_unreachable("Invalid boolean contents");
2799 }
2800
isConstFalseVal(const SDNode * N) const2801 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2802 if (!N)
2803 return false;
2804
2805 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2806 if (!CN) {
2807 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2808 if (!BV)
2809 return false;
2810
2811 // Only interested in constant splats, we don't care about undef
2812 // elements in identifying boolean constants and getConstantSplatNode
2813 // returns NULL if all ops are undef;
2814 CN = BV->getConstantSplatNode();
2815 if (!CN)
2816 return false;
2817 }
2818
2819 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2820 return !CN->getAPIntValue()[0];
2821
2822 return CN->isNullValue();
2823 }
2824
isExtendedTrueVal(const ConstantSDNode * N,EVT VT,bool SExt) const2825 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2826 bool SExt) const {
2827 if (VT == MVT::i1)
2828 return N->isOne();
2829
2830 TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2831 switch (Cnt) {
2832 case TargetLowering::ZeroOrOneBooleanContent:
2833 // An extended value of 1 is always true, unless its original type is i1,
2834 // in which case it will be sign extended to -1.
2835 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
2836 case TargetLowering::UndefinedBooleanContent:
2837 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2838 return N->isAllOnesValue() && SExt;
2839 }
2840 llvm_unreachable("Unexpected enumeration.");
2841 }
2842
2843 /// This helper function of SimplifySetCC tries to optimize the comparison when
2844 /// either operand of the SetCC node is a bitwise-and instruction.
foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const2845 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2846 ISD::CondCode Cond, const SDLoc &DL,
2847 DAGCombinerInfo &DCI) const {
2848 // Match these patterns in any of their permutations:
2849 // (X & Y) == Y
2850 // (X & Y) != Y
2851 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
2852 std::swap(N0, N1);
2853
2854 EVT OpVT = N0.getValueType();
2855 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
2856 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
2857 return SDValue();
2858
2859 SDValue X, Y;
2860 if (N0.getOperand(0) == N1) {
2861 X = N0.getOperand(1);
2862 Y = N0.getOperand(0);
2863 } else if (N0.getOperand(1) == N1) {
2864 X = N0.getOperand(0);
2865 Y = N0.getOperand(1);
2866 } else {
2867 return SDValue();
2868 }
2869
2870 SelectionDAG &DAG = DCI.DAG;
2871 SDValue Zero = DAG.getConstant(0, DL, OpVT);
2872 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2873 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2874 // Note that where Y is variable and is known to have at most one bit set
2875 // (for example, if it is Z & 1) we cannot do this; the expressions are not
2876 // equivalent when Y == 0.
2877 assert(OpVT.isInteger());
2878 Cond = ISD::getSetCCInverse(Cond, OpVT);
2879 if (DCI.isBeforeLegalizeOps() ||
2880 isCondCodeLegal(Cond, N0.getSimpleValueType()))
2881 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2882 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
2883 // If the target supports an 'and-not' or 'and-complement' logic operation,
2884 // try to use that to make a comparison operation more efficient.
2885 // But don't do this transform if the mask is a single bit because there are
2886 // more efficient ways to deal with that case (for example, 'bt' on x86 or
2887 // 'rlwinm' on PPC).
2888
2889 // Bail out if the compare operand that we want to turn into a zero is
2890 // already a zero (otherwise, infinite loop).
2891 auto *YConst = dyn_cast<ConstantSDNode>(Y);
2892 if (YConst && YConst->isNullValue())
2893 return SDValue();
2894
2895 // Transform this into: ~X & Y == 0.
2896 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2897 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2898 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2899 }
2900
2901 return SDValue();
2902 }
2903
2904 /// There are multiple IR patterns that could be checking whether certain
2905 /// truncation of a signed number would be lossy or not. The pattern which is
2906 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2907 /// We are looking for the following pattern: (KeptBits is a constant)
2908 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2909 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2910 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
2911 /// We will unfold it into the natural trunc+sext pattern:
2912 /// ((%x << C) a>> C) dstcond %x
2913 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
optimizeSetCCOfSignedTruncationCheck(EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const2914 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2915 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2916 const SDLoc &DL) const {
2917 // We must be comparing with a constant.
2918 ConstantSDNode *C1;
2919 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2920 return SDValue();
2921
2922 // N0 should be: add %x, (1 << (KeptBits-1))
2923 if (N0->getOpcode() != ISD::ADD)
2924 return SDValue();
2925
2926 // And we must be 'add'ing a constant.
2927 ConstantSDNode *C01;
2928 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2929 return SDValue();
2930
2931 SDValue X = N0->getOperand(0);
2932 EVT XVT = X.getValueType();
2933
2934 // Validate constants ...
2935
2936 APInt I1 = C1->getAPIntValue();
2937
2938 ISD::CondCode NewCond;
2939 if (Cond == ISD::CondCode::SETULT) {
2940 NewCond = ISD::CondCode::SETEQ;
2941 } else if (Cond == ISD::CondCode::SETULE) {
2942 NewCond = ISD::CondCode::SETEQ;
2943 // But need to 'canonicalize' the constant.
2944 I1 += 1;
2945 } else if (Cond == ISD::CondCode::SETUGT) {
2946 NewCond = ISD::CondCode::SETNE;
2947 // But need to 'canonicalize' the constant.
2948 I1 += 1;
2949 } else if (Cond == ISD::CondCode::SETUGE) {
2950 NewCond = ISD::CondCode::SETNE;
2951 } else
2952 return SDValue();
2953
2954 APInt I01 = C01->getAPIntValue();
2955
2956 auto checkConstants = [&I1, &I01]() -> bool {
2957 // Both of them must be power-of-two, and the constant from setcc is bigger.
2958 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2959 };
2960
2961 if (checkConstants()) {
2962 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
2963 } else {
2964 // What if we invert constants? (and the target predicate)
2965 I1.negate();
2966 I01.negate();
2967 assert(XVT.isInteger());
2968 NewCond = getSetCCInverse(NewCond, XVT);
2969 if (!checkConstants())
2970 return SDValue();
2971 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
2972 }
2973
2974 // They are power-of-two, so which bit is set?
2975 const unsigned KeptBits = I1.logBase2();
2976 const unsigned KeptBitsMinusOne = I01.logBase2();
2977
2978 // Magic!
2979 if (KeptBits != (KeptBitsMinusOne + 1))
2980 return SDValue();
2981 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2982
2983 // We don't want to do this in every single case.
2984 SelectionDAG &DAG = DCI.DAG;
2985 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2986 XVT, KeptBits))
2987 return SDValue();
2988
2989 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2990 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2991
2992 // Unfold into: ((%x << C) a>> C) cond %x
2993 // Where 'cond' will be either 'eq' or 'ne'.
2994 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2995 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2996 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2997 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2998
2999 return T2;
3000 }
3001
3002 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
optimizeSetCCByHoistingAndByConstFromLogicalShift(EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const3003 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3004 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3005 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3006 assert(isConstOrConstSplat(N1C) &&
3007 isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3008 "Should be a comparison with 0.");
3009 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3010 "Valid only for [in]equality comparisons.");
3011
3012 unsigned NewShiftOpcode;
3013 SDValue X, C, Y;
3014
3015 SelectionDAG &DAG = DCI.DAG;
3016 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3017
3018 // Look for '(C l>>/<< Y)'.
3019 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3020 // The shift should be one-use.
3021 if (!V.hasOneUse())
3022 return false;
3023 unsigned OldShiftOpcode = V.getOpcode();
3024 switch (OldShiftOpcode) {
3025 case ISD::SHL:
3026 NewShiftOpcode = ISD::SRL;
3027 break;
3028 case ISD::SRL:
3029 NewShiftOpcode = ISD::SHL;
3030 break;
3031 default:
3032 return false; // must be a logical shift.
3033 }
3034 // We should be shifting a constant.
3035 // FIXME: best to use isConstantOrConstantVector().
3036 C = V.getOperand(0);
3037 ConstantSDNode *CC =
3038 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3039 if (!CC)
3040 return false;
3041 Y = V.getOperand(1);
3042
3043 ConstantSDNode *XC =
3044 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3045 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3046 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3047 };
3048
3049 // LHS of comparison should be an one-use 'and'.
3050 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3051 return SDValue();
3052
3053 X = N0.getOperand(0);
3054 SDValue Mask = N0.getOperand(1);
3055
3056 // 'and' is commutative!
3057 if (!Match(Mask)) {
3058 std::swap(X, Mask);
3059 if (!Match(Mask))
3060 return SDValue();
3061 }
3062
3063 EVT VT = X.getValueType();
3064
3065 // Produce:
3066 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3067 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3068 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3069 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3070 return T2;
3071 }
3072
3073 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3074 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3075 /// handle the commuted versions of these patterns.
foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const3076 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3077 ISD::CondCode Cond, const SDLoc &DL,
3078 DAGCombinerInfo &DCI) const {
3079 unsigned BOpcode = N0.getOpcode();
3080 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3081 "Unexpected binop");
3082 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3083
3084 // (X + Y) == X --> Y == 0
3085 // (X - Y) == X --> Y == 0
3086 // (X ^ Y) == X --> Y == 0
3087 SelectionDAG &DAG = DCI.DAG;
3088 EVT OpVT = N0.getValueType();
3089 SDValue X = N0.getOperand(0);
3090 SDValue Y = N0.getOperand(1);
3091 if (X == N1)
3092 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3093
3094 if (Y != N1)
3095 return SDValue();
3096
3097 // (X + Y) == Y --> X == 0
3098 // (X ^ Y) == Y --> X == 0
3099 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3100 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3101
3102 // The shift would not be valid if the operands are boolean (i1).
3103 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3104 return SDValue();
3105
3106 // (X - Y) == Y --> X == Y << 1
3107 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3108 !DCI.isBeforeLegalize());
3109 SDValue One = DAG.getConstant(1, DL, ShiftVT);
3110 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3111 if (!DCI.isCalledByLegalizer())
3112 DCI.AddToWorklist(YShl1.getNode());
3113 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3114 }
3115
3116 /// Try to simplify a setcc built with the specified operands and cc. If it is
3117 /// unable to simplify it, return a null SDValue.
SimplifySetCC(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,bool foldBooleans,DAGCombinerInfo & DCI,const SDLoc & dl) const3118 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3119 ISD::CondCode Cond, bool foldBooleans,
3120 DAGCombinerInfo &DCI,
3121 const SDLoc &dl) const {
3122 SelectionDAG &DAG = DCI.DAG;
3123 const DataLayout &Layout = DAG.getDataLayout();
3124 EVT OpVT = N0.getValueType();
3125
3126 // Constant fold or commute setcc.
3127 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3128 return Fold;
3129
3130 // Ensure that the constant occurs on the RHS and fold constant comparisons.
3131 // TODO: Handle non-splat vector constants. All undef causes trouble.
3132 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3133 if (isConstOrConstSplat(N0) &&
3134 (DCI.isBeforeLegalizeOps() ||
3135 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3136 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3137
3138 // If we have a subtract with the same 2 non-constant operands as this setcc
3139 // -- but in reverse order -- then try to commute the operands of this setcc
3140 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3141 // instruction on some targets.
3142 if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3143 (DCI.isBeforeLegalizeOps() ||
3144 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3145 DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3146 !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3147 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3148
3149 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3150 const APInt &C1 = N1C->getAPIntValue();
3151
3152 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3153 // equality comparison, then we're just comparing whether X itself is
3154 // zero.
3155 if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3156 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3157 N0.getOperand(1).getOpcode() == ISD::Constant) {
3158 const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3159 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3160 ShAmt == Log2_32(N0.getValueSizeInBits())) {
3161 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3162 // (srl (ctlz x), 5) == 0 -> X != 0
3163 // (srl (ctlz x), 5) != 1 -> X != 0
3164 Cond = ISD::SETNE;
3165 } else {
3166 // (srl (ctlz x), 5) != 0 -> X == 0
3167 // (srl (ctlz x), 5) == 1 -> X == 0
3168 Cond = ISD::SETEQ;
3169 }
3170 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3171 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3172 Zero, Cond);
3173 }
3174 }
3175
3176 SDValue CTPOP = N0;
3177 // Look through truncs that don't change the value of a ctpop.
3178 if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3179 CTPOP = N0.getOperand(0);
3180
3181 if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3182 (N0 == CTPOP ||
3183 N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3184 EVT CTVT = CTPOP.getValueType();
3185 SDValue CTOp = CTPOP.getOperand(0);
3186
3187 // (ctpop x) u< 2 -> (x & x-1) == 0
3188 // (ctpop x) u> 1 -> (x & x-1) != 0
3189 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3190 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3191 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3192 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3193 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3194 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3195 }
3196
3197 // If ctpop is not supported, expand a power-of-2 comparison based on it.
3198 if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3199 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3200 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3201 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3202 SDValue Zero = DAG.getConstant(0, dl, CTVT);
3203 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3204 assert(CTVT.isInteger());
3205 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3206 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3207 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3208 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3209 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3210 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3211 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3212 }
3213 }
3214
3215 // (zext x) == C --> x == (trunc C)
3216 // (sext x) == C --> x == (trunc C)
3217 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3218 DCI.isBeforeLegalize() && N0->hasOneUse()) {
3219 unsigned MinBits = N0.getValueSizeInBits();
3220 SDValue PreExt;
3221 bool Signed = false;
3222 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3223 // ZExt
3224 MinBits = N0->getOperand(0).getValueSizeInBits();
3225 PreExt = N0->getOperand(0);
3226 } else if (N0->getOpcode() == ISD::AND) {
3227 // DAGCombine turns costly ZExts into ANDs
3228 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3229 if ((C->getAPIntValue()+1).isPowerOf2()) {
3230 MinBits = C->getAPIntValue().countTrailingOnes();
3231 PreExt = N0->getOperand(0);
3232 }
3233 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3234 // SExt
3235 MinBits = N0->getOperand(0).getValueSizeInBits();
3236 PreExt = N0->getOperand(0);
3237 Signed = true;
3238 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3239 // ZEXTLOAD / SEXTLOAD
3240 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3241 MinBits = LN0->getMemoryVT().getSizeInBits();
3242 PreExt = N0;
3243 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3244 Signed = true;
3245 MinBits = LN0->getMemoryVT().getSizeInBits();
3246 PreExt = N0;
3247 }
3248 }
3249
3250 // Figure out how many bits we need to preserve this constant.
3251 unsigned ReqdBits = Signed ?
3252 C1.getBitWidth() - C1.getNumSignBits() + 1 :
3253 C1.getActiveBits();
3254
3255 // Make sure we're not losing bits from the constant.
3256 if (MinBits > 0 &&
3257 MinBits < C1.getBitWidth() &&
3258 MinBits >= ReqdBits) {
3259 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3260 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3261 // Will get folded away.
3262 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3263 if (MinBits == 1 && C1 == 1)
3264 // Invert the condition.
3265 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3266 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3267 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3268 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3269 }
3270
3271 // If truncating the setcc operands is not desirable, we can still
3272 // simplify the expression in some cases:
3273 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3274 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3275 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3276 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3277 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3278 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3279 SDValue TopSetCC = N0->getOperand(0);
3280 unsigned N0Opc = N0->getOpcode();
3281 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3282 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3283 TopSetCC.getOpcode() == ISD::SETCC &&
3284 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3285 (isConstFalseVal(N1C) ||
3286 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3287
3288 bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3289 (!N1C->isNullValue() && Cond == ISD::SETNE);
3290
3291 if (!Inverse)
3292 return TopSetCC;
3293
3294 ISD::CondCode InvCond = ISD::getSetCCInverse(
3295 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3296 TopSetCC.getOperand(0).getValueType());
3297 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3298 TopSetCC.getOperand(1),
3299 InvCond);
3300 }
3301 }
3302 }
3303
3304 // If the LHS is '(and load, const)', the RHS is 0, the test is for
3305 // equality or unsigned, and all 1 bits of the const are in the same
3306 // partial word, see if we can shorten the load.
3307 if (DCI.isBeforeLegalize() &&
3308 !ISD::isSignedIntSetCC(Cond) &&
3309 N0.getOpcode() == ISD::AND && C1 == 0 &&
3310 N0.getNode()->hasOneUse() &&
3311 isa<LoadSDNode>(N0.getOperand(0)) &&
3312 N0.getOperand(0).getNode()->hasOneUse() &&
3313 isa<ConstantSDNode>(N0.getOperand(1))) {
3314 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3315 APInt bestMask;
3316 unsigned bestWidth = 0, bestOffset = 0;
3317 if (Lod->isSimple() && Lod->isUnindexed()) {
3318 unsigned origWidth = N0.getValueSizeInBits();
3319 unsigned maskWidth = origWidth;
3320 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3321 // 8 bits, but have to be careful...
3322 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3323 origWidth = Lod->getMemoryVT().getSizeInBits();
3324 const APInt &Mask = N0.getConstantOperandAPInt(1);
3325 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3326 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3327 for (unsigned offset=0; offset<origWidth/width; offset++) {
3328 if (Mask.isSubsetOf(newMask)) {
3329 if (Layout.isLittleEndian())
3330 bestOffset = (uint64_t)offset * (width/8);
3331 else
3332 bestOffset = (origWidth/width - offset - 1) * (width/8);
3333 bestMask = Mask.lshr(offset * (width/8) * 8);
3334 bestWidth = width;
3335 break;
3336 }
3337 newMask <<= width;
3338 }
3339 }
3340 }
3341 if (bestWidth) {
3342 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3343 if (newVT.isRound() &&
3344 shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3345 SDValue Ptr = Lod->getBasePtr();
3346 if (bestOffset != 0)
3347 Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
3348 unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3349 SDValue NewLoad = DAG.getLoad(
3350 newVT, dl, Lod->getChain(), Ptr,
3351 Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3352 return DAG.getSetCC(dl, VT,
3353 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3354 DAG.getConstant(bestMask.trunc(bestWidth),
3355 dl, newVT)),
3356 DAG.getConstant(0LL, dl, newVT), Cond);
3357 }
3358 }
3359 }
3360
3361 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3362 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3363 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3364
3365 // If the comparison constant has bits in the upper part, the
3366 // zero-extended value could never match.
3367 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3368 C1.getBitWidth() - InSize))) {
3369 switch (Cond) {
3370 case ISD::SETUGT:
3371 case ISD::SETUGE:
3372 case ISD::SETEQ:
3373 return DAG.getConstant(0, dl, VT);
3374 case ISD::SETULT:
3375 case ISD::SETULE:
3376 case ISD::SETNE:
3377 return DAG.getConstant(1, dl, VT);
3378 case ISD::SETGT:
3379 case ISD::SETGE:
3380 // True if the sign bit of C1 is set.
3381 return DAG.getConstant(C1.isNegative(), dl, VT);
3382 case ISD::SETLT:
3383 case ISD::SETLE:
3384 // True if the sign bit of C1 isn't set.
3385 return DAG.getConstant(C1.isNonNegative(), dl, VT);
3386 default:
3387 break;
3388 }
3389 }
3390
3391 // Otherwise, we can perform the comparison with the low bits.
3392 switch (Cond) {
3393 case ISD::SETEQ:
3394 case ISD::SETNE:
3395 case ISD::SETUGT:
3396 case ISD::SETUGE:
3397 case ISD::SETULT:
3398 case ISD::SETULE: {
3399 EVT newVT = N0.getOperand(0).getValueType();
3400 if (DCI.isBeforeLegalizeOps() ||
3401 (isOperationLegal(ISD::SETCC, newVT) &&
3402 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3403 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3404 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3405
3406 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3407 NewConst, Cond);
3408 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3409 }
3410 break;
3411 }
3412 default:
3413 break; // todo, be more careful with signed comparisons
3414 }
3415 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3416 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3417 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3418 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3419 EVT ExtDstTy = N0.getValueType();
3420 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3421
3422 // If the constant doesn't fit into the number of bits for the source of
3423 // the sign extension, it is impossible for both sides to be equal.
3424 if (C1.getMinSignedBits() > ExtSrcTyBits)
3425 return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3426
3427 SDValue ZextOp;
3428 EVT Op0Ty = N0.getOperand(0).getValueType();
3429 if (Op0Ty == ExtSrcTy) {
3430 ZextOp = N0.getOperand(0);
3431 } else {
3432 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3433 ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3434 DAG.getConstant(Imm, dl, Op0Ty));
3435 }
3436 if (!DCI.isCalledByLegalizer())
3437 DCI.AddToWorklist(ZextOp.getNode());
3438 // Otherwise, make this a use of a zext.
3439 return DAG.getSetCC(dl, VT, ZextOp,
3440 DAG.getConstant(C1 & APInt::getLowBitsSet(
3441 ExtDstTyBits,
3442 ExtSrcTyBits),
3443 dl, ExtDstTy),
3444 Cond);
3445 } else if ((N1C->isNullValue() || N1C->isOne()) &&
3446 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3447 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
3448 if (N0.getOpcode() == ISD::SETCC &&
3449 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3450 (N0.getValueType() == MVT::i1 ||
3451 getBooleanContents(N0.getOperand(0).getValueType()) ==
3452 ZeroOrOneBooleanContent)) {
3453 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3454 if (TrueWhenTrue)
3455 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3456 // Invert the condition.
3457 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3458 CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3459 if (DCI.isBeforeLegalizeOps() ||
3460 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3461 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3462 }
3463
3464 if ((N0.getOpcode() == ISD::XOR ||
3465 (N0.getOpcode() == ISD::AND &&
3466 N0.getOperand(0).getOpcode() == ISD::XOR &&
3467 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3468 isa<ConstantSDNode>(N0.getOperand(1)) &&
3469 cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3470 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
3471 // can only do this if the top bits are known zero.
3472 unsigned BitWidth = N0.getValueSizeInBits();
3473 if (DAG.MaskedValueIsZero(N0,
3474 APInt::getHighBitsSet(BitWidth,
3475 BitWidth-1))) {
3476 // Okay, get the un-inverted input value.
3477 SDValue Val;
3478 if (N0.getOpcode() == ISD::XOR) {
3479 Val = N0.getOperand(0);
3480 } else {
3481 assert(N0.getOpcode() == ISD::AND &&
3482 N0.getOperand(0).getOpcode() == ISD::XOR);
3483 // ((X^1)&1)^1 -> X & 1
3484 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3485 N0.getOperand(0).getOperand(0),
3486 N0.getOperand(1));
3487 }
3488
3489 return DAG.getSetCC(dl, VT, Val, N1,
3490 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3491 }
3492 } else if (N1C->isOne()) {
3493 SDValue Op0 = N0;
3494 if (Op0.getOpcode() == ISD::TRUNCATE)
3495 Op0 = Op0.getOperand(0);
3496
3497 if ((Op0.getOpcode() == ISD::XOR) &&
3498 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3499 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3500 SDValue XorLHS = Op0.getOperand(0);
3501 SDValue XorRHS = Op0.getOperand(1);
3502 // Ensure that the input setccs return an i1 type or 0/1 value.
3503 if (Op0.getValueType() == MVT::i1 ||
3504 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3505 ZeroOrOneBooleanContent &&
3506 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3507 ZeroOrOneBooleanContent)) {
3508 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3509 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3510 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3511 }
3512 }
3513 if (Op0.getOpcode() == ISD::AND &&
3514 isa<ConstantSDNode>(Op0.getOperand(1)) &&
3515 cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3516 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3517 if (Op0.getValueType().bitsGT(VT))
3518 Op0 = DAG.getNode(ISD::AND, dl, VT,
3519 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3520 DAG.getConstant(1, dl, VT));
3521 else if (Op0.getValueType().bitsLT(VT))
3522 Op0 = DAG.getNode(ISD::AND, dl, VT,
3523 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3524 DAG.getConstant(1, dl, VT));
3525
3526 return DAG.getSetCC(dl, VT, Op0,
3527 DAG.getConstant(0, dl, Op0.getValueType()),
3528 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3529 }
3530 if (Op0.getOpcode() == ISD::AssertZext &&
3531 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3532 return DAG.getSetCC(dl, VT, Op0,
3533 DAG.getConstant(0, dl, Op0.getValueType()),
3534 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3535 }
3536 }
3537
3538 // Given:
3539 // icmp eq/ne (urem %x, %y), 0
3540 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3541 // icmp eq/ne %x, 0
3542 if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3543 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3544 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3545 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3546 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3547 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3548 }
3549
3550 if (SDValue V =
3551 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3552 return V;
3553 }
3554
3555 // These simplifications apply to splat vectors as well.
3556 // TODO: Handle more splat vector cases.
3557 if (auto *N1C = isConstOrConstSplat(N1)) {
3558 const APInt &C1 = N1C->getAPIntValue();
3559
3560 APInt MinVal, MaxVal;
3561 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3562 if (ISD::isSignedIntSetCC(Cond)) {
3563 MinVal = APInt::getSignedMinValue(OperandBitSize);
3564 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3565 } else {
3566 MinVal = APInt::getMinValue(OperandBitSize);
3567 MaxVal = APInt::getMaxValue(OperandBitSize);
3568 }
3569
3570 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3571 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3572 // X >= MIN --> true
3573 if (C1 == MinVal)
3574 return DAG.getBoolConstant(true, dl, VT, OpVT);
3575
3576 if (!VT.isVector()) { // TODO: Support this for vectors.
3577 // X >= C0 --> X > (C0 - 1)
3578 APInt C = C1 - 1;
3579 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3580 if ((DCI.isBeforeLegalizeOps() ||
3581 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3582 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3583 isLegalICmpImmediate(C.getSExtValue())))) {
3584 return DAG.getSetCC(dl, VT, N0,
3585 DAG.getConstant(C, dl, N1.getValueType()),
3586 NewCC);
3587 }
3588 }
3589 }
3590
3591 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3592 // X <= MAX --> true
3593 if (C1 == MaxVal)
3594 return DAG.getBoolConstant(true, dl, VT, OpVT);
3595
3596 // X <= C0 --> X < (C0 + 1)
3597 if (!VT.isVector()) { // TODO: Support this for vectors.
3598 APInt C = C1 + 1;
3599 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3600 if ((DCI.isBeforeLegalizeOps() ||
3601 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3602 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3603 isLegalICmpImmediate(C.getSExtValue())))) {
3604 return DAG.getSetCC(dl, VT, N0,
3605 DAG.getConstant(C, dl, N1.getValueType()),
3606 NewCC);
3607 }
3608 }
3609 }
3610
3611 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3612 if (C1 == MinVal)
3613 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3614
3615 // TODO: Support this for vectors after legalize ops.
3616 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3617 // Canonicalize setlt X, Max --> setne X, Max
3618 if (C1 == MaxVal)
3619 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3620
3621 // If we have setult X, 1, turn it into seteq X, 0
3622 if (C1 == MinVal+1)
3623 return DAG.getSetCC(dl, VT, N0,
3624 DAG.getConstant(MinVal, dl, N0.getValueType()),
3625 ISD::SETEQ);
3626 }
3627 }
3628
3629 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3630 if (C1 == MaxVal)
3631 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3632
3633 // TODO: Support this for vectors after legalize ops.
3634 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3635 // Canonicalize setgt X, Min --> setne X, Min
3636 if (C1 == MinVal)
3637 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3638
3639 // If we have setugt X, Max-1, turn it into seteq X, Max
3640 if (C1 == MaxVal-1)
3641 return DAG.getSetCC(dl, VT, N0,
3642 DAG.getConstant(MaxVal, dl, N0.getValueType()),
3643 ISD::SETEQ);
3644 }
3645 }
3646
3647 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3648 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3649 if (C1.isNullValue())
3650 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3651 VT, N0, N1, Cond, DCI, dl))
3652 return CC;
3653 }
3654
3655 // If we have "setcc X, C0", check to see if we can shrink the immediate
3656 // by changing cc.
3657 // TODO: Support this for vectors after legalize ops.
3658 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3659 // SETUGT X, SINTMAX -> SETLT X, 0
3660 if (Cond == ISD::SETUGT &&
3661 C1 == APInt::getSignedMaxValue(OperandBitSize))
3662 return DAG.getSetCC(dl, VT, N0,
3663 DAG.getConstant(0, dl, N1.getValueType()),
3664 ISD::SETLT);
3665
3666 // SETULT X, SINTMIN -> SETGT X, -1
3667 if (Cond == ISD::SETULT &&
3668 C1 == APInt::getSignedMinValue(OperandBitSize)) {
3669 SDValue ConstMinusOne =
3670 DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3671 N1.getValueType());
3672 return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3673 }
3674 }
3675 }
3676
3677 // Back to non-vector simplifications.
3678 // TODO: Can we do these for vector splats?
3679 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3680 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3681 const APInt &C1 = N1C->getAPIntValue();
3682 EVT ShValTy = N0.getValueType();
3683
3684 // Fold bit comparisons when we can.
3685 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3686 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
3687 N0.getOpcode() == ISD::AND) {
3688 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3689 EVT ShiftTy =
3690 getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3691 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
3692 // Perform the xform if the AND RHS is a single bit.
3693 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
3694 if (AndRHS->getAPIntValue().isPowerOf2() &&
3695 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3696 return DAG.getNode(ISD::TRUNCATE, dl, VT,
3697 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3698 DAG.getConstant(ShCt, dl, ShiftTy)));
3699 }
3700 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3701 // (X & 8) == 8 --> (X & 8) >> 3
3702 // Perform the xform if C1 is a single bit.
3703 unsigned ShCt = C1.logBase2();
3704 if (C1.isPowerOf2() &&
3705 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
3706 return DAG.getNode(ISD::TRUNCATE, dl, VT,
3707 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3708 DAG.getConstant(ShCt, dl, ShiftTy)));
3709 }
3710 }
3711 }
3712 }
3713
3714 if (C1.getMinSignedBits() <= 64 &&
3715 !isLegalICmpImmediate(C1.getSExtValue())) {
3716 EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
3717 // (X & -256) == 256 -> (X >> 8) == 1
3718 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3719 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3720 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3721 const APInt &AndRHSC = AndRHS->getAPIntValue();
3722 if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3723 unsigned ShiftBits = AndRHSC.countTrailingZeros();
3724 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3725 SDValue Shift =
3726 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
3727 DAG.getConstant(ShiftBits, dl, ShiftTy));
3728 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
3729 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3730 }
3731 }
3732 }
3733 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3734 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3735 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3736 // X < 0x100000000 -> (X >> 32) < 1
3737 // X >= 0x100000000 -> (X >> 32) >= 1
3738 // X <= 0x0ffffffff -> (X >> 32) < 1
3739 // X > 0x0ffffffff -> (X >> 32) >= 1
3740 unsigned ShiftBits;
3741 APInt NewC = C1;
3742 ISD::CondCode NewCond = Cond;
3743 if (AdjOne) {
3744 ShiftBits = C1.countTrailingOnes();
3745 NewC = NewC + 1;
3746 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3747 } else {
3748 ShiftBits = C1.countTrailingZeros();
3749 }
3750 NewC.lshrInPlace(ShiftBits);
3751 if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3752 isLegalICmpImmediate(NewC.getSExtValue()) &&
3753 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
3754 SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
3755 DAG.getConstant(ShiftBits, dl, ShiftTy));
3756 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
3757 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3758 }
3759 }
3760 }
3761 }
3762
3763 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3764 auto *CFP = cast<ConstantFPSDNode>(N1);
3765 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3766
3767 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
3768 // constant if knowing that the operand is non-nan is enough. We prefer to
3769 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3770 // materialize 0.0.
3771 if (Cond == ISD::SETO || Cond == ISD::SETUO)
3772 return DAG.getSetCC(dl, VT, N0, N0, Cond);
3773
3774 // setcc (fneg x), C -> setcc swap(pred) x, -C
3775 if (N0.getOpcode() == ISD::FNEG) {
3776 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3777 if (DCI.isBeforeLegalizeOps() ||
3778 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3779 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3780 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3781 }
3782 }
3783
3784 // If the condition is not legal, see if we can find an equivalent one
3785 // which is legal.
3786 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3787 // If the comparison was an awkward floating-point == or != and one of
3788 // the comparison operands is infinity or negative infinity, convert the
3789 // condition to a less-awkward <= or >=.
3790 if (CFP->getValueAPF().isInfinity()) {
3791 if (CFP->getValueAPF().isNegative()) {
3792 if (Cond == ISD::SETOEQ &&
3793 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3794 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3795 if (Cond == ISD::SETUEQ &&
3796 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3797 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3798 if (Cond == ISD::SETUNE &&
3799 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3800 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3801 if (Cond == ISD::SETONE &&
3802 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3803 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3804 } else {
3805 if (Cond == ISD::SETOEQ &&
3806 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3807 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3808 if (Cond == ISD::SETUEQ &&
3809 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3810 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3811 if (Cond == ISD::SETUNE &&
3812 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3813 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3814 if (Cond == ISD::SETONE &&
3815 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3816 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3817 }
3818 }
3819 }
3820 }
3821
3822 if (N0 == N1) {
3823 // The sext(setcc()) => setcc() optimization relies on the appropriate
3824 // constant being emitted.
3825 assert(!N0.getValueType().isInteger() &&
3826 "Integer types should be handled by FoldSetCC");
3827
3828 bool EqTrue = ISD::isTrueWhenEqual(Cond);
3829 unsigned UOF = ISD::getUnorderedFlavor(Cond);
3830 if (UOF == 2) // FP operators that are undefined on NaNs.
3831 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3832 if (UOF == unsigned(EqTrue))
3833 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3834 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
3835 // if it is not already.
3836 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
3837 if (NewCond != Cond &&
3838 (DCI.isBeforeLegalizeOps() ||
3839 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
3840 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3841 }
3842
3843 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3844 N0.getValueType().isInteger()) {
3845 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
3846 N0.getOpcode() == ISD::XOR) {
3847 // Simplify (X+Y) == (X+Z) --> Y == Z
3848 if (N0.getOpcode() == N1.getOpcode()) {
3849 if (N0.getOperand(0) == N1.getOperand(0))
3850 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3851 if (N0.getOperand(1) == N1.getOperand(1))
3852 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3853 if (isCommutativeBinOp(N0.getOpcode())) {
3854 // If X op Y == Y op X, try other combinations.
3855 if (N0.getOperand(0) == N1.getOperand(1))
3856 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3857 Cond);
3858 if (N0.getOperand(1) == N1.getOperand(0))
3859 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3860 Cond);
3861 }
3862 }
3863
3864 // If RHS is a legal immediate value for a compare instruction, we need
3865 // to be careful about increasing register pressure needlessly.
3866 bool LegalRHSImm = false;
3867
3868 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3869 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3870 // Turn (X+C1) == C2 --> X == C2-C1
3871 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
3872 return DAG.getSetCC(dl, VT, N0.getOperand(0),
3873 DAG.getConstant(RHSC->getAPIntValue()-
3874 LHSR->getAPIntValue(),
3875 dl, N0.getValueType()), Cond);
3876 }
3877
3878 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3879 if (N0.getOpcode() == ISD::XOR)
3880 // If we know that all of the inverted bits are zero, don't bother
3881 // performing the inversion.
3882 if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3883 return
3884 DAG.getSetCC(dl, VT, N0.getOperand(0),
3885 DAG.getConstant(LHSR->getAPIntValue() ^
3886 RHSC->getAPIntValue(),
3887 dl, N0.getValueType()),
3888 Cond);
3889 }
3890
3891 // Turn (C1-X) == C2 --> X == C1-C2
3892 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3893 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3894 return
3895 DAG.getSetCC(dl, VT, N0.getOperand(1),
3896 DAG.getConstant(SUBC->getAPIntValue() -
3897 RHSC->getAPIntValue(),
3898 dl, N0.getValueType()),
3899 Cond);
3900 }
3901 }
3902
3903 // Could RHSC fold directly into a compare?
3904 if (RHSC->getValueType(0).getSizeInBits() <= 64)
3905 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3906 }
3907
3908 // (X+Y) == X --> Y == 0 and similar folds.
3909 // Don't do this if X is an immediate that can fold into a cmp
3910 // instruction and X+Y has other uses. It could be an induction variable
3911 // chain, and the transform would increase register pressure.
3912 if (!LegalRHSImm || N0.hasOneUse())
3913 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3914 return V;
3915 }
3916
3917 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
3918 N1.getOpcode() == ISD::XOR)
3919 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3920 return V;
3921
3922 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3923 return V;
3924 }
3925
3926 // Fold remainder of division by a constant.
3927 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
3928 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3929 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3930
3931 // When division is cheap or optimizing for minimum size,
3932 // fall through to DIVREM creation by skipping this fold.
3933 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
3934 if (N0.getOpcode() == ISD::UREM) {
3935 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3936 return Folded;
3937 } else if (N0.getOpcode() == ISD::SREM) {
3938 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
3939 return Folded;
3940 }
3941 }
3942 }
3943
3944 // Fold away ALL boolean setcc's.
3945 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
3946 SDValue Temp;
3947 switch (Cond) {
3948 default: llvm_unreachable("Unknown integer setcc!");
3949 case ISD::SETEQ: // X == Y -> ~(X^Y)
3950 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3951 N0 = DAG.getNOT(dl, Temp, OpVT);
3952 if (!DCI.isCalledByLegalizer())
3953 DCI.AddToWorklist(Temp.getNode());
3954 break;
3955 case ISD::SETNE: // X != Y --> (X^Y)
3956 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3957 break;
3958 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
3959 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
3960 Temp = DAG.getNOT(dl, N0, OpVT);
3961 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3962 if (!DCI.isCalledByLegalizer())
3963 DCI.AddToWorklist(Temp.getNode());
3964 break;
3965 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
3966 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
3967 Temp = DAG.getNOT(dl, N1, OpVT);
3968 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3969 if (!DCI.isCalledByLegalizer())
3970 DCI.AddToWorklist(Temp.getNode());
3971 break;
3972 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
3973 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
3974 Temp = DAG.getNOT(dl, N0, OpVT);
3975 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3976 if (!DCI.isCalledByLegalizer())
3977 DCI.AddToWorklist(Temp.getNode());
3978 break;
3979 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
3980 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
3981 Temp = DAG.getNOT(dl, N1, OpVT);
3982 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3983 break;
3984 }
3985 if (VT.getScalarType() != MVT::i1) {
3986 if (!DCI.isCalledByLegalizer())
3987 DCI.AddToWorklist(N0.getNode());
3988 // FIXME: If running after legalize, we probably can't do this.
3989 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3990 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3991 }
3992 return N0;
3993 }
3994
3995 // Could not fold it.
3996 return SDValue();
3997 }
3998
3999 /// Returns true (and the GlobalValue and the offset) if the node is a
4000 /// GlobalAddress + offset.
isGAPlusOffset(SDNode * WN,const GlobalValue * & GA,int64_t & Offset) const4001 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4002 int64_t &Offset) const {
4003
4004 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4005
4006 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4007 GA = GASD->getGlobal();
4008 Offset += GASD->getOffset();
4009 return true;
4010 }
4011
4012 if (N->getOpcode() == ISD::ADD) {
4013 SDValue N1 = N->getOperand(0);
4014 SDValue N2 = N->getOperand(1);
4015 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4016 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4017 Offset += V->getSExtValue();
4018 return true;
4019 }
4020 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4021 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4022 Offset += V->getSExtValue();
4023 return true;
4024 }
4025 }
4026 }
4027
4028 return false;
4029 }
4030
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const4031 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4032 DAGCombinerInfo &DCI) const {
4033 // Default implementation: no optimization.
4034 return SDValue();
4035 }
4036
4037 //===----------------------------------------------------------------------===//
4038 // Inline Assembler Implementation Methods
4039 //===----------------------------------------------------------------------===//
4040
4041 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const4042 TargetLowering::getConstraintType(StringRef Constraint) const {
4043 unsigned S = Constraint.size();
4044
4045 if (S == 1) {
4046 switch (Constraint[0]) {
4047 default: break;
4048 case 'r':
4049 return C_RegisterClass;
4050 case 'm': // memory
4051 case 'o': // offsetable
4052 case 'V': // not offsetable
4053 return C_Memory;
4054 case 'n': // Simple Integer
4055 case 'E': // Floating Point Constant
4056 case 'F': // Floating Point Constant
4057 return C_Immediate;
4058 case 'i': // Simple Integer or Relocatable Constant
4059 case 's': // Relocatable Constant
4060 case 'p': // Address.
4061 case 'X': // Allow ANY value.
4062 case 'I': // Target registers.
4063 case 'J':
4064 case 'K':
4065 case 'L':
4066 case 'M':
4067 case 'N':
4068 case 'O':
4069 case 'P':
4070 case '<':
4071 case '>':
4072 return C_Other;
4073 }
4074 }
4075
4076 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4077 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4078 return C_Memory;
4079 return C_Register;
4080 }
4081 return C_Unknown;
4082 }
4083
4084 /// Try to replace an X constraint, which matches anything, with another that
4085 /// has more specific requirements based on the type of the corresponding
4086 /// operand.
LowerXConstraint(EVT ConstraintVT) const4087 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4088 if (ConstraintVT.isInteger())
4089 return "r";
4090 if (ConstraintVT.isFloatingPoint())
4091 return "f"; // works for many targets
4092 return nullptr;
4093 }
4094
LowerAsmOutputForConstraint(SDValue & Chain,SDValue & Flag,SDLoc DL,const AsmOperandInfo & OpInfo,SelectionDAG & DAG) const4095 SDValue TargetLowering::LowerAsmOutputForConstraint(
4096 SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4097 SelectionDAG &DAG) const {
4098 return SDValue();
4099 }
4100
4101 /// Lower the specified operand into the Ops vector.
4102 /// If it is invalid, don't add anything to Ops.
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const4103 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4104 std::string &Constraint,
4105 std::vector<SDValue> &Ops,
4106 SelectionDAG &DAG) const {
4107
4108 if (Constraint.length() > 1) return;
4109
4110 char ConstraintLetter = Constraint[0];
4111 switch (ConstraintLetter) {
4112 default: break;
4113 case 'X': // Allows any operand; labels (basic block) use this.
4114 if (Op.getOpcode() == ISD::BasicBlock ||
4115 Op.getOpcode() == ISD::TargetBlockAddress) {
4116 Ops.push_back(Op);
4117 return;
4118 }
4119 LLVM_FALLTHROUGH;
4120 case 'i': // Simple Integer or Relocatable Constant
4121 case 'n': // Simple Integer
4122 case 's': { // Relocatable Constant
4123
4124 GlobalAddressSDNode *GA;
4125 ConstantSDNode *C;
4126 BlockAddressSDNode *BA;
4127 uint64_t Offset = 0;
4128
4129 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4130 // etc., since getelementpointer is variadic. We can't use
4131 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4132 // while in this case the GA may be furthest from the root node which is
4133 // likely an ISD::ADD.
4134 while (1) {
4135 if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4136 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4137 GA->getValueType(0),
4138 Offset + GA->getOffset()));
4139 return;
4140 } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4141 ConstraintLetter != 's') {
4142 // gcc prints these as sign extended. Sign extend value to 64 bits
4143 // now; without this it would get ZExt'd later in
4144 // ScheduleDAGSDNodes::EmitNode, which is very generic.
4145 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4146 BooleanContent BCont = getBooleanContents(MVT::i64);
4147 ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4148 : ISD::SIGN_EXTEND;
4149 int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4150 : C->getSExtValue();
4151 Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4152 SDLoc(C), MVT::i64));
4153 return;
4154 } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4155 ConstraintLetter != 'n') {
4156 Ops.push_back(DAG.getTargetBlockAddress(
4157 BA->getBlockAddress(), BA->getValueType(0),
4158 Offset + BA->getOffset(), BA->getTargetFlags()));
4159 return;
4160 } else {
4161 const unsigned OpCode = Op.getOpcode();
4162 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4163 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4164 Op = Op.getOperand(1);
4165 // Subtraction is not commutative.
4166 else if (OpCode == ISD::ADD &&
4167 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4168 Op = Op.getOperand(0);
4169 else
4170 return;
4171 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4172 continue;
4173 }
4174 }
4175 return;
4176 }
4177 break;
4178 }
4179 }
4180 }
4181
4182 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * RI,StringRef Constraint,MVT VT) const4183 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4184 StringRef Constraint,
4185 MVT VT) const {
4186 if (Constraint.empty() || Constraint[0] != '{')
4187 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4188 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4189
4190 // Remove the braces from around the name.
4191 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4192
4193 std::pair<unsigned, const TargetRegisterClass *> R =
4194 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4195
4196 // Figure out which register class contains this reg.
4197 for (const TargetRegisterClass *RC : RI->regclasses()) {
4198 // If none of the value types for this register class are valid, we
4199 // can't use it. For example, 64-bit reg classes on 32-bit targets.
4200 if (!isLegalRC(*RI, *RC))
4201 continue;
4202
4203 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4204 I != E; ++I) {
4205 if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4206 std::pair<unsigned, const TargetRegisterClass *> S =
4207 std::make_pair(*I, RC);
4208
4209 // If this register class has the requested value type, return it,
4210 // otherwise keep searching and return the first class found
4211 // if no other is found which explicitly has the requested type.
4212 if (RI->isTypeLegalForClass(*RC, VT))
4213 return S;
4214 if (!R.second)
4215 R = S;
4216 }
4217 }
4218 }
4219
4220 return R;
4221 }
4222
4223 //===----------------------------------------------------------------------===//
4224 // Constraint Selection.
4225
4226 /// Return true of this is an input operand that is a matching constraint like
4227 /// "4".
isMatchingInputConstraint() const4228 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4229 assert(!ConstraintCode.empty() && "No known constraint!");
4230 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4231 }
4232
4233 /// If this is an input matching constraint, this method returns the output
4234 /// operand it matches.
getMatchedOperand() const4235 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4236 assert(!ConstraintCode.empty() && "No known constraint!");
4237 return atoi(ConstraintCode.c_str());
4238 }
4239
4240 /// Split up the constraint string from the inline assembly value into the
4241 /// specific constraints and their prefixes, and also tie in the associated
4242 /// operand values.
4243 /// If this returns an empty vector, and if the constraint string itself
4244 /// isn't empty, there was an error parsing.
4245 TargetLowering::AsmOperandInfoVector
ParseConstraints(const DataLayout & DL,const TargetRegisterInfo * TRI,ImmutableCallSite CS) const4246 TargetLowering::ParseConstraints(const DataLayout &DL,
4247 const TargetRegisterInfo *TRI,
4248 ImmutableCallSite CS) const {
4249 /// Information about all of the constraints.
4250 AsmOperandInfoVector ConstraintOperands;
4251 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4252 unsigned maCount = 0; // Largest number of multiple alternative constraints.
4253
4254 // Do a prepass over the constraints, canonicalizing them, and building up the
4255 // ConstraintOperands list.
4256 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4257 unsigned ResNo = 0; // ResNo - The result number of the next output.
4258
4259 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4260 ConstraintOperands.emplace_back(std::move(CI));
4261 AsmOperandInfo &OpInfo = ConstraintOperands.back();
4262
4263 // Update multiple alternative constraint count.
4264 if (OpInfo.multipleAlternatives.size() > maCount)
4265 maCount = OpInfo.multipleAlternatives.size();
4266
4267 OpInfo.ConstraintVT = MVT::Other;
4268
4269 // Compute the value type for each operand.
4270 switch (OpInfo.Type) {
4271 case InlineAsm::isOutput:
4272 // Indirect outputs just consume an argument.
4273 if (OpInfo.isIndirect) {
4274 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4275 break;
4276 }
4277
4278 // The return value of the call is this value. As such, there is no
4279 // corresponding argument.
4280 assert(!CS.getType()->isVoidTy() &&
4281 "Bad inline asm!");
4282 if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
4283 OpInfo.ConstraintVT =
4284 getSimpleValueType(DL, STy->getElementType(ResNo));
4285 } else {
4286 assert(ResNo == 0 && "Asm only has one result!");
4287 OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
4288 }
4289 ++ResNo;
4290 break;
4291 case InlineAsm::isInput:
4292 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4293 break;
4294 case InlineAsm::isClobber:
4295 // Nothing to do.
4296 break;
4297 }
4298
4299 if (OpInfo.CallOperandVal) {
4300 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4301 if (OpInfo.isIndirect) {
4302 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4303 if (!PtrTy)
4304 report_fatal_error("Indirect operand for inline asm not a pointer!");
4305 OpTy = PtrTy->getElementType();
4306 }
4307
4308 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4309 if (StructType *STy = dyn_cast<StructType>(OpTy))
4310 if (STy->getNumElements() == 1)
4311 OpTy = STy->getElementType(0);
4312
4313 // If OpTy is not a single value, it may be a struct/union that we
4314 // can tile with integers.
4315 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4316 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4317 switch (BitSize) {
4318 default: break;
4319 case 1:
4320 case 8:
4321 case 16:
4322 case 32:
4323 case 64:
4324 case 128:
4325 OpInfo.ConstraintVT =
4326 MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4327 break;
4328 }
4329 } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4330 unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4331 OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4332 } else {
4333 OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4334 }
4335 }
4336 }
4337
4338 // If we have multiple alternative constraints, select the best alternative.
4339 if (!ConstraintOperands.empty()) {
4340 if (maCount) {
4341 unsigned bestMAIndex = 0;
4342 int bestWeight = -1;
4343 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
4344 int weight = -1;
4345 unsigned maIndex;
4346 // Compute the sums of the weights for each alternative, keeping track
4347 // of the best (highest weight) one so far.
4348 for (maIndex = 0; maIndex < maCount; ++maIndex) {
4349 int weightSum = 0;
4350 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4351 cIndex != eIndex; ++cIndex) {
4352 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4353 if (OpInfo.Type == InlineAsm::isClobber)
4354 continue;
4355
4356 // If this is an output operand with a matching input operand,
4357 // look up the matching input. If their types mismatch, e.g. one
4358 // is an integer, the other is floating point, or their sizes are
4359 // different, flag it as an maCantMatch.
4360 if (OpInfo.hasMatchingInput()) {
4361 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4362 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4363 if ((OpInfo.ConstraintVT.isInteger() !=
4364 Input.ConstraintVT.isInteger()) ||
4365 (OpInfo.ConstraintVT.getSizeInBits() !=
4366 Input.ConstraintVT.getSizeInBits())) {
4367 weightSum = -1; // Can't match.
4368 break;
4369 }
4370 }
4371 }
4372 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4373 if (weight == -1) {
4374 weightSum = -1;
4375 break;
4376 }
4377 weightSum += weight;
4378 }
4379 // Update best.
4380 if (weightSum > bestWeight) {
4381 bestWeight = weightSum;
4382 bestMAIndex = maIndex;
4383 }
4384 }
4385
4386 // Now select chosen alternative in each constraint.
4387 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4388 cIndex != eIndex; ++cIndex) {
4389 AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4390 if (cInfo.Type == InlineAsm::isClobber)
4391 continue;
4392 cInfo.selectAlternative(bestMAIndex);
4393 }
4394 }
4395 }
4396
4397 // Check and hook up tied operands, choose constraint code to use.
4398 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4399 cIndex != eIndex; ++cIndex) {
4400 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4401
4402 // If this is an output operand with a matching input operand, look up the
4403 // matching input. If their types mismatch, e.g. one is an integer, the
4404 // other is floating point, or their sizes are different, flag it as an
4405 // error.
4406 if (OpInfo.hasMatchingInput()) {
4407 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4408
4409 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4410 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4411 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4412 OpInfo.ConstraintVT);
4413 std::pair<unsigned, const TargetRegisterClass *> InputRC =
4414 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4415 Input.ConstraintVT);
4416 if ((OpInfo.ConstraintVT.isInteger() !=
4417 Input.ConstraintVT.isInteger()) ||
4418 (MatchRC.second != InputRC.second)) {
4419 report_fatal_error("Unsupported asm: input constraint"
4420 " with a matching output constraint of"
4421 " incompatible type!");
4422 }
4423 }
4424 }
4425 }
4426
4427 return ConstraintOperands;
4428 }
4429
4430 /// Return an integer indicating how general CT is.
getConstraintGenerality(TargetLowering::ConstraintType CT)4431 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4432 switch (CT) {
4433 case TargetLowering::C_Immediate:
4434 case TargetLowering::C_Other:
4435 case TargetLowering::C_Unknown:
4436 return 0;
4437 case TargetLowering::C_Register:
4438 return 1;
4439 case TargetLowering::C_RegisterClass:
4440 return 2;
4441 case TargetLowering::C_Memory:
4442 return 3;
4443 }
4444 llvm_unreachable("Invalid constraint type");
4445 }
4446
4447 /// Examine constraint type and operand type and determine a weight value.
4448 /// This object must already have been set up with the operand type
4449 /// and the current alternative constraint selected.
4450 TargetLowering::ConstraintWeight
getMultipleConstraintMatchWeight(AsmOperandInfo & info,int maIndex) const4451 TargetLowering::getMultipleConstraintMatchWeight(
4452 AsmOperandInfo &info, int maIndex) const {
4453 InlineAsm::ConstraintCodeVector *rCodes;
4454 if (maIndex >= (int)info.multipleAlternatives.size())
4455 rCodes = &info.Codes;
4456 else
4457 rCodes = &info.multipleAlternatives[maIndex].Codes;
4458 ConstraintWeight BestWeight = CW_Invalid;
4459
4460 // Loop over the options, keeping track of the most general one.
4461 for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4462 ConstraintWeight weight =
4463 getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4464 if (weight > BestWeight)
4465 BestWeight = weight;
4466 }
4467
4468 return BestWeight;
4469 }
4470
4471 /// Examine constraint type and operand type and determine a weight value.
4472 /// This object must already have been set up with the operand type
4473 /// and the current alternative constraint selected.
4474 TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const4475 TargetLowering::getSingleConstraintMatchWeight(
4476 AsmOperandInfo &info, const char *constraint) const {
4477 ConstraintWeight weight = CW_Invalid;
4478 Value *CallOperandVal = info.CallOperandVal;
4479 // If we don't have a value, we can't do a match,
4480 // but allow it at the lowest weight.
4481 if (!CallOperandVal)
4482 return CW_Default;
4483 // Look at the constraint type.
4484 switch (*constraint) {
4485 case 'i': // immediate integer.
4486 case 'n': // immediate integer with a known value.
4487 if (isa<ConstantInt>(CallOperandVal))
4488 weight = CW_Constant;
4489 break;
4490 case 's': // non-explicit intregal immediate.
4491 if (isa<GlobalValue>(CallOperandVal))
4492 weight = CW_Constant;
4493 break;
4494 case 'E': // immediate float if host format.
4495 case 'F': // immediate float.
4496 if (isa<ConstantFP>(CallOperandVal))
4497 weight = CW_Constant;
4498 break;
4499 case '<': // memory operand with autodecrement.
4500 case '>': // memory operand with autoincrement.
4501 case 'm': // memory operand.
4502 case 'o': // offsettable memory operand
4503 case 'V': // non-offsettable memory operand
4504 weight = CW_Memory;
4505 break;
4506 case 'r': // general register.
4507 case 'g': // general register, memory operand or immediate integer.
4508 // note: Clang converts "g" to "imr".
4509 if (CallOperandVal->getType()->isIntegerTy())
4510 weight = CW_Register;
4511 break;
4512 case 'X': // any operand.
4513 default:
4514 weight = CW_Default;
4515 break;
4516 }
4517 return weight;
4518 }
4519
4520 /// If there are multiple different constraints that we could pick for this
4521 /// operand (e.g. "imr") try to pick the 'best' one.
4522 /// This is somewhat tricky: constraints fall into four classes:
4523 /// Other -> immediates and magic values
4524 /// Register -> one specific register
4525 /// RegisterClass -> a group of regs
4526 /// Memory -> memory
4527 /// Ideally, we would pick the most specific constraint possible: if we have
4528 /// something that fits into a register, we would pick it. The problem here
4529 /// is that if we have something that could either be in a register or in
4530 /// memory that use of the register could cause selection of *other*
4531 /// operands to fail: they might only succeed if we pick memory. Because of
4532 /// this the heuristic we use is:
4533 ///
4534 /// 1) If there is an 'other' constraint, and if the operand is valid for
4535 /// that constraint, use it. This makes us take advantage of 'i'
4536 /// constraints when available.
4537 /// 2) Otherwise, pick the most general constraint present. This prefers
4538 /// 'm' over 'r', for example.
4539 ///
ChooseConstraint(TargetLowering::AsmOperandInfo & OpInfo,const TargetLowering & TLI,SDValue Op,SelectionDAG * DAG)4540 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4541 const TargetLowering &TLI,
4542 SDValue Op, SelectionDAG *DAG) {
4543 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4544 unsigned BestIdx = 0;
4545 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4546 int BestGenerality = -1;
4547
4548 // Loop over the options, keeping track of the most general one.
4549 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4550 TargetLowering::ConstraintType CType =
4551 TLI.getConstraintType(OpInfo.Codes[i]);
4552
4553 // Indirect 'other' or 'immediate' constraints are not allowed.
4554 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4555 CType == TargetLowering::C_Register ||
4556 CType == TargetLowering::C_RegisterClass))
4557 continue;
4558
4559 // If this is an 'other' or 'immediate' constraint, see if the operand is
4560 // valid for it. For example, on X86 we might have an 'rI' constraint. If
4561 // the operand is an integer in the range [0..31] we want to use I (saving a
4562 // load of a register), otherwise we must use 'r'.
4563 if ((CType == TargetLowering::C_Other ||
4564 CType == TargetLowering::C_Immediate) && Op.getNode()) {
4565 assert(OpInfo.Codes[i].size() == 1 &&
4566 "Unhandled multi-letter 'other' constraint");
4567 std::vector<SDValue> ResultOps;
4568 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4569 ResultOps, *DAG);
4570 if (!ResultOps.empty()) {
4571 BestType = CType;
4572 BestIdx = i;
4573 break;
4574 }
4575 }
4576
4577 // Things with matching constraints can only be registers, per gcc
4578 // documentation. This mainly affects "g" constraints.
4579 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4580 continue;
4581
4582 // This constraint letter is more general than the previous one, use it.
4583 int Generality = getConstraintGenerality(CType);
4584 if (Generality > BestGenerality) {
4585 BestType = CType;
4586 BestIdx = i;
4587 BestGenerality = Generality;
4588 }
4589 }
4590
4591 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4592 OpInfo.ConstraintType = BestType;
4593 }
4594
4595 /// Determines the constraint code and constraint type to use for the specific
4596 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
ComputeConstraintToUse(AsmOperandInfo & OpInfo,SDValue Op,SelectionDAG * DAG) const4597 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4598 SDValue Op,
4599 SelectionDAG *DAG) const {
4600 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4601
4602 // Single-letter constraints ('r') are very common.
4603 if (OpInfo.Codes.size() == 1) {
4604 OpInfo.ConstraintCode = OpInfo.Codes[0];
4605 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4606 } else {
4607 ChooseConstraint(OpInfo, *this, Op, DAG);
4608 }
4609
4610 // 'X' matches anything.
4611 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4612 // Labels and constants are handled elsewhere ('X' is the only thing
4613 // that matches labels). For Functions, the type here is the type of
4614 // the result, which is not what we want to look at; leave them alone.
4615 Value *v = OpInfo.CallOperandVal;
4616 if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4617 OpInfo.CallOperandVal = v;
4618 return;
4619 }
4620
4621 if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4622 return;
4623
4624 // Otherwise, try to resolve it to something we know about by looking at
4625 // the actual operand type.
4626 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4627 OpInfo.ConstraintCode = Repl;
4628 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4629 }
4630 }
4631 }
4632
4633 /// Given an exact SDIV by a constant, create a multiplication
4634 /// with the multiplicative inverse of the constant.
BuildExactSDIV(const TargetLowering & TLI,SDNode * N,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created)4635 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4636 const SDLoc &dl, SelectionDAG &DAG,
4637 SmallVectorImpl<SDNode *> &Created) {
4638 SDValue Op0 = N->getOperand(0);
4639 SDValue Op1 = N->getOperand(1);
4640 EVT VT = N->getValueType(0);
4641 EVT SVT = VT.getScalarType();
4642 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4643 EVT ShSVT = ShVT.getScalarType();
4644
4645 bool UseSRA = false;
4646 SmallVector<SDValue, 16> Shifts, Factors;
4647
4648 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4649 if (C->isNullValue())
4650 return false;
4651 APInt Divisor = C->getAPIntValue();
4652 unsigned Shift = Divisor.countTrailingZeros();
4653 if (Shift) {
4654 Divisor.ashrInPlace(Shift);
4655 UseSRA = true;
4656 }
4657 // Calculate the multiplicative inverse, using Newton's method.
4658 APInt t;
4659 APInt Factor = Divisor;
4660 while ((t = Divisor * Factor) != 1)
4661 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4662 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4663 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4664 return true;
4665 };
4666
4667 // Collect all magic values from the build vector.
4668 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4669 return SDValue();
4670
4671 SDValue Shift, Factor;
4672 if (VT.isVector()) {
4673 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4674 Factor = DAG.getBuildVector(VT, dl, Factors);
4675 } else {
4676 Shift = Shifts[0];
4677 Factor = Factors[0];
4678 }
4679
4680 SDValue Res = Op0;
4681
4682 // Shift the value upfront if it is even, so the LSB is one.
4683 if (UseSRA) {
4684 // TODO: For UDIV use SRL instead of SRA.
4685 SDNodeFlags Flags;
4686 Flags.setExact(true);
4687 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4688 Created.push_back(Res.getNode());
4689 }
4690
4691 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4692 }
4693
BuildSDIVPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const4694 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4695 SelectionDAG &DAG,
4696 SmallVectorImpl<SDNode *> &Created) const {
4697 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4698 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4699 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4700 return SDValue(N, 0); // Lower SDIV as SDIV
4701 return SDValue();
4702 }
4703
4704 /// Given an ISD::SDIV node expressing a divide by constant,
4705 /// return a DAG expression to select that will generate the same value by
4706 /// multiplying by a magic number.
4707 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildSDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const4708 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4709 bool IsAfterLegalization,
4710 SmallVectorImpl<SDNode *> &Created) const {
4711 SDLoc dl(N);
4712 EVT VT = N->getValueType(0);
4713 EVT SVT = VT.getScalarType();
4714 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4715 EVT ShSVT = ShVT.getScalarType();
4716 unsigned EltBits = VT.getScalarSizeInBits();
4717
4718 // Check to see if we can do this.
4719 // FIXME: We should be more aggressive here.
4720 if (!isTypeLegal(VT))
4721 return SDValue();
4722
4723 // If the sdiv has an 'exact' bit we can use a simpler lowering.
4724 if (N->getFlags().hasExact())
4725 return BuildExactSDIV(*this, N, dl, DAG, Created);
4726
4727 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4728
4729 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4730 if (C->isNullValue())
4731 return false;
4732
4733 const APInt &Divisor = C->getAPIntValue();
4734 APInt::ms magics = Divisor.magic();
4735 int NumeratorFactor = 0;
4736 int ShiftMask = -1;
4737
4738 if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4739 // If d is +1/-1, we just multiply the numerator by +1/-1.
4740 NumeratorFactor = Divisor.getSExtValue();
4741 magics.m = 0;
4742 magics.s = 0;
4743 ShiftMask = 0;
4744 } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4745 // If d > 0 and m < 0, add the numerator.
4746 NumeratorFactor = 1;
4747 } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4748 // If d < 0 and m > 0, subtract the numerator.
4749 NumeratorFactor = -1;
4750 }
4751
4752 MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4753 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4754 Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4755 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4756 return true;
4757 };
4758
4759 SDValue N0 = N->getOperand(0);
4760 SDValue N1 = N->getOperand(1);
4761
4762 // Collect the shifts / magic values from each element.
4763 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4764 return SDValue();
4765
4766 SDValue MagicFactor, Factor, Shift, ShiftMask;
4767 if (VT.isVector()) {
4768 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4769 Factor = DAG.getBuildVector(VT, dl, Factors);
4770 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4771 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4772 } else {
4773 MagicFactor = MagicFactors[0];
4774 Factor = Factors[0];
4775 Shift = Shifts[0];
4776 ShiftMask = ShiftMasks[0];
4777 }
4778
4779 // Multiply the numerator (operand 0) by the magic value.
4780 // FIXME: We should support doing a MUL in a wider type.
4781 SDValue Q;
4782 if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4783 : isOperationLegalOrCustom(ISD::MULHS, VT))
4784 Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4785 else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4786 : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4787 SDValue LoHi =
4788 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4789 Q = SDValue(LoHi.getNode(), 1);
4790 } else
4791 return SDValue(); // No mulhs or equivalent.
4792 Created.push_back(Q.getNode());
4793
4794 // (Optionally) Add/subtract the numerator using Factor.
4795 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4796 Created.push_back(Factor.getNode());
4797 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4798 Created.push_back(Q.getNode());
4799
4800 // Shift right algebraic by shift value.
4801 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4802 Created.push_back(Q.getNode());
4803
4804 // Extract the sign bit, mask it and add it to the quotient.
4805 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4806 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4807 Created.push_back(T.getNode());
4808 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4809 Created.push_back(T.getNode());
4810 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4811 }
4812
4813 /// Given an ISD::UDIV node expressing a divide by constant,
4814 /// return a DAG expression to select that will generate the same value by
4815 /// multiplying by a magic number.
4816 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildUDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const4817 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4818 bool IsAfterLegalization,
4819 SmallVectorImpl<SDNode *> &Created) const {
4820 SDLoc dl(N);
4821 EVT VT = N->getValueType(0);
4822 EVT SVT = VT.getScalarType();
4823 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4824 EVT ShSVT = ShVT.getScalarType();
4825 unsigned EltBits = VT.getScalarSizeInBits();
4826
4827 // Check to see if we can do this.
4828 // FIXME: We should be more aggressive here.
4829 if (!isTypeLegal(VT))
4830 return SDValue();
4831
4832 bool UseNPQ = false;
4833 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4834
4835 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4836 if (C->isNullValue())
4837 return false;
4838 // FIXME: We should use a narrower constant when the upper
4839 // bits are known to be zero.
4840 APInt Divisor = C->getAPIntValue();
4841 APInt::mu magics = Divisor.magicu();
4842 unsigned PreShift = 0, PostShift = 0;
4843
4844 // If the divisor is even, we can avoid using the expensive fixup by
4845 // shifting the divided value upfront.
4846 if (magics.a != 0 && !Divisor[0]) {
4847 PreShift = Divisor.countTrailingZeros();
4848 // Get magic number for the shifted divisor.
4849 magics = Divisor.lshr(PreShift).magicu(PreShift);
4850 assert(magics.a == 0 && "Should use cheap fixup now");
4851 }
4852
4853 APInt Magic = magics.m;
4854
4855 unsigned SelNPQ;
4856 if (magics.a == 0 || Divisor.isOneValue()) {
4857 assert(magics.s < Divisor.getBitWidth() &&
4858 "We shouldn't generate an undefined shift!");
4859 PostShift = magics.s;
4860 SelNPQ = false;
4861 } else {
4862 PostShift = magics.s - 1;
4863 SelNPQ = true;
4864 }
4865
4866 PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4867 MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4868 NPQFactors.push_back(
4869 DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4870 : APInt::getNullValue(EltBits),
4871 dl, SVT));
4872 PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4873 UseNPQ |= SelNPQ;
4874 return true;
4875 };
4876
4877 SDValue N0 = N->getOperand(0);
4878 SDValue N1 = N->getOperand(1);
4879
4880 // Collect the shifts/magic values from each element.
4881 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4882 return SDValue();
4883
4884 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4885 if (VT.isVector()) {
4886 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4887 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4888 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4889 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4890 } else {
4891 PreShift = PreShifts[0];
4892 MagicFactor = MagicFactors[0];
4893 PostShift = PostShifts[0];
4894 }
4895
4896 SDValue Q = N0;
4897 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4898 Created.push_back(Q.getNode());
4899
4900 // FIXME: We should support doing a MUL in a wider type.
4901 auto GetMULHU = [&](SDValue X, SDValue Y) {
4902 if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
4903 : isOperationLegalOrCustom(ISD::MULHU, VT))
4904 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4905 if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
4906 : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
4907 SDValue LoHi =
4908 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4909 return SDValue(LoHi.getNode(), 1);
4910 }
4911 return SDValue(); // No mulhu or equivalent
4912 };
4913
4914 // Multiply the numerator (operand 0) by the magic value.
4915 Q = GetMULHU(Q, MagicFactor);
4916 if (!Q)
4917 return SDValue();
4918
4919 Created.push_back(Q.getNode());
4920
4921 if (UseNPQ) {
4922 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4923 Created.push_back(NPQ.getNode());
4924
4925 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4926 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4927 if (VT.isVector())
4928 NPQ = GetMULHU(NPQ, NPQFactor);
4929 else
4930 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4931
4932 Created.push_back(NPQ.getNode());
4933
4934 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4935 Created.push_back(Q.getNode());
4936 }
4937
4938 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4939 Created.push_back(Q.getNode());
4940
4941 SDValue One = DAG.getConstant(1, dl, VT);
4942 SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4943 return DAG.getSelect(dl, VT, IsOne, N0, Q);
4944 }
4945
4946 /// If all values in Values that *don't* match the predicate are same 'splat'
4947 /// value, then replace all values with that splat value.
4948 /// Else, if AlternativeReplacement was provided, then replace all values that
4949 /// do match predicate with AlternativeReplacement value.
4950 static void
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,std::function<bool (SDValue)> Predicate,SDValue AlternativeReplacement=SDValue ())4951 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4952 std::function<bool(SDValue)> Predicate,
4953 SDValue AlternativeReplacement = SDValue()) {
4954 SDValue Replacement;
4955 // Is there a value for which the Predicate does *NOT* match? What is it?
4956 auto SplatValue = llvm::find_if_not(Values, Predicate);
4957 if (SplatValue != Values.end()) {
4958 // Does Values consist only of SplatValue's and values matching Predicate?
4959 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
4960 return Value == *SplatValue || Predicate(Value);
4961 })) // Then we shall replace values matching predicate with SplatValue.
4962 Replacement = *SplatValue;
4963 }
4964 if (!Replacement) {
4965 // Oops, we did not find the "baseline" splat value.
4966 if (!AlternativeReplacement)
4967 return; // Nothing to do.
4968 // Let's replace with provided value then.
4969 Replacement = AlternativeReplacement;
4970 }
4971 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4972 }
4973
4974 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4975 /// where the divisor is constant and the comparison target is zero,
4976 /// return a DAG expression that will generate the same comparison result
4977 /// using only multiplications, additions and shifts/rotations.
4978 /// Ref: "Hacker's Delight" 10-17.
buildUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const4979 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4980 SDValue CompTargetNode,
4981 ISD::CondCode Cond,
4982 DAGCombinerInfo &DCI,
4983 const SDLoc &DL) const {
4984 SmallVector<SDNode *, 5> Built;
4985 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4986 DCI, DL, Built)) {
4987 for (SDNode *N : Built)
4988 DCI.AddToWorklist(N);
4989 return Folded;
4990 }
4991
4992 return SDValue();
4993 }
4994
4995 SDValue
prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const4996 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4997 SDValue CompTargetNode, ISD::CondCode Cond,
4998 DAGCombinerInfo &DCI, const SDLoc &DL,
4999 SmallVectorImpl<SDNode *> &Created) const {
5000 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5001 // - D must be constant, with D = D0 * 2^K where D0 is odd
5002 // - P is the multiplicative inverse of D0 modulo 2^W
5003 // - Q = floor(((2^W) - 1) / D)
5004 // where W is the width of the common type of N and D.
5005 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5006 "Only applicable for (in)equality comparisons.");
5007
5008 SelectionDAG &DAG = DCI.DAG;
5009
5010 EVT VT = REMNode.getValueType();
5011 EVT SVT = VT.getScalarType();
5012 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5013 EVT ShSVT = ShVT.getScalarType();
5014
5015 // If MUL is unavailable, we cannot proceed in any case.
5016 if (!isOperationLegalOrCustom(ISD::MUL, VT))
5017 return SDValue();
5018
5019 bool ComparingWithAllZeros = true;
5020 bool AllComparisonsWithNonZerosAreTautological = true;
5021 bool HadTautologicalLanes = false;
5022 bool AllLanesAreTautological = true;
5023 bool HadEvenDivisor = false;
5024 bool AllDivisorsArePowerOfTwo = true;
5025 bool HadTautologicalInvertedLanes = false;
5026 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5027
5028 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5029 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5030 if (CDiv->isNullValue())
5031 return false;
5032
5033 const APInt &D = CDiv->getAPIntValue();
5034 const APInt &Cmp = CCmp->getAPIntValue();
5035
5036 ComparingWithAllZeros &= Cmp.isNullValue();
5037
5038 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5039 // if C2 is not less than C1, the comparison is always false.
5040 // But we will only be able to produce the comparison that will give the
5041 // opposive tautological answer. So this lane would need to be fixed up.
5042 bool TautologicalInvertedLane = D.ule(Cmp);
5043 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5044
5045 // If all lanes are tautological (either all divisors are ones, or divisor
5046 // is not greater than the constant we are comparing with),
5047 // we will prefer to avoid the fold.
5048 bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
5049 HadTautologicalLanes |= TautologicalLane;
5050 AllLanesAreTautological &= TautologicalLane;
5051
5052 // If we are comparing with non-zero, we need'll need to subtract said
5053 // comparison value from the LHS. But there is no point in doing that if
5054 // every lane where we are comparing with non-zero is tautological..
5055 if (!Cmp.isNullValue())
5056 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5057
5058 // Decompose D into D0 * 2^K
5059 unsigned K = D.countTrailingZeros();
5060 assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5061 APInt D0 = D.lshr(K);
5062
5063 // D is even if it has trailing zeros.
5064 HadEvenDivisor |= (K != 0);
5065 // D is a power-of-two if D0 is one.
5066 // If all divisors are power-of-two, we will prefer to avoid the fold.
5067 AllDivisorsArePowerOfTwo &= D0.isOneValue();
5068
5069 // P = inv(D0, 2^W)
5070 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5071 unsigned W = D.getBitWidth();
5072 APInt P = D0.zext(W + 1)
5073 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5074 .trunc(W);
5075 assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5076 assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5077
5078 // Q = floor((2^W - 1) u/ D)
5079 // R = ((2^W - 1) u% D)
5080 APInt Q, R;
5081 APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5082
5083 // If we are comparing with zero, then that comparison constant is okay,
5084 // else it may need to be one less than that.
5085 if (Cmp.ugt(R))
5086 Q -= 1;
5087
5088 assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5089 "We are expecting that K is always less than all-ones for ShSVT");
5090
5091 // If the lane is tautological the result can be constant-folded.
5092 if (TautologicalLane) {
5093 // Set P and K amount to a bogus values so we can try to splat them.
5094 P = 0;
5095 K = -1;
5096 // And ensure that comparison constant is tautological,
5097 // it will always compare true/false.
5098 Q = -1;
5099 }
5100
5101 PAmts.push_back(DAG.getConstant(P, DL, SVT));
5102 KAmts.push_back(
5103 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5104 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5105 return true;
5106 };
5107
5108 SDValue N = REMNode.getOperand(0);
5109 SDValue D = REMNode.getOperand(1);
5110
5111 // Collect the values from each element.
5112 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5113 return SDValue();
5114
5115 // If all lanes are tautological, the result can be constant-folded.
5116 if (AllLanesAreTautological)
5117 return SDValue();
5118
5119 // If this is a urem by a powers-of-two, avoid the fold since it can be
5120 // best implemented as a bit test.
5121 if (AllDivisorsArePowerOfTwo)
5122 return SDValue();
5123
5124 SDValue PVal, KVal, QVal;
5125 if (VT.isVector()) {
5126 if (HadTautologicalLanes) {
5127 // Try to turn PAmts into a splat, since we don't care about the values
5128 // that are currently '0'. If we can't, just keep '0'`s.
5129 turnVectorIntoSplatVector(PAmts, isNullConstant);
5130 // Try to turn KAmts into a splat, since we don't care about the values
5131 // that are currently '-1'. If we can't, change them to '0'`s.
5132 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5133 DAG.getConstant(0, DL, ShSVT));
5134 }
5135
5136 PVal = DAG.getBuildVector(VT, DL, PAmts);
5137 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5138 QVal = DAG.getBuildVector(VT, DL, QAmts);
5139 } else {
5140 PVal = PAmts[0];
5141 KVal = KAmts[0];
5142 QVal = QAmts[0];
5143 }
5144
5145 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5146 if (!isOperationLegalOrCustom(ISD::SUB, VT))
5147 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5148 assert(CompTargetNode.getValueType() == N.getValueType() &&
5149 "Expecting that the types on LHS and RHS of comparisons match.");
5150 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5151 }
5152
5153 // (mul N, P)
5154 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5155 Created.push_back(Op0.getNode());
5156
5157 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5158 // divisors as a performance improvement, since rotating by 0 is a no-op.
5159 if (HadEvenDivisor) {
5160 // We need ROTR to do this.
5161 if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5162 return SDValue();
5163 SDNodeFlags Flags;
5164 Flags.setExact(true);
5165 // UREM: (rotr (mul N, P), K)
5166 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5167 Created.push_back(Op0.getNode());
5168 }
5169
5170 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5171 SDValue NewCC =
5172 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5173 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5174 if (!HadTautologicalInvertedLanes)
5175 return NewCC;
5176
5177 // If any lanes previously compared always-false, the NewCC will give
5178 // always-true result for them, so we need to fixup those lanes.
5179 // Or the other way around for inequality predicate.
5180 assert(VT.isVector() && "Can/should only get here for vectors.");
5181 Created.push_back(NewCC.getNode());
5182
5183 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5184 // if C2 is not less than C1, the comparison is always false.
5185 // But we have produced the comparison that will give the
5186 // opposive tautological answer. So these lanes would need to be fixed up.
5187 SDValue TautologicalInvertedChannels =
5188 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5189 Created.push_back(TautologicalInvertedChannels.getNode());
5190
5191 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5192 // If we have a vector select, let's replace the comparison results in the
5193 // affected lanes with the correct tautological result.
5194 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5195 DL, SETCCVT, SETCCVT);
5196 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5197 Replacement, NewCC);
5198 }
5199
5200 // Else, we can just invert the comparison result in the appropriate lanes.
5201 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5202 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5203 TautologicalInvertedChannels);
5204
5205 return SDValue(); // Don't know how to lower.
5206 }
5207
5208 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5209 /// where the divisor is constant and the comparison target is zero,
5210 /// return a DAG expression that will generate the same comparison result
5211 /// using only multiplications, additions and shifts/rotations.
5212 /// Ref: "Hacker's Delight" 10-17.
buildSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const5213 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5214 SDValue CompTargetNode,
5215 ISD::CondCode Cond,
5216 DAGCombinerInfo &DCI,
5217 const SDLoc &DL) const {
5218 SmallVector<SDNode *, 7> Built;
5219 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5220 DCI, DL, Built)) {
5221 assert(Built.size() <= 7 && "Max size prediction failed.");
5222 for (SDNode *N : Built)
5223 DCI.AddToWorklist(N);
5224 return Folded;
5225 }
5226
5227 return SDValue();
5228 }
5229
5230 SDValue
prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const5231 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5232 SDValue CompTargetNode, ISD::CondCode Cond,
5233 DAGCombinerInfo &DCI, const SDLoc &DL,
5234 SmallVectorImpl<SDNode *> &Created) const {
5235 // Fold:
5236 // (seteq/ne (srem N, D), 0)
5237 // To:
5238 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
5239 //
5240 // - D must be constant, with D = D0 * 2^K where D0 is odd
5241 // - P is the multiplicative inverse of D0 modulo 2^W
5242 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5243 // - Q = floor((2 * A) / (2^K))
5244 // where W is the width of the common type of N and D.
5245 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5246 "Only applicable for (in)equality comparisons.");
5247
5248 SelectionDAG &DAG = DCI.DAG;
5249
5250 EVT VT = REMNode.getValueType();
5251 EVT SVT = VT.getScalarType();
5252 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5253 EVT ShSVT = ShVT.getScalarType();
5254
5255 // If MUL is unavailable, we cannot proceed in any case.
5256 if (!isOperationLegalOrCustom(ISD::MUL, VT))
5257 return SDValue();
5258
5259 // TODO: Could support comparing with non-zero too.
5260 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5261 if (!CompTarget || !CompTarget->isNullValue())
5262 return SDValue();
5263
5264 bool HadIntMinDivisor = false;
5265 bool HadOneDivisor = false;
5266 bool AllDivisorsAreOnes = true;
5267 bool HadEvenDivisor = false;
5268 bool NeedToApplyOffset = false;
5269 bool AllDivisorsArePowerOfTwo = true;
5270 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5271
5272 auto BuildSREMPattern = [&](ConstantSDNode *C) {
5273 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5274 if (C->isNullValue())
5275 return false;
5276
5277 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5278
5279 // WARNING: this fold is only valid for positive divisors!
5280 APInt D = C->getAPIntValue();
5281 if (D.isNegative())
5282 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
5283
5284 HadIntMinDivisor |= D.isMinSignedValue();
5285
5286 // If all divisors are ones, we will prefer to avoid the fold.
5287 HadOneDivisor |= D.isOneValue();
5288 AllDivisorsAreOnes &= D.isOneValue();
5289
5290 // Decompose D into D0 * 2^K
5291 unsigned K = D.countTrailingZeros();
5292 assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5293 APInt D0 = D.lshr(K);
5294
5295 if (!D.isMinSignedValue()) {
5296 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5297 // we don't care about this lane in this fold, we'll special-handle it.
5298 HadEvenDivisor |= (K != 0);
5299 }
5300
5301 // D is a power-of-two if D0 is one. This includes INT_MIN.
5302 // If all divisors are power-of-two, we will prefer to avoid the fold.
5303 AllDivisorsArePowerOfTwo &= D0.isOneValue();
5304
5305 // P = inv(D0, 2^W)
5306 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5307 unsigned W = D.getBitWidth();
5308 APInt P = D0.zext(W + 1)
5309 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5310 .trunc(W);
5311 assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5312 assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5313
5314 // A = floor((2^(W - 1) - 1) / D0) & -2^K
5315 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5316 A.clearLowBits(K);
5317
5318 if (!D.isMinSignedValue()) {
5319 // If divisor INT_MIN, then we don't care about this lane in this fold,
5320 // we'll special-handle it.
5321 NeedToApplyOffset |= A != 0;
5322 }
5323
5324 // Q = floor((2 * A) / (2^K))
5325 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5326
5327 assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5328 "We are expecting that A is always less than all-ones for SVT");
5329 assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5330 "We are expecting that K is always less than all-ones for ShSVT");
5331
5332 // If the divisor is 1 the result can be constant-folded. Likewise, we
5333 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5334 if (D.isOneValue()) {
5335 // Set P, A and K to a bogus values so we can try to splat them.
5336 P = 0;
5337 A = -1;
5338 K = -1;
5339
5340 // x ?% 1 == 0 <--> true <--> x u<= -1
5341 Q = -1;
5342 }
5343
5344 PAmts.push_back(DAG.getConstant(P, DL, SVT));
5345 AAmts.push_back(DAG.getConstant(A, DL, SVT));
5346 KAmts.push_back(
5347 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5348 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5349 return true;
5350 };
5351
5352 SDValue N = REMNode.getOperand(0);
5353 SDValue D = REMNode.getOperand(1);
5354
5355 // Collect the values from each element.
5356 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5357 return SDValue();
5358
5359 // If this is a srem by a one, avoid the fold since it can be constant-folded.
5360 if (AllDivisorsAreOnes)
5361 return SDValue();
5362
5363 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5364 // since it can be best implemented as a bit test.
5365 if (AllDivisorsArePowerOfTwo)
5366 return SDValue();
5367
5368 SDValue PVal, AVal, KVal, QVal;
5369 if (VT.isVector()) {
5370 if (HadOneDivisor) {
5371 // Try to turn PAmts into a splat, since we don't care about the values
5372 // that are currently '0'. If we can't, just keep '0'`s.
5373 turnVectorIntoSplatVector(PAmts, isNullConstant);
5374 // Try to turn AAmts into a splat, since we don't care about the
5375 // values that are currently '-1'. If we can't, change them to '0'`s.
5376 turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5377 DAG.getConstant(0, DL, SVT));
5378 // Try to turn KAmts into a splat, since we don't care about the values
5379 // that are currently '-1'. If we can't, change them to '0'`s.
5380 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5381 DAG.getConstant(0, DL, ShSVT));
5382 }
5383
5384 PVal = DAG.getBuildVector(VT, DL, PAmts);
5385 AVal = DAG.getBuildVector(VT, DL, AAmts);
5386 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5387 QVal = DAG.getBuildVector(VT, DL, QAmts);
5388 } else {
5389 PVal = PAmts[0];
5390 AVal = AAmts[0];
5391 KVal = KAmts[0];
5392 QVal = QAmts[0];
5393 }
5394
5395 // (mul N, P)
5396 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5397 Created.push_back(Op0.getNode());
5398
5399 if (NeedToApplyOffset) {
5400 // We need ADD to do this.
5401 if (!isOperationLegalOrCustom(ISD::ADD, VT))
5402 return SDValue();
5403
5404 // (add (mul N, P), A)
5405 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5406 Created.push_back(Op0.getNode());
5407 }
5408
5409 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5410 // divisors as a performance improvement, since rotating by 0 is a no-op.
5411 if (HadEvenDivisor) {
5412 // We need ROTR to do this.
5413 if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5414 return SDValue();
5415 SDNodeFlags Flags;
5416 Flags.setExact(true);
5417 // SREM: (rotr (add (mul N, P), A), K)
5418 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5419 Created.push_back(Op0.getNode());
5420 }
5421
5422 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5423 SDValue Fold =
5424 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5425 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5426
5427 // If we didn't have lanes with INT_MIN divisor, then we're done.
5428 if (!HadIntMinDivisor)
5429 return Fold;
5430
5431 // That fold is only valid for positive divisors. Which effectively means,
5432 // it is invalid for INT_MIN divisors. So if we have such a lane,
5433 // we must fix-up results for said lanes.
5434 assert(VT.isVector() && "Can/should only get here for vectors.");
5435
5436 if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5437 !isOperationLegalOrCustom(ISD::AND, VT) ||
5438 !isOperationLegalOrCustom(Cond, VT) ||
5439 !isOperationLegalOrCustom(ISD::VSELECT, VT))
5440 return SDValue();
5441
5442 Created.push_back(Fold.getNode());
5443
5444 SDValue IntMin = DAG.getConstant(
5445 APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5446 SDValue IntMax = DAG.getConstant(
5447 APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5448 SDValue Zero =
5449 DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5450
5451 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5452 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5453 Created.push_back(DivisorIsIntMin.getNode());
5454
5455 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
5456 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5457 Created.push_back(Masked.getNode());
5458 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5459 Created.push_back(MaskedIsZero.getNode());
5460
5461 // To produce final result we need to blend 2 vectors: 'SetCC' and
5462 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5463 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5464 // constant-folded, select can get lowered to a shuffle with constant mask.
5465 SDValue Blended =
5466 DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5467
5468 return Blended;
5469 }
5470
5471 bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op,SelectionDAG & DAG) const5472 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5473 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5474 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5475 "be a constant integer");
5476 return true;
5477 }
5478
5479 return false;
5480 }
5481
isNegatibleForFree(SDValue Op,SelectionDAG & DAG,bool LegalOperations,bool ForCodeSize,unsigned Depth) const5482 char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
5483 bool LegalOperations, bool ForCodeSize,
5484 unsigned Depth) const {
5485 // fneg is removable even if it has multiple uses.
5486 if (Op.getOpcode() == ISD::FNEG)
5487 return 2;
5488
5489 // Don't allow anything with multiple uses unless we know it is free.
5490 EVT VT = Op.getValueType();
5491 const SDNodeFlags Flags = Op->getFlags();
5492 const TargetOptions &Options = DAG.getTarget().Options;
5493 if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
5494 isFPExtFree(VT, Op.getOperand(0).getValueType())))
5495 return 0;
5496
5497 // Don't recurse exponentially.
5498 if (Depth > SelectionDAG::MaxRecursionDepth)
5499 return 0;
5500
5501 switch (Op.getOpcode()) {
5502 case ISD::ConstantFP: {
5503 if (!LegalOperations)
5504 return 1;
5505
5506 // Don't invert constant FP values after legalization unless the target says
5507 // the negated constant is legal.
5508 return isOperationLegal(ISD::ConstantFP, VT) ||
5509 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5510 ForCodeSize);
5511 }
5512 case ISD::BUILD_VECTOR: {
5513 // Only permit BUILD_VECTOR of constants.
5514 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5515 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5516 }))
5517 return 0;
5518 if (!LegalOperations)
5519 return 1;
5520 if (isOperationLegal(ISD::ConstantFP, VT) &&
5521 isOperationLegal(ISD::BUILD_VECTOR, VT))
5522 return 1;
5523 return llvm::all_of(Op->op_values(), [&](SDValue N) {
5524 return N.isUndef() ||
5525 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5526 ForCodeSize);
5527 });
5528 }
5529 case ISD::FADD:
5530 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5531 return 0;
5532
5533 // After operation legalization, it might not be legal to create new FSUBs.
5534 if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
5535 return 0;
5536
5537 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5538 if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5539 ForCodeSize, Depth + 1))
5540 return V;
5541 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5542 return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5543 ForCodeSize, Depth + 1);
5544 case ISD::FSUB:
5545 // We can't turn -(A-B) into B-A when we honor signed zeros.
5546 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5547 return 0;
5548
5549 // fold (fneg (fsub A, B)) -> (fsub B, A)
5550 return 1;
5551
5552 case ISD::FMUL:
5553 case ISD::FDIV:
5554 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
5555 if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5556 ForCodeSize, Depth + 1))
5557 return V;
5558
5559 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
5560 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
5561 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
5562 return 0;
5563
5564 return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5565 ForCodeSize, Depth + 1);
5566
5567 case ISD::FMA:
5568 case ISD::FMAD: {
5569 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
5570 return 0;
5571
5572 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5573 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5574 char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
5575 ForCodeSize, Depth + 1);
5576 if (!V2)
5577 return 0;
5578
5579 // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
5580 char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5581 ForCodeSize, Depth + 1);
5582 char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5583 ForCodeSize, Depth + 1);
5584 char V01 = std::max(V0, V1);
5585 return V01 ? std::max(V01, V2) : 0;
5586 }
5587
5588 case ISD::FP_EXTEND:
5589 case ISD::FP_ROUND:
5590 case ISD::FSIN:
5591 return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5592 ForCodeSize, Depth + 1);
5593 }
5594
5595 return 0;
5596 }
5597
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOperations,bool ForCodeSize,unsigned Depth) const5598 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5599 bool LegalOperations,
5600 bool ForCodeSize,
5601 unsigned Depth) const {
5602 // fneg is removable even if it has multiple uses.
5603 if (Op.getOpcode() == ISD::FNEG)
5604 return Op.getOperand(0);
5605
5606 assert(Depth <= SelectionDAG::MaxRecursionDepth &&
5607 "getNegatedExpression doesn't match isNegatibleForFree");
5608 const SDNodeFlags Flags = Op->getFlags();
5609
5610 switch (Op.getOpcode()) {
5611 case ISD::ConstantFP: {
5612 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5613 V.changeSign();
5614 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
5615 }
5616 case ISD::BUILD_VECTOR: {
5617 SmallVector<SDValue, 4> Ops;
5618 for (SDValue C : Op->op_values()) {
5619 if (C.isUndef()) {
5620 Ops.push_back(C);
5621 continue;
5622 }
5623 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5624 V.changeSign();
5625 Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
5626 }
5627 return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
5628 }
5629 case ISD::FADD:
5630 assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5631 Flags.hasNoSignedZeros()) &&
5632 "Expected NSZ fp-flag");
5633
5634 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
5635 if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5636 Depth + 1))
5637 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5638 getNegatedExpression(Op.getOperand(0), DAG,
5639 LegalOperations, ForCodeSize,
5640 Depth + 1),
5641 Op.getOperand(1), Flags);
5642 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
5643 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5644 getNegatedExpression(Op.getOperand(1), DAG,
5645 LegalOperations, ForCodeSize,
5646 Depth + 1),
5647 Op.getOperand(0), Flags);
5648 case ISD::FSUB:
5649 // fold (fneg (fsub 0, B)) -> B
5650 if (ConstantFPSDNode *N0CFP =
5651 isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
5652 if (N0CFP->isZero())
5653 return Op.getOperand(1);
5654
5655 // fold (fneg (fsub A, B)) -> (fsub B, A)
5656 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
5657 Op.getOperand(1), Op.getOperand(0), Flags);
5658
5659 case ISD::FMUL:
5660 case ISD::FDIV:
5661 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
5662 if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
5663 Depth + 1))
5664 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5665 getNegatedExpression(Op.getOperand(0), DAG,
5666 LegalOperations, ForCodeSize,
5667 Depth + 1),
5668 Op.getOperand(1), Flags);
5669
5670 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
5671 return DAG.getNode(
5672 Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
5673 getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5674 ForCodeSize, Depth + 1),
5675 Flags);
5676
5677 case ISD::FMA:
5678 case ISD::FMAD: {
5679 assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
5680 Flags.hasNoSignedZeros()) &&
5681 "Expected NSZ fp-flag");
5682
5683 SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
5684 ForCodeSize, Depth + 1);
5685
5686 char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
5687 ForCodeSize, Depth + 1);
5688 char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
5689 ForCodeSize, Depth + 1);
5690 // TODO: This is a hack. It is possible that costs have changed between now
5691 // and the initial calls to isNegatibleForFree(). That is because we
5692 // are rewriting the expression, and that may change the number of
5693 // uses (and therefore the cost) of values. If the negation costs are
5694 // equal, only negate this value if it is a constant. Otherwise, try
5695 // operand 1. A better fix would eliminate uses as a cost factor or
5696 // track the change in uses as we rewrite the expression.
5697 if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
5698 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
5699 SDValue Neg0 = getNegatedExpression(
5700 Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
5701 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
5702 Op.getOperand(1), Neg2, Flags);
5703 }
5704
5705 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
5706 SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
5707 ForCodeSize, Depth + 1);
5708 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5709 Op.getOperand(0), Neg1, Neg2, Flags);
5710 }
5711
5712 case ISD::FP_EXTEND:
5713 case ISD::FSIN:
5714 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
5715 getNegatedExpression(Op.getOperand(0), DAG,
5716 LegalOperations, ForCodeSize,
5717 Depth + 1));
5718 case ISD::FP_ROUND:
5719 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
5720 getNegatedExpression(Op.getOperand(0), DAG,
5721 LegalOperations, ForCodeSize,
5722 Depth + 1),
5723 Op.getOperand(1));
5724 }
5725
5726 llvm_unreachable("Unknown code");
5727 }
5728
5729 //===----------------------------------------------------------------------===//
5730 // Legalization Utilities
5731 //===----------------------------------------------------------------------===//
5732
expandMUL_LOHI(unsigned Opcode,EVT VT,SDLoc dl,SDValue LHS,SDValue RHS,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const5733 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5734 SDValue LHS, SDValue RHS,
5735 SmallVectorImpl<SDValue> &Result,
5736 EVT HiLoVT, SelectionDAG &DAG,
5737 MulExpansionKind Kind, SDValue LL,
5738 SDValue LH, SDValue RL, SDValue RH) const {
5739 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5740 Opcode == ISD::SMUL_LOHI);
5741
5742 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5743 isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5744 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5745 isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5746 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5747 isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5748 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5749 isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5750
5751 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5752 return false;
5753
5754 unsigned OuterBitSize = VT.getScalarSizeInBits();
5755 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5756 unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5757 unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5758
5759 // LL, LH, RL, and RH must be either all NULL or all set to a value.
5760 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5761 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5762
5763 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5764 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5765 bool Signed) -> bool {
5766 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5767 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5768 Hi = SDValue(Lo.getNode(), 1);
5769 return true;
5770 }
5771 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5772 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5773 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5774 return true;
5775 }
5776 return false;
5777 };
5778
5779 SDValue Lo, Hi;
5780
5781 if (!LL.getNode() && !RL.getNode() &&
5782 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5783 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5784 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5785 }
5786
5787 if (!LL.getNode())
5788 return false;
5789
5790 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5791 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5792 DAG.MaskedValueIsZero(RHS, HighMask)) {
5793 // The inputs are both zero-extended.
5794 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5795 Result.push_back(Lo);
5796 Result.push_back(Hi);
5797 if (Opcode != ISD::MUL) {
5798 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5799 Result.push_back(Zero);
5800 Result.push_back(Zero);
5801 }
5802 return true;
5803 }
5804 }
5805
5806 if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5807 RHSSB > InnerBitSize) {
5808 // The input values are both sign-extended.
5809 // TODO non-MUL case?
5810 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5811 Result.push_back(Lo);
5812 Result.push_back(Hi);
5813 return true;
5814 }
5815 }
5816
5817 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5818 EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5819 if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5820 // FIXME getShiftAmountTy does not always return a sensible result when VT
5821 // is an illegal type, and so the type may be too small to fit the shift
5822 // amount. Override it with i32. The shift will have to be legalized.
5823 ShiftAmountTy = MVT::i32;
5824 }
5825 SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5826
5827 if (!LH.getNode() && !RH.getNode() &&
5828 isOperationLegalOrCustom(ISD::SRL, VT) &&
5829 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5830 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5831 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
5832 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
5833 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
5834 }
5835
5836 if (!LH.getNode())
5837 return false;
5838
5839 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
5840 return false;
5841
5842 Result.push_back(Lo);
5843
5844 if (Opcode == ISD::MUL) {
5845 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
5846 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
5847 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
5848 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
5849 Result.push_back(Hi);
5850 return true;
5851 }
5852
5853 // Compute the full width result.
5854 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
5855 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
5856 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5857 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
5858 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
5859 };
5860
5861 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5862 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
5863 return false;
5864
5865 // This is effectively the add part of a multiply-add of half-sized operands,
5866 // so it cannot overflow.
5867 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5868
5869 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
5870 return false;
5871
5872 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5873 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5874
5875 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
5876 isOperationLegalOrCustom(ISD::ADDE, VT));
5877 if (UseGlue)
5878 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
5879 Merge(Lo, Hi));
5880 else
5881 Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
5882 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
5883
5884 SDValue Carry = Next.getValue(1);
5885 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5886 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5887
5888 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
5889 return false;
5890
5891 if (UseGlue)
5892 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
5893 Carry);
5894 else
5895 Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
5896 Zero, Carry);
5897
5898 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5899
5900 if (Opcode == ISD::SMUL_LOHI) {
5901 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5902 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
5903 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
5904
5905 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5906 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
5907 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
5908 }
5909
5910 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5911 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5912 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5913 return true;
5914 }
5915
expandMUL(SDNode * N,SDValue & Lo,SDValue & Hi,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const5916 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5917 SelectionDAG &DAG, MulExpansionKind Kind,
5918 SDValue LL, SDValue LH, SDValue RL,
5919 SDValue RH) const {
5920 SmallVector<SDValue, 2> Result;
5921 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
5922 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
5923 DAG, Kind, LL, LH, RL, RH);
5924 if (Ok) {
5925 assert(Result.size() == 2);
5926 Lo = Result[0];
5927 Hi = Result[1];
5928 }
5929 return Ok;
5930 }
5931
expandFunnelShift(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const5932 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
5933 SelectionDAG &DAG) const {
5934 EVT VT = Node->getValueType(0);
5935
5936 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5937 !isOperationLegalOrCustom(ISD::SRL, VT) ||
5938 !isOperationLegalOrCustom(ISD::SUB, VT) ||
5939 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
5940 return false;
5941
5942 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5943 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5944 SDValue X = Node->getOperand(0);
5945 SDValue Y = Node->getOperand(1);
5946 SDValue Z = Node->getOperand(2);
5947
5948 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5949 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
5950 SDLoc DL(SDValue(Node, 0));
5951
5952 EVT ShVT = Z.getValueType();
5953 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5954 SDValue Zero = DAG.getConstant(0, DL, ShVT);
5955
5956 SDValue ShAmt;
5957 if (isPowerOf2_32(EltSizeInBits)) {
5958 SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5959 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
5960 } else {
5961 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
5962 }
5963
5964 SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
5965 SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
5966 SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5967 SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
5968
5969 // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5970 // and that is undefined. We must compare and select to avoid UB.
5971 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
5972
5973 // For fshl, 0-shift returns the 1st arg (X).
5974 // For fshr, 0-shift returns the 2nd arg (Y).
5975 SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
5976 Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
5977 return true;
5978 }
5979
5980 // TODO: Merge with expandFunnelShift.
expandROT(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const5981 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
5982 SelectionDAG &DAG) const {
5983 EVT VT = Node->getValueType(0);
5984 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5985 bool IsLeft = Node->getOpcode() == ISD::ROTL;
5986 SDValue Op0 = Node->getOperand(0);
5987 SDValue Op1 = Node->getOperand(1);
5988 SDLoc DL(SDValue(Node, 0));
5989
5990 EVT ShVT = Op1.getValueType();
5991 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5992
5993 // If a rotate in the other direction is legal, use it.
5994 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
5995 if (isOperationLegal(RevRot, VT)) {
5996 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5997 Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
5998 return true;
5999 }
6000
6001 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6002 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6003 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6004 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
6005 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6006 return false;
6007
6008 // Otherwise,
6009 // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
6010 // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
6011 //
6012 assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
6013 "Expecting the type bitwidth to be a power of 2");
6014 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
6015 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
6016 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6017 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
6018 SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
6019 SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
6020 Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
6021 DAG.getNode(HsOpc, DL, VT, Op0, And1));
6022 return true;
6023 }
6024
expandFP_TO_SINT(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const6025 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
6026 SelectionDAG &DAG) const {
6027 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6028 SDValue Src = Node->getOperand(OpNo);
6029 EVT SrcVT = Src.getValueType();
6030 EVT DstVT = Node->getValueType(0);
6031 SDLoc dl(SDValue(Node, 0));
6032
6033 // FIXME: Only f32 to i64 conversions are supported.
6034 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
6035 return false;
6036
6037 if (Node->isStrictFPOpcode())
6038 // When a NaN is converted to an integer a trap is allowed. We can't
6039 // use this expansion here because it would eliminate that trap. Other
6040 // traps are also allowed and cannot be eliminated. See
6041 // IEEE 754-2008 sec 5.8.
6042 return false;
6043
6044 // Expand f32 -> i64 conversion
6045 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6046 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
6047 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
6048 EVT IntVT = SrcVT.changeTypeToInteger();
6049 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
6050
6051 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6052 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6053 SDValue Bias = DAG.getConstant(127, dl, IntVT);
6054 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6055 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6056 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6057
6058 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6059
6060 SDValue ExponentBits = DAG.getNode(
6061 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6062 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6063 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6064
6065 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6066 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6067 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6068 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6069
6070 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6071 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6072 DAG.getConstant(0x00800000, dl, IntVT));
6073
6074 R = DAG.getZExtOrTrunc(R, dl, DstVT);
6075
6076 R = DAG.getSelectCC(
6077 dl, Exponent, ExponentLoBit,
6078 DAG.getNode(ISD::SHL, dl, DstVT, R,
6079 DAG.getZExtOrTrunc(
6080 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6081 dl, IntShVT)),
6082 DAG.getNode(ISD::SRL, dl, DstVT, R,
6083 DAG.getZExtOrTrunc(
6084 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6085 dl, IntShVT)),
6086 ISD::SETGT);
6087
6088 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6089 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6090
6091 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6092 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6093 return true;
6094 }
6095
expandFP_TO_UINT(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const6096 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6097 SDValue &Chain,
6098 SelectionDAG &DAG) const {
6099 SDLoc dl(SDValue(Node, 0));
6100 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6101 SDValue Src = Node->getOperand(OpNo);
6102
6103 EVT SrcVT = Src.getValueType();
6104 EVT DstVT = Node->getValueType(0);
6105 EVT SetCCVT =
6106 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6107 EVT DstSetCCVT =
6108 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
6109
6110 // Only expand vector types if we have the appropriate vector bit operations.
6111 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6112 ISD::FP_TO_SINT;
6113 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6114 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6115 return false;
6116
6117 // If the maximum float value is smaller then the signed integer range,
6118 // the destination signmask can't be represented by the float, so we can
6119 // just use FP_TO_SINT directly.
6120 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6121 APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6122 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6123 if (APFloat::opOverflow &
6124 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6125 if (Node->isStrictFPOpcode()) {
6126 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6127 { Node->getOperand(0), Src });
6128 Chain = Result.getValue(1);
6129 } else
6130 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6131 return true;
6132 }
6133
6134 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6135 SDValue Sel;
6136
6137 if (Node->isStrictFPOpcode()) {
6138 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6139 Node->getOperand(0), /*IsSignaling*/ true);
6140 Chain = Sel.getValue(1);
6141 } else {
6142 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6143 }
6144
6145 bool Strict = Node->isStrictFPOpcode() ||
6146 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6147
6148 if (Strict) {
6149 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6150 // signmask then offset (the result of which should be fully representable).
6151 // Sel = Src < 0x8000000000000000
6152 // FltOfs = select Sel, 0, 0x8000000000000000
6153 // IntOfs = select Sel, 0, 0x8000000000000000
6154 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6155
6156 // TODO: Should any fast-math-flags be set for the FSUB?
6157 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6158 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6159 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6160 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6161 DAG.getConstant(0, dl, DstVT),
6162 DAG.getConstant(SignMask, dl, DstVT));
6163 SDValue SInt;
6164 if (Node->isStrictFPOpcode()) {
6165 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6166 { Chain, Src, FltOfs });
6167 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6168 { Val.getValue(1), Val });
6169 Chain = SInt.getValue(1);
6170 } else {
6171 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6172 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6173 }
6174 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6175 } else {
6176 // Expand based on maximum range of FP_TO_SINT:
6177 // True = fp_to_sint(Src)
6178 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6179 // Result = select (Src < 0x8000000000000000), True, False
6180
6181 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6182 // TODO: Should any fast-math-flags be set for the FSUB?
6183 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6184 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6185 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6186 DAG.getConstant(SignMask, dl, DstVT));
6187 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6188 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6189 }
6190 return true;
6191 }
6192
expandUINT_TO_FP(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const6193 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6194 SDValue &Chain,
6195 SelectionDAG &DAG) const {
6196 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6197 SDValue Src = Node->getOperand(OpNo);
6198 EVT SrcVT = Src.getValueType();
6199 EVT DstVT = Node->getValueType(0);
6200
6201 if (SrcVT.getScalarType() != MVT::i64)
6202 return false;
6203
6204 SDLoc dl(SDValue(Node, 0));
6205 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6206
6207 if (DstVT.getScalarType() == MVT::f32) {
6208 // Only expand vector types if we have the appropriate vector bit
6209 // operations.
6210 if (SrcVT.isVector() &&
6211 (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6212 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6213 !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
6214 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6215 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6216 return false;
6217
6218 // For unsigned conversions, convert them to signed conversions using the
6219 // algorithm from the x86_64 __floatundisf in compiler_rt.
6220
6221 // TODO: This really should be implemented using a branch rather than a
6222 // select. We happen to get lucky and machinesink does the right
6223 // thing most of the time. This would be a good candidate for a
6224 // pseudo-op, or, even better, for whole-function isel.
6225 EVT SetCCVT =
6226 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6227
6228 SDValue SignBitTest = DAG.getSetCC(
6229 dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
6230
6231 SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
6232 SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
6233 SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
6234 SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
6235 SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
6236
6237 SDValue Slow, Fast;
6238 if (Node->isStrictFPOpcode()) {
6239 // In strict mode, we must avoid spurious exceptions, and therefore
6240 // must make sure to only emit a single STRICT_SINT_TO_FP.
6241 SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
6242 Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
6243 { Node->getOperand(0), InCvt });
6244 Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
6245 { Fast.getValue(1), Fast, Fast });
6246 Chain = Slow.getValue(1);
6247 // The STRICT_SINT_TO_FP inherits the exception mode from the
6248 // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
6249 // never raise any exception.
6250 SDNodeFlags Flags;
6251 Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
6252 Fast->setFlags(Flags);
6253 Flags.setNoFPExcept(true);
6254 Slow->setFlags(Flags);
6255 } else {
6256 SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
6257 Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
6258 Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
6259 }
6260
6261 Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
6262 return true;
6263 }
6264
6265 if (DstVT.getScalarType() == MVT::f64) {
6266 // Only expand vector types if we have the appropriate vector bit
6267 // operations.
6268 if (SrcVT.isVector() &&
6269 (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6270 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6271 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6272 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6273 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6274 return false;
6275
6276 // Implementation of unsigned i64 to f64 following the algorithm in
6277 // __floatundidf in compiler_rt. This implementation has the advantage
6278 // of performing rounding correctly, both in the default rounding mode
6279 // and in all alternate rounding modes.
6280 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6281 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6282 BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6283 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6284 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6285 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6286
6287 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6288 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6289 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6290 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6291 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6292 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6293 if (Node->isStrictFPOpcode()) {
6294 SDValue HiSub =
6295 DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
6296 {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
6297 Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
6298 {HiSub.getValue(1), LoFlt, HiSub});
6299 Chain = Result.getValue(1);
6300 } else {
6301 SDValue HiSub =
6302 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6303 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6304 }
6305 return true;
6306 }
6307
6308 return false;
6309 }
6310
expandFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const6311 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6312 SelectionDAG &DAG) const {
6313 SDLoc dl(Node);
6314 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6315 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6316 EVT VT = Node->getValueType(0);
6317 if (isOperationLegalOrCustom(NewOp, VT)) {
6318 SDValue Quiet0 = Node->getOperand(0);
6319 SDValue Quiet1 = Node->getOperand(1);
6320
6321 if (!Node->getFlags().hasNoNaNs()) {
6322 // Insert canonicalizes if it's possible we need to quiet to get correct
6323 // sNaN behavior.
6324 if (!DAG.isKnownNeverSNaN(Quiet0)) {
6325 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6326 Node->getFlags());
6327 }
6328 if (!DAG.isKnownNeverSNaN(Quiet1)) {
6329 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6330 Node->getFlags());
6331 }
6332 }
6333
6334 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6335 }
6336
6337 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6338 // instead if there are no NaNs.
6339 if (Node->getFlags().hasNoNaNs()) {
6340 unsigned IEEE2018Op =
6341 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6342 if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6343 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6344 Node->getOperand(1), Node->getFlags());
6345 }
6346 }
6347
6348 // If none of the above worked, but there are no NaNs, then expand to
6349 // a compare/select sequence. This is required for correctness since
6350 // InstCombine might have canonicalized a fcmp+select sequence to a
6351 // FMINNUM/FMAXNUM node. If we were to fall through to the default
6352 // expansion to libcall, we might introduce a link-time dependency
6353 // on libm into a file that originally did not have one.
6354 if (Node->getFlags().hasNoNaNs()) {
6355 ISD::CondCode Pred =
6356 Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6357 SDValue Op1 = Node->getOperand(0);
6358 SDValue Op2 = Node->getOperand(1);
6359 SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6360 // Copy FMF flags, but always set the no-signed-zeros flag
6361 // as this is implied by the FMINNUM/FMAXNUM semantics.
6362 SDNodeFlags Flags = Node->getFlags();
6363 Flags.setNoSignedZeros(true);
6364 SelCC->setFlags(Flags);
6365 return SelCC;
6366 }
6367
6368 return SDValue();
6369 }
6370
expandCTPOP(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const6371 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6372 SelectionDAG &DAG) const {
6373 SDLoc dl(Node);
6374 EVT VT = Node->getValueType(0);
6375 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6376 SDValue Op = Node->getOperand(0);
6377 unsigned Len = VT.getScalarSizeInBits();
6378 assert(VT.isInteger() && "CTPOP not implemented for this type.");
6379
6380 // TODO: Add support for irregular type lengths.
6381 if (!(Len <= 128 && Len % 8 == 0))
6382 return false;
6383
6384 // Only expand vector types if we have the appropriate vector bit operations.
6385 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6386 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6387 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6388 (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6389 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6390 return false;
6391
6392 // This is the "best" algorithm from
6393 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6394 SDValue Mask55 =
6395 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6396 SDValue Mask33 =
6397 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6398 SDValue Mask0F =
6399 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6400 SDValue Mask01 =
6401 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6402
6403 // v = v - ((v >> 1) & 0x55555555...)
6404 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6405 DAG.getNode(ISD::AND, dl, VT,
6406 DAG.getNode(ISD::SRL, dl, VT, Op,
6407 DAG.getConstant(1, dl, ShVT)),
6408 Mask55));
6409 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6410 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6411 DAG.getNode(ISD::AND, dl, VT,
6412 DAG.getNode(ISD::SRL, dl, VT, Op,
6413 DAG.getConstant(2, dl, ShVT)),
6414 Mask33));
6415 // v = (v + (v >> 4)) & 0x0F0F0F0F...
6416 Op = DAG.getNode(ISD::AND, dl, VT,
6417 DAG.getNode(ISD::ADD, dl, VT, Op,
6418 DAG.getNode(ISD::SRL, dl, VT, Op,
6419 DAG.getConstant(4, dl, ShVT))),
6420 Mask0F);
6421 // v = (v * 0x01010101...) >> (Len - 8)
6422 if (Len > 8)
6423 Op =
6424 DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6425 DAG.getConstant(Len - 8, dl, ShVT));
6426
6427 Result = Op;
6428 return true;
6429 }
6430
expandCTLZ(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const6431 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6432 SelectionDAG &DAG) const {
6433 SDLoc dl(Node);
6434 EVT VT = Node->getValueType(0);
6435 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6436 SDValue Op = Node->getOperand(0);
6437 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6438
6439 // If the non-ZERO_UNDEF version is supported we can use that instead.
6440 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6441 isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6442 Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6443 return true;
6444 }
6445
6446 // If the ZERO_UNDEF version is supported use that and handle the zero case.
6447 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6448 EVT SetCCVT =
6449 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6450 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6451 SDValue Zero = DAG.getConstant(0, dl, VT);
6452 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6453 Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6454 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6455 return true;
6456 }
6457
6458 // Only expand vector types if we have the appropriate vector bit operations.
6459 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6460 !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6461 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6462 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6463 return false;
6464
6465 // for now, we do this:
6466 // x = x | (x >> 1);
6467 // x = x | (x >> 2);
6468 // ...
6469 // x = x | (x >>16);
6470 // x = x | (x >>32); // for 64-bit input
6471 // return popcount(~x);
6472 //
6473 // Ref: "Hacker's Delight" by Henry Warren
6474 for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6475 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6476 Op = DAG.getNode(ISD::OR, dl, VT, Op,
6477 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6478 }
6479 Op = DAG.getNOT(dl, Op, VT);
6480 Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6481 return true;
6482 }
6483
expandCTTZ(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const6484 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6485 SelectionDAG &DAG) const {
6486 SDLoc dl(Node);
6487 EVT VT = Node->getValueType(0);
6488 SDValue Op = Node->getOperand(0);
6489 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6490
6491 // If the non-ZERO_UNDEF version is supported we can use that instead.
6492 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6493 isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6494 Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6495 return true;
6496 }
6497
6498 // If the ZERO_UNDEF version is supported use that and handle the zero case.
6499 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6500 EVT SetCCVT =
6501 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6502 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6503 SDValue Zero = DAG.getConstant(0, dl, VT);
6504 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6505 Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6506 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6507 return true;
6508 }
6509
6510 // Only expand vector types if we have the appropriate vector bit operations.
6511 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6512 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6513 !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6514 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6515 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6516 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6517 return false;
6518
6519 // for now, we use: { return popcount(~x & (x - 1)); }
6520 // unless the target has ctlz but not ctpop, in which case we use:
6521 // { return 32 - nlz(~x & (x-1)); }
6522 // Ref: "Hacker's Delight" by Henry Warren
6523 SDValue Tmp = DAG.getNode(
6524 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6525 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6526
6527 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6528 if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6529 Result =
6530 DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6531 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6532 return true;
6533 }
6534
6535 Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6536 return true;
6537 }
6538
expandABS(SDNode * N,SDValue & Result,SelectionDAG & DAG) const6539 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6540 SelectionDAG &DAG) const {
6541 SDLoc dl(N);
6542 EVT VT = N->getValueType(0);
6543 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6544 SDValue Op = N->getOperand(0);
6545
6546 // Only expand vector types if we have the appropriate vector operations.
6547 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6548 !isOperationLegalOrCustom(ISD::ADD, VT) ||
6549 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6550 return false;
6551
6552 SDValue Shift =
6553 DAG.getNode(ISD::SRA, dl, VT, Op,
6554 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6555 SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6556 Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6557 return true;
6558 }
6559
6560 std::pair<SDValue, SDValue>
scalarizeVectorLoad(LoadSDNode * LD,SelectionDAG & DAG) const6561 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6562 SelectionDAG &DAG) const {
6563 SDLoc SL(LD);
6564 SDValue Chain = LD->getChain();
6565 SDValue BasePTR = LD->getBasePtr();
6566 EVT SrcVT = LD->getMemoryVT();
6567 ISD::LoadExtType ExtType = LD->getExtensionType();
6568
6569 unsigned NumElem = SrcVT.getVectorNumElements();
6570
6571 EVT SrcEltVT = SrcVT.getScalarType();
6572 EVT DstEltVT = LD->getValueType(0).getScalarType();
6573
6574 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6575 assert(SrcEltVT.isByteSized());
6576
6577 SmallVector<SDValue, 8> Vals;
6578 SmallVector<SDValue, 8> LoadChains;
6579
6580 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6581 SDValue ScalarLoad =
6582 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6583 LD->getPointerInfo().getWithOffset(Idx * Stride),
6584 SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6585 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6586
6587 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6588
6589 Vals.push_back(ScalarLoad.getValue(0));
6590 LoadChains.push_back(ScalarLoad.getValue(1));
6591 }
6592
6593 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6594 SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6595
6596 return std::make_pair(Value, NewChain);
6597 }
6598
scalarizeVectorStore(StoreSDNode * ST,SelectionDAG & DAG) const6599 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6600 SelectionDAG &DAG) const {
6601 SDLoc SL(ST);
6602
6603 SDValue Chain = ST->getChain();
6604 SDValue BasePtr = ST->getBasePtr();
6605 SDValue Value = ST->getValue();
6606 EVT StVT = ST->getMemoryVT();
6607
6608 // The type of the data we want to save
6609 EVT RegVT = Value.getValueType();
6610 EVT RegSclVT = RegVT.getScalarType();
6611
6612 // The type of data as saved in memory.
6613 EVT MemSclVT = StVT.getScalarType();
6614
6615 EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
6616 unsigned NumElem = StVT.getVectorNumElements();
6617
6618 // A vector must always be stored in memory as-is, i.e. without any padding
6619 // between the elements, since various code depend on it, e.g. in the
6620 // handling of a bitcast of a vector type to int, which may be done with a
6621 // vector store followed by an integer load. A vector that does not have
6622 // elements that are byte-sized must therefore be stored as an integer
6623 // built out of the extracted vector elements.
6624 if (!MemSclVT.isByteSized()) {
6625 unsigned NumBits = StVT.getSizeInBits();
6626 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6627
6628 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6629
6630 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6631 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6632 DAG.getConstant(Idx, SL, IdxVT));
6633 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6634 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6635 unsigned ShiftIntoIdx =
6636 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6637 SDValue ShiftAmount =
6638 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6639 SDValue ShiftedElt =
6640 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6641 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6642 }
6643
6644 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6645 ST->getAlignment(), ST->getMemOperand()->getFlags(),
6646 ST->getAAInfo());
6647 }
6648
6649 // Store Stride in bytes
6650 unsigned Stride = MemSclVT.getSizeInBits() / 8;
6651 assert(Stride && "Zero stride!");
6652 // Extract each of the elements from the original vector and save them into
6653 // memory individually.
6654 SmallVector<SDValue, 8> Stores;
6655 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6656 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6657 DAG.getConstant(Idx, SL, IdxVT));
6658
6659 SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6660
6661 // This scalar TruncStore may be illegal, but we legalize it later.
6662 SDValue Store = DAG.getTruncStore(
6663 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6664 MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6665 ST->getMemOperand()->getFlags(), ST->getAAInfo());
6666
6667 Stores.push_back(Store);
6668 }
6669
6670 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6671 }
6672
6673 std::pair<SDValue, SDValue>
expandUnalignedLoad(LoadSDNode * LD,SelectionDAG & DAG) const6674 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6675 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6676 "unaligned indexed loads not implemented!");
6677 SDValue Chain = LD->getChain();
6678 SDValue Ptr = LD->getBasePtr();
6679 EVT VT = LD->getValueType(0);
6680 EVT LoadedVT = LD->getMemoryVT();
6681 SDLoc dl(LD);
6682 auto &MF = DAG.getMachineFunction();
6683
6684 if (VT.isFloatingPoint() || VT.isVector()) {
6685 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6686 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6687 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6688 LoadedVT.isVector()) {
6689 // Scalarize the load and let the individual components be handled.
6690 return scalarizeVectorLoad(LD, DAG);
6691 }
6692
6693 // Expand to a (misaligned) integer load of the same size,
6694 // then bitconvert to floating point or vector.
6695 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6696 LD->getMemOperand());
6697 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6698 if (LoadedVT != VT)
6699 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6700 ISD::ANY_EXTEND, dl, VT, Result);
6701
6702 return std::make_pair(Result, newLoad.getValue(1));
6703 }
6704
6705 // Copy the value to a (aligned) stack slot using (unaligned) integer
6706 // loads and stores, then do a (aligned) load from the stack slot.
6707 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6708 unsigned LoadedBytes = LoadedVT.getStoreSize();
6709 unsigned RegBytes = RegVT.getSizeInBits() / 8;
6710 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6711
6712 // Make sure the stack slot is also aligned for the register type.
6713 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6714 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6715 SmallVector<SDValue, 8> Stores;
6716 SDValue StackPtr = StackBase;
6717 unsigned Offset = 0;
6718
6719 EVT PtrVT = Ptr.getValueType();
6720 EVT StackPtrVT = StackPtr.getValueType();
6721
6722 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6723 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6724
6725 // Do all but one copies using the full register width.
6726 for (unsigned i = 1; i < NumRegs; i++) {
6727 // Load one integer register's worth from the original location.
6728 SDValue Load = DAG.getLoad(
6729 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6730 MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6731 LD->getAAInfo());
6732 // Follow the load with a store to the stack slot. Remember the store.
6733 Stores.push_back(DAG.getStore(
6734 Load.getValue(1), dl, Load, StackPtr,
6735 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6736 // Increment the pointers.
6737 Offset += RegBytes;
6738
6739 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6740 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6741 }
6742
6743 // The last copy may be partial. Do an extending load.
6744 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6745 8 * (LoadedBytes - Offset));
6746 SDValue Load =
6747 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6748 LD->getPointerInfo().getWithOffset(Offset), MemVT,
6749 MinAlign(LD->getAlignment(), Offset),
6750 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6751 // Follow the load with a store to the stack slot. Remember the store.
6752 // On big-endian machines this requires a truncating store to ensure
6753 // that the bits end up in the right place.
6754 Stores.push_back(DAG.getTruncStore(
6755 Load.getValue(1), dl, Load, StackPtr,
6756 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6757
6758 // The order of the stores doesn't matter - say it with a TokenFactor.
6759 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6760
6761 // Finally, perform the original load only redirected to the stack slot.
6762 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6763 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6764 LoadedVT);
6765
6766 // Callers expect a MERGE_VALUES node.
6767 return std::make_pair(Load, TF);
6768 }
6769
6770 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6771 "Unaligned load of unsupported type.");
6772
6773 // Compute the new VT that is half the size of the old one. This is an
6774 // integer MVT.
6775 unsigned NumBits = LoadedVT.getSizeInBits();
6776 EVT NewLoadedVT;
6777 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6778 NumBits >>= 1;
6779
6780 unsigned Alignment = LD->getAlignment();
6781 unsigned IncrementSize = NumBits / 8;
6782 ISD::LoadExtType HiExtType = LD->getExtensionType();
6783
6784 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6785 if (HiExtType == ISD::NON_EXTLOAD)
6786 HiExtType = ISD::ZEXTLOAD;
6787
6788 // Load the value in two parts
6789 SDValue Lo, Hi;
6790 if (DAG.getDataLayout().isLittleEndian()) {
6791 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6792 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6793 LD->getAAInfo());
6794
6795 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6796 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6797 LD->getPointerInfo().getWithOffset(IncrementSize),
6798 NewLoadedVT, MinAlign(Alignment, IncrementSize),
6799 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6800 } else {
6801 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6802 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6803 LD->getAAInfo());
6804
6805 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6806 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6807 LD->getPointerInfo().getWithOffset(IncrementSize),
6808 NewLoadedVT, MinAlign(Alignment, IncrementSize),
6809 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6810 }
6811
6812 // aggregate the two parts
6813 SDValue ShiftAmount =
6814 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6815 DAG.getDataLayout()));
6816 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6817 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6818
6819 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6820 Hi.getValue(1));
6821
6822 return std::make_pair(Result, TF);
6823 }
6824
expandUnalignedStore(StoreSDNode * ST,SelectionDAG & DAG) const6825 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6826 SelectionDAG &DAG) const {
6827 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6828 "unaligned indexed stores not implemented!");
6829 SDValue Chain = ST->getChain();
6830 SDValue Ptr = ST->getBasePtr();
6831 SDValue Val = ST->getValue();
6832 EVT VT = Val.getValueType();
6833 int Alignment = ST->getAlignment();
6834 auto &MF = DAG.getMachineFunction();
6835 EVT StoreMemVT = ST->getMemoryVT();
6836
6837 SDLoc dl(ST);
6838 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6839 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6840 if (isTypeLegal(intVT)) {
6841 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6842 StoreMemVT.isVector()) {
6843 // Scalarize the store and let the individual components be handled.
6844 SDValue Result = scalarizeVectorStore(ST, DAG);
6845 return Result;
6846 }
6847 // Expand to a bitconvert of the value to the integer type of the
6848 // same size, then a (misaligned) int store.
6849 // FIXME: Does not handle truncating floating point stores!
6850 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
6851 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
6852 Alignment, ST->getMemOperand()->getFlags());
6853 return Result;
6854 }
6855 // Do a (aligned) store to a stack slot, then copy from the stack slot
6856 // to the final destination using (unaligned) integer loads and stores.
6857 MVT RegVT = getRegisterType(
6858 *DAG.getContext(),
6859 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
6860 EVT PtrVT = Ptr.getValueType();
6861 unsigned StoredBytes = StoreMemVT.getStoreSize();
6862 unsigned RegBytes = RegVT.getSizeInBits() / 8;
6863 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
6864
6865 // Make sure the stack slot is also aligned for the register type.
6866 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
6867 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6868
6869 // Perform the original store, only redirected to the stack slot.
6870 SDValue Store = DAG.getTruncStore(
6871 Chain, dl, Val, StackPtr,
6872 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
6873
6874 EVT StackPtrVT = StackPtr.getValueType();
6875
6876 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6877 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6878 SmallVector<SDValue, 8> Stores;
6879 unsigned Offset = 0;
6880
6881 // Do all but one copies using the full register width.
6882 for (unsigned i = 1; i < NumRegs; i++) {
6883 // Load one integer register's worth from the stack slot.
6884 SDValue Load = DAG.getLoad(
6885 RegVT, dl, Store, StackPtr,
6886 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
6887 // Store it to the final location. Remember the store.
6888 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
6889 ST->getPointerInfo().getWithOffset(Offset),
6890 MinAlign(ST->getAlignment(), Offset),
6891 ST->getMemOperand()->getFlags()));
6892 // Increment the pointers.
6893 Offset += RegBytes;
6894 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6895 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6896 }
6897
6898 // The last store may be partial. Do a truncating store. On big-endian
6899 // machines this requires an extending load from the stack slot to ensure
6900 // that the bits are in the right place.
6901 EVT LoadMemVT =
6902 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
6903
6904 // Load from the stack slot.
6905 SDValue Load = DAG.getExtLoad(
6906 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
6907 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
6908
6909 Stores.push_back(
6910 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
6911 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
6912 MinAlign(ST->getAlignment(), Offset),
6913 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
6914 // The order of the stores doesn't matter - say it with a TokenFactor.
6915 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6916 return Result;
6917 }
6918
6919 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
6920 "Unaligned store of unknown type.");
6921 // Get the half-size VT
6922 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
6923 int NumBits = NewStoredVT.getSizeInBits();
6924 int IncrementSize = NumBits / 8;
6925
6926 // Divide the stored value in two parts.
6927 SDValue ShiftAmount = DAG.getConstant(
6928 NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
6929 SDValue Lo = Val;
6930 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
6931
6932 // Store the two parts
6933 SDValue Store1, Store2;
6934 Store1 = DAG.getTruncStore(Chain, dl,
6935 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
6936 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
6937 ST->getMemOperand()->getFlags());
6938
6939 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6940 Alignment = MinAlign(Alignment, IncrementSize);
6941 Store2 = DAG.getTruncStore(
6942 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
6943 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
6944 ST->getMemOperand()->getFlags(), ST->getAAInfo());
6945
6946 SDValue Result =
6947 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
6948 return Result;
6949 }
6950
6951 SDValue
IncrementMemoryAddress(SDValue Addr,SDValue Mask,const SDLoc & DL,EVT DataVT,SelectionDAG & DAG,bool IsCompressedMemory) const6952 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
6953 const SDLoc &DL, EVT DataVT,
6954 SelectionDAG &DAG,
6955 bool IsCompressedMemory) const {
6956 SDValue Increment;
6957 EVT AddrVT = Addr.getValueType();
6958 EVT MaskVT = Mask.getValueType();
6959 assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
6960 "Incompatible types of Data and Mask");
6961 if (IsCompressedMemory) {
6962 // Incrementing the pointer according to number of '1's in the mask.
6963 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
6964 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
6965 if (MaskIntVT.getSizeInBits() < 32) {
6966 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
6967 MaskIntVT = MVT::i32;
6968 }
6969
6970 // Count '1's with POPCNT.
6971 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
6972 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
6973 // Scale is an element size in bytes.
6974 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
6975 AddrVT);
6976 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
6977 } else
6978 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
6979
6980 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
6981 }
6982
clampDynamicVectorIndex(SelectionDAG & DAG,SDValue Idx,EVT VecVT,const SDLoc & dl)6983 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
6984 SDValue Idx,
6985 EVT VecVT,
6986 const SDLoc &dl) {
6987 if (isa<ConstantSDNode>(Idx))
6988 return Idx;
6989
6990 EVT IdxVT = Idx.getValueType();
6991 unsigned NElts = VecVT.getVectorNumElements();
6992 if (isPowerOf2_32(NElts)) {
6993 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
6994 Log2_32(NElts));
6995 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
6996 DAG.getConstant(Imm, dl, IdxVT));
6997 }
6998
6999 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
7000 DAG.getConstant(NElts - 1, dl, IdxVT));
7001 }
7002
getVectorElementPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,SDValue Index) const7003 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
7004 SDValue VecPtr, EVT VecVT,
7005 SDValue Index) const {
7006 SDLoc dl(Index);
7007 // Make sure the index type is big enough to compute in.
7008 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
7009
7010 EVT EltVT = VecVT.getVectorElementType();
7011
7012 // Calculate the element offset and add it to the pointer.
7013 unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
7014 assert(EltSize * 8 == EltVT.getSizeInBits() &&
7015 "Converting bits to bytes lost precision");
7016
7017 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
7018
7019 EVT IdxVT = Index.getValueType();
7020
7021 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
7022 DAG.getConstant(EltSize, dl, IdxVT));
7023 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
7024 }
7025
7026 //===----------------------------------------------------------------------===//
7027 // Implementation of Emulated TLS Model
7028 //===----------------------------------------------------------------------===//
7029
LowerToTLSEmulatedModel(const GlobalAddressSDNode * GA,SelectionDAG & DAG) const7030 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
7031 SelectionDAG &DAG) const {
7032 // Access to address of TLS varialbe xyz is lowered to a function call:
7033 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7034 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7035 PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
7036 SDLoc dl(GA);
7037
7038 ArgListTy Args;
7039 ArgListEntry Entry;
7040 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
7041 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
7042 StringRef EmuTlsVarName(NameString);
7043 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
7044 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
7045 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
7046 Entry.Ty = VoidPtrType;
7047 Args.push_back(Entry);
7048
7049 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
7050
7051 TargetLowering::CallLoweringInfo CLI(DAG);
7052 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
7053 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
7054 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7055
7056 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7057 // At last for X86 targets, maybe good for other targets too?
7058 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7059 MFI.setAdjustsStack(true); // Is this only for X86 target?
7060 MFI.setHasCalls(true);
7061
7062 assert((GA->getOffset() == 0) &&
7063 "Emulated TLS must have zero offset in GlobalAddressSDNode");
7064 return CallResult.first;
7065 }
7066
lowerCmpEqZeroToCtlzSrl(SDValue Op,SelectionDAG & DAG) const7067 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7068 SelectionDAG &DAG) const {
7069 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7070 if (!isCtlzFast())
7071 return SDValue();
7072 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7073 SDLoc dl(Op);
7074 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7075 if (C->isNullValue() && CC == ISD::SETEQ) {
7076 EVT VT = Op.getOperand(0).getValueType();
7077 SDValue Zext = Op.getOperand(0);
7078 if (VT.bitsLT(MVT::i32)) {
7079 VT = MVT::i32;
7080 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7081 }
7082 unsigned Log2b = Log2_32(VT.getSizeInBits());
7083 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7084 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7085 DAG.getConstant(Log2b, dl, MVT::i32));
7086 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7087 }
7088 }
7089 return SDValue();
7090 }
7091
expandAddSubSat(SDNode * Node,SelectionDAG & DAG) const7092 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7093 unsigned Opcode = Node->getOpcode();
7094 SDValue LHS = Node->getOperand(0);
7095 SDValue RHS = Node->getOperand(1);
7096 EVT VT = LHS.getValueType();
7097 SDLoc dl(Node);
7098
7099 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7100 assert(VT.isInteger() && "Expected operands to be integers");
7101
7102 // usub.sat(a, b) -> umax(a, b) - b
7103 if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
7104 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7105 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7106 }
7107
7108 if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
7109 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7110 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7111 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7112 }
7113
7114 unsigned OverflowOp;
7115 switch (Opcode) {
7116 case ISD::SADDSAT:
7117 OverflowOp = ISD::SADDO;
7118 break;
7119 case ISD::UADDSAT:
7120 OverflowOp = ISD::UADDO;
7121 break;
7122 case ISD::SSUBSAT:
7123 OverflowOp = ISD::SSUBO;
7124 break;
7125 case ISD::USUBSAT:
7126 OverflowOp = ISD::USUBO;
7127 break;
7128 default:
7129 llvm_unreachable("Expected method to receive signed or unsigned saturation "
7130 "addition or subtraction node.");
7131 }
7132
7133 unsigned BitWidth = LHS.getScalarValueSizeInBits();
7134 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7135 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7136 LHS, RHS);
7137 SDValue SumDiff = Result.getValue(0);
7138 SDValue Overflow = Result.getValue(1);
7139 SDValue Zero = DAG.getConstant(0, dl, VT);
7140 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7141
7142 if (Opcode == ISD::UADDSAT) {
7143 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7144 // (LHS + RHS) | OverflowMask
7145 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7146 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7147 }
7148 // Overflow ? 0xffff.... : (LHS + RHS)
7149 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7150 } else if (Opcode == ISD::USUBSAT) {
7151 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7152 // (LHS - RHS) & ~OverflowMask
7153 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7154 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7155 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7156 }
7157 // Overflow ? 0 : (LHS - RHS)
7158 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7159 } else {
7160 // SatMax -> Overflow && SumDiff < 0
7161 // SatMin -> Overflow && SumDiff >= 0
7162 APInt MinVal = APInt::getSignedMinValue(BitWidth);
7163 APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7164 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7165 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7166 SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7167 Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7168 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7169 }
7170 }
7171
7172 SDValue
expandFixedPointMul(SDNode * Node,SelectionDAG & DAG) const7173 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7174 assert((Node->getOpcode() == ISD::SMULFIX ||
7175 Node->getOpcode() == ISD::UMULFIX ||
7176 Node->getOpcode() == ISD::SMULFIXSAT ||
7177 Node->getOpcode() == ISD::UMULFIXSAT) &&
7178 "Expected a fixed point multiplication opcode");
7179
7180 SDLoc dl(Node);
7181 SDValue LHS = Node->getOperand(0);
7182 SDValue RHS = Node->getOperand(1);
7183 EVT VT = LHS.getValueType();
7184 unsigned Scale = Node->getConstantOperandVal(2);
7185 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7186 Node->getOpcode() == ISD::UMULFIXSAT);
7187 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7188 Node->getOpcode() == ISD::SMULFIXSAT);
7189 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7190 unsigned VTSize = VT.getScalarSizeInBits();
7191
7192 if (!Scale) {
7193 // [us]mul.fix(a, b, 0) -> mul(a, b)
7194 if (!Saturating) {
7195 if (isOperationLegalOrCustom(ISD::MUL, VT))
7196 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7197 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7198 SDValue Result =
7199 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7200 SDValue Product = Result.getValue(0);
7201 SDValue Overflow = Result.getValue(1);
7202 SDValue Zero = DAG.getConstant(0, dl, VT);
7203
7204 APInt MinVal = APInt::getSignedMinValue(VTSize);
7205 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7206 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7207 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7208 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7209 Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7210 return DAG.getSelect(dl, VT, Overflow, Result, Product);
7211 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7212 SDValue Result =
7213 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7214 SDValue Product = Result.getValue(0);
7215 SDValue Overflow = Result.getValue(1);
7216
7217 APInt MaxVal = APInt::getMaxValue(VTSize);
7218 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7219 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7220 }
7221 }
7222
7223 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7224 "Expected scale to be less than the number of bits if signed or at "
7225 "most the number of bits if unsigned.");
7226 assert(LHS.getValueType() == RHS.getValueType() &&
7227 "Expected both operands to be the same type");
7228
7229 // Get the upper and lower bits of the result.
7230 SDValue Lo, Hi;
7231 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7232 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7233 if (isOperationLegalOrCustom(LoHiOp, VT)) {
7234 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7235 Lo = Result.getValue(0);
7236 Hi = Result.getValue(1);
7237 } else if (isOperationLegalOrCustom(HiOp, VT)) {
7238 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7239 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7240 } else if (VT.isVector()) {
7241 return SDValue();
7242 } else {
7243 report_fatal_error("Unable to expand fixed point multiplication.");
7244 }
7245
7246 if (Scale == VTSize)
7247 // Result is just the top half since we'd be shifting by the width of the
7248 // operand. Overflow impossible so this works for both UMULFIX and
7249 // UMULFIXSAT.
7250 return Hi;
7251
7252 // The result will need to be shifted right by the scale since both operands
7253 // are scaled. The result is given to us in 2 halves, so we only want part of
7254 // both in the result.
7255 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7256 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7257 DAG.getConstant(Scale, dl, ShiftTy));
7258 if (!Saturating)
7259 return Result;
7260
7261 if (!Signed) {
7262 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7263 // widened multiplication) aren't all zeroes.
7264
7265 // Saturate to max if ((Hi >> Scale) != 0),
7266 // which is the same as if (Hi > ((1 << Scale) - 1))
7267 APInt MaxVal = APInt::getMaxValue(VTSize);
7268 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7269 dl, VT);
7270 Result = DAG.getSelectCC(dl, Hi, LowMask,
7271 DAG.getConstant(MaxVal, dl, VT), Result,
7272 ISD::SETUGT);
7273
7274 return Result;
7275 }
7276
7277 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7278 // widened multiplication) aren't all ones or all zeroes.
7279
7280 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7281 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7282
7283 if (Scale == 0) {
7284 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7285 DAG.getConstant(VTSize - 1, dl, ShiftTy));
7286 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7287 // Saturated to SatMin if wide product is negative, and SatMax if wide
7288 // product is positive ...
7289 SDValue Zero = DAG.getConstant(0, dl, VT);
7290 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7291 ISD::SETLT);
7292 // ... but only if we overflowed.
7293 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7294 }
7295
7296 // We handled Scale==0 above so all the bits to examine is in Hi.
7297
7298 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7299 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7300 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7301 dl, VT);
7302 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7303 // Saturate to min if (Hi >> (Scale - 1)) < -1),
7304 // which is the same as if (HI < (-1 << (Scale - 1))
7305 SDValue HighMask =
7306 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7307 dl, VT);
7308 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7309 return Result;
7310 }
7311
7312 SDValue
expandFixedPointDiv(unsigned Opcode,const SDLoc & dl,SDValue LHS,SDValue RHS,unsigned Scale,SelectionDAG & DAG) const7313 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
7314 SDValue LHS, SDValue RHS,
7315 unsigned Scale, SelectionDAG &DAG) const {
7316 assert((Opcode == ISD::SDIVFIX ||
7317 Opcode == ISD::UDIVFIX) &&
7318 "Expected a fixed point division opcode");
7319
7320 EVT VT = LHS.getValueType();
7321 bool Signed = Opcode == ISD::SDIVFIX;
7322 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7323
7324 // If there is enough room in the type to upscale the LHS or downscale the
7325 // RHS before the division, we can perform it in this type without having to
7326 // resize. For signed operations, the LHS headroom is the number of
7327 // redundant sign bits, and for unsigned ones it is the number of zeroes.
7328 // The headroom for the RHS is the number of trailing zeroes.
7329 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
7330 : DAG.computeKnownBits(LHS).countMinLeadingZeros();
7331 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
7332
7333 if (LHSLead + RHSTrail < Scale)
7334 return SDValue();
7335
7336 unsigned LHSShift = std::min(LHSLead, Scale);
7337 unsigned RHSShift = Scale - LHSShift;
7338
7339 // At this point, we know that if we shift the LHS up by LHSShift and the
7340 // RHS down by RHSShift, we can emit a regular division with a final scaling
7341 // factor of Scale.
7342
7343 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7344 if (LHSShift)
7345 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
7346 DAG.getConstant(LHSShift, dl, ShiftTy));
7347 if (RHSShift)
7348 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
7349 DAG.getConstant(RHSShift, dl, ShiftTy));
7350
7351 SDValue Quot;
7352 if (Signed) {
7353 // For signed operations, if the resulting quotient is negative and the
7354 // remainder is nonzero, subtract 1 from the quotient to round towards
7355 // negative infinity.
7356 SDValue Rem;
7357 // FIXME: Ideally we would always produce an SDIVREM here, but if the
7358 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
7359 // we couldn't just form a libcall, but the type legalizer doesn't do it.
7360 if (isTypeLegal(VT) &&
7361 isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
7362 Quot = DAG.getNode(ISD::SDIVREM, dl,
7363 DAG.getVTList(VT, VT),
7364 LHS, RHS);
7365 Rem = Quot.getValue(1);
7366 Quot = Quot.getValue(0);
7367 } else {
7368 Quot = DAG.getNode(ISD::SDIV, dl, VT,
7369 LHS, RHS);
7370 Rem = DAG.getNode(ISD::SREM, dl, VT,
7371 LHS, RHS);
7372 }
7373 SDValue Zero = DAG.getConstant(0, dl, VT);
7374 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
7375 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
7376 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
7377 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
7378 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
7379 DAG.getConstant(1, dl, VT));
7380 Quot = DAG.getSelect(dl, VT,
7381 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
7382 Sub1, Quot);
7383 } else
7384 Quot = DAG.getNode(ISD::UDIV, dl, VT,
7385 LHS, RHS);
7386
7387 // TODO: Saturation.
7388
7389 return Quot;
7390 }
7391
expandUADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const7392 void TargetLowering::expandUADDSUBO(
7393 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7394 SDLoc dl(Node);
7395 SDValue LHS = Node->getOperand(0);
7396 SDValue RHS = Node->getOperand(1);
7397 bool IsAdd = Node->getOpcode() == ISD::UADDO;
7398
7399 // If ADD/SUBCARRY is legal, use that instead.
7400 unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
7401 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
7402 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
7403 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
7404 { LHS, RHS, CarryIn });
7405 Result = SDValue(NodeCarry.getNode(), 0);
7406 Overflow = SDValue(NodeCarry.getNode(), 1);
7407 return;
7408 }
7409
7410 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7411 LHS.getValueType(), LHS, RHS);
7412
7413 EVT ResultType = Node->getValueType(1);
7414 EVT SetCCType = getSetCCResultType(
7415 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7416 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
7417 SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
7418 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7419 }
7420
expandSADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const7421 void TargetLowering::expandSADDSUBO(
7422 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
7423 SDLoc dl(Node);
7424 SDValue LHS = Node->getOperand(0);
7425 SDValue RHS = Node->getOperand(1);
7426 bool IsAdd = Node->getOpcode() == ISD::SADDO;
7427
7428 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
7429 LHS.getValueType(), LHS, RHS);
7430
7431 EVT ResultType = Node->getValueType(1);
7432 EVT OType = getSetCCResultType(
7433 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
7434
7435 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7436 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
7437 if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
7438 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
7439 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
7440 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
7441 return;
7442 }
7443
7444 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
7445
7446 // For an addition, the result should be less than one of the operands (LHS)
7447 // if and only if the other operand (RHS) is negative, otherwise there will
7448 // be overflow.
7449 // For a subtraction, the result should be less than one of the operands
7450 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7451 // otherwise there will be overflow.
7452 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
7453 SDValue ConditionRHS =
7454 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
7455
7456 Overflow = DAG.getBoolExtOrTrunc(
7457 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
7458 ResultType, ResultType);
7459 }
7460
expandMULO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const7461 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
7462 SDValue &Overflow, SelectionDAG &DAG) const {
7463 SDLoc dl(Node);
7464 EVT VT = Node->getValueType(0);
7465 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7466 SDValue LHS = Node->getOperand(0);
7467 SDValue RHS = Node->getOperand(1);
7468 bool isSigned = Node->getOpcode() == ISD::SMULO;
7469
7470 // For power-of-two multiplications we can use a simpler shift expansion.
7471 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
7472 const APInt &C = RHSC->getAPIntValue();
7473 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
7474 if (C.isPowerOf2()) {
7475 // smulo(x, signed_min) is same as umulo(x, signed_min).
7476 bool UseArithShift = isSigned && !C.isMinSignedValue();
7477 EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
7478 SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
7479 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
7480 Overflow = DAG.getSetCC(dl, SetCCVT,
7481 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
7482 dl, VT, Result, ShiftAmt),
7483 LHS, ISD::SETNE);
7484 return true;
7485 }
7486 }
7487
7488 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
7489 if (VT.isVector())
7490 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
7491 VT.getVectorNumElements());
7492
7493 SDValue BottomHalf;
7494 SDValue TopHalf;
7495 static const unsigned Ops[2][3] =
7496 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
7497 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
7498 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
7499 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7500 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
7501 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
7502 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
7503 RHS);
7504 TopHalf = BottomHalf.getValue(1);
7505 } else if (isTypeLegal(WideVT)) {
7506 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
7507 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
7508 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
7509 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
7510 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
7511 getShiftAmountTy(WideVT, DAG.getDataLayout()));
7512 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
7513 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
7514 } else {
7515 if (VT.isVector())
7516 return false;
7517
7518 // We can fall back to a libcall with an illegal type for the MUL if we
7519 // have a libcall big enough.
7520 // Also, we can fall back to a division in some cases, but that's a big
7521 // performance hit in the general case.
7522 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
7523 if (WideVT == MVT::i16)
7524 LC = RTLIB::MUL_I16;
7525 else if (WideVT == MVT::i32)
7526 LC = RTLIB::MUL_I32;
7527 else if (WideVT == MVT::i64)
7528 LC = RTLIB::MUL_I64;
7529 else if (WideVT == MVT::i128)
7530 LC = RTLIB::MUL_I128;
7531 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
7532
7533 SDValue HiLHS;
7534 SDValue HiRHS;
7535 if (isSigned) {
7536 // The high part is obtained by SRA'ing all but one of the bits of low
7537 // part.
7538 unsigned LoSize = VT.getSizeInBits();
7539 HiLHS =
7540 DAG.getNode(ISD::SRA, dl, VT, LHS,
7541 DAG.getConstant(LoSize - 1, dl,
7542 getPointerTy(DAG.getDataLayout())));
7543 HiRHS =
7544 DAG.getNode(ISD::SRA, dl, VT, RHS,
7545 DAG.getConstant(LoSize - 1, dl,
7546 getPointerTy(DAG.getDataLayout())));
7547 } else {
7548 HiLHS = DAG.getConstant(0, dl, VT);
7549 HiRHS = DAG.getConstant(0, dl, VT);
7550 }
7551
7552 // Here we're passing the 2 arguments explicitly as 4 arguments that are
7553 // pre-lowered to the correct types. This all depends upon WideVT not
7554 // being a legal type for the architecture and thus has to be split to
7555 // two arguments.
7556 SDValue Ret;
7557 TargetLowering::MakeLibCallOptions CallOptions;
7558 CallOptions.setSExt(isSigned);
7559 CallOptions.setIsPostTypeLegalization(true);
7560 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7561 // Halves of WideVT are packed into registers in different order
7562 // depending on platform endianness. This is usually handled by
7563 // the C calling convention, but we can't defer to it in
7564 // the legalizer.
7565 SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7566 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7567 } else {
7568 SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7569 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7570 }
7571 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7572 "Ret value is a collection of constituent nodes holding result.");
7573 if (DAG.getDataLayout().isLittleEndian()) {
7574 // Same as above.
7575 BottomHalf = Ret.getOperand(0);
7576 TopHalf = Ret.getOperand(1);
7577 } else {
7578 BottomHalf = Ret.getOperand(1);
7579 TopHalf = Ret.getOperand(0);
7580 }
7581 }
7582
7583 Result = BottomHalf;
7584 if (isSigned) {
7585 SDValue ShiftAmt = DAG.getConstant(
7586 VT.getScalarSizeInBits() - 1, dl,
7587 getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7588 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7589 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7590 } else {
7591 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7592 DAG.getConstant(0, dl, VT), ISD::SETNE);
7593 }
7594
7595 // Truncate the result if SetCC returns a larger type than needed.
7596 EVT RType = Node->getValueType(1);
7597 if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7598 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7599
7600 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7601 "Unexpected result type for S/UMULO legalization");
7602 return true;
7603 }
7604
expandVecReduce(SDNode * Node,SelectionDAG & DAG) const7605 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7606 SDLoc dl(Node);
7607 bool NoNaN = Node->getFlags().hasNoNaNs();
7608 unsigned BaseOpcode = 0;
7609 switch (Node->getOpcode()) {
7610 default: llvm_unreachable("Expected VECREDUCE opcode");
7611 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7612 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7613 case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
7614 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
7615 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
7616 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
7617 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
7618 case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7619 case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7620 case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7621 case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7622 case ISD::VECREDUCE_FMAX:
7623 BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7624 break;
7625 case ISD::VECREDUCE_FMIN:
7626 BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7627 break;
7628 }
7629
7630 SDValue Op = Node->getOperand(0);
7631 EVT VT = Op.getValueType();
7632
7633 // Try to use a shuffle reduction for power of two vectors.
7634 if (VT.isPow2VectorType()) {
7635 while (VT.getVectorNumElements() > 1) {
7636 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7637 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7638 break;
7639
7640 SDValue Lo, Hi;
7641 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7642 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7643 VT = HalfVT;
7644 }
7645 }
7646
7647 EVT EltVT = VT.getVectorElementType();
7648 unsigned NumElts = VT.getVectorNumElements();
7649
7650 SmallVector<SDValue, 8> Ops;
7651 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7652
7653 SDValue Res = Ops[0];
7654 for (unsigned i = 1; i < NumElts; i++)
7655 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7656
7657 // Result type may be wider than element type.
7658 if (EltVT != Node->getValueType(0))
7659 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7660 return Res;
7661 }
7662