1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CodeGenFunction.h"
15 #include "CGObjCRuntime.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TargetInfo.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Intrinsics.h"
24
25 using namespace clang;
26 using namespace CodeGen;
27 using namespace llvm;
28
29 /// getBuiltinLibFunction - Given a builtin id for a function like
30 /// "__builtin_fabsf", return a Function* for "fabsf".
getBuiltinLibFunction(const FunctionDecl * FD,unsigned BuiltinID)31 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
32 unsigned BuiltinID) {
33 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
34
35 // Get the name, skip over the __builtin_ prefix (if necessary).
36 StringRef Name;
37 GlobalDecl D(FD);
38
39 // If the builtin has been declared explicitly with an assembler label,
40 // use the mangled name. This differs from the plain label on platforms
41 // that prefix labels.
42 if (FD->hasAttr<AsmLabelAttr>())
43 Name = getMangledName(D);
44 else
45 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
46
47 llvm::FunctionType *Ty =
48 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
49
50 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
51 }
52
53 /// Emit the conversions required to turn the given value into an
54 /// integer of the given size.
EmitToInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::IntegerType * IntType)55 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
56 QualType T, llvm::IntegerType *IntType) {
57 V = CGF.EmitToMemory(V, T);
58
59 if (V->getType()->isPointerTy())
60 return CGF.Builder.CreatePtrToInt(V, IntType);
61
62 assert(V->getType() == IntType);
63 return V;
64 }
65
EmitFromInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::Type * ResultType)66 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
67 QualType T, llvm::Type *ResultType) {
68 V = CGF.EmitFromMemory(V, T);
69
70 if (ResultType->isPointerTy())
71 return CGF.Builder.CreateIntToPtr(V, ResultType);
72
73 assert(V->getType() == ResultType);
74 return V;
75 }
76
77 /// Utility to insert an atomic instruction based on Instrinsic::ID
78 /// and the expression node.
EmitBinaryAtomic(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)79 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
80 llvm::AtomicRMWInst::BinOp Kind,
81 const CallExpr *E) {
82 QualType T = E->getType();
83 assert(E->getArg(0)->getType()->isPointerType());
84 assert(CGF.getContext().hasSameUnqualifiedType(T,
85 E->getArg(0)->getType()->getPointeeType()));
86 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
87
88 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
90
91 llvm::IntegerType *IntType =
92 llvm::IntegerType::get(CGF.getLLVMContext(),
93 CGF.getContext().getTypeSize(T));
94 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
95
96 llvm::Value *Args[2];
97 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
98 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
99 llvm::Type *ValueType = Args[1]->getType();
100 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
101
102 llvm::Value *Result =
103 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
104 llvm::SequentiallyConsistent);
105 Result = EmitFromInt(CGF, Result, T, ValueType);
106 return RValue::get(Result);
107 }
108
109 /// Utility to insert an atomic instruction based Instrinsic::ID and
110 /// the expression node, where the return value is the result of the
111 /// operation.
EmitBinaryAtomicPost(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,Instruction::BinaryOps Op)112 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
113 llvm::AtomicRMWInst::BinOp Kind,
114 const CallExpr *E,
115 Instruction::BinaryOps Op) {
116 QualType T = E->getType();
117 assert(E->getArg(0)->getType()->isPointerType());
118 assert(CGF.getContext().hasSameUnqualifiedType(T,
119 E->getArg(0)->getType()->getPointeeType()));
120 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
121
122 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
123 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
124
125 llvm::IntegerType *IntType =
126 llvm::IntegerType::get(CGF.getLLVMContext(),
127 CGF.getContext().getTypeSize(T));
128 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
129
130 llvm::Value *Args[2];
131 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
132 llvm::Type *ValueType = Args[1]->getType();
133 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
134 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
135
136 llvm::Value *Result =
137 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
138 llvm::SequentiallyConsistent);
139 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
140 Result = EmitFromInt(CGF, Result, T, ValueType);
141 return RValue::get(Result);
142 }
143
144 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
145 /// which must be a scalar floating point type.
EmitFAbs(CodeGenFunction & CGF,Value * V,QualType ValTy)146 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
147 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
148 assert(ValTyP && "isn't scalar fp type!");
149
150 StringRef FnName;
151 switch (ValTyP->getKind()) {
152 default: llvm_unreachable("Isn't a scalar fp type!");
153 case BuiltinType::Float: FnName = "fabsf"; break;
154 case BuiltinType::Double: FnName = "fabs"; break;
155 case BuiltinType::LongDouble: FnName = "fabsl"; break;
156 }
157
158 // The prototype is something that takes and returns whatever V's type is.
159 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
160 false);
161 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
162
163 return CGF.EmitNounwindRuntimeCall(Fn, V, "abs");
164 }
165
emitLibraryCall(CodeGenFunction & CGF,const FunctionDecl * Fn,const CallExpr * E,llvm::Value * calleeValue)166 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
167 const CallExpr *E, llvm::Value *calleeValue) {
168 return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
169 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
170 }
171
172 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
173 /// depending on IntrinsicID.
174 ///
175 /// \arg CGF The current codegen function.
176 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
177 /// \arg X The first argument to the llvm.*.with.overflow.*.
178 /// \arg Y The second argument to the llvm.*.with.overflow.*.
179 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
180 /// \returns The result (i.e. sum/product) returned by the intrinsic.
EmitOverflowIntrinsic(CodeGenFunction & CGF,const llvm::Intrinsic::ID IntrinsicID,llvm::Value * X,llvm::Value * Y,llvm::Value * & Carry)181 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
182 const llvm::Intrinsic::ID IntrinsicID,
183 llvm::Value *X, llvm::Value *Y,
184 llvm::Value *&Carry) {
185 // Make sure we have integers of the same width.
186 assert(X->getType() == Y->getType() &&
187 "Arguments must be the same type. (Did you forget to make sure both "
188 "arguments have the same integer width?)");
189
190 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
191 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
192 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
193 return CGF.Builder.CreateExtractValue(Tmp, 0);
194 }
195
EmitBuiltinExpr(const FunctionDecl * FD,unsigned BuiltinID,const CallExpr * E)196 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
197 unsigned BuiltinID, const CallExpr *E) {
198 // See if we can constant fold this builtin. If so, don't emit it at all.
199 Expr::EvalResult Result;
200 if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
201 !Result.hasSideEffects()) {
202 if (Result.Val.isInt())
203 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
204 Result.Val.getInt()));
205 if (Result.Val.isFloat())
206 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
207 Result.Val.getFloat()));
208 }
209
210 switch (BuiltinID) {
211 default: break; // Handle intrinsics and libm functions below.
212 case Builtin::BI__builtin___CFStringMakeConstantString:
213 case Builtin::BI__builtin___NSStringMakeConstantString:
214 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
215 case Builtin::BI__builtin_stdarg_start:
216 case Builtin::BI__builtin_va_start:
217 case Builtin::BI__builtin_va_end: {
218 Value *ArgValue = EmitVAListRef(E->getArg(0));
219 llvm::Type *DestType = Int8PtrTy;
220 if (ArgValue->getType() != DestType)
221 ArgValue = Builder.CreateBitCast(ArgValue, DestType,
222 ArgValue->getName().data());
223
224 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
225 Intrinsic::vaend : Intrinsic::vastart;
226 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
227 }
228 case Builtin::BI__builtin_va_copy: {
229 Value *DstPtr = EmitVAListRef(E->getArg(0));
230 Value *SrcPtr = EmitVAListRef(E->getArg(1));
231
232 llvm::Type *Type = Int8PtrTy;
233
234 DstPtr = Builder.CreateBitCast(DstPtr, Type);
235 SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
236 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
237 DstPtr, SrcPtr));
238 }
239 case Builtin::BI__builtin_abs:
240 case Builtin::BI__builtin_labs:
241 case Builtin::BI__builtin_llabs: {
242 Value *ArgValue = EmitScalarExpr(E->getArg(0));
243
244 Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
245 Value *CmpResult =
246 Builder.CreateICmpSGE(ArgValue,
247 llvm::Constant::getNullValue(ArgValue->getType()),
248 "abscond");
249 Value *Result =
250 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
251
252 return RValue::get(Result);
253 }
254
255 case Builtin::BI__builtin_conj:
256 case Builtin::BI__builtin_conjf:
257 case Builtin::BI__builtin_conjl: {
258 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
259 Value *Real = ComplexVal.first;
260 Value *Imag = ComplexVal.second;
261 Value *Zero =
262 Imag->getType()->isFPOrFPVectorTy()
263 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
264 : llvm::Constant::getNullValue(Imag->getType());
265
266 Imag = Builder.CreateFSub(Zero, Imag, "sub");
267 return RValue::getComplex(std::make_pair(Real, Imag));
268 }
269 case Builtin::BI__builtin_creal:
270 case Builtin::BI__builtin_crealf:
271 case Builtin::BI__builtin_creall:
272 case Builtin::BIcreal:
273 case Builtin::BIcrealf:
274 case Builtin::BIcreall: {
275 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
276 return RValue::get(ComplexVal.first);
277 }
278
279 case Builtin::BI__builtin_cimag:
280 case Builtin::BI__builtin_cimagf:
281 case Builtin::BI__builtin_cimagl:
282 case Builtin::BIcimag:
283 case Builtin::BIcimagf:
284 case Builtin::BIcimagl: {
285 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
286 return RValue::get(ComplexVal.second);
287 }
288
289 case Builtin::BI__builtin_ctzs:
290 case Builtin::BI__builtin_ctz:
291 case Builtin::BI__builtin_ctzl:
292 case Builtin::BI__builtin_ctzll: {
293 Value *ArgValue = EmitScalarExpr(E->getArg(0));
294
295 llvm::Type *ArgType = ArgValue->getType();
296 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
297
298 llvm::Type *ResultType = ConvertType(E->getType());
299 Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
300 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
301 if (Result->getType() != ResultType)
302 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
303 "cast");
304 return RValue::get(Result);
305 }
306 case Builtin::BI__builtin_clzs:
307 case Builtin::BI__builtin_clz:
308 case Builtin::BI__builtin_clzl:
309 case Builtin::BI__builtin_clzll: {
310 Value *ArgValue = EmitScalarExpr(E->getArg(0));
311
312 llvm::Type *ArgType = ArgValue->getType();
313 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
314
315 llvm::Type *ResultType = ConvertType(E->getType());
316 Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
317 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
318 if (Result->getType() != ResultType)
319 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
320 "cast");
321 return RValue::get(Result);
322 }
323 case Builtin::BI__builtin_ffs:
324 case Builtin::BI__builtin_ffsl:
325 case Builtin::BI__builtin_ffsll: {
326 // ffs(x) -> x ? cttz(x) + 1 : 0
327 Value *ArgValue = EmitScalarExpr(E->getArg(0));
328
329 llvm::Type *ArgType = ArgValue->getType();
330 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
331
332 llvm::Type *ResultType = ConvertType(E->getType());
333 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
334 Builder.getTrue()),
335 llvm::ConstantInt::get(ArgType, 1));
336 Value *Zero = llvm::Constant::getNullValue(ArgType);
337 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
338 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
339 if (Result->getType() != ResultType)
340 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
341 "cast");
342 return RValue::get(Result);
343 }
344 case Builtin::BI__builtin_parity:
345 case Builtin::BI__builtin_parityl:
346 case Builtin::BI__builtin_parityll: {
347 // parity(x) -> ctpop(x) & 1
348 Value *ArgValue = EmitScalarExpr(E->getArg(0));
349
350 llvm::Type *ArgType = ArgValue->getType();
351 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
352
353 llvm::Type *ResultType = ConvertType(E->getType());
354 Value *Tmp = Builder.CreateCall(F, ArgValue);
355 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
356 if (Result->getType() != ResultType)
357 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
358 "cast");
359 return RValue::get(Result);
360 }
361 case Builtin::BI__builtin_popcount:
362 case Builtin::BI__builtin_popcountl:
363 case Builtin::BI__builtin_popcountll: {
364 Value *ArgValue = EmitScalarExpr(E->getArg(0));
365
366 llvm::Type *ArgType = ArgValue->getType();
367 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
368
369 llvm::Type *ResultType = ConvertType(E->getType());
370 Value *Result = Builder.CreateCall(F, ArgValue);
371 if (Result->getType() != ResultType)
372 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
373 "cast");
374 return RValue::get(Result);
375 }
376 case Builtin::BI__builtin_expect: {
377 Value *ArgValue = EmitScalarExpr(E->getArg(0));
378 llvm::Type *ArgType = ArgValue->getType();
379
380 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
381 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
382
383 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
384 "expval");
385 return RValue::get(Result);
386 }
387 case Builtin::BI__builtin_bswap16:
388 case Builtin::BI__builtin_bswap32:
389 case Builtin::BI__builtin_bswap64: {
390 Value *ArgValue = EmitScalarExpr(E->getArg(0));
391 llvm::Type *ArgType = ArgValue->getType();
392 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
393 return RValue::get(Builder.CreateCall(F, ArgValue));
394 }
395 case Builtin::BI__builtin_object_size: {
396 // We rely on constant folding to deal with expressions with side effects.
397 assert(!E->getArg(0)->HasSideEffects(getContext()) &&
398 "should have been constant folded");
399
400 // We pass this builtin onto the optimizer so that it can
401 // figure out the object size in more complex cases.
402 llvm::Type *ResType = ConvertType(E->getType());
403
404 // LLVM only supports 0 and 2, make sure that we pass along that
405 // as a boolean.
406 Value *Ty = EmitScalarExpr(E->getArg(1));
407 ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
408 assert(CI);
409 uint64_t val = CI->getZExtValue();
410 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
411
412 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
413 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
414 }
415 case Builtin::BI__builtin_prefetch: {
416 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
417 // FIXME: Technically these constants should of type 'int', yes?
418 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
419 llvm::ConstantInt::get(Int32Ty, 0);
420 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
421 llvm::ConstantInt::get(Int32Ty, 3);
422 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
423 Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
424 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
425 }
426 case Builtin::BI__builtin_readcyclecounter: {
427 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
428 return RValue::get(Builder.CreateCall(F));
429 }
430 case Builtin::BI__builtin_trap: {
431 Value *F = CGM.getIntrinsic(Intrinsic::trap);
432 return RValue::get(Builder.CreateCall(F));
433 }
434 case Builtin::BI__debugbreak: {
435 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
436 return RValue::get(Builder.CreateCall(F));
437 }
438 case Builtin::BI__builtin_unreachable: {
439 if (SanOpts->Unreachable)
440 EmitCheck(Builder.getFalse(), "builtin_unreachable",
441 EmitCheckSourceLocation(E->getExprLoc()),
442 ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
443 else
444 Builder.CreateUnreachable();
445
446 // We do need to preserve an insertion point.
447 EmitBlock(createBasicBlock("unreachable.cont"));
448
449 return RValue::get(0);
450 }
451
452 case Builtin::BI__builtin_powi:
453 case Builtin::BI__builtin_powif:
454 case Builtin::BI__builtin_powil: {
455 Value *Base = EmitScalarExpr(E->getArg(0));
456 Value *Exponent = EmitScalarExpr(E->getArg(1));
457 llvm::Type *ArgType = Base->getType();
458 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
459 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
460 }
461
462 case Builtin::BI__builtin_isgreater:
463 case Builtin::BI__builtin_isgreaterequal:
464 case Builtin::BI__builtin_isless:
465 case Builtin::BI__builtin_islessequal:
466 case Builtin::BI__builtin_islessgreater:
467 case Builtin::BI__builtin_isunordered: {
468 // Ordered comparisons: we know the arguments to these are matching scalar
469 // floating point values.
470 Value *LHS = EmitScalarExpr(E->getArg(0));
471 Value *RHS = EmitScalarExpr(E->getArg(1));
472
473 switch (BuiltinID) {
474 default: llvm_unreachable("Unknown ordered comparison");
475 case Builtin::BI__builtin_isgreater:
476 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
477 break;
478 case Builtin::BI__builtin_isgreaterequal:
479 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
480 break;
481 case Builtin::BI__builtin_isless:
482 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
483 break;
484 case Builtin::BI__builtin_islessequal:
485 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
486 break;
487 case Builtin::BI__builtin_islessgreater:
488 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
489 break;
490 case Builtin::BI__builtin_isunordered:
491 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
492 break;
493 }
494 // ZExt bool to int type.
495 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
496 }
497 case Builtin::BI__builtin_isnan: {
498 Value *V = EmitScalarExpr(E->getArg(0));
499 V = Builder.CreateFCmpUNO(V, V, "cmp");
500 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
501 }
502
503 case Builtin::BI__builtin_isinf: {
504 // isinf(x) --> fabs(x) == infinity
505 Value *V = EmitScalarExpr(E->getArg(0));
506 V = EmitFAbs(*this, V, E->getArg(0)->getType());
507
508 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
509 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
510 }
511
512 // TODO: BI__builtin_isinf_sign
513 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
514
515 case Builtin::BI__builtin_isnormal: {
516 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
517 Value *V = EmitScalarExpr(E->getArg(0));
518 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
519
520 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
521 Value *IsLessThanInf =
522 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
523 APFloat Smallest = APFloat::getSmallestNormalized(
524 getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
525 Value *IsNormal =
526 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
527 "isnormal");
528 V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
529 V = Builder.CreateAnd(V, IsNormal, "and");
530 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
531 }
532
533 case Builtin::BI__builtin_isfinite: {
534 // isfinite(x) --> x == x && fabs(x) != infinity;
535 Value *V = EmitScalarExpr(E->getArg(0));
536 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
537
538 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
539 Value *IsNotInf =
540 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
541
542 V = Builder.CreateAnd(Eq, IsNotInf, "and");
543 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
544 }
545
546 case Builtin::BI__builtin_fpclassify: {
547 Value *V = EmitScalarExpr(E->getArg(5));
548 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
549
550 // Create Result
551 BasicBlock *Begin = Builder.GetInsertBlock();
552 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
553 Builder.SetInsertPoint(End);
554 PHINode *Result =
555 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
556 "fpclassify_result");
557
558 // if (V==0) return FP_ZERO
559 Builder.SetInsertPoint(Begin);
560 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
561 "iszero");
562 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
563 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
564 Builder.CreateCondBr(IsZero, End, NotZero);
565 Result->addIncoming(ZeroLiteral, Begin);
566
567 // if (V != V) return FP_NAN
568 Builder.SetInsertPoint(NotZero);
569 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
570 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
571 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
572 Builder.CreateCondBr(IsNan, End, NotNan);
573 Result->addIncoming(NanLiteral, NotZero);
574
575 // if (fabs(V) == infinity) return FP_INFINITY
576 Builder.SetInsertPoint(NotNan);
577 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
578 Value *IsInf =
579 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
580 "isinf");
581 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
582 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
583 Builder.CreateCondBr(IsInf, End, NotInf);
584 Result->addIncoming(InfLiteral, NotNan);
585
586 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
587 Builder.SetInsertPoint(NotInf);
588 APFloat Smallest = APFloat::getSmallestNormalized(
589 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
590 Value *IsNormal =
591 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
592 "isnormal");
593 Value *NormalResult =
594 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
595 EmitScalarExpr(E->getArg(3)));
596 Builder.CreateBr(End);
597 Result->addIncoming(NormalResult, NotInf);
598
599 // return Result
600 Builder.SetInsertPoint(End);
601 return RValue::get(Result);
602 }
603
604 case Builtin::BIalloca:
605 case Builtin::BI__builtin_alloca: {
606 Value *Size = EmitScalarExpr(E->getArg(0));
607 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
608 }
609 case Builtin::BIbzero:
610 case Builtin::BI__builtin_bzero: {
611 std::pair<llvm::Value*, unsigned> Dest =
612 EmitPointerWithAlignment(E->getArg(0));
613 Value *SizeVal = EmitScalarExpr(E->getArg(1));
614 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
615 Dest.second, false);
616 return RValue::get(Dest.first);
617 }
618 case Builtin::BImemcpy:
619 case Builtin::BI__builtin_memcpy: {
620 std::pair<llvm::Value*, unsigned> Dest =
621 EmitPointerWithAlignment(E->getArg(0));
622 std::pair<llvm::Value*, unsigned> Src =
623 EmitPointerWithAlignment(E->getArg(1));
624 Value *SizeVal = EmitScalarExpr(E->getArg(2));
625 unsigned Align = std::min(Dest.second, Src.second);
626 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
627 return RValue::get(Dest.first);
628 }
629
630 case Builtin::BI__builtin___memcpy_chk: {
631 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
632 llvm::APSInt Size, DstSize;
633 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
634 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
635 break;
636 if (Size.ugt(DstSize))
637 break;
638 std::pair<llvm::Value*, unsigned> Dest =
639 EmitPointerWithAlignment(E->getArg(0));
640 std::pair<llvm::Value*, unsigned> Src =
641 EmitPointerWithAlignment(E->getArg(1));
642 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
643 unsigned Align = std::min(Dest.second, Src.second);
644 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
645 return RValue::get(Dest.first);
646 }
647
648 case Builtin::BI__builtin_objc_memmove_collectable: {
649 Value *Address = EmitScalarExpr(E->getArg(0));
650 Value *SrcAddr = EmitScalarExpr(E->getArg(1));
651 Value *SizeVal = EmitScalarExpr(E->getArg(2));
652 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
653 Address, SrcAddr, SizeVal);
654 return RValue::get(Address);
655 }
656
657 case Builtin::BI__builtin___memmove_chk: {
658 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
659 llvm::APSInt Size, DstSize;
660 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
661 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
662 break;
663 if (Size.ugt(DstSize))
664 break;
665 std::pair<llvm::Value*, unsigned> Dest =
666 EmitPointerWithAlignment(E->getArg(0));
667 std::pair<llvm::Value*, unsigned> Src =
668 EmitPointerWithAlignment(E->getArg(1));
669 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
670 unsigned Align = std::min(Dest.second, Src.second);
671 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
672 return RValue::get(Dest.first);
673 }
674
675 case Builtin::BImemmove:
676 case Builtin::BI__builtin_memmove: {
677 std::pair<llvm::Value*, unsigned> Dest =
678 EmitPointerWithAlignment(E->getArg(0));
679 std::pair<llvm::Value*, unsigned> Src =
680 EmitPointerWithAlignment(E->getArg(1));
681 Value *SizeVal = EmitScalarExpr(E->getArg(2));
682 unsigned Align = std::min(Dest.second, Src.second);
683 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
684 return RValue::get(Dest.first);
685 }
686 case Builtin::BImemset:
687 case Builtin::BI__builtin_memset: {
688 std::pair<llvm::Value*, unsigned> Dest =
689 EmitPointerWithAlignment(E->getArg(0));
690 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
691 Builder.getInt8Ty());
692 Value *SizeVal = EmitScalarExpr(E->getArg(2));
693 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
694 return RValue::get(Dest.first);
695 }
696 case Builtin::BI__builtin___memset_chk: {
697 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
698 llvm::APSInt Size, DstSize;
699 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
700 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
701 break;
702 if (Size.ugt(DstSize))
703 break;
704 std::pair<llvm::Value*, unsigned> Dest =
705 EmitPointerWithAlignment(E->getArg(0));
706 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
707 Builder.getInt8Ty());
708 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
709 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
710 return RValue::get(Dest.first);
711 }
712 case Builtin::BI__builtin_dwarf_cfa: {
713 // The offset in bytes from the first argument to the CFA.
714 //
715 // Why on earth is this in the frontend? Is there any reason at
716 // all that the backend can't reasonably determine this while
717 // lowering llvm.eh.dwarf.cfa()?
718 //
719 // TODO: If there's a satisfactory reason, add a target hook for
720 // this instead of hard-coding 0, which is correct for most targets.
721 int32_t Offset = 0;
722
723 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
724 return RValue::get(Builder.CreateCall(F,
725 llvm::ConstantInt::get(Int32Ty, Offset)));
726 }
727 case Builtin::BI__builtin_return_address: {
728 Value *Depth = EmitScalarExpr(E->getArg(0));
729 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
730 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
731 return RValue::get(Builder.CreateCall(F, Depth));
732 }
733 case Builtin::BI__builtin_frame_address: {
734 Value *Depth = EmitScalarExpr(E->getArg(0));
735 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
736 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
737 return RValue::get(Builder.CreateCall(F, Depth));
738 }
739 case Builtin::BI__builtin_extract_return_addr: {
740 Value *Address = EmitScalarExpr(E->getArg(0));
741 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
742 return RValue::get(Result);
743 }
744 case Builtin::BI__builtin_frob_return_addr: {
745 Value *Address = EmitScalarExpr(E->getArg(0));
746 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
747 return RValue::get(Result);
748 }
749 case Builtin::BI__builtin_dwarf_sp_column: {
750 llvm::IntegerType *Ty
751 = cast<llvm::IntegerType>(ConvertType(E->getType()));
752 int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
753 if (Column == -1) {
754 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
755 return RValue::get(llvm::UndefValue::get(Ty));
756 }
757 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
758 }
759 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
760 Value *Address = EmitScalarExpr(E->getArg(0));
761 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
762 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
763 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
764 }
765 case Builtin::BI__builtin_eh_return: {
766 Value *Int = EmitScalarExpr(E->getArg(0));
767 Value *Ptr = EmitScalarExpr(E->getArg(1));
768
769 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
770 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
771 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
772 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
773 ? Intrinsic::eh_return_i32
774 : Intrinsic::eh_return_i64);
775 Builder.CreateCall2(F, Int, Ptr);
776 Builder.CreateUnreachable();
777
778 // We do need to preserve an insertion point.
779 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
780
781 return RValue::get(0);
782 }
783 case Builtin::BI__builtin_unwind_init: {
784 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
785 return RValue::get(Builder.CreateCall(F));
786 }
787 case Builtin::BI__builtin_extend_pointer: {
788 // Extends a pointer to the size of an _Unwind_Word, which is
789 // uint64_t on all platforms. Generally this gets poked into a
790 // register and eventually used as an address, so if the
791 // addressing registers are wider than pointers and the platform
792 // doesn't implicitly ignore high-order bits when doing
793 // addressing, we need to make sure we zext / sext based on
794 // the platform's expectations.
795 //
796 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
797
798 // Cast the pointer to intptr_t.
799 Value *Ptr = EmitScalarExpr(E->getArg(0));
800 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
801
802 // If that's 64 bits, we're done.
803 if (IntPtrTy->getBitWidth() == 64)
804 return RValue::get(Result);
805
806 // Otherwise, ask the codegen data what to do.
807 if (getTargetHooks().extendPointerWithSExt())
808 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
809 else
810 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
811 }
812 case Builtin::BI__builtin_setjmp: {
813 // Buffer is a void**.
814 Value *Buf = EmitScalarExpr(E->getArg(0));
815
816 // Store the frame pointer to the setjmp buffer.
817 Value *FrameAddr =
818 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
819 ConstantInt::get(Int32Ty, 0));
820 Builder.CreateStore(FrameAddr, Buf);
821
822 // Store the stack pointer to the setjmp buffer.
823 Value *StackAddr =
824 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
825 Value *StackSaveSlot =
826 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
827 Builder.CreateStore(StackAddr, StackSaveSlot);
828
829 // Call LLVM's EH setjmp, which is lightweight.
830 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
831 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
832 return RValue::get(Builder.CreateCall(F, Buf));
833 }
834 case Builtin::BI__builtin_longjmp: {
835 Value *Buf = EmitScalarExpr(E->getArg(0));
836 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
837
838 // Call LLVM's EH longjmp, which is lightweight.
839 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
840
841 // longjmp doesn't return; mark this as unreachable.
842 Builder.CreateUnreachable();
843
844 // We do need to preserve an insertion point.
845 EmitBlock(createBasicBlock("longjmp.cont"));
846
847 return RValue::get(0);
848 }
849 case Builtin::BI__sync_fetch_and_add:
850 case Builtin::BI__sync_fetch_and_sub:
851 case Builtin::BI__sync_fetch_and_or:
852 case Builtin::BI__sync_fetch_and_and:
853 case Builtin::BI__sync_fetch_and_xor:
854 case Builtin::BI__sync_add_and_fetch:
855 case Builtin::BI__sync_sub_and_fetch:
856 case Builtin::BI__sync_and_and_fetch:
857 case Builtin::BI__sync_or_and_fetch:
858 case Builtin::BI__sync_xor_and_fetch:
859 case Builtin::BI__sync_val_compare_and_swap:
860 case Builtin::BI__sync_bool_compare_and_swap:
861 case Builtin::BI__sync_lock_test_and_set:
862 case Builtin::BI__sync_lock_release:
863 case Builtin::BI__sync_swap:
864 llvm_unreachable("Shouldn't make it through sema");
865 case Builtin::BI__sync_fetch_and_add_1:
866 case Builtin::BI__sync_fetch_and_add_2:
867 case Builtin::BI__sync_fetch_and_add_4:
868 case Builtin::BI__sync_fetch_and_add_8:
869 case Builtin::BI__sync_fetch_and_add_16:
870 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
871 case Builtin::BI__sync_fetch_and_sub_1:
872 case Builtin::BI__sync_fetch_and_sub_2:
873 case Builtin::BI__sync_fetch_and_sub_4:
874 case Builtin::BI__sync_fetch_and_sub_8:
875 case Builtin::BI__sync_fetch_and_sub_16:
876 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
877 case Builtin::BI__sync_fetch_and_or_1:
878 case Builtin::BI__sync_fetch_and_or_2:
879 case Builtin::BI__sync_fetch_and_or_4:
880 case Builtin::BI__sync_fetch_and_or_8:
881 case Builtin::BI__sync_fetch_and_or_16:
882 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
883 case Builtin::BI__sync_fetch_and_and_1:
884 case Builtin::BI__sync_fetch_and_and_2:
885 case Builtin::BI__sync_fetch_and_and_4:
886 case Builtin::BI__sync_fetch_and_and_8:
887 case Builtin::BI__sync_fetch_and_and_16:
888 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
889 case Builtin::BI__sync_fetch_and_xor_1:
890 case Builtin::BI__sync_fetch_and_xor_2:
891 case Builtin::BI__sync_fetch_and_xor_4:
892 case Builtin::BI__sync_fetch_and_xor_8:
893 case Builtin::BI__sync_fetch_and_xor_16:
894 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
895
896 // Clang extensions: not overloaded yet.
897 case Builtin::BI__sync_fetch_and_min:
898 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
899 case Builtin::BI__sync_fetch_and_max:
900 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
901 case Builtin::BI__sync_fetch_and_umin:
902 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
903 case Builtin::BI__sync_fetch_and_umax:
904 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
905
906 case Builtin::BI__sync_add_and_fetch_1:
907 case Builtin::BI__sync_add_and_fetch_2:
908 case Builtin::BI__sync_add_and_fetch_4:
909 case Builtin::BI__sync_add_and_fetch_8:
910 case Builtin::BI__sync_add_and_fetch_16:
911 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
912 llvm::Instruction::Add);
913 case Builtin::BI__sync_sub_and_fetch_1:
914 case Builtin::BI__sync_sub_and_fetch_2:
915 case Builtin::BI__sync_sub_and_fetch_4:
916 case Builtin::BI__sync_sub_and_fetch_8:
917 case Builtin::BI__sync_sub_and_fetch_16:
918 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
919 llvm::Instruction::Sub);
920 case Builtin::BI__sync_and_and_fetch_1:
921 case Builtin::BI__sync_and_and_fetch_2:
922 case Builtin::BI__sync_and_and_fetch_4:
923 case Builtin::BI__sync_and_and_fetch_8:
924 case Builtin::BI__sync_and_and_fetch_16:
925 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
926 llvm::Instruction::And);
927 case Builtin::BI__sync_or_and_fetch_1:
928 case Builtin::BI__sync_or_and_fetch_2:
929 case Builtin::BI__sync_or_and_fetch_4:
930 case Builtin::BI__sync_or_and_fetch_8:
931 case Builtin::BI__sync_or_and_fetch_16:
932 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
933 llvm::Instruction::Or);
934 case Builtin::BI__sync_xor_and_fetch_1:
935 case Builtin::BI__sync_xor_and_fetch_2:
936 case Builtin::BI__sync_xor_and_fetch_4:
937 case Builtin::BI__sync_xor_and_fetch_8:
938 case Builtin::BI__sync_xor_and_fetch_16:
939 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
940 llvm::Instruction::Xor);
941
942 case Builtin::BI__sync_val_compare_and_swap_1:
943 case Builtin::BI__sync_val_compare_and_swap_2:
944 case Builtin::BI__sync_val_compare_and_swap_4:
945 case Builtin::BI__sync_val_compare_and_swap_8:
946 case Builtin::BI__sync_val_compare_and_swap_16: {
947 QualType T = E->getType();
948 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
949 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
950
951 llvm::IntegerType *IntType =
952 llvm::IntegerType::get(getLLVMContext(),
953 getContext().getTypeSize(T));
954 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
955
956 Value *Args[3];
957 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
958 Args[1] = EmitScalarExpr(E->getArg(1));
959 llvm::Type *ValueType = Args[1]->getType();
960 Args[1] = EmitToInt(*this, Args[1], T, IntType);
961 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
962
963 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
964 llvm::SequentiallyConsistent);
965 Result = EmitFromInt(*this, Result, T, ValueType);
966 return RValue::get(Result);
967 }
968
969 case Builtin::BI__sync_bool_compare_and_swap_1:
970 case Builtin::BI__sync_bool_compare_and_swap_2:
971 case Builtin::BI__sync_bool_compare_and_swap_4:
972 case Builtin::BI__sync_bool_compare_and_swap_8:
973 case Builtin::BI__sync_bool_compare_and_swap_16: {
974 QualType T = E->getArg(1)->getType();
975 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
976 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
977
978 llvm::IntegerType *IntType =
979 llvm::IntegerType::get(getLLVMContext(),
980 getContext().getTypeSize(T));
981 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
982
983 Value *Args[3];
984 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
985 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
986 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
987
988 Value *OldVal = Args[1];
989 Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
990 llvm::SequentiallyConsistent);
991 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
992 // zext bool to int.
993 Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
994 return RValue::get(Result);
995 }
996
997 case Builtin::BI__sync_swap_1:
998 case Builtin::BI__sync_swap_2:
999 case Builtin::BI__sync_swap_4:
1000 case Builtin::BI__sync_swap_8:
1001 case Builtin::BI__sync_swap_16:
1002 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1003
1004 case Builtin::BI__sync_lock_test_and_set_1:
1005 case Builtin::BI__sync_lock_test_and_set_2:
1006 case Builtin::BI__sync_lock_test_and_set_4:
1007 case Builtin::BI__sync_lock_test_and_set_8:
1008 case Builtin::BI__sync_lock_test_and_set_16:
1009 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1010
1011 case Builtin::BI__sync_lock_release_1:
1012 case Builtin::BI__sync_lock_release_2:
1013 case Builtin::BI__sync_lock_release_4:
1014 case Builtin::BI__sync_lock_release_8:
1015 case Builtin::BI__sync_lock_release_16: {
1016 Value *Ptr = EmitScalarExpr(E->getArg(0));
1017 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1018 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1019 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1020 StoreSize.getQuantity() * 8);
1021 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1022 llvm::StoreInst *Store =
1023 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
1024 Store->setAlignment(StoreSize.getQuantity());
1025 Store->setAtomic(llvm::Release);
1026 return RValue::get(0);
1027 }
1028
1029 case Builtin::BI__sync_synchronize: {
1030 // We assume this is supposed to correspond to a C++0x-style
1031 // sequentially-consistent fence (i.e. this is only usable for
1032 // synchonization, not device I/O or anything like that). This intrinsic
1033 // is really badly designed in the sense that in theory, there isn't
1034 // any way to safely use it... but in practice, it mostly works
1035 // to use it with non-atomic loads and stores to get acquire/release
1036 // semantics.
1037 Builder.CreateFence(llvm::SequentiallyConsistent);
1038 return RValue::get(0);
1039 }
1040
1041 case Builtin::BI__c11_atomic_is_lock_free:
1042 case Builtin::BI__atomic_is_lock_free: {
1043 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1044 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1045 // _Atomic(T) is always properly-aligned.
1046 const char *LibCallName = "__atomic_is_lock_free";
1047 CallArgList Args;
1048 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1049 getContext().getSizeType());
1050 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1051 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1052 getContext().VoidPtrTy);
1053 else
1054 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1055 getContext().VoidPtrTy);
1056 const CGFunctionInfo &FuncInfo =
1057 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1058 FunctionType::ExtInfo(),
1059 RequiredArgs::All);
1060 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1061 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1062 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1063 }
1064
1065 case Builtin::BI__atomic_test_and_set: {
1066 // Look at the argument type to determine whether this is a volatile
1067 // operation. The parameter type is always volatile.
1068 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1069 bool Volatile =
1070 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1071
1072 Value *Ptr = EmitScalarExpr(E->getArg(0));
1073 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1074 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1075 Value *NewVal = Builder.getInt8(1);
1076 Value *Order = EmitScalarExpr(E->getArg(1));
1077 if (isa<llvm::ConstantInt>(Order)) {
1078 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1079 AtomicRMWInst *Result = 0;
1080 switch (ord) {
1081 case 0: // memory_order_relaxed
1082 default: // invalid order
1083 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1084 Ptr, NewVal,
1085 llvm::Monotonic);
1086 break;
1087 case 1: // memory_order_consume
1088 case 2: // memory_order_acquire
1089 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1090 Ptr, NewVal,
1091 llvm::Acquire);
1092 break;
1093 case 3: // memory_order_release
1094 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1095 Ptr, NewVal,
1096 llvm::Release);
1097 break;
1098 case 4: // memory_order_acq_rel
1099 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1100 Ptr, NewVal,
1101 llvm::AcquireRelease);
1102 break;
1103 case 5: // memory_order_seq_cst
1104 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1105 Ptr, NewVal,
1106 llvm::SequentiallyConsistent);
1107 break;
1108 }
1109 Result->setVolatile(Volatile);
1110 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1111 }
1112
1113 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1114
1115 llvm::BasicBlock *BBs[5] = {
1116 createBasicBlock("monotonic", CurFn),
1117 createBasicBlock("acquire", CurFn),
1118 createBasicBlock("release", CurFn),
1119 createBasicBlock("acqrel", CurFn),
1120 createBasicBlock("seqcst", CurFn)
1121 };
1122 llvm::AtomicOrdering Orders[5] = {
1123 llvm::Monotonic, llvm::Acquire, llvm::Release,
1124 llvm::AcquireRelease, llvm::SequentiallyConsistent
1125 };
1126
1127 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1128 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1129
1130 Builder.SetInsertPoint(ContBB);
1131 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1132
1133 for (unsigned i = 0; i < 5; ++i) {
1134 Builder.SetInsertPoint(BBs[i]);
1135 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1136 Ptr, NewVal, Orders[i]);
1137 RMW->setVolatile(Volatile);
1138 Result->addIncoming(RMW, BBs[i]);
1139 Builder.CreateBr(ContBB);
1140 }
1141
1142 SI->addCase(Builder.getInt32(0), BBs[0]);
1143 SI->addCase(Builder.getInt32(1), BBs[1]);
1144 SI->addCase(Builder.getInt32(2), BBs[1]);
1145 SI->addCase(Builder.getInt32(3), BBs[2]);
1146 SI->addCase(Builder.getInt32(4), BBs[3]);
1147 SI->addCase(Builder.getInt32(5), BBs[4]);
1148
1149 Builder.SetInsertPoint(ContBB);
1150 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1151 }
1152
1153 case Builtin::BI__atomic_clear: {
1154 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1155 bool Volatile =
1156 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1157
1158 Value *Ptr = EmitScalarExpr(E->getArg(0));
1159 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1160 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1161 Value *NewVal = Builder.getInt8(0);
1162 Value *Order = EmitScalarExpr(E->getArg(1));
1163 if (isa<llvm::ConstantInt>(Order)) {
1164 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1165 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1166 Store->setAlignment(1);
1167 switch (ord) {
1168 case 0: // memory_order_relaxed
1169 default: // invalid order
1170 Store->setOrdering(llvm::Monotonic);
1171 break;
1172 case 3: // memory_order_release
1173 Store->setOrdering(llvm::Release);
1174 break;
1175 case 5: // memory_order_seq_cst
1176 Store->setOrdering(llvm::SequentiallyConsistent);
1177 break;
1178 }
1179 return RValue::get(0);
1180 }
1181
1182 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1183
1184 llvm::BasicBlock *BBs[3] = {
1185 createBasicBlock("monotonic", CurFn),
1186 createBasicBlock("release", CurFn),
1187 createBasicBlock("seqcst", CurFn)
1188 };
1189 llvm::AtomicOrdering Orders[3] = {
1190 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1191 };
1192
1193 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1194 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1195
1196 for (unsigned i = 0; i < 3; ++i) {
1197 Builder.SetInsertPoint(BBs[i]);
1198 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1199 Store->setAlignment(1);
1200 Store->setOrdering(Orders[i]);
1201 Builder.CreateBr(ContBB);
1202 }
1203
1204 SI->addCase(Builder.getInt32(0), BBs[0]);
1205 SI->addCase(Builder.getInt32(3), BBs[1]);
1206 SI->addCase(Builder.getInt32(5), BBs[2]);
1207
1208 Builder.SetInsertPoint(ContBB);
1209 return RValue::get(0);
1210 }
1211
1212 case Builtin::BI__atomic_thread_fence:
1213 case Builtin::BI__atomic_signal_fence:
1214 case Builtin::BI__c11_atomic_thread_fence:
1215 case Builtin::BI__c11_atomic_signal_fence: {
1216 llvm::SynchronizationScope Scope;
1217 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1218 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1219 Scope = llvm::SingleThread;
1220 else
1221 Scope = llvm::CrossThread;
1222 Value *Order = EmitScalarExpr(E->getArg(0));
1223 if (isa<llvm::ConstantInt>(Order)) {
1224 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1225 switch (ord) {
1226 case 0: // memory_order_relaxed
1227 default: // invalid order
1228 break;
1229 case 1: // memory_order_consume
1230 case 2: // memory_order_acquire
1231 Builder.CreateFence(llvm::Acquire, Scope);
1232 break;
1233 case 3: // memory_order_release
1234 Builder.CreateFence(llvm::Release, Scope);
1235 break;
1236 case 4: // memory_order_acq_rel
1237 Builder.CreateFence(llvm::AcquireRelease, Scope);
1238 break;
1239 case 5: // memory_order_seq_cst
1240 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1241 break;
1242 }
1243 return RValue::get(0);
1244 }
1245
1246 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1247 AcquireBB = createBasicBlock("acquire", CurFn);
1248 ReleaseBB = createBasicBlock("release", CurFn);
1249 AcqRelBB = createBasicBlock("acqrel", CurFn);
1250 SeqCstBB = createBasicBlock("seqcst", CurFn);
1251 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1252
1253 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1254 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1255
1256 Builder.SetInsertPoint(AcquireBB);
1257 Builder.CreateFence(llvm::Acquire, Scope);
1258 Builder.CreateBr(ContBB);
1259 SI->addCase(Builder.getInt32(1), AcquireBB);
1260 SI->addCase(Builder.getInt32(2), AcquireBB);
1261
1262 Builder.SetInsertPoint(ReleaseBB);
1263 Builder.CreateFence(llvm::Release, Scope);
1264 Builder.CreateBr(ContBB);
1265 SI->addCase(Builder.getInt32(3), ReleaseBB);
1266
1267 Builder.SetInsertPoint(AcqRelBB);
1268 Builder.CreateFence(llvm::AcquireRelease, Scope);
1269 Builder.CreateBr(ContBB);
1270 SI->addCase(Builder.getInt32(4), AcqRelBB);
1271
1272 Builder.SetInsertPoint(SeqCstBB);
1273 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1274 Builder.CreateBr(ContBB);
1275 SI->addCase(Builder.getInt32(5), SeqCstBB);
1276
1277 Builder.SetInsertPoint(ContBB);
1278 return RValue::get(0);
1279 }
1280
1281 // Library functions with special handling.
1282 case Builtin::BIsqrt:
1283 case Builtin::BIsqrtf:
1284 case Builtin::BIsqrtl: {
1285 // TODO: there is currently no set of optimizer flags
1286 // sufficient for us to rewrite sqrt to @llvm.sqrt.
1287 // -fmath-errno=0 is not good enough; we need finiteness.
1288 // We could probably precondition the call with an ult
1289 // against 0, but is that worth the complexity?
1290 break;
1291 }
1292
1293 case Builtin::BIpow:
1294 case Builtin::BIpowf:
1295 case Builtin::BIpowl: {
1296 // Rewrite sqrt to intrinsic if allowed.
1297 if (!FD->hasAttr<ConstAttr>())
1298 break;
1299 Value *Base = EmitScalarExpr(E->getArg(0));
1300 Value *Exponent = EmitScalarExpr(E->getArg(1));
1301 llvm::Type *ArgType = Base->getType();
1302 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1303 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1304 }
1305
1306 case Builtin::BIfma:
1307 case Builtin::BIfmaf:
1308 case Builtin::BIfmal:
1309 case Builtin::BI__builtin_fma:
1310 case Builtin::BI__builtin_fmaf:
1311 case Builtin::BI__builtin_fmal: {
1312 // Rewrite fma to intrinsic.
1313 Value *FirstArg = EmitScalarExpr(E->getArg(0));
1314 llvm::Type *ArgType = FirstArg->getType();
1315 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1316 return RValue::get(Builder.CreateCall3(F, FirstArg,
1317 EmitScalarExpr(E->getArg(1)),
1318 EmitScalarExpr(E->getArg(2))));
1319 }
1320
1321 case Builtin::BI__builtin_signbit:
1322 case Builtin::BI__builtin_signbitf:
1323 case Builtin::BI__builtin_signbitl: {
1324 LLVMContext &C = CGM.getLLVMContext();
1325
1326 Value *Arg = EmitScalarExpr(E->getArg(0));
1327 llvm::Type *ArgTy = Arg->getType();
1328 if (ArgTy->isPPC_FP128Ty())
1329 break; // FIXME: I'm not sure what the right implementation is here.
1330 int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1331 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1332 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1333 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1334 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1335 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1336 }
1337 case Builtin::BI__builtin_annotation: {
1338 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1339 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1340 AnnVal->getType());
1341
1342 // Get the annotation string, go through casts. Sema requires this to be a
1343 // non-wide string literal, potentially casted, so the cast<> is safe.
1344 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1345 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1346 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1347 }
1348 case Builtin::BI__builtin_addcs:
1349 case Builtin::BI__builtin_addc:
1350 case Builtin::BI__builtin_addcl:
1351 case Builtin::BI__builtin_addcll:
1352 case Builtin::BI__builtin_subcs:
1353 case Builtin::BI__builtin_subc:
1354 case Builtin::BI__builtin_subcl:
1355 case Builtin::BI__builtin_subcll: {
1356
1357 // We translate all of these builtins from expressions of the form:
1358 // int x = ..., y = ..., carryin = ..., carryout, result;
1359 // result = __builtin_addc(x, y, carryin, &carryout);
1360 //
1361 // to LLVM IR of the form:
1362 //
1363 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1364 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1365 // %carry1 = extractvalue {i32, i1} %tmp1, 1
1366 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1367 // i32 %carryin)
1368 // %result = extractvalue {i32, i1} %tmp2, 0
1369 // %carry2 = extractvalue {i32, i1} %tmp2, 1
1370 // %tmp3 = or i1 %carry1, %carry2
1371 // %tmp4 = zext i1 %tmp3 to i32
1372 // store i32 %tmp4, i32* %carryout
1373
1374 // Scalarize our inputs.
1375 llvm::Value *X = EmitScalarExpr(E->getArg(0));
1376 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1377 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1378 std::pair<llvm::Value*, unsigned> CarryOutPtr =
1379 EmitPointerWithAlignment(E->getArg(3));
1380
1381 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1382 llvm::Intrinsic::ID IntrinsicId;
1383 switch (BuiltinID) {
1384 default: llvm_unreachable("Unknown multiprecision builtin id.");
1385 case Builtin::BI__builtin_addcs:
1386 case Builtin::BI__builtin_addc:
1387 case Builtin::BI__builtin_addcl:
1388 case Builtin::BI__builtin_addcll:
1389 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1390 break;
1391 case Builtin::BI__builtin_subcs:
1392 case Builtin::BI__builtin_subc:
1393 case Builtin::BI__builtin_subcl:
1394 case Builtin::BI__builtin_subcll:
1395 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1396 break;
1397 }
1398
1399 // Construct our resulting LLVM IR expression.
1400 llvm::Value *Carry1;
1401 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1402 X, Y, Carry1);
1403 llvm::Value *Carry2;
1404 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1405 Sum1, Carryin, Carry2);
1406 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1407 X->getType());
1408 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
1409 CarryOutPtr.first);
1410 CarryOutStore->setAlignment(CarryOutPtr.second);
1411 return RValue::get(Sum2);
1412 }
1413 case Builtin::BI__noop:
1414 return RValue::get(0);
1415 }
1416
1417 // If this is an alias for a lib function (e.g. __builtin_sin), emit
1418 // the call using the normal call path, but using the unmangled
1419 // version of the function name.
1420 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1421 return emitLibraryCall(*this, FD, E,
1422 CGM.getBuiltinLibFunction(FD, BuiltinID));
1423
1424 // If this is a predefined lib function (e.g. malloc), emit the call
1425 // using exactly the normal call path.
1426 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1427 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1428
1429 // See if we have a target specific intrinsic.
1430 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1431 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1432 if (const char *Prefix =
1433 llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1434 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1435
1436 if (IntrinsicID != Intrinsic::not_intrinsic) {
1437 SmallVector<Value*, 16> Args;
1438
1439 // Find out if any arguments are required to be integer constant
1440 // expressions.
1441 unsigned ICEArguments = 0;
1442 ASTContext::GetBuiltinTypeError Error;
1443 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1444 assert(Error == ASTContext::GE_None && "Should not codegen an error");
1445
1446 Function *F = CGM.getIntrinsic(IntrinsicID);
1447 llvm::FunctionType *FTy = F->getFunctionType();
1448
1449 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1450 Value *ArgValue;
1451 // If this is a normal argument, just emit it as a scalar.
1452 if ((ICEArguments & (1 << i)) == 0) {
1453 ArgValue = EmitScalarExpr(E->getArg(i));
1454 } else {
1455 // If this is required to be a constant, constant fold it so that we
1456 // know that the generated intrinsic gets a ConstantInt.
1457 llvm::APSInt Result;
1458 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1459 assert(IsConst && "Constant arg isn't actually constant?");
1460 (void)IsConst;
1461 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1462 }
1463
1464 // If the intrinsic arg type is different from the builtin arg type
1465 // we need to do a bit cast.
1466 llvm::Type *PTy = FTy->getParamType(i);
1467 if (PTy != ArgValue->getType()) {
1468 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1469 "Must be able to losslessly bit cast to param");
1470 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1471 }
1472
1473 Args.push_back(ArgValue);
1474 }
1475
1476 Value *V = Builder.CreateCall(F, Args);
1477 QualType BuiltinRetType = E->getType();
1478
1479 llvm::Type *RetTy = VoidTy;
1480 if (!BuiltinRetType->isVoidType())
1481 RetTy = ConvertType(BuiltinRetType);
1482
1483 if (RetTy != V->getType()) {
1484 assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1485 "Must be able to losslessly bit cast result type");
1486 V = Builder.CreateBitCast(V, RetTy);
1487 }
1488
1489 return RValue::get(V);
1490 }
1491
1492 // See if we have a target specific builtin that needs to be lowered.
1493 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1494 return RValue::get(V);
1495
1496 ErrorUnsupported(E, "builtin function");
1497
1498 // Unknown builtin, for now just dump it out and return undef.
1499 return GetUndefRValue(E->getType());
1500 }
1501
EmitTargetBuiltinExpr(unsigned BuiltinID,const CallExpr * E)1502 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1503 const CallExpr *E) {
1504 switch (Target.getTriple().getArch()) {
1505 case llvm::Triple::arm:
1506 case llvm::Triple::thumb:
1507 return EmitARMBuiltinExpr(BuiltinID, E);
1508 case llvm::Triple::x86:
1509 case llvm::Triple::x86_64:
1510 return EmitX86BuiltinExpr(BuiltinID, E);
1511 case llvm::Triple::ppc:
1512 case llvm::Triple::ppc64:
1513 return EmitPPCBuiltinExpr(BuiltinID, E);
1514 default:
1515 return 0;
1516 }
1517 }
1518
GetNeonType(CodeGenFunction * CGF,NeonTypeFlags TypeFlags)1519 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1520 NeonTypeFlags TypeFlags) {
1521 int IsQuad = TypeFlags.isQuad();
1522 switch (TypeFlags.getEltType()) {
1523 case NeonTypeFlags::Int8:
1524 case NeonTypeFlags::Poly8:
1525 return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad);
1526 case NeonTypeFlags::Int16:
1527 case NeonTypeFlags::Poly16:
1528 case NeonTypeFlags::Float16:
1529 return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad);
1530 case NeonTypeFlags::Int32:
1531 return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad);
1532 case NeonTypeFlags::Int64:
1533 return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
1534 case NeonTypeFlags::Float32:
1535 return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
1536 }
1537 llvm_unreachable("Invalid NeonTypeFlags element type!");
1538 }
1539
EmitNeonSplat(Value * V,Constant * C)1540 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1541 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1542 Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1543 return Builder.CreateShuffleVector(V, V, SV, "lane");
1544 }
1545
EmitNeonCall(Function * F,SmallVectorImpl<Value * > & Ops,const char * name,unsigned shift,bool rightshift)1546 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1547 const char *name,
1548 unsigned shift, bool rightshift) {
1549 unsigned j = 0;
1550 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1551 ai != ae; ++ai, ++j)
1552 if (shift > 0 && shift == j)
1553 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1554 else
1555 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1556
1557 return Builder.CreateCall(F, Ops, name);
1558 }
1559
EmitNeonShiftVector(Value * V,llvm::Type * Ty,bool neg)1560 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1561 bool neg) {
1562 int SV = cast<ConstantInt>(V)->getSExtValue();
1563
1564 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1565 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1566 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1567 }
1568
1569 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1570 /// alignment of the type referenced by the pointer. Skip over implicit
1571 /// casts.
1572 std::pair<llvm::Value*, unsigned>
EmitPointerWithAlignment(const Expr * Addr)1573 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
1574 assert(Addr->getType()->isPointerType());
1575 Addr = Addr->IgnoreParens();
1576 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
1577 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
1578 ICE->getSubExpr()->getType()->isPointerType()) {
1579 std::pair<llvm::Value*, unsigned> Ptr =
1580 EmitPointerWithAlignment(ICE->getSubExpr());
1581 Ptr.first = Builder.CreateBitCast(Ptr.first,
1582 ConvertType(Addr->getType()));
1583 return Ptr;
1584 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
1585 LValue LV = EmitLValue(ICE->getSubExpr());
1586 unsigned Align = LV.getAlignment().getQuantity();
1587 if (!Align) {
1588 // FIXME: Once LValues are fixed to always set alignment,
1589 // zap this code.
1590 QualType PtTy = ICE->getSubExpr()->getType();
1591 if (!PtTy->isIncompleteType())
1592 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1593 else
1594 Align = 1;
1595 }
1596 return std::make_pair(LV.getAddress(), Align);
1597 }
1598 }
1599 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
1600 if (UO->getOpcode() == UO_AddrOf) {
1601 LValue LV = EmitLValue(UO->getSubExpr());
1602 unsigned Align = LV.getAlignment().getQuantity();
1603 if (!Align) {
1604 // FIXME: Once LValues are fixed to always set alignment,
1605 // zap this code.
1606 QualType PtTy = UO->getSubExpr()->getType();
1607 if (!PtTy->isIncompleteType())
1608 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1609 else
1610 Align = 1;
1611 }
1612 return std::make_pair(LV.getAddress(), Align);
1613 }
1614 }
1615
1616 unsigned Align = 1;
1617 QualType PtTy = Addr->getType()->getPointeeType();
1618 if (!PtTy->isIncompleteType())
1619 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1620
1621 return std::make_pair(EmitScalarExpr(Addr), Align);
1622 }
1623
EmitARMBuiltinExpr(unsigned BuiltinID,const CallExpr * E)1624 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1625 const CallExpr *E) {
1626 if (BuiltinID == ARM::BI__clear_cache) {
1627 const FunctionDecl *FD = E->getDirectCallee();
1628 // Oddly people write this call without args on occasion and gcc accepts
1629 // it - it's also marked as varargs in the description file.
1630 SmallVector<Value*, 2> Ops;
1631 for (unsigned i = 0; i < E->getNumArgs(); i++)
1632 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1633 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1634 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1635 StringRef Name = FD->getName();
1636 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1637 }
1638
1639 if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1640 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1641
1642 Value *LdPtr = EmitScalarExpr(E->getArg(0));
1643 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1644
1645 Value *Val0 = Builder.CreateExtractValue(Val, 1);
1646 Value *Val1 = Builder.CreateExtractValue(Val, 0);
1647 Val0 = Builder.CreateZExt(Val0, Int64Ty);
1648 Val1 = Builder.CreateZExt(Val1, Int64Ty);
1649
1650 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1651 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1652 return Builder.CreateOr(Val, Val1);
1653 }
1654
1655 if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1656 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1657 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1658
1659 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1660 Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
1661 Value *Val = EmitScalarExpr(E->getArg(0));
1662 Builder.CreateStore(Val, Tmp);
1663
1664 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1665 Val = Builder.CreateLoad(LdPtr);
1666
1667 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1668 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1669 Value *StPtr = EmitScalarExpr(E->getArg(1));
1670 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1671 }
1672
1673 SmallVector<Value*, 4> Ops;
1674 llvm::Value *Align = 0;
1675 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
1676 if (i == 0) {
1677 switch (BuiltinID) {
1678 case ARM::BI__builtin_neon_vld1_v:
1679 case ARM::BI__builtin_neon_vld1q_v:
1680 case ARM::BI__builtin_neon_vld1q_lane_v:
1681 case ARM::BI__builtin_neon_vld1_lane_v:
1682 case ARM::BI__builtin_neon_vld1_dup_v:
1683 case ARM::BI__builtin_neon_vld1q_dup_v:
1684 case ARM::BI__builtin_neon_vst1_v:
1685 case ARM::BI__builtin_neon_vst1q_v:
1686 case ARM::BI__builtin_neon_vst1q_lane_v:
1687 case ARM::BI__builtin_neon_vst1_lane_v:
1688 case ARM::BI__builtin_neon_vst2_v:
1689 case ARM::BI__builtin_neon_vst2q_v:
1690 case ARM::BI__builtin_neon_vst2_lane_v:
1691 case ARM::BI__builtin_neon_vst2q_lane_v:
1692 case ARM::BI__builtin_neon_vst3_v:
1693 case ARM::BI__builtin_neon_vst3q_v:
1694 case ARM::BI__builtin_neon_vst3_lane_v:
1695 case ARM::BI__builtin_neon_vst3q_lane_v:
1696 case ARM::BI__builtin_neon_vst4_v:
1697 case ARM::BI__builtin_neon_vst4q_v:
1698 case ARM::BI__builtin_neon_vst4_lane_v:
1699 case ARM::BI__builtin_neon_vst4q_lane_v:
1700 // Get the alignment for the argument in addition to the value;
1701 // we'll use it later.
1702 std::pair<llvm::Value*, unsigned> Src =
1703 EmitPointerWithAlignment(E->getArg(0));
1704 Ops.push_back(Src.first);
1705 Align = Builder.getInt32(Src.second);
1706 continue;
1707 }
1708 }
1709 if (i == 1) {
1710 switch (BuiltinID) {
1711 case ARM::BI__builtin_neon_vld2_v:
1712 case ARM::BI__builtin_neon_vld2q_v:
1713 case ARM::BI__builtin_neon_vld3_v:
1714 case ARM::BI__builtin_neon_vld3q_v:
1715 case ARM::BI__builtin_neon_vld4_v:
1716 case ARM::BI__builtin_neon_vld4q_v:
1717 case ARM::BI__builtin_neon_vld2_lane_v:
1718 case ARM::BI__builtin_neon_vld2q_lane_v:
1719 case ARM::BI__builtin_neon_vld3_lane_v:
1720 case ARM::BI__builtin_neon_vld3q_lane_v:
1721 case ARM::BI__builtin_neon_vld4_lane_v:
1722 case ARM::BI__builtin_neon_vld4q_lane_v:
1723 case ARM::BI__builtin_neon_vld2_dup_v:
1724 case ARM::BI__builtin_neon_vld3_dup_v:
1725 case ARM::BI__builtin_neon_vld4_dup_v:
1726 // Get the alignment for the argument in addition to the value;
1727 // we'll use it later.
1728 std::pair<llvm::Value*, unsigned> Src =
1729 EmitPointerWithAlignment(E->getArg(1));
1730 Ops.push_back(Src.first);
1731 Align = Builder.getInt32(Src.second);
1732 continue;
1733 }
1734 }
1735 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1736 }
1737
1738 // vget_lane and vset_lane are not overloaded and do not have an extra
1739 // argument that specifies the vector type.
1740 switch (BuiltinID) {
1741 default: break;
1742 case ARM::BI__builtin_neon_vget_lane_i8:
1743 case ARM::BI__builtin_neon_vget_lane_i16:
1744 case ARM::BI__builtin_neon_vget_lane_i32:
1745 case ARM::BI__builtin_neon_vget_lane_i64:
1746 case ARM::BI__builtin_neon_vget_lane_f32:
1747 case ARM::BI__builtin_neon_vgetq_lane_i8:
1748 case ARM::BI__builtin_neon_vgetq_lane_i16:
1749 case ARM::BI__builtin_neon_vgetq_lane_i32:
1750 case ARM::BI__builtin_neon_vgetq_lane_i64:
1751 case ARM::BI__builtin_neon_vgetq_lane_f32:
1752 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1753 "vget_lane");
1754 case ARM::BI__builtin_neon_vset_lane_i8:
1755 case ARM::BI__builtin_neon_vset_lane_i16:
1756 case ARM::BI__builtin_neon_vset_lane_i32:
1757 case ARM::BI__builtin_neon_vset_lane_i64:
1758 case ARM::BI__builtin_neon_vset_lane_f32:
1759 case ARM::BI__builtin_neon_vsetq_lane_i8:
1760 case ARM::BI__builtin_neon_vsetq_lane_i16:
1761 case ARM::BI__builtin_neon_vsetq_lane_i32:
1762 case ARM::BI__builtin_neon_vsetq_lane_i64:
1763 case ARM::BI__builtin_neon_vsetq_lane_f32:
1764 Ops.push_back(EmitScalarExpr(E->getArg(2)));
1765 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1766 }
1767
1768 // Get the last argument, which specifies the vector type.
1769 llvm::APSInt Result;
1770 const Expr *Arg = E->getArg(E->getNumArgs()-1);
1771 if (!Arg->isIntegerConstantExpr(Result, getContext()))
1772 return 0;
1773
1774 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1775 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1776 // Determine the overloaded type of this builtin.
1777 llvm::Type *Ty;
1778 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1779 Ty = FloatTy;
1780 else
1781 Ty = DoubleTy;
1782
1783 // Determine whether this is an unsigned conversion or not.
1784 bool usgn = Result.getZExtValue() == 1;
1785 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1786
1787 // Call the appropriate intrinsic.
1788 Function *F = CGM.getIntrinsic(Int, Ty);
1789 return Builder.CreateCall(F, Ops, "vcvtr");
1790 }
1791
1792 // Determine the type of this overloaded NEON intrinsic.
1793 NeonTypeFlags Type(Result.getZExtValue());
1794 bool usgn = Type.isUnsigned();
1795 bool quad = Type.isQuad();
1796 bool rightShift = false;
1797
1798 llvm::VectorType *VTy = GetNeonType(this, Type);
1799 llvm::Type *Ty = VTy;
1800 if (!Ty)
1801 return 0;
1802
1803 unsigned Int;
1804 switch (BuiltinID) {
1805 default: return 0;
1806 case ARM::BI__builtin_neon_vbsl_v:
1807 case ARM::BI__builtin_neon_vbslq_v:
1808 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
1809 Ops, "vbsl");
1810 case ARM::BI__builtin_neon_vabd_v:
1811 case ARM::BI__builtin_neon_vabdq_v:
1812 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1813 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1814 case ARM::BI__builtin_neon_vabs_v:
1815 case ARM::BI__builtin_neon_vabsq_v:
1816 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1817 Ops, "vabs");
1818 case ARM::BI__builtin_neon_vaddhn_v:
1819 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1820 Ops, "vaddhn");
1821 case ARM::BI__builtin_neon_vcale_v:
1822 std::swap(Ops[0], Ops[1]);
1823 case ARM::BI__builtin_neon_vcage_v: {
1824 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1825 return EmitNeonCall(F, Ops, "vcage");
1826 }
1827 case ARM::BI__builtin_neon_vcaleq_v:
1828 std::swap(Ops[0], Ops[1]);
1829 case ARM::BI__builtin_neon_vcageq_v: {
1830 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1831 return EmitNeonCall(F, Ops, "vcage");
1832 }
1833 case ARM::BI__builtin_neon_vcalt_v:
1834 std::swap(Ops[0], Ops[1]);
1835 case ARM::BI__builtin_neon_vcagt_v: {
1836 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1837 return EmitNeonCall(F, Ops, "vcagt");
1838 }
1839 case ARM::BI__builtin_neon_vcaltq_v:
1840 std::swap(Ops[0], Ops[1]);
1841 case ARM::BI__builtin_neon_vcagtq_v: {
1842 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1843 return EmitNeonCall(F, Ops, "vcagt");
1844 }
1845 case ARM::BI__builtin_neon_vcls_v:
1846 case ARM::BI__builtin_neon_vclsq_v: {
1847 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1848 return EmitNeonCall(F, Ops, "vcls");
1849 }
1850 case ARM::BI__builtin_neon_vclz_v:
1851 case ARM::BI__builtin_neon_vclzq_v: {
1852 // Generate target-independent intrinsic; also need to add second argument
1853 // for whether or not clz of zero is undefined; on ARM it isn't.
1854 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
1855 Ops.push_back(Builder.getInt1(Target.isCLZForZeroUndef()));
1856 return EmitNeonCall(F, Ops, "vclz");
1857 }
1858 case ARM::BI__builtin_neon_vcnt_v:
1859 case ARM::BI__builtin_neon_vcntq_v: {
1860 // generate target-independent intrinsic
1861 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
1862 return EmitNeonCall(F, Ops, "vctpop");
1863 }
1864 case ARM::BI__builtin_neon_vcvt_f16_v: {
1865 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1866 "unexpected vcvt_f16_v builtin");
1867 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1868 return EmitNeonCall(F, Ops, "vcvt");
1869 }
1870 case ARM::BI__builtin_neon_vcvt_f32_f16: {
1871 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1872 "unexpected vcvt_f32_f16 builtin");
1873 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1874 return EmitNeonCall(F, Ops, "vcvt");
1875 }
1876 case ARM::BI__builtin_neon_vcvt_f32_v:
1877 case ARM::BI__builtin_neon_vcvtq_f32_v:
1878 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1879 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1880 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1881 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1882 case ARM::BI__builtin_neon_vcvt_s32_v:
1883 case ARM::BI__builtin_neon_vcvt_u32_v:
1884 case ARM::BI__builtin_neon_vcvtq_s32_v:
1885 case ARM::BI__builtin_neon_vcvtq_u32_v: {
1886 llvm::Type *FloatTy =
1887 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1888 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
1889 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1890 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1891 }
1892 case ARM::BI__builtin_neon_vcvt_n_f32_v:
1893 case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1894 llvm::Type *FloatTy =
1895 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1896 llvm::Type *Tys[2] = { FloatTy, Ty };
1897 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
1898 : Intrinsic::arm_neon_vcvtfxs2fp;
1899 Function *F = CGM.getIntrinsic(Int, Tys);
1900 return EmitNeonCall(F, Ops, "vcvt_n");
1901 }
1902 case ARM::BI__builtin_neon_vcvt_n_s32_v:
1903 case ARM::BI__builtin_neon_vcvt_n_u32_v:
1904 case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1905 case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1906 llvm::Type *FloatTy =
1907 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1908 llvm::Type *Tys[2] = { Ty, FloatTy };
1909 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
1910 : Intrinsic::arm_neon_vcvtfp2fxs;
1911 Function *F = CGM.getIntrinsic(Int, Tys);
1912 return EmitNeonCall(F, Ops, "vcvt_n");
1913 }
1914 case ARM::BI__builtin_neon_vext_v:
1915 case ARM::BI__builtin_neon_vextq_v: {
1916 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1917 SmallVector<Constant*, 16> Indices;
1918 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1919 Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1920
1921 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1922 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1923 Value *SV = llvm::ConstantVector::get(Indices);
1924 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1925 }
1926 case ARM::BI__builtin_neon_vhadd_v:
1927 case ARM::BI__builtin_neon_vhaddq_v:
1928 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1930 case ARM::BI__builtin_neon_vhsub_v:
1931 case ARM::BI__builtin_neon_vhsubq_v:
1932 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1933 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1934 case ARM::BI__builtin_neon_vld1_v:
1935 case ARM::BI__builtin_neon_vld1q_v:
1936 Ops.push_back(Align);
1937 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1938 Ops, "vld1");
1939 case ARM::BI__builtin_neon_vld1q_lane_v:
1940 // Handle 64-bit integer elements as a special case. Use shuffles of
1941 // one-element vectors to avoid poor code for i64 in the backend.
1942 if (VTy->getElementType()->isIntegerTy(64)) {
1943 // Extract the other lane.
1944 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1945 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
1946 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
1947 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
1948 // Load the value as a one-element vector.
1949 Ty = llvm::VectorType::get(VTy->getElementType(), 1);
1950 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
1951 Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
1952 // Combine them.
1953 SmallVector<Constant*, 2> Indices;
1954 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
1955 Indices.push_back(ConstantInt::get(Int32Ty, Lane));
1956 SV = llvm::ConstantVector::get(Indices);
1957 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
1958 }
1959 // fall through
1960 case ARM::BI__builtin_neon_vld1_lane_v: {
1961 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1962 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1963 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1964 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1965 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1966 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
1967 }
1968 case ARM::BI__builtin_neon_vld1_dup_v:
1969 case ARM::BI__builtin_neon_vld1q_dup_v: {
1970 Value *V = UndefValue::get(Ty);
1971 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1972 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1973 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1974 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1975 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1976 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1977 return EmitNeonSplat(Ops[0], CI);
1978 }
1979 case ARM::BI__builtin_neon_vld2_v:
1980 case ARM::BI__builtin_neon_vld2q_v: {
1981 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1982 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1983 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1984 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1985 return Builder.CreateStore(Ops[1], Ops[0]);
1986 }
1987 case ARM::BI__builtin_neon_vld3_v:
1988 case ARM::BI__builtin_neon_vld3q_v: {
1989 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1990 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1991 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1992 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1993 return Builder.CreateStore(Ops[1], Ops[0]);
1994 }
1995 case ARM::BI__builtin_neon_vld4_v:
1996 case ARM::BI__builtin_neon_vld4q_v: {
1997 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1998 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1999 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2000 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2001 return Builder.CreateStore(Ops[1], Ops[0]);
2002 }
2003 case ARM::BI__builtin_neon_vld2_lane_v:
2004 case ARM::BI__builtin_neon_vld2q_lane_v: {
2005 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
2006 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2007 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2008 Ops.push_back(Align);
2009 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
2010 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2011 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2012 return Builder.CreateStore(Ops[1], Ops[0]);
2013 }
2014 case ARM::BI__builtin_neon_vld3_lane_v:
2015 case ARM::BI__builtin_neon_vld3q_lane_v: {
2016 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
2017 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2018 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2019 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
2020 Ops.push_back(Align);
2021 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
2022 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2023 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2024 return Builder.CreateStore(Ops[1], Ops[0]);
2025 }
2026 case ARM::BI__builtin_neon_vld4_lane_v:
2027 case ARM::BI__builtin_neon_vld4q_lane_v: {
2028 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
2029 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2030 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2031 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
2032 Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
2033 Ops.push_back(Align);
2034 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
2035 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2036 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2037 return Builder.CreateStore(Ops[1], Ops[0]);
2038 }
2039 case ARM::BI__builtin_neon_vld2_dup_v:
2040 case ARM::BI__builtin_neon_vld3_dup_v:
2041 case ARM::BI__builtin_neon_vld4_dup_v: {
2042 // Handle 64-bit elements as a special-case. There is no "dup" needed.
2043 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
2044 switch (BuiltinID) {
2045 case ARM::BI__builtin_neon_vld2_dup_v:
2046 Int = Intrinsic::arm_neon_vld2;
2047 break;
2048 case ARM::BI__builtin_neon_vld3_dup_v:
2049 Int = Intrinsic::arm_neon_vld3;
2050 break;
2051 case ARM::BI__builtin_neon_vld4_dup_v:
2052 Int = Intrinsic::arm_neon_vld4;
2053 break;
2054 default: llvm_unreachable("unknown vld_dup intrinsic?");
2055 }
2056 Function *F = CGM.getIntrinsic(Int, Ty);
2057 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
2058 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2059 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2060 return Builder.CreateStore(Ops[1], Ops[0]);
2061 }
2062 switch (BuiltinID) {
2063 case ARM::BI__builtin_neon_vld2_dup_v:
2064 Int = Intrinsic::arm_neon_vld2lane;
2065 break;
2066 case ARM::BI__builtin_neon_vld3_dup_v:
2067 Int = Intrinsic::arm_neon_vld3lane;
2068 break;
2069 case ARM::BI__builtin_neon_vld4_dup_v:
2070 Int = Intrinsic::arm_neon_vld4lane;
2071 break;
2072 default: llvm_unreachable("unknown vld_dup intrinsic?");
2073 }
2074 Function *F = CGM.getIntrinsic(Int, Ty);
2075 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
2076
2077 SmallVector<Value*, 6> Args;
2078 Args.push_back(Ops[1]);
2079 Args.append(STy->getNumElements(), UndefValue::get(Ty));
2080
2081 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
2082 Args.push_back(CI);
2083 Args.push_back(Align);
2084
2085 Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
2086 // splat lane 0 to all elts in each vector of the result.
2087 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2088 Value *Val = Builder.CreateExtractValue(Ops[1], i);
2089 Value *Elt = Builder.CreateBitCast(Val, Ty);
2090 Elt = EmitNeonSplat(Elt, CI);
2091 Elt = Builder.CreateBitCast(Elt, Val->getType());
2092 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
2093 }
2094 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2095 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2096 return Builder.CreateStore(Ops[1], Ops[0]);
2097 }
2098 case ARM::BI__builtin_neon_vmax_v:
2099 case ARM::BI__builtin_neon_vmaxq_v:
2100 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
2101 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
2102 case ARM::BI__builtin_neon_vmin_v:
2103 case ARM::BI__builtin_neon_vminq_v:
2104 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
2105 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
2106 case ARM::BI__builtin_neon_vmovl_v: {
2107 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
2108 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2109 if (usgn)
2110 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2111 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2112 }
2113 case ARM::BI__builtin_neon_vmovn_v: {
2114 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2115 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2116 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2117 }
2118 case ARM::BI__builtin_neon_vmul_v:
2119 case ARM::BI__builtin_neon_vmulq_v:
2120 assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
2121 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
2122 Ops, "vmul");
2123 case ARM::BI__builtin_neon_vmull_v:
2124 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2125 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2126 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2127 case ARM::BI__builtin_neon_vfma_v:
2128 case ARM::BI__builtin_neon_vfmaq_v: {
2129 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2130 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2131 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2132 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2133
2134 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2135 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
2136 }
2137 case ARM::BI__builtin_neon_vpadal_v:
2138 case ARM::BI__builtin_neon_vpadalq_v: {
2139 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
2140 // The source operand type has twice as many elements of half the size.
2141 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2142 llvm::Type *EltTy =
2143 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2144 llvm::Type *NarrowTy =
2145 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2146 llvm::Type *Tys[2] = { Ty, NarrowTy };
2147 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
2148 }
2149 case ARM::BI__builtin_neon_vpadd_v:
2150 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
2151 Ops, "vpadd");
2152 case ARM::BI__builtin_neon_vpaddl_v:
2153 case ARM::BI__builtin_neon_vpaddlq_v: {
2154 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
2155 // The source operand type has twice as many elements of half the size.
2156 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2157 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2158 llvm::Type *NarrowTy =
2159 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2160 llvm::Type *Tys[2] = { Ty, NarrowTy };
2161 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2162 }
2163 case ARM::BI__builtin_neon_vpmax_v:
2164 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
2165 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
2166 case ARM::BI__builtin_neon_vpmin_v:
2167 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
2168 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
2169 case ARM::BI__builtin_neon_vqabs_v:
2170 case ARM::BI__builtin_neon_vqabsq_v:
2171 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
2172 Ops, "vqabs");
2173 case ARM::BI__builtin_neon_vqadd_v:
2174 case ARM::BI__builtin_neon_vqaddq_v:
2175 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
2176 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
2177 case ARM::BI__builtin_neon_vqdmlal_v:
2178 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
2179 Ops, "vqdmlal");
2180 case ARM::BI__builtin_neon_vqdmlsl_v:
2181 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
2182 Ops, "vqdmlsl");
2183 case ARM::BI__builtin_neon_vqdmulh_v:
2184 case ARM::BI__builtin_neon_vqdmulhq_v:
2185 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
2186 Ops, "vqdmulh");
2187 case ARM::BI__builtin_neon_vqdmull_v:
2188 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
2189 Ops, "vqdmull");
2190 case ARM::BI__builtin_neon_vqmovn_v:
2191 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
2192 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
2193 case ARM::BI__builtin_neon_vqmovun_v:
2194 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
2195 Ops, "vqdmull");
2196 case ARM::BI__builtin_neon_vqneg_v:
2197 case ARM::BI__builtin_neon_vqnegq_v:
2198 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
2199 Ops, "vqneg");
2200 case ARM::BI__builtin_neon_vqrdmulh_v:
2201 case ARM::BI__builtin_neon_vqrdmulhq_v:
2202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
2203 Ops, "vqrdmulh");
2204 case ARM::BI__builtin_neon_vqrshl_v:
2205 case ARM::BI__builtin_neon_vqrshlq_v:
2206 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
2207 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
2208 case ARM::BI__builtin_neon_vqrshrn_n_v:
2209 Int =
2210 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2211 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2212 1, true);
2213 case ARM::BI__builtin_neon_vqrshrun_n_v:
2214 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2215 Ops, "vqrshrun_n", 1, true);
2216 case ARM::BI__builtin_neon_vqshl_v:
2217 case ARM::BI__builtin_neon_vqshlq_v:
2218 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2219 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
2220 case ARM::BI__builtin_neon_vqshl_n_v:
2221 case ARM::BI__builtin_neon_vqshlq_n_v:
2222 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2223 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2224 1, false);
2225 case ARM::BI__builtin_neon_vqshlu_n_v:
2226 case ARM::BI__builtin_neon_vqshluq_n_v:
2227 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
2228 Ops, "vqshlu", 1, false);
2229 case ARM::BI__builtin_neon_vqshrn_n_v:
2230 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2231 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2232 1, true);
2233 case ARM::BI__builtin_neon_vqshrun_n_v:
2234 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2235 Ops, "vqshrun_n", 1, true);
2236 case ARM::BI__builtin_neon_vqsub_v:
2237 case ARM::BI__builtin_neon_vqsubq_v:
2238 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
2239 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
2240 case ARM::BI__builtin_neon_vraddhn_v:
2241 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
2242 Ops, "vraddhn");
2243 case ARM::BI__builtin_neon_vrecpe_v:
2244 case ARM::BI__builtin_neon_vrecpeq_v:
2245 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2246 Ops, "vrecpe");
2247 case ARM::BI__builtin_neon_vrecps_v:
2248 case ARM::BI__builtin_neon_vrecpsq_v:
2249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
2250 Ops, "vrecps");
2251 case ARM::BI__builtin_neon_vrhadd_v:
2252 case ARM::BI__builtin_neon_vrhaddq_v:
2253 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
2254 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
2255 case ARM::BI__builtin_neon_vrshl_v:
2256 case ARM::BI__builtin_neon_vrshlq_v:
2257 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2258 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
2259 case ARM::BI__builtin_neon_vrshrn_n_v:
2260 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2261 Ops, "vrshrn_n", 1, true);
2262 case ARM::BI__builtin_neon_vrshr_n_v:
2263 case ARM::BI__builtin_neon_vrshrq_n_v:
2264 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2265 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
2266 case ARM::BI__builtin_neon_vrsqrte_v:
2267 case ARM::BI__builtin_neon_vrsqrteq_v:
2268 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
2269 Ops, "vrsqrte");
2270 case ARM::BI__builtin_neon_vrsqrts_v:
2271 case ARM::BI__builtin_neon_vrsqrtsq_v:
2272 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
2273 Ops, "vrsqrts");
2274 case ARM::BI__builtin_neon_vrsra_n_v:
2275 case ARM::BI__builtin_neon_vrsraq_n_v:
2276 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2277 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2278 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2279 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2280 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2281 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2282 case ARM::BI__builtin_neon_vrsubhn_v:
2283 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
2284 Ops, "vrsubhn");
2285 case ARM::BI__builtin_neon_vshl_v:
2286 case ARM::BI__builtin_neon_vshlq_v:
2287 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
2288 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
2289 case ARM::BI__builtin_neon_vshll_n_v:
2290 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
2291 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
2292 case ARM::BI__builtin_neon_vshl_n_v:
2293 case ARM::BI__builtin_neon_vshlq_n_v:
2294 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2295 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2296 "vshl_n");
2297 case ARM::BI__builtin_neon_vshrn_n_v:
2298 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
2299 Ops, "vshrn_n", 1, true);
2300 case ARM::BI__builtin_neon_vshr_n_v:
2301 case ARM::BI__builtin_neon_vshrq_n_v:
2302 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2303 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2304 if (usgn)
2305 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
2306 else
2307 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
2308 case ARM::BI__builtin_neon_vsri_n_v:
2309 case ARM::BI__builtin_neon_vsriq_n_v:
2310 rightShift = true;
2311 case ARM::BI__builtin_neon_vsli_n_v:
2312 case ARM::BI__builtin_neon_vsliq_n_v:
2313 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2314 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2315 Ops, "vsli_n");
2316 case ARM::BI__builtin_neon_vsra_n_v:
2317 case ARM::BI__builtin_neon_vsraq_n_v:
2318 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2319 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2320 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
2321 if (usgn)
2322 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
2323 else
2324 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
2325 return Builder.CreateAdd(Ops[0], Ops[1]);
2326 case ARM::BI__builtin_neon_vst1_v:
2327 case ARM::BI__builtin_neon_vst1q_v:
2328 Ops.push_back(Align);
2329 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
2330 Ops, "");
2331 case ARM::BI__builtin_neon_vst1q_lane_v:
2332 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2333 // a one-element vector and avoid poor code for i64 in the backend.
2334 if (VTy->getElementType()->isIntegerTy(64)) {
2335 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2336 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2337 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2338 Ops[2] = Align;
2339 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2340 Ops[1]->getType()), Ops);
2341 }
2342 // fall through
2343 case ARM::BI__builtin_neon_vst1_lane_v: {
2344 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2345 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2346 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2347 StoreInst *St = Builder.CreateStore(Ops[1],
2348 Builder.CreateBitCast(Ops[0], Ty));
2349 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2350 return St;
2351 }
2352 case ARM::BI__builtin_neon_vst2_v:
2353 case ARM::BI__builtin_neon_vst2q_v:
2354 Ops.push_back(Align);
2355 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
2356 Ops, "");
2357 case ARM::BI__builtin_neon_vst2_lane_v:
2358 case ARM::BI__builtin_neon_vst2q_lane_v:
2359 Ops.push_back(Align);
2360 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
2361 Ops, "");
2362 case ARM::BI__builtin_neon_vst3_v:
2363 case ARM::BI__builtin_neon_vst3q_v:
2364 Ops.push_back(Align);
2365 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
2366 Ops, "");
2367 case ARM::BI__builtin_neon_vst3_lane_v:
2368 case ARM::BI__builtin_neon_vst3q_lane_v:
2369 Ops.push_back(Align);
2370 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
2371 Ops, "");
2372 case ARM::BI__builtin_neon_vst4_v:
2373 case ARM::BI__builtin_neon_vst4q_v:
2374 Ops.push_back(Align);
2375 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
2376 Ops, "");
2377 case ARM::BI__builtin_neon_vst4_lane_v:
2378 case ARM::BI__builtin_neon_vst4q_lane_v:
2379 Ops.push_back(Align);
2380 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
2381 Ops, "");
2382 case ARM::BI__builtin_neon_vsubhn_v:
2383 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
2384 Ops, "vsubhn");
2385 case ARM::BI__builtin_neon_vtbl1_v:
2386 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2387 Ops, "vtbl1");
2388 case ARM::BI__builtin_neon_vtbl2_v:
2389 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2390 Ops, "vtbl2");
2391 case ARM::BI__builtin_neon_vtbl3_v:
2392 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2393 Ops, "vtbl3");
2394 case ARM::BI__builtin_neon_vtbl4_v:
2395 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2396 Ops, "vtbl4");
2397 case ARM::BI__builtin_neon_vtbx1_v:
2398 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2399 Ops, "vtbx1");
2400 case ARM::BI__builtin_neon_vtbx2_v:
2401 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2402 Ops, "vtbx2");
2403 case ARM::BI__builtin_neon_vtbx3_v:
2404 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2405 Ops, "vtbx3");
2406 case ARM::BI__builtin_neon_vtbx4_v:
2407 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2408 Ops, "vtbx4");
2409 case ARM::BI__builtin_neon_vtst_v:
2410 case ARM::BI__builtin_neon_vtstq_v: {
2411 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2412 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2413 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2414 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2415 ConstantAggregateZero::get(Ty));
2416 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2417 }
2418 case ARM::BI__builtin_neon_vtrn_v:
2419 case ARM::BI__builtin_neon_vtrnq_v: {
2420 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2421 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2422 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2423 Value *SV = 0;
2424
2425 for (unsigned vi = 0; vi != 2; ++vi) {
2426 SmallVector<Constant*, 16> Indices;
2427 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2428 Indices.push_back(Builder.getInt32(i+vi));
2429 Indices.push_back(Builder.getInt32(i+e+vi));
2430 }
2431 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2432 SV = llvm::ConstantVector::get(Indices);
2433 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
2434 SV = Builder.CreateStore(SV, Addr);
2435 }
2436 return SV;
2437 }
2438 case ARM::BI__builtin_neon_vuzp_v:
2439 case ARM::BI__builtin_neon_vuzpq_v: {
2440 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2441 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2442 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2443 Value *SV = 0;
2444
2445 for (unsigned vi = 0; vi != 2; ++vi) {
2446 SmallVector<Constant*, 16> Indices;
2447 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2448 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
2449
2450 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2451 SV = llvm::ConstantVector::get(Indices);
2452 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
2453 SV = Builder.CreateStore(SV, Addr);
2454 }
2455 return SV;
2456 }
2457 case ARM::BI__builtin_neon_vzip_v:
2458 case ARM::BI__builtin_neon_vzipq_v: {
2459 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2460 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2461 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2462 Value *SV = 0;
2463
2464 for (unsigned vi = 0; vi != 2; ++vi) {
2465 SmallVector<Constant*, 16> Indices;
2466 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2467 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
2468 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
2469 }
2470 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2471 SV = llvm::ConstantVector::get(Indices);
2472 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
2473 SV = Builder.CreateStore(SV, Addr);
2474 }
2475 return SV;
2476 }
2477 }
2478 }
2479
2480 llvm::Value *CodeGenFunction::
BuildVector(ArrayRef<llvm::Value * > Ops)2481 BuildVector(ArrayRef<llvm::Value*> Ops) {
2482 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
2483 "Not a power-of-two sized vector!");
2484 bool AllConstants = true;
2485 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
2486 AllConstants &= isa<Constant>(Ops[i]);
2487
2488 // If this is a constant vector, create a ConstantVector.
2489 if (AllConstants) {
2490 SmallVector<llvm::Constant*, 16> CstOps;
2491 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2492 CstOps.push_back(cast<Constant>(Ops[i]));
2493 return llvm::ConstantVector::get(CstOps);
2494 }
2495
2496 // Otherwise, insertelement the values to build the vector.
2497 Value *Result =
2498 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2499
2500 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2501 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
2502
2503 return Result;
2504 }
2505
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)2506 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2507 const CallExpr *E) {
2508 SmallVector<Value*, 4> Ops;
2509
2510 // Find out if any arguments are required to be integer constant expressions.
2511 unsigned ICEArguments = 0;
2512 ASTContext::GetBuiltinTypeError Error;
2513 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2514 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2515
2516 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2517 // If this is a normal argument, just emit it as a scalar.
2518 if ((ICEArguments & (1 << i)) == 0) {
2519 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2520 continue;
2521 }
2522
2523 // If this is required to be a constant, constant fold it so that we know
2524 // that the generated intrinsic gets a ConstantInt.
2525 llvm::APSInt Result;
2526 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2527 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2528 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2529 }
2530
2531 switch (BuiltinID) {
2532 default: return 0;
2533 case X86::BI__builtin_ia32_vec_init_v8qi:
2534 case X86::BI__builtin_ia32_vec_init_v4hi:
2535 case X86::BI__builtin_ia32_vec_init_v2si:
2536 return Builder.CreateBitCast(BuildVector(Ops),
2537 llvm::Type::getX86_MMXTy(getLLVMContext()));
2538 case X86::BI__builtin_ia32_vec_ext_v2si:
2539 return Builder.CreateExtractElement(Ops[0],
2540 llvm::ConstantInt::get(Ops[1]->getType(), 0));
2541 case X86::BI__builtin_ia32_ldmxcsr: {
2542 llvm::Type *PtrTy = Int8PtrTy;
2543 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2544 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2545 Builder.CreateStore(Ops[0], Tmp);
2546 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2547 Builder.CreateBitCast(Tmp, PtrTy));
2548 }
2549 case X86::BI__builtin_ia32_stmxcsr: {
2550 llvm::Type *PtrTy = Int8PtrTy;
2551 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2552 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2553 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2554 Builder.CreateBitCast(Tmp, PtrTy));
2555 return Builder.CreateLoad(Tmp, "stmxcsr");
2556 }
2557 case X86::BI__builtin_ia32_storehps:
2558 case X86::BI__builtin_ia32_storelps: {
2559 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2560 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2561
2562 // cast val v2i64
2563 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2564
2565 // extract (0, 1)
2566 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2567 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2568 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2569
2570 // cast pointer to i64 & store
2571 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2572 return Builder.CreateStore(Ops[1], Ops[0]);
2573 }
2574 case X86::BI__builtin_ia32_palignr: {
2575 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2576
2577 // If palignr is shifting the pair of input vectors less than 9 bytes,
2578 // emit a shuffle instruction.
2579 if (shiftVal <= 8) {
2580 SmallVector<llvm::Constant*, 8> Indices;
2581 for (unsigned i = 0; i != 8; ++i)
2582 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2583
2584 Value* SV = llvm::ConstantVector::get(Indices);
2585 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2586 }
2587
2588 // If palignr is shifting the pair of input vectors more than 8 but less
2589 // than 16 bytes, emit a logical right shift of the destination.
2590 if (shiftVal < 16) {
2591 // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2592 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2593
2594 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2595 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2596
2597 // create i32 constant
2598 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2599 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2600 }
2601
2602 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2603 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2604 }
2605 case X86::BI__builtin_ia32_palignr128: {
2606 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2607
2608 // If palignr is shifting the pair of input vectors less than 17 bytes,
2609 // emit a shuffle instruction.
2610 if (shiftVal <= 16) {
2611 SmallVector<llvm::Constant*, 16> Indices;
2612 for (unsigned i = 0; i != 16; ++i)
2613 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2614
2615 Value* SV = llvm::ConstantVector::get(Indices);
2616 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2617 }
2618
2619 // If palignr is shifting the pair of input vectors more than 16 but less
2620 // than 32 bytes, emit a logical right shift of the destination.
2621 if (shiftVal < 32) {
2622 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2623
2624 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2625 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2626
2627 // create i32 constant
2628 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2629 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2630 }
2631
2632 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2633 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2634 }
2635 case X86::BI__builtin_ia32_palignr256: {
2636 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2637
2638 // If palignr is shifting the pair of input vectors less than 17 bytes,
2639 // emit a shuffle instruction.
2640 if (shiftVal <= 16) {
2641 SmallVector<llvm::Constant*, 32> Indices;
2642 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2643 for (unsigned l = 0; l != 2; ++l) {
2644 unsigned LaneStart = l * 16;
2645 unsigned LaneEnd = (l+1) * 16;
2646 for (unsigned i = 0; i != 16; ++i) {
2647 unsigned Idx = shiftVal + i + LaneStart;
2648 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
2649 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
2650 }
2651 }
2652
2653 Value* SV = llvm::ConstantVector::get(Indices);
2654 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2655 }
2656
2657 // If palignr is shifting the pair of input vectors more than 16 but less
2658 // than 32 bytes, emit a logical right shift of the destination.
2659 if (shiftVal < 32) {
2660 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
2661
2662 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2663 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2664
2665 // create i32 constant
2666 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
2667 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2668 }
2669
2670 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2671 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2672 }
2673 case X86::BI__builtin_ia32_movntps:
2674 case X86::BI__builtin_ia32_movntps256:
2675 case X86::BI__builtin_ia32_movntpd:
2676 case X86::BI__builtin_ia32_movntpd256:
2677 case X86::BI__builtin_ia32_movntdq:
2678 case X86::BI__builtin_ia32_movntdq256:
2679 case X86::BI__builtin_ia32_movnti: {
2680 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2681 Builder.getInt32(1));
2682
2683 // Convert the type of the pointer to a pointer to the stored type.
2684 Value *BC = Builder.CreateBitCast(Ops[0],
2685 llvm::PointerType::getUnqual(Ops[1]->getType()),
2686 "cast");
2687 StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2688 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2689 SI->setAlignment(16);
2690 return SI;
2691 }
2692 // 3DNow!
2693 case X86::BI__builtin_ia32_pswapdsf:
2694 case X86::BI__builtin_ia32_pswapdsi: {
2695 const char *name = 0;
2696 Intrinsic::ID ID = Intrinsic::not_intrinsic;
2697 switch(BuiltinID) {
2698 default: llvm_unreachable("Unsupported intrinsic!");
2699 case X86::BI__builtin_ia32_pswapdsf:
2700 case X86::BI__builtin_ia32_pswapdsi:
2701 name = "pswapd";
2702 ID = Intrinsic::x86_3dnowa_pswapd;
2703 break;
2704 }
2705 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
2706 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
2707 llvm::Function *F = CGM.getIntrinsic(ID);
2708 return Builder.CreateCall(F, Ops, name);
2709 }
2710 case X86::BI__builtin_ia32_rdrand16_step:
2711 case X86::BI__builtin_ia32_rdrand32_step:
2712 case X86::BI__builtin_ia32_rdrand64_step: {
2713 Intrinsic::ID ID;
2714 switch (BuiltinID) {
2715 default: llvm_unreachable("Unsupported intrinsic!");
2716 case X86::BI__builtin_ia32_rdrand16_step:
2717 ID = Intrinsic::x86_rdrand_16;
2718 break;
2719 case X86::BI__builtin_ia32_rdrand32_step:
2720 ID = Intrinsic::x86_rdrand_32;
2721 break;
2722 case X86::BI__builtin_ia32_rdrand64_step:
2723 ID = Intrinsic::x86_rdrand_64;
2724 break;
2725 }
2726
2727 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2728 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
2729 return Builder.CreateExtractValue(Call, 1);
2730 }
2731 }
2732 }
2733
2734
EmitPPCBuiltinExpr(unsigned BuiltinID,const CallExpr * E)2735 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2736 const CallExpr *E) {
2737 SmallVector<Value*, 4> Ops;
2738
2739 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2740 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2741
2742 Intrinsic::ID ID = Intrinsic::not_intrinsic;
2743
2744 switch (BuiltinID) {
2745 default: return 0;
2746
2747 // vec_ld, vec_lvsl, vec_lvsr
2748 case PPC::BI__builtin_altivec_lvx:
2749 case PPC::BI__builtin_altivec_lvxl:
2750 case PPC::BI__builtin_altivec_lvebx:
2751 case PPC::BI__builtin_altivec_lvehx:
2752 case PPC::BI__builtin_altivec_lvewx:
2753 case PPC::BI__builtin_altivec_lvsl:
2754 case PPC::BI__builtin_altivec_lvsr:
2755 {
2756 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2757
2758 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
2759 Ops.pop_back();
2760
2761 switch (BuiltinID) {
2762 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
2763 case PPC::BI__builtin_altivec_lvx:
2764 ID = Intrinsic::ppc_altivec_lvx;
2765 break;
2766 case PPC::BI__builtin_altivec_lvxl:
2767 ID = Intrinsic::ppc_altivec_lvxl;
2768 break;
2769 case PPC::BI__builtin_altivec_lvebx:
2770 ID = Intrinsic::ppc_altivec_lvebx;
2771 break;
2772 case PPC::BI__builtin_altivec_lvehx:
2773 ID = Intrinsic::ppc_altivec_lvehx;
2774 break;
2775 case PPC::BI__builtin_altivec_lvewx:
2776 ID = Intrinsic::ppc_altivec_lvewx;
2777 break;
2778 case PPC::BI__builtin_altivec_lvsl:
2779 ID = Intrinsic::ppc_altivec_lvsl;
2780 break;
2781 case PPC::BI__builtin_altivec_lvsr:
2782 ID = Intrinsic::ppc_altivec_lvsr;
2783 break;
2784 }
2785 llvm::Function *F = CGM.getIntrinsic(ID);
2786 return Builder.CreateCall(F, Ops, "");
2787 }
2788
2789 // vec_st
2790 case PPC::BI__builtin_altivec_stvx:
2791 case PPC::BI__builtin_altivec_stvxl:
2792 case PPC::BI__builtin_altivec_stvebx:
2793 case PPC::BI__builtin_altivec_stvehx:
2794 case PPC::BI__builtin_altivec_stvewx:
2795 {
2796 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2797 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
2798 Ops.pop_back();
2799
2800 switch (BuiltinID) {
2801 default: llvm_unreachable("Unsupported st intrinsic!");
2802 case PPC::BI__builtin_altivec_stvx:
2803 ID = Intrinsic::ppc_altivec_stvx;
2804 break;
2805 case PPC::BI__builtin_altivec_stvxl:
2806 ID = Intrinsic::ppc_altivec_stvxl;
2807 break;
2808 case PPC::BI__builtin_altivec_stvebx:
2809 ID = Intrinsic::ppc_altivec_stvebx;
2810 break;
2811 case PPC::BI__builtin_altivec_stvehx:
2812 ID = Intrinsic::ppc_altivec_stvehx;
2813 break;
2814 case PPC::BI__builtin_altivec_stvewx:
2815 ID = Intrinsic::ppc_altivec_stvewx;
2816 break;
2817 }
2818 llvm::Function *F = CGM.getIntrinsic(ID);
2819 return Builder.CreateCall(F, Ops, "");
2820 }
2821 }
2822 }
2823