1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CodeGenFunction.h"
15 #include "CGObjCRuntime.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TargetInfo.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Intrinsics.h"
24
25 using namespace clang;
26 using namespace CodeGen;
27 using namespace llvm;
28
29 /// getBuiltinLibFunction - Given a builtin id for a function like
30 /// "__builtin_fabsf", return a Function* for "fabsf".
getBuiltinLibFunction(const FunctionDecl * FD,unsigned BuiltinID)31 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
32 unsigned BuiltinID) {
33 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
34
35 // Get the name, skip over the __builtin_ prefix (if necessary).
36 StringRef Name;
37 GlobalDecl D(FD);
38
39 // If the builtin has been declared explicitly with an assembler label,
40 // use the mangled name. This differs from the plain label on platforms
41 // that prefix labels.
42 if (FD->hasAttr<AsmLabelAttr>())
43 Name = getMangledName(D);
44 else
45 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
46
47 llvm::FunctionType *Ty =
48 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
49
50 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
51 }
52
53 /// Emit the conversions required to turn the given value into an
54 /// integer of the given size.
EmitToInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::IntegerType * IntType)55 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
56 QualType T, llvm::IntegerType *IntType) {
57 V = CGF.EmitToMemory(V, T);
58
59 if (V->getType()->isPointerTy())
60 return CGF.Builder.CreatePtrToInt(V, IntType);
61
62 assert(V->getType() == IntType);
63 return V;
64 }
65
EmitFromInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::Type * ResultType)66 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
67 QualType T, llvm::Type *ResultType) {
68 V = CGF.EmitFromMemory(V, T);
69
70 if (ResultType->isPointerTy())
71 return CGF.Builder.CreateIntToPtr(V, ResultType);
72
73 assert(V->getType() == ResultType);
74 return V;
75 }
76
77 /// Utility to insert an atomic instruction based on Instrinsic::ID
78 /// and the expression node.
EmitBinaryAtomic(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)79 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
80 llvm::AtomicRMWInst::BinOp Kind,
81 const CallExpr *E) {
82 QualType T = E->getType();
83 assert(E->getArg(0)->getType()->isPointerType());
84 assert(CGF.getContext().hasSameUnqualifiedType(T,
85 E->getArg(0)->getType()->getPointeeType()));
86 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
87
88 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
90
91 llvm::IntegerType *IntType =
92 llvm::IntegerType::get(CGF.getLLVMContext(),
93 CGF.getContext().getTypeSize(T));
94 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
95
96 llvm::Value *Args[2];
97 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
98 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
99 llvm::Type *ValueType = Args[1]->getType();
100 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
101
102 llvm::Value *Result =
103 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
104 llvm::SequentiallyConsistent);
105 Result = EmitFromInt(CGF, Result, T, ValueType);
106 return RValue::get(Result);
107 }
108
109 /// Utility to insert an atomic instruction based Instrinsic::ID and
110 /// the expression node, where the return value is the result of the
111 /// operation.
EmitBinaryAtomicPost(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,Instruction::BinaryOps Op)112 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
113 llvm::AtomicRMWInst::BinOp Kind,
114 const CallExpr *E,
115 Instruction::BinaryOps Op) {
116 QualType T = E->getType();
117 assert(E->getArg(0)->getType()->isPointerType());
118 assert(CGF.getContext().hasSameUnqualifiedType(T,
119 E->getArg(0)->getType()->getPointeeType()));
120 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
121
122 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
123 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
124
125 llvm::IntegerType *IntType =
126 llvm::IntegerType::get(CGF.getLLVMContext(),
127 CGF.getContext().getTypeSize(T));
128 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
129
130 llvm::Value *Args[2];
131 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
132 llvm::Type *ValueType = Args[1]->getType();
133 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
134 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
135
136 llvm::Value *Result =
137 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
138 llvm::SequentiallyConsistent);
139 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
140 Result = EmitFromInt(CGF, Result, T, ValueType);
141 return RValue::get(Result);
142 }
143
144 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
145 /// which must be a scalar floating point type.
EmitFAbs(CodeGenFunction & CGF,Value * V,QualType ValTy)146 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
147 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
148 assert(ValTyP && "isn't scalar fp type!");
149
150 StringRef FnName;
151 switch (ValTyP->getKind()) {
152 default: llvm_unreachable("Isn't a scalar fp type!");
153 case BuiltinType::Float: FnName = "fabsf"; break;
154 case BuiltinType::Double: FnName = "fabs"; break;
155 case BuiltinType::LongDouble: FnName = "fabsl"; break;
156 }
157
158 // The prototype is something that takes and returns whatever V's type is.
159 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
160 false);
161 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
162
163 return CGF.EmitNounwindRuntimeCall(Fn, V, "abs");
164 }
165
emitLibraryCall(CodeGenFunction & CGF,const FunctionDecl * Fn,const CallExpr * E,llvm::Value * calleeValue)166 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
167 const CallExpr *E, llvm::Value *calleeValue) {
168 return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
169 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
170 }
171
172 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
173 /// depending on IntrinsicID.
174 ///
175 /// \arg CGF The current codegen function.
176 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
177 /// \arg X The first argument to the llvm.*.with.overflow.*.
178 /// \arg Y The second argument to the llvm.*.with.overflow.*.
179 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
180 /// \returns The result (i.e. sum/product) returned by the intrinsic.
EmitOverflowIntrinsic(CodeGenFunction & CGF,const llvm::Intrinsic::ID IntrinsicID,llvm::Value * X,llvm::Value * Y,llvm::Value * & Carry)181 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
182 const llvm::Intrinsic::ID IntrinsicID,
183 llvm::Value *X, llvm::Value *Y,
184 llvm::Value *&Carry) {
185 // Make sure we have integers of the same width.
186 assert(X->getType() == Y->getType() &&
187 "Arguments must be the same type. (Did you forget to make sure both "
188 "arguments have the same integer width?)");
189
190 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
191 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
192 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
193 return CGF.Builder.CreateExtractValue(Tmp, 0);
194 }
195
EmitBuiltinExpr(const FunctionDecl * FD,unsigned BuiltinID,const CallExpr * E)196 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
197 unsigned BuiltinID, const CallExpr *E) {
198 // See if we can constant fold this builtin. If so, don't emit it at all.
199 Expr::EvalResult Result;
200 if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
201 !Result.hasSideEffects()) {
202 if (Result.Val.isInt())
203 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
204 Result.Val.getInt()));
205 if (Result.Val.isFloat())
206 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
207 Result.Val.getFloat()));
208 }
209
210 switch (BuiltinID) {
211 default: break; // Handle intrinsics and libm functions below.
212 case Builtin::BI__builtin___CFStringMakeConstantString:
213 case Builtin::BI__builtin___NSStringMakeConstantString:
214 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
215 case Builtin::BI__builtin_stdarg_start:
216 case Builtin::BI__builtin_va_start:
217 case Builtin::BI__builtin_va_end: {
218 Value *ArgValue = EmitVAListRef(E->getArg(0));
219 llvm::Type *DestType = Int8PtrTy;
220 if (ArgValue->getType() != DestType)
221 ArgValue = Builder.CreateBitCast(ArgValue, DestType,
222 ArgValue->getName().data());
223
224 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
225 Intrinsic::vaend : Intrinsic::vastart;
226 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
227 }
228 case Builtin::BI__builtin_va_copy: {
229 Value *DstPtr = EmitVAListRef(E->getArg(0));
230 Value *SrcPtr = EmitVAListRef(E->getArg(1));
231
232 llvm::Type *Type = Int8PtrTy;
233
234 DstPtr = Builder.CreateBitCast(DstPtr, Type);
235 SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
236 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
237 DstPtr, SrcPtr));
238 }
239 case Builtin::BI__builtin_abs:
240 case Builtin::BI__builtin_labs:
241 case Builtin::BI__builtin_llabs: {
242 Value *ArgValue = EmitScalarExpr(E->getArg(0));
243
244 Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
245 Value *CmpResult =
246 Builder.CreateICmpSGE(ArgValue,
247 llvm::Constant::getNullValue(ArgValue->getType()),
248 "abscond");
249 Value *Result =
250 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
251
252 return RValue::get(Result);
253 }
254
255 case Builtin::BI__builtin_conj:
256 case Builtin::BI__builtin_conjf:
257 case Builtin::BI__builtin_conjl: {
258 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
259 Value *Real = ComplexVal.first;
260 Value *Imag = ComplexVal.second;
261 Value *Zero =
262 Imag->getType()->isFPOrFPVectorTy()
263 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
264 : llvm::Constant::getNullValue(Imag->getType());
265
266 Imag = Builder.CreateFSub(Zero, Imag, "sub");
267 return RValue::getComplex(std::make_pair(Real, Imag));
268 }
269 case Builtin::BI__builtin_creal:
270 case Builtin::BI__builtin_crealf:
271 case Builtin::BI__builtin_creall:
272 case Builtin::BIcreal:
273 case Builtin::BIcrealf:
274 case Builtin::BIcreall: {
275 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
276 return RValue::get(ComplexVal.first);
277 }
278
279 case Builtin::BI__builtin_cimag:
280 case Builtin::BI__builtin_cimagf:
281 case Builtin::BI__builtin_cimagl:
282 case Builtin::BIcimag:
283 case Builtin::BIcimagf:
284 case Builtin::BIcimagl: {
285 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
286 return RValue::get(ComplexVal.second);
287 }
288
289 case Builtin::BI__builtin_ctzs:
290 case Builtin::BI__builtin_ctz:
291 case Builtin::BI__builtin_ctzl:
292 case Builtin::BI__builtin_ctzll: {
293 Value *ArgValue = EmitScalarExpr(E->getArg(0));
294
295 llvm::Type *ArgType = ArgValue->getType();
296 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
297
298 llvm::Type *ResultType = ConvertType(E->getType());
299 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
300 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
301 if (Result->getType() != ResultType)
302 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
303 "cast");
304 return RValue::get(Result);
305 }
306 case Builtin::BI__builtin_clzs:
307 case Builtin::BI__builtin_clz:
308 case Builtin::BI__builtin_clzl:
309 case Builtin::BI__builtin_clzll: {
310 Value *ArgValue = EmitScalarExpr(E->getArg(0));
311
312 llvm::Type *ArgType = ArgValue->getType();
313 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
314
315 llvm::Type *ResultType = ConvertType(E->getType());
316 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
317 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
318 if (Result->getType() != ResultType)
319 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
320 "cast");
321 return RValue::get(Result);
322 }
323 case Builtin::BI__builtin_ffs:
324 case Builtin::BI__builtin_ffsl:
325 case Builtin::BI__builtin_ffsll: {
326 // ffs(x) -> x ? cttz(x) + 1 : 0
327 Value *ArgValue = EmitScalarExpr(E->getArg(0));
328
329 llvm::Type *ArgType = ArgValue->getType();
330 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
331
332 llvm::Type *ResultType = ConvertType(E->getType());
333 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
334 Builder.getTrue()),
335 llvm::ConstantInt::get(ArgType, 1));
336 Value *Zero = llvm::Constant::getNullValue(ArgType);
337 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
338 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
339 if (Result->getType() != ResultType)
340 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
341 "cast");
342 return RValue::get(Result);
343 }
344 case Builtin::BI__builtin_parity:
345 case Builtin::BI__builtin_parityl:
346 case Builtin::BI__builtin_parityll: {
347 // parity(x) -> ctpop(x) & 1
348 Value *ArgValue = EmitScalarExpr(E->getArg(0));
349
350 llvm::Type *ArgType = ArgValue->getType();
351 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
352
353 llvm::Type *ResultType = ConvertType(E->getType());
354 Value *Tmp = Builder.CreateCall(F, ArgValue);
355 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
356 if (Result->getType() != ResultType)
357 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
358 "cast");
359 return RValue::get(Result);
360 }
361 case Builtin::BI__builtin_popcount:
362 case Builtin::BI__builtin_popcountl:
363 case Builtin::BI__builtin_popcountll: {
364 Value *ArgValue = EmitScalarExpr(E->getArg(0));
365
366 llvm::Type *ArgType = ArgValue->getType();
367 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
368
369 llvm::Type *ResultType = ConvertType(E->getType());
370 Value *Result = Builder.CreateCall(F, ArgValue);
371 if (Result->getType() != ResultType)
372 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
373 "cast");
374 return RValue::get(Result);
375 }
376 case Builtin::BI__builtin_expect: {
377 Value *ArgValue = EmitScalarExpr(E->getArg(0));
378 llvm::Type *ArgType = ArgValue->getType();
379
380 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
381 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
382
383 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
384 "expval");
385 return RValue::get(Result);
386 }
387 case Builtin::BI__builtin_bswap16:
388 case Builtin::BI__builtin_bswap32:
389 case Builtin::BI__builtin_bswap64: {
390 Value *ArgValue = EmitScalarExpr(E->getArg(0));
391 llvm::Type *ArgType = ArgValue->getType();
392 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
393 return RValue::get(Builder.CreateCall(F, ArgValue));
394 }
395 case Builtin::BI__builtin_object_size: {
396 // We rely on constant folding to deal with expressions with side effects.
397 assert(!E->getArg(0)->HasSideEffects(getContext()) &&
398 "should have been constant folded");
399
400 // We pass this builtin onto the optimizer so that it can
401 // figure out the object size in more complex cases.
402 llvm::Type *ResType = ConvertType(E->getType());
403
404 // LLVM only supports 0 and 2, make sure that we pass along that
405 // as a boolean.
406 Value *Ty = EmitScalarExpr(E->getArg(1));
407 ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
408 assert(CI);
409 uint64_t val = CI->getZExtValue();
410 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
411
412 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
413 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
414 }
415 case Builtin::BI__builtin_prefetch: {
416 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
417 // FIXME: Technically these constants should of type 'int', yes?
418 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
419 llvm::ConstantInt::get(Int32Ty, 0);
420 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
421 llvm::ConstantInt::get(Int32Ty, 3);
422 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
423 Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
424 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
425 }
426 case Builtin::BI__builtin_readcyclecounter: {
427 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
428 return RValue::get(Builder.CreateCall(F));
429 }
430 case Builtin::BI__builtin_trap: {
431 Value *F = CGM.getIntrinsic(Intrinsic::trap);
432 return RValue::get(Builder.CreateCall(F));
433 }
434 case Builtin::BI__debugbreak: {
435 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
436 return RValue::get(Builder.CreateCall(F));
437 }
438 case Builtin::BI__builtin_unreachable: {
439 if (SanOpts->Unreachable)
440 EmitCheck(Builder.getFalse(), "builtin_unreachable",
441 EmitCheckSourceLocation(E->getExprLoc()),
442 ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
443 else
444 Builder.CreateUnreachable();
445
446 // We do need to preserve an insertion point.
447 EmitBlock(createBasicBlock("unreachable.cont"));
448
449 return RValue::get(0);
450 }
451
452 case Builtin::BI__builtin_powi:
453 case Builtin::BI__builtin_powif:
454 case Builtin::BI__builtin_powil: {
455 Value *Base = EmitScalarExpr(E->getArg(0));
456 Value *Exponent = EmitScalarExpr(E->getArg(1));
457 llvm::Type *ArgType = Base->getType();
458 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
459 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
460 }
461
462 case Builtin::BI__builtin_isgreater:
463 case Builtin::BI__builtin_isgreaterequal:
464 case Builtin::BI__builtin_isless:
465 case Builtin::BI__builtin_islessequal:
466 case Builtin::BI__builtin_islessgreater:
467 case Builtin::BI__builtin_isunordered: {
468 // Ordered comparisons: we know the arguments to these are matching scalar
469 // floating point values.
470 Value *LHS = EmitScalarExpr(E->getArg(0));
471 Value *RHS = EmitScalarExpr(E->getArg(1));
472
473 switch (BuiltinID) {
474 default: llvm_unreachable("Unknown ordered comparison");
475 case Builtin::BI__builtin_isgreater:
476 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
477 break;
478 case Builtin::BI__builtin_isgreaterequal:
479 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
480 break;
481 case Builtin::BI__builtin_isless:
482 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
483 break;
484 case Builtin::BI__builtin_islessequal:
485 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
486 break;
487 case Builtin::BI__builtin_islessgreater:
488 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
489 break;
490 case Builtin::BI__builtin_isunordered:
491 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
492 break;
493 }
494 // ZExt bool to int type.
495 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
496 }
497 case Builtin::BI__builtin_isnan: {
498 Value *V = EmitScalarExpr(E->getArg(0));
499 V = Builder.CreateFCmpUNO(V, V, "cmp");
500 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
501 }
502
503 case Builtin::BI__builtin_isinf: {
504 // isinf(x) --> fabs(x) == infinity
505 Value *V = EmitScalarExpr(E->getArg(0));
506 V = EmitFAbs(*this, V, E->getArg(0)->getType());
507
508 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
509 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
510 }
511
512 // TODO: BI__builtin_isinf_sign
513 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
514
515 case Builtin::BI__builtin_isnormal: {
516 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
517 Value *V = EmitScalarExpr(E->getArg(0));
518 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
519
520 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
521 Value *IsLessThanInf =
522 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
523 APFloat Smallest = APFloat::getSmallestNormalized(
524 getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
525 Value *IsNormal =
526 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
527 "isnormal");
528 V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
529 V = Builder.CreateAnd(V, IsNormal, "and");
530 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
531 }
532
533 case Builtin::BI__builtin_isfinite: {
534 // isfinite(x) --> x == x && fabs(x) != infinity;
535 Value *V = EmitScalarExpr(E->getArg(0));
536 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
537
538 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
539 Value *IsNotInf =
540 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
541
542 V = Builder.CreateAnd(Eq, IsNotInf, "and");
543 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
544 }
545
546 case Builtin::BI__builtin_fpclassify: {
547 Value *V = EmitScalarExpr(E->getArg(5));
548 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
549
550 // Create Result
551 BasicBlock *Begin = Builder.GetInsertBlock();
552 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
553 Builder.SetInsertPoint(End);
554 PHINode *Result =
555 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
556 "fpclassify_result");
557
558 // if (V==0) return FP_ZERO
559 Builder.SetInsertPoint(Begin);
560 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
561 "iszero");
562 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
563 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
564 Builder.CreateCondBr(IsZero, End, NotZero);
565 Result->addIncoming(ZeroLiteral, Begin);
566
567 // if (V != V) return FP_NAN
568 Builder.SetInsertPoint(NotZero);
569 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
570 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
571 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
572 Builder.CreateCondBr(IsNan, End, NotNan);
573 Result->addIncoming(NanLiteral, NotZero);
574
575 // if (fabs(V) == infinity) return FP_INFINITY
576 Builder.SetInsertPoint(NotNan);
577 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
578 Value *IsInf =
579 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
580 "isinf");
581 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
582 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
583 Builder.CreateCondBr(IsInf, End, NotInf);
584 Result->addIncoming(InfLiteral, NotNan);
585
586 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
587 Builder.SetInsertPoint(NotInf);
588 APFloat Smallest = APFloat::getSmallestNormalized(
589 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
590 Value *IsNormal =
591 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
592 "isnormal");
593 Value *NormalResult =
594 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
595 EmitScalarExpr(E->getArg(3)));
596 Builder.CreateBr(End);
597 Result->addIncoming(NormalResult, NotInf);
598
599 // return Result
600 Builder.SetInsertPoint(End);
601 return RValue::get(Result);
602 }
603
604 case Builtin::BIalloca:
605 case Builtin::BI__builtin_alloca: {
606 Value *Size = EmitScalarExpr(E->getArg(0));
607 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
608 }
609 case Builtin::BIbzero:
610 case Builtin::BI__builtin_bzero: {
611 std::pair<llvm::Value*, unsigned> Dest =
612 EmitPointerWithAlignment(E->getArg(0));
613 Value *SizeVal = EmitScalarExpr(E->getArg(1));
614 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
615 Dest.second, false);
616 return RValue::get(Dest.first);
617 }
618 case Builtin::BImemcpy:
619 case Builtin::BI__builtin_memcpy: {
620 std::pair<llvm::Value*, unsigned> Dest =
621 EmitPointerWithAlignment(E->getArg(0));
622 std::pair<llvm::Value*, unsigned> Src =
623 EmitPointerWithAlignment(E->getArg(1));
624 Value *SizeVal = EmitScalarExpr(E->getArg(2));
625 unsigned Align = std::min(Dest.second, Src.second);
626 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
627 return RValue::get(Dest.first);
628 }
629
630 case Builtin::BI__builtin___memcpy_chk: {
631 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
632 llvm::APSInt Size, DstSize;
633 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
634 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
635 break;
636 if (Size.ugt(DstSize))
637 break;
638 std::pair<llvm::Value*, unsigned> Dest =
639 EmitPointerWithAlignment(E->getArg(0));
640 std::pair<llvm::Value*, unsigned> Src =
641 EmitPointerWithAlignment(E->getArg(1));
642 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
643 unsigned Align = std::min(Dest.second, Src.second);
644 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
645 return RValue::get(Dest.first);
646 }
647
648 case Builtin::BI__builtin_objc_memmove_collectable: {
649 Value *Address = EmitScalarExpr(E->getArg(0));
650 Value *SrcAddr = EmitScalarExpr(E->getArg(1));
651 Value *SizeVal = EmitScalarExpr(E->getArg(2));
652 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
653 Address, SrcAddr, SizeVal);
654 return RValue::get(Address);
655 }
656
657 case Builtin::BI__builtin___memmove_chk: {
658 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
659 llvm::APSInt Size, DstSize;
660 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
661 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
662 break;
663 if (Size.ugt(DstSize))
664 break;
665 std::pair<llvm::Value*, unsigned> Dest =
666 EmitPointerWithAlignment(E->getArg(0));
667 std::pair<llvm::Value*, unsigned> Src =
668 EmitPointerWithAlignment(E->getArg(1));
669 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
670 unsigned Align = std::min(Dest.second, Src.second);
671 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
672 return RValue::get(Dest.first);
673 }
674
675 case Builtin::BImemmove:
676 case Builtin::BI__builtin_memmove: {
677 std::pair<llvm::Value*, unsigned> Dest =
678 EmitPointerWithAlignment(E->getArg(0));
679 std::pair<llvm::Value*, unsigned> Src =
680 EmitPointerWithAlignment(E->getArg(1));
681 Value *SizeVal = EmitScalarExpr(E->getArg(2));
682 unsigned Align = std::min(Dest.second, Src.second);
683 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
684 return RValue::get(Dest.first);
685 }
686 case Builtin::BImemset:
687 case Builtin::BI__builtin_memset: {
688 std::pair<llvm::Value*, unsigned> Dest =
689 EmitPointerWithAlignment(E->getArg(0));
690 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
691 Builder.getInt8Ty());
692 Value *SizeVal = EmitScalarExpr(E->getArg(2));
693 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
694 return RValue::get(Dest.first);
695 }
696 case Builtin::BI__builtin___memset_chk: {
697 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
698 llvm::APSInt Size, DstSize;
699 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
700 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
701 break;
702 if (Size.ugt(DstSize))
703 break;
704 std::pair<llvm::Value*, unsigned> Dest =
705 EmitPointerWithAlignment(E->getArg(0));
706 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
707 Builder.getInt8Ty());
708 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
709 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
710 return RValue::get(Dest.first);
711 }
712 case Builtin::BI__builtin_dwarf_cfa: {
713 // The offset in bytes from the first argument to the CFA.
714 //
715 // Why on earth is this in the frontend? Is there any reason at
716 // all that the backend can't reasonably determine this while
717 // lowering llvm.eh.dwarf.cfa()?
718 //
719 // TODO: If there's a satisfactory reason, add a target hook for
720 // this instead of hard-coding 0, which is correct for most targets.
721 int32_t Offset = 0;
722
723 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
724 return RValue::get(Builder.CreateCall(F,
725 llvm::ConstantInt::get(Int32Ty, Offset)));
726 }
727 case Builtin::BI__builtin_return_address: {
728 Value *Depth = EmitScalarExpr(E->getArg(0));
729 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
730 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
731 return RValue::get(Builder.CreateCall(F, Depth));
732 }
733 case Builtin::BI__builtin_frame_address: {
734 Value *Depth = EmitScalarExpr(E->getArg(0));
735 Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
736 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
737 return RValue::get(Builder.CreateCall(F, Depth));
738 }
739 case Builtin::BI__builtin_extract_return_addr: {
740 Value *Address = EmitScalarExpr(E->getArg(0));
741 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
742 return RValue::get(Result);
743 }
744 case Builtin::BI__builtin_frob_return_addr: {
745 Value *Address = EmitScalarExpr(E->getArg(0));
746 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
747 return RValue::get(Result);
748 }
749 case Builtin::BI__builtin_dwarf_sp_column: {
750 llvm::IntegerType *Ty
751 = cast<llvm::IntegerType>(ConvertType(E->getType()));
752 int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
753 if (Column == -1) {
754 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
755 return RValue::get(llvm::UndefValue::get(Ty));
756 }
757 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
758 }
759 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
760 Value *Address = EmitScalarExpr(E->getArg(0));
761 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
762 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
763 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
764 }
765 case Builtin::BI__builtin_eh_return: {
766 Value *Int = EmitScalarExpr(E->getArg(0));
767 Value *Ptr = EmitScalarExpr(E->getArg(1));
768
769 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
770 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
771 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
772 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
773 ? Intrinsic::eh_return_i32
774 : Intrinsic::eh_return_i64);
775 Builder.CreateCall2(F, Int, Ptr);
776 Builder.CreateUnreachable();
777
778 // We do need to preserve an insertion point.
779 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
780
781 return RValue::get(0);
782 }
783 case Builtin::BI__builtin_unwind_init: {
784 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
785 return RValue::get(Builder.CreateCall(F));
786 }
787 case Builtin::BI__builtin_extend_pointer: {
788 // Extends a pointer to the size of an _Unwind_Word, which is
789 // uint64_t on all platforms. Generally this gets poked into a
790 // register and eventually used as an address, so if the
791 // addressing registers are wider than pointers and the platform
792 // doesn't implicitly ignore high-order bits when doing
793 // addressing, we need to make sure we zext / sext based on
794 // the platform's expectations.
795 //
796 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
797
798 // Cast the pointer to intptr_t.
799 Value *Ptr = EmitScalarExpr(E->getArg(0));
800 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
801
802 // If that's 64 bits, we're done.
803 if (IntPtrTy->getBitWidth() == 64)
804 return RValue::get(Result);
805
806 // Otherwise, ask the codegen data what to do.
807 if (getTargetHooks().extendPointerWithSExt())
808 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
809 else
810 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
811 }
812 case Builtin::BI__builtin_setjmp: {
813 // Buffer is a void**.
814 Value *Buf = EmitScalarExpr(E->getArg(0));
815
816 // Store the frame pointer to the setjmp buffer.
817 Value *FrameAddr =
818 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
819 ConstantInt::get(Int32Ty, 0));
820 Builder.CreateStore(FrameAddr, Buf);
821
822 // Store the stack pointer to the setjmp buffer.
823 Value *StackAddr =
824 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
825 Value *StackSaveSlot =
826 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
827 Builder.CreateStore(StackAddr, StackSaveSlot);
828
829 // Call LLVM's EH setjmp, which is lightweight.
830 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
831 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
832 return RValue::get(Builder.CreateCall(F, Buf));
833 }
834 case Builtin::BI__builtin_longjmp: {
835 Value *Buf = EmitScalarExpr(E->getArg(0));
836 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
837
838 // Call LLVM's EH longjmp, which is lightweight.
839 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
840
841 // longjmp doesn't return; mark this as unreachable.
842 Builder.CreateUnreachable();
843
844 // We do need to preserve an insertion point.
845 EmitBlock(createBasicBlock("longjmp.cont"));
846
847 return RValue::get(0);
848 }
849 case Builtin::BI__sync_fetch_and_add:
850 case Builtin::BI__sync_fetch_and_sub:
851 case Builtin::BI__sync_fetch_and_or:
852 case Builtin::BI__sync_fetch_and_and:
853 case Builtin::BI__sync_fetch_and_xor:
854 case Builtin::BI__sync_add_and_fetch:
855 case Builtin::BI__sync_sub_and_fetch:
856 case Builtin::BI__sync_and_and_fetch:
857 case Builtin::BI__sync_or_and_fetch:
858 case Builtin::BI__sync_xor_and_fetch:
859 case Builtin::BI__sync_val_compare_and_swap:
860 case Builtin::BI__sync_bool_compare_and_swap:
861 case Builtin::BI__sync_lock_test_and_set:
862 case Builtin::BI__sync_lock_release:
863 case Builtin::BI__sync_swap:
864 llvm_unreachable("Shouldn't make it through sema");
865 case Builtin::BI__sync_fetch_and_add_1:
866 case Builtin::BI__sync_fetch_and_add_2:
867 case Builtin::BI__sync_fetch_and_add_4:
868 case Builtin::BI__sync_fetch_and_add_8:
869 case Builtin::BI__sync_fetch_and_add_16:
870 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
871 case Builtin::BI__sync_fetch_and_sub_1:
872 case Builtin::BI__sync_fetch_and_sub_2:
873 case Builtin::BI__sync_fetch_and_sub_4:
874 case Builtin::BI__sync_fetch_and_sub_8:
875 case Builtin::BI__sync_fetch_and_sub_16:
876 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
877 case Builtin::BI__sync_fetch_and_or_1:
878 case Builtin::BI__sync_fetch_and_or_2:
879 case Builtin::BI__sync_fetch_and_or_4:
880 case Builtin::BI__sync_fetch_and_or_8:
881 case Builtin::BI__sync_fetch_and_or_16:
882 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
883 case Builtin::BI__sync_fetch_and_and_1:
884 case Builtin::BI__sync_fetch_and_and_2:
885 case Builtin::BI__sync_fetch_and_and_4:
886 case Builtin::BI__sync_fetch_and_and_8:
887 case Builtin::BI__sync_fetch_and_and_16:
888 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
889 case Builtin::BI__sync_fetch_and_xor_1:
890 case Builtin::BI__sync_fetch_and_xor_2:
891 case Builtin::BI__sync_fetch_and_xor_4:
892 case Builtin::BI__sync_fetch_and_xor_8:
893 case Builtin::BI__sync_fetch_and_xor_16:
894 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
895
896 // Clang extensions: not overloaded yet.
897 case Builtin::BI__sync_fetch_and_min:
898 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
899 case Builtin::BI__sync_fetch_and_max:
900 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
901 case Builtin::BI__sync_fetch_and_umin:
902 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
903 case Builtin::BI__sync_fetch_and_umax:
904 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
905
906 case Builtin::BI__sync_add_and_fetch_1:
907 case Builtin::BI__sync_add_and_fetch_2:
908 case Builtin::BI__sync_add_and_fetch_4:
909 case Builtin::BI__sync_add_and_fetch_8:
910 case Builtin::BI__sync_add_and_fetch_16:
911 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
912 llvm::Instruction::Add);
913 case Builtin::BI__sync_sub_and_fetch_1:
914 case Builtin::BI__sync_sub_and_fetch_2:
915 case Builtin::BI__sync_sub_and_fetch_4:
916 case Builtin::BI__sync_sub_and_fetch_8:
917 case Builtin::BI__sync_sub_and_fetch_16:
918 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
919 llvm::Instruction::Sub);
920 case Builtin::BI__sync_and_and_fetch_1:
921 case Builtin::BI__sync_and_and_fetch_2:
922 case Builtin::BI__sync_and_and_fetch_4:
923 case Builtin::BI__sync_and_and_fetch_8:
924 case Builtin::BI__sync_and_and_fetch_16:
925 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
926 llvm::Instruction::And);
927 case Builtin::BI__sync_or_and_fetch_1:
928 case Builtin::BI__sync_or_and_fetch_2:
929 case Builtin::BI__sync_or_and_fetch_4:
930 case Builtin::BI__sync_or_and_fetch_8:
931 case Builtin::BI__sync_or_and_fetch_16:
932 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
933 llvm::Instruction::Or);
934 case Builtin::BI__sync_xor_and_fetch_1:
935 case Builtin::BI__sync_xor_and_fetch_2:
936 case Builtin::BI__sync_xor_and_fetch_4:
937 case Builtin::BI__sync_xor_and_fetch_8:
938 case Builtin::BI__sync_xor_and_fetch_16:
939 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
940 llvm::Instruction::Xor);
941
942 case Builtin::BI__sync_val_compare_and_swap_1:
943 case Builtin::BI__sync_val_compare_and_swap_2:
944 case Builtin::BI__sync_val_compare_and_swap_4:
945 case Builtin::BI__sync_val_compare_and_swap_8:
946 case Builtin::BI__sync_val_compare_and_swap_16: {
947 QualType T = E->getType();
948 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
949 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
950
951 llvm::IntegerType *IntType =
952 llvm::IntegerType::get(getLLVMContext(),
953 getContext().getTypeSize(T));
954 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
955
956 Value *Args[3];
957 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
958 Args[1] = EmitScalarExpr(E->getArg(1));
959 llvm::Type *ValueType = Args[1]->getType();
960 Args[1] = EmitToInt(*this, Args[1], T, IntType);
961 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
962
963 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
964 llvm::SequentiallyConsistent);
965 Result = EmitFromInt(*this, Result, T, ValueType);
966 return RValue::get(Result);
967 }
968
969 case Builtin::BI__sync_bool_compare_and_swap_1:
970 case Builtin::BI__sync_bool_compare_and_swap_2:
971 case Builtin::BI__sync_bool_compare_and_swap_4:
972 case Builtin::BI__sync_bool_compare_and_swap_8:
973 case Builtin::BI__sync_bool_compare_and_swap_16: {
974 QualType T = E->getArg(1)->getType();
975 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
976 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
977
978 llvm::IntegerType *IntType =
979 llvm::IntegerType::get(getLLVMContext(),
980 getContext().getTypeSize(T));
981 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
982
983 Value *Args[3];
984 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
985 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
986 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
987
988 Value *OldVal = Args[1];
989 Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
990 llvm::SequentiallyConsistent);
991 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
992 // zext bool to int.
993 Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
994 return RValue::get(Result);
995 }
996
997 case Builtin::BI__sync_swap_1:
998 case Builtin::BI__sync_swap_2:
999 case Builtin::BI__sync_swap_4:
1000 case Builtin::BI__sync_swap_8:
1001 case Builtin::BI__sync_swap_16:
1002 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1003
1004 case Builtin::BI__sync_lock_test_and_set_1:
1005 case Builtin::BI__sync_lock_test_and_set_2:
1006 case Builtin::BI__sync_lock_test_and_set_4:
1007 case Builtin::BI__sync_lock_test_and_set_8:
1008 case Builtin::BI__sync_lock_test_and_set_16:
1009 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1010
1011 case Builtin::BI__sync_lock_release_1:
1012 case Builtin::BI__sync_lock_release_2:
1013 case Builtin::BI__sync_lock_release_4:
1014 case Builtin::BI__sync_lock_release_8:
1015 case Builtin::BI__sync_lock_release_16: {
1016 Value *Ptr = EmitScalarExpr(E->getArg(0));
1017 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1018 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1019 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1020 StoreSize.getQuantity() * 8);
1021 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1022 llvm::StoreInst *Store =
1023 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
1024 Store->setAlignment(StoreSize.getQuantity());
1025 Store->setAtomic(llvm::Release);
1026 return RValue::get(0);
1027 }
1028
1029 case Builtin::BI__sync_synchronize: {
1030 // We assume this is supposed to correspond to a C++0x-style
1031 // sequentially-consistent fence (i.e. this is only usable for
1032 // synchonization, not device I/O or anything like that). This intrinsic
1033 // is really badly designed in the sense that in theory, there isn't
1034 // any way to safely use it... but in practice, it mostly works
1035 // to use it with non-atomic loads and stores to get acquire/release
1036 // semantics.
1037 Builder.CreateFence(llvm::SequentiallyConsistent);
1038 return RValue::get(0);
1039 }
1040
1041 case Builtin::BI__c11_atomic_is_lock_free:
1042 case Builtin::BI__atomic_is_lock_free: {
1043 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1044 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1045 // _Atomic(T) is always properly-aligned.
1046 const char *LibCallName = "__atomic_is_lock_free";
1047 CallArgList Args;
1048 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1049 getContext().getSizeType());
1050 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1051 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1052 getContext().VoidPtrTy);
1053 else
1054 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1055 getContext().VoidPtrTy);
1056 const CGFunctionInfo &FuncInfo =
1057 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1058 FunctionType::ExtInfo(),
1059 RequiredArgs::All);
1060 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1061 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1062 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1063 }
1064
1065 case Builtin::BI__atomic_test_and_set: {
1066 // Look at the argument type to determine whether this is a volatile
1067 // operation. The parameter type is always volatile.
1068 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1069 bool Volatile =
1070 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1071
1072 Value *Ptr = EmitScalarExpr(E->getArg(0));
1073 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1074 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1075 Value *NewVal = Builder.getInt8(1);
1076 Value *Order = EmitScalarExpr(E->getArg(1));
1077 if (isa<llvm::ConstantInt>(Order)) {
1078 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1079 AtomicRMWInst *Result = 0;
1080 switch (ord) {
1081 case 0: // memory_order_relaxed
1082 default: // invalid order
1083 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1084 Ptr, NewVal,
1085 llvm::Monotonic);
1086 break;
1087 case 1: // memory_order_consume
1088 case 2: // memory_order_acquire
1089 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1090 Ptr, NewVal,
1091 llvm::Acquire);
1092 break;
1093 case 3: // memory_order_release
1094 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1095 Ptr, NewVal,
1096 llvm::Release);
1097 break;
1098 case 4: // memory_order_acq_rel
1099 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1100 Ptr, NewVal,
1101 llvm::AcquireRelease);
1102 break;
1103 case 5: // memory_order_seq_cst
1104 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1105 Ptr, NewVal,
1106 llvm::SequentiallyConsistent);
1107 break;
1108 }
1109 Result->setVolatile(Volatile);
1110 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1111 }
1112
1113 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1114
1115 llvm::BasicBlock *BBs[5] = {
1116 createBasicBlock("monotonic", CurFn),
1117 createBasicBlock("acquire", CurFn),
1118 createBasicBlock("release", CurFn),
1119 createBasicBlock("acqrel", CurFn),
1120 createBasicBlock("seqcst", CurFn)
1121 };
1122 llvm::AtomicOrdering Orders[5] = {
1123 llvm::Monotonic, llvm::Acquire, llvm::Release,
1124 llvm::AcquireRelease, llvm::SequentiallyConsistent
1125 };
1126
1127 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1128 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1129
1130 Builder.SetInsertPoint(ContBB);
1131 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1132
1133 for (unsigned i = 0; i < 5; ++i) {
1134 Builder.SetInsertPoint(BBs[i]);
1135 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1136 Ptr, NewVal, Orders[i]);
1137 RMW->setVolatile(Volatile);
1138 Result->addIncoming(RMW, BBs[i]);
1139 Builder.CreateBr(ContBB);
1140 }
1141
1142 SI->addCase(Builder.getInt32(0), BBs[0]);
1143 SI->addCase(Builder.getInt32(1), BBs[1]);
1144 SI->addCase(Builder.getInt32(2), BBs[1]);
1145 SI->addCase(Builder.getInt32(3), BBs[2]);
1146 SI->addCase(Builder.getInt32(4), BBs[3]);
1147 SI->addCase(Builder.getInt32(5), BBs[4]);
1148
1149 Builder.SetInsertPoint(ContBB);
1150 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1151 }
1152
1153 case Builtin::BI__atomic_clear: {
1154 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1155 bool Volatile =
1156 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1157
1158 Value *Ptr = EmitScalarExpr(E->getArg(0));
1159 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1160 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1161 Value *NewVal = Builder.getInt8(0);
1162 Value *Order = EmitScalarExpr(E->getArg(1));
1163 if (isa<llvm::ConstantInt>(Order)) {
1164 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1165 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1166 Store->setAlignment(1);
1167 switch (ord) {
1168 case 0: // memory_order_relaxed
1169 default: // invalid order
1170 Store->setOrdering(llvm::Monotonic);
1171 break;
1172 case 3: // memory_order_release
1173 Store->setOrdering(llvm::Release);
1174 break;
1175 case 5: // memory_order_seq_cst
1176 Store->setOrdering(llvm::SequentiallyConsistent);
1177 break;
1178 }
1179 return RValue::get(0);
1180 }
1181
1182 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1183
1184 llvm::BasicBlock *BBs[3] = {
1185 createBasicBlock("monotonic", CurFn),
1186 createBasicBlock("release", CurFn),
1187 createBasicBlock("seqcst", CurFn)
1188 };
1189 llvm::AtomicOrdering Orders[3] = {
1190 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1191 };
1192
1193 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1194 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1195
1196 for (unsigned i = 0; i < 3; ++i) {
1197 Builder.SetInsertPoint(BBs[i]);
1198 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1199 Store->setAlignment(1);
1200 Store->setOrdering(Orders[i]);
1201 Builder.CreateBr(ContBB);
1202 }
1203
1204 SI->addCase(Builder.getInt32(0), BBs[0]);
1205 SI->addCase(Builder.getInt32(3), BBs[1]);
1206 SI->addCase(Builder.getInt32(5), BBs[2]);
1207
1208 Builder.SetInsertPoint(ContBB);
1209 return RValue::get(0);
1210 }
1211
1212 case Builtin::BI__atomic_thread_fence:
1213 case Builtin::BI__atomic_signal_fence:
1214 case Builtin::BI__c11_atomic_thread_fence:
1215 case Builtin::BI__c11_atomic_signal_fence: {
1216 llvm::SynchronizationScope Scope;
1217 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1218 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1219 Scope = llvm::SingleThread;
1220 else
1221 Scope = llvm::CrossThread;
1222 Value *Order = EmitScalarExpr(E->getArg(0));
1223 if (isa<llvm::ConstantInt>(Order)) {
1224 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1225 switch (ord) {
1226 case 0: // memory_order_relaxed
1227 default: // invalid order
1228 break;
1229 case 1: // memory_order_consume
1230 case 2: // memory_order_acquire
1231 Builder.CreateFence(llvm::Acquire, Scope);
1232 break;
1233 case 3: // memory_order_release
1234 Builder.CreateFence(llvm::Release, Scope);
1235 break;
1236 case 4: // memory_order_acq_rel
1237 Builder.CreateFence(llvm::AcquireRelease, Scope);
1238 break;
1239 case 5: // memory_order_seq_cst
1240 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1241 break;
1242 }
1243 return RValue::get(0);
1244 }
1245
1246 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1247 AcquireBB = createBasicBlock("acquire", CurFn);
1248 ReleaseBB = createBasicBlock("release", CurFn);
1249 AcqRelBB = createBasicBlock("acqrel", CurFn);
1250 SeqCstBB = createBasicBlock("seqcst", CurFn);
1251 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1252
1253 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1254 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1255
1256 Builder.SetInsertPoint(AcquireBB);
1257 Builder.CreateFence(llvm::Acquire, Scope);
1258 Builder.CreateBr(ContBB);
1259 SI->addCase(Builder.getInt32(1), AcquireBB);
1260 SI->addCase(Builder.getInt32(2), AcquireBB);
1261
1262 Builder.SetInsertPoint(ReleaseBB);
1263 Builder.CreateFence(llvm::Release, Scope);
1264 Builder.CreateBr(ContBB);
1265 SI->addCase(Builder.getInt32(3), ReleaseBB);
1266
1267 Builder.SetInsertPoint(AcqRelBB);
1268 Builder.CreateFence(llvm::AcquireRelease, Scope);
1269 Builder.CreateBr(ContBB);
1270 SI->addCase(Builder.getInt32(4), AcqRelBB);
1271
1272 Builder.SetInsertPoint(SeqCstBB);
1273 Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1274 Builder.CreateBr(ContBB);
1275 SI->addCase(Builder.getInt32(5), SeqCstBB);
1276
1277 Builder.SetInsertPoint(ContBB);
1278 return RValue::get(0);
1279 }
1280
1281 // Library functions with special handling.
1282 case Builtin::BIsqrt:
1283 case Builtin::BIsqrtf:
1284 case Builtin::BIsqrtl: {
1285 // TODO: there is currently no set of optimizer flags
1286 // sufficient for us to rewrite sqrt to @llvm.sqrt.
1287 // -fmath-errno=0 is not good enough; we need finiteness.
1288 // We could probably precondition the call with an ult
1289 // against 0, but is that worth the complexity?
1290 break;
1291 }
1292
1293 case Builtin::BIpow:
1294 case Builtin::BIpowf:
1295 case Builtin::BIpowl: {
1296 // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1297 if (!FD->hasAttr<ConstAttr>())
1298 break;
1299 Value *Base = EmitScalarExpr(E->getArg(0));
1300 Value *Exponent = EmitScalarExpr(E->getArg(1));
1301 llvm::Type *ArgType = Base->getType();
1302 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1303 return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1304 break;
1305 }
1306
1307 case Builtin::BIfma:
1308 case Builtin::BIfmaf:
1309 case Builtin::BIfmal:
1310 case Builtin::BI__builtin_fma:
1311 case Builtin::BI__builtin_fmaf:
1312 case Builtin::BI__builtin_fmal: {
1313 // Rewrite fma to intrinsic.
1314 Value *FirstArg = EmitScalarExpr(E->getArg(0));
1315 llvm::Type *ArgType = FirstArg->getType();
1316 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1317 return RValue::get(Builder.CreateCall3(F, FirstArg,
1318 EmitScalarExpr(E->getArg(1)),
1319 EmitScalarExpr(E->getArg(2))));
1320 }
1321
1322 case Builtin::BI__builtin_signbit:
1323 case Builtin::BI__builtin_signbitf:
1324 case Builtin::BI__builtin_signbitl: {
1325 LLVMContext &C = CGM.getLLVMContext();
1326
1327 Value *Arg = EmitScalarExpr(E->getArg(0));
1328 llvm::Type *ArgTy = Arg->getType();
1329 if (ArgTy->isPPC_FP128Ty())
1330 break; // FIXME: I'm not sure what the right implementation is here.
1331 int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1332 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1333 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1334 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1335 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1336 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1337 }
1338 case Builtin::BI__builtin_annotation: {
1339 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1340 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1341 AnnVal->getType());
1342
1343 // Get the annotation string, go through casts. Sema requires this to be a
1344 // non-wide string literal, potentially casted, so the cast<> is safe.
1345 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1346 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1347 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1348 }
1349 case Builtin::BI__builtin_addcb:
1350 case Builtin::BI__builtin_addcs:
1351 case Builtin::BI__builtin_addc:
1352 case Builtin::BI__builtin_addcl:
1353 case Builtin::BI__builtin_addcll:
1354 case Builtin::BI__builtin_subcb:
1355 case Builtin::BI__builtin_subcs:
1356 case Builtin::BI__builtin_subc:
1357 case Builtin::BI__builtin_subcl:
1358 case Builtin::BI__builtin_subcll: {
1359
1360 // We translate all of these builtins from expressions of the form:
1361 // int x = ..., y = ..., carryin = ..., carryout, result;
1362 // result = __builtin_addc(x, y, carryin, &carryout);
1363 //
1364 // to LLVM IR of the form:
1365 //
1366 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1367 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1368 // %carry1 = extractvalue {i32, i1} %tmp1, 1
1369 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1370 // i32 %carryin)
1371 // %result = extractvalue {i32, i1} %tmp2, 0
1372 // %carry2 = extractvalue {i32, i1} %tmp2, 1
1373 // %tmp3 = or i1 %carry1, %carry2
1374 // %tmp4 = zext i1 %tmp3 to i32
1375 // store i32 %tmp4, i32* %carryout
1376
1377 // Scalarize our inputs.
1378 llvm::Value *X = EmitScalarExpr(E->getArg(0));
1379 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1380 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1381 std::pair<llvm::Value*, unsigned> CarryOutPtr =
1382 EmitPointerWithAlignment(E->getArg(3));
1383
1384 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1385 llvm::Intrinsic::ID IntrinsicId;
1386 switch (BuiltinID) {
1387 default: llvm_unreachable("Unknown multiprecision builtin id.");
1388 case Builtin::BI__builtin_addcb:
1389 case Builtin::BI__builtin_addcs:
1390 case Builtin::BI__builtin_addc:
1391 case Builtin::BI__builtin_addcl:
1392 case Builtin::BI__builtin_addcll:
1393 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1394 break;
1395 case Builtin::BI__builtin_subcb:
1396 case Builtin::BI__builtin_subcs:
1397 case Builtin::BI__builtin_subc:
1398 case Builtin::BI__builtin_subcl:
1399 case Builtin::BI__builtin_subcll:
1400 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1401 break;
1402 }
1403
1404 // Construct our resulting LLVM IR expression.
1405 llvm::Value *Carry1;
1406 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1407 X, Y, Carry1);
1408 llvm::Value *Carry2;
1409 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1410 Sum1, Carryin, Carry2);
1411 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1412 X->getType());
1413 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
1414 CarryOutPtr.first);
1415 CarryOutStore->setAlignment(CarryOutPtr.second);
1416 return RValue::get(Sum2);
1417 }
1418 case Builtin::BI__builtin_uadd_overflow:
1419 case Builtin::BI__builtin_uaddl_overflow:
1420 case Builtin::BI__builtin_uaddll_overflow:
1421 case Builtin::BI__builtin_usub_overflow:
1422 case Builtin::BI__builtin_usubl_overflow:
1423 case Builtin::BI__builtin_usubll_overflow:
1424 case Builtin::BI__builtin_umul_overflow:
1425 case Builtin::BI__builtin_umull_overflow:
1426 case Builtin::BI__builtin_umulll_overflow:
1427 case Builtin::BI__builtin_sadd_overflow:
1428 case Builtin::BI__builtin_saddl_overflow:
1429 case Builtin::BI__builtin_saddll_overflow:
1430 case Builtin::BI__builtin_ssub_overflow:
1431 case Builtin::BI__builtin_ssubl_overflow:
1432 case Builtin::BI__builtin_ssubll_overflow:
1433 case Builtin::BI__builtin_smul_overflow:
1434 case Builtin::BI__builtin_smull_overflow:
1435 case Builtin::BI__builtin_smulll_overflow: {
1436
1437 // We translate all of these builtins directly to the relevant llvm IR node.
1438
1439 // Scalarize our inputs.
1440 llvm::Value *X = EmitScalarExpr(E->getArg(0));
1441 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1442 std::pair<llvm::Value *, unsigned> SumOutPtr =
1443 EmitPointerWithAlignment(E->getArg(2));
1444
1445 // Decide which of the overflow intrinsics we are lowering to:
1446 llvm::Intrinsic::ID IntrinsicId;
1447 switch (BuiltinID) {
1448 default: llvm_unreachable("Unknown security overflow builtin id.");
1449 case Builtin::BI__builtin_uadd_overflow:
1450 case Builtin::BI__builtin_uaddl_overflow:
1451 case Builtin::BI__builtin_uaddll_overflow:
1452 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1453 break;
1454 case Builtin::BI__builtin_usub_overflow:
1455 case Builtin::BI__builtin_usubl_overflow:
1456 case Builtin::BI__builtin_usubll_overflow:
1457 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1458 break;
1459 case Builtin::BI__builtin_umul_overflow:
1460 case Builtin::BI__builtin_umull_overflow:
1461 case Builtin::BI__builtin_umulll_overflow:
1462 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1463 break;
1464 case Builtin::BI__builtin_sadd_overflow:
1465 case Builtin::BI__builtin_saddl_overflow:
1466 case Builtin::BI__builtin_saddll_overflow:
1467 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1468 break;
1469 case Builtin::BI__builtin_ssub_overflow:
1470 case Builtin::BI__builtin_ssubl_overflow:
1471 case Builtin::BI__builtin_ssubll_overflow:
1472 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1473 break;
1474 case Builtin::BI__builtin_smul_overflow:
1475 case Builtin::BI__builtin_smull_overflow:
1476 case Builtin::BI__builtin_smulll_overflow:
1477 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1478 break;
1479 }
1480
1481
1482 llvm::Value *Carry;
1483 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1484 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first);
1485 SumOutStore->setAlignment(SumOutPtr.second);
1486
1487 return RValue::get(Carry);
1488 }
1489 case Builtin::BI__builtin_addressof:
1490 return RValue::get(EmitLValue(E->getArg(0)).getAddress());
1491 case Builtin::BI__noop:
1492 return RValue::get(0);
1493 }
1494
1495 // If this is an alias for a lib function (e.g. __builtin_sin), emit
1496 // the call using the normal call path, but using the unmangled
1497 // version of the function name.
1498 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1499 return emitLibraryCall(*this, FD, E,
1500 CGM.getBuiltinLibFunction(FD, BuiltinID));
1501
1502 // If this is a predefined lib function (e.g. malloc), emit the call
1503 // using exactly the normal call path.
1504 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1505 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1506
1507 // See if we have a target specific intrinsic.
1508 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1509 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1510 if (const char *Prefix =
1511 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()))
1512 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1513
1514 if (IntrinsicID != Intrinsic::not_intrinsic) {
1515 SmallVector<Value*, 16> Args;
1516
1517 // Find out if any arguments are required to be integer constant
1518 // expressions.
1519 unsigned ICEArguments = 0;
1520 ASTContext::GetBuiltinTypeError Error;
1521 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1522 assert(Error == ASTContext::GE_None && "Should not codegen an error");
1523
1524 Function *F = CGM.getIntrinsic(IntrinsicID);
1525 llvm::FunctionType *FTy = F->getFunctionType();
1526
1527 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1528 Value *ArgValue;
1529 // If this is a normal argument, just emit it as a scalar.
1530 if ((ICEArguments & (1 << i)) == 0) {
1531 ArgValue = EmitScalarExpr(E->getArg(i));
1532 } else {
1533 // If this is required to be a constant, constant fold it so that we
1534 // know that the generated intrinsic gets a ConstantInt.
1535 llvm::APSInt Result;
1536 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1537 assert(IsConst && "Constant arg isn't actually constant?");
1538 (void)IsConst;
1539 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1540 }
1541
1542 // If the intrinsic arg type is different from the builtin arg type
1543 // we need to do a bit cast.
1544 llvm::Type *PTy = FTy->getParamType(i);
1545 if (PTy != ArgValue->getType()) {
1546 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1547 "Must be able to losslessly bit cast to param");
1548 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1549 }
1550
1551 Args.push_back(ArgValue);
1552 }
1553
1554 Value *V = Builder.CreateCall(F, Args);
1555 QualType BuiltinRetType = E->getType();
1556
1557 llvm::Type *RetTy = VoidTy;
1558 if (!BuiltinRetType->isVoidType())
1559 RetTy = ConvertType(BuiltinRetType);
1560
1561 if (RetTy != V->getType()) {
1562 assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1563 "Must be able to losslessly bit cast result type");
1564 V = Builder.CreateBitCast(V, RetTy);
1565 }
1566
1567 return RValue::get(V);
1568 }
1569
1570 // See if we have a target specific builtin that needs to be lowered.
1571 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1572 return RValue::get(V);
1573
1574 ErrorUnsupported(E, "builtin function");
1575
1576 // Unknown builtin, for now just dump it out and return undef.
1577 return GetUndefRValue(E->getType());
1578 }
1579
EmitTargetBuiltinExpr(unsigned BuiltinID,const CallExpr * E)1580 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1581 const CallExpr *E) {
1582 switch (getTarget().getTriple().getArch()) {
1583 case llvm::Triple::aarch64:
1584 return EmitAArch64BuiltinExpr(BuiltinID, E);
1585 case llvm::Triple::arm:
1586 case llvm::Triple::thumb:
1587 return EmitARMBuiltinExpr(BuiltinID, E);
1588 case llvm::Triple::x86:
1589 case llvm::Triple::x86_64:
1590 return EmitX86BuiltinExpr(BuiltinID, E);
1591 case llvm::Triple::ppc:
1592 case llvm::Triple::ppc64:
1593 case llvm::Triple::ppc64le:
1594 return EmitPPCBuiltinExpr(BuiltinID, E);
1595 default:
1596 return 0;
1597 }
1598 }
1599
GetNeonType(CodeGenFunction * CGF,NeonTypeFlags TypeFlags)1600 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1601 NeonTypeFlags TypeFlags) {
1602 int IsQuad = TypeFlags.isQuad();
1603 switch (TypeFlags.getEltType()) {
1604 case NeonTypeFlags::Int8:
1605 case NeonTypeFlags::Poly8:
1606 return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad);
1607 case NeonTypeFlags::Int16:
1608 case NeonTypeFlags::Poly16:
1609 case NeonTypeFlags::Float16:
1610 return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad);
1611 case NeonTypeFlags::Int32:
1612 return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad);
1613 case NeonTypeFlags::Int64:
1614 return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
1615 case NeonTypeFlags::Float32:
1616 return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
1617 case NeonTypeFlags::Float64:
1618 return llvm::VectorType::get(CGF->DoubleTy, 1 << IsQuad);
1619 }
1620 llvm_unreachable("Invalid NeonTypeFlags element type!");
1621 }
1622
EmitNeonSplat(Value * V,Constant * C)1623 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1624 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1625 Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1626 return Builder.CreateShuffleVector(V, V, SV, "lane");
1627 }
1628
EmitNeonCall(Function * F,SmallVectorImpl<Value * > & Ops,const char * name,unsigned shift,bool rightshift)1629 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1630 const char *name,
1631 unsigned shift, bool rightshift) {
1632 unsigned j = 0;
1633 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1634 ai != ae; ++ai, ++j)
1635 if (shift > 0 && shift == j)
1636 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1637 else
1638 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1639
1640 return Builder.CreateCall(F, Ops, name);
1641 }
1642
EmitNeonShiftVector(Value * V,llvm::Type * Ty,bool neg)1643 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1644 bool neg) {
1645 int SV = cast<ConstantInt>(V)->getSExtValue();
1646
1647 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1648 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1649 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1650 }
1651
1652 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1653 /// alignment of the type referenced by the pointer. Skip over implicit
1654 /// casts.
1655 std::pair<llvm::Value*, unsigned>
EmitPointerWithAlignment(const Expr * Addr)1656 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
1657 assert(Addr->getType()->isPointerType());
1658 Addr = Addr->IgnoreParens();
1659 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
1660 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
1661 ICE->getSubExpr()->getType()->isPointerType()) {
1662 std::pair<llvm::Value*, unsigned> Ptr =
1663 EmitPointerWithAlignment(ICE->getSubExpr());
1664 Ptr.first = Builder.CreateBitCast(Ptr.first,
1665 ConvertType(Addr->getType()));
1666 return Ptr;
1667 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
1668 LValue LV = EmitLValue(ICE->getSubExpr());
1669 unsigned Align = LV.getAlignment().getQuantity();
1670 if (!Align) {
1671 // FIXME: Once LValues are fixed to always set alignment,
1672 // zap this code.
1673 QualType PtTy = ICE->getSubExpr()->getType();
1674 if (!PtTy->isIncompleteType())
1675 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1676 else
1677 Align = 1;
1678 }
1679 return std::make_pair(LV.getAddress(), Align);
1680 }
1681 }
1682 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
1683 if (UO->getOpcode() == UO_AddrOf) {
1684 LValue LV = EmitLValue(UO->getSubExpr());
1685 unsigned Align = LV.getAlignment().getQuantity();
1686 if (!Align) {
1687 // FIXME: Once LValues are fixed to always set alignment,
1688 // zap this code.
1689 QualType PtTy = UO->getSubExpr()->getType();
1690 if (!PtTy->isIncompleteType())
1691 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1692 else
1693 Align = 1;
1694 }
1695 return std::make_pair(LV.getAddress(), Align);
1696 }
1697 }
1698
1699 unsigned Align = 1;
1700 QualType PtTy = Addr->getType()->getPointeeType();
1701 if (!PtTy->isIncompleteType())
1702 Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1703
1704 return std::make_pair(EmitScalarExpr(Addr), Align);
1705 }
1706
EmitAArch64BuiltinExpr(unsigned BuiltinID,const CallExpr * E)1707 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1708 const CallExpr *E) {
1709 if (BuiltinID == AArch64::BI__clear_cache) {
1710 assert(E->getNumArgs() == 2 &&
1711 "Variadic __clear_cache slipped through on AArch64");
1712
1713 const FunctionDecl *FD = E->getDirectCallee();
1714 SmallVector<Value *, 2> Ops;
1715 for (unsigned i = 0; i < E->getNumArgs(); i++)
1716 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1717 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1718 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1719 StringRef Name = FD->getName();
1720 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1721 }
1722
1723 SmallVector<Value *, 4> Ops;
1724 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
1725 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1726 }
1727
1728 // Get the last argument, which specifies the vector type.
1729 llvm::APSInt Result;
1730 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
1731 if (!Arg->isIntegerConstantExpr(Result, getContext()))
1732 return 0;
1733
1734 // Determine the type of this overloaded NEON intrinsic.
1735 NeonTypeFlags Type(Result.getZExtValue());
1736 bool usgn = Type.isUnsigned();
1737
1738 llvm::VectorType *VTy = GetNeonType(this, Type);
1739 llvm::Type *Ty = VTy;
1740 if (!Ty)
1741 return 0;
1742
1743 unsigned Int;
1744 switch (BuiltinID) {
1745 default:
1746 return 0;
1747
1748 // AArch64 builtins mapping to legacy ARM v7 builtins.
1749 // FIXME: the mapped builtins listed correspond to what has been tested
1750 // in aarch64-neon-intrinsics.c so far.
1751 case AArch64::BI__builtin_neon_vmul_v:
1752 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
1753 case AArch64::BI__builtin_neon_vmulq_v:
1754 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
1755 case AArch64::BI__builtin_neon_vabd_v:
1756 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
1757 case AArch64::BI__builtin_neon_vabdq_v:
1758 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
1759 case AArch64::BI__builtin_neon_vfma_v:
1760 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
1761 case AArch64::BI__builtin_neon_vfmaq_v:
1762 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
1763 case AArch64::BI__builtin_neon_vbsl_v:
1764 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
1765 case AArch64::BI__builtin_neon_vbslq_v:
1766 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
1767 case AArch64::BI__builtin_neon_vrsqrts_v:
1768 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
1769 case AArch64::BI__builtin_neon_vrsqrtsq_v:
1770 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
1771 case AArch64::BI__builtin_neon_vrecps_v:
1772 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
1773 case AArch64::BI__builtin_neon_vrecpsq_v:
1774 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
1775 case AArch64::BI__builtin_neon_vcage_v:
1776 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
1777 case AArch64::BI__builtin_neon_vcale_v:
1778 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
1779 case AArch64::BI__builtin_neon_vcaleq_v:
1780 std::swap(Ops[0], Ops[1]);
1781 case AArch64::BI__builtin_neon_vcageq_v: {
1782 Function *F;
1783 if (VTy->getElementType()->isIntegerTy(64))
1784 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
1785 else
1786 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1787 return EmitNeonCall(F, Ops, "vcage");
1788 }
1789 case AArch64::BI__builtin_neon_vcalt_v:
1790 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
1791 case AArch64::BI__builtin_neon_vcagt_v:
1792 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
1793 case AArch64::BI__builtin_neon_vcaltq_v:
1794 std::swap(Ops[0], Ops[1]);
1795 case AArch64::BI__builtin_neon_vcagtq_v: {
1796 Function *F;
1797 if (VTy->getElementType()->isIntegerTy(64))
1798 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
1799 else
1800 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1801 return EmitNeonCall(F, Ops, "vcagt");
1802 }
1803 case AArch64::BI__builtin_neon_vtst_v:
1804 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
1805 case AArch64::BI__builtin_neon_vtstq_v:
1806 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
1807 case AArch64::BI__builtin_neon_vhadd_v:
1808 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
1809 case AArch64::BI__builtin_neon_vhaddq_v:
1810 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
1811 case AArch64::BI__builtin_neon_vhsub_v:
1812 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
1813 case AArch64::BI__builtin_neon_vhsubq_v:
1814 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
1815 case AArch64::BI__builtin_neon_vrhadd_v:
1816 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
1817 case AArch64::BI__builtin_neon_vrhaddq_v:
1818 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
1819 case AArch64::BI__builtin_neon_vqadd_v:
1820 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
1821 case AArch64::BI__builtin_neon_vqaddq_v:
1822 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
1823 case AArch64::BI__builtin_neon_vqsub_v:
1824 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
1825 case AArch64::BI__builtin_neon_vqsubq_v:
1826 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
1827 case AArch64::BI__builtin_neon_vshl_v:
1828 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
1829 case AArch64::BI__builtin_neon_vshlq_v:
1830 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
1831 case AArch64::BI__builtin_neon_vqshl_v:
1832 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
1833 case AArch64::BI__builtin_neon_vqshlq_v:
1834 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
1835 case AArch64::BI__builtin_neon_vrshl_v:
1836 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
1837 case AArch64::BI__builtin_neon_vrshlq_v:
1838 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
1839 case AArch64::BI__builtin_neon_vqrshl_v:
1840 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
1841 case AArch64::BI__builtin_neon_vqrshlq_v:
1842 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
1843 case AArch64::BI__builtin_neon_vmax_v:
1844 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
1845 case AArch64::BI__builtin_neon_vmaxq_v:
1846 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
1847 case AArch64::BI__builtin_neon_vmin_v:
1848 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
1849 case AArch64::BI__builtin_neon_vminq_v:
1850 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
1851 case AArch64::BI__builtin_neon_vpmax_v:
1852 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
1853 case AArch64::BI__builtin_neon_vpmin_v:
1854 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
1855 case AArch64::BI__builtin_neon_vpadd_v:
1856 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
1857 case AArch64::BI__builtin_neon_vqdmulh_v:
1858 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
1859 case AArch64::BI__builtin_neon_vqdmulhq_v:
1860 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
1861 case AArch64::BI__builtin_neon_vqrdmulh_v:
1862 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
1863 case AArch64::BI__builtin_neon_vqrdmulhq_v:
1864 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
1865
1866 // AArch64-only builtins
1867 case AArch64::BI__builtin_neon_vfms_v:
1868 case AArch64::BI__builtin_neon_vfmsq_v: {
1869 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
1870 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1871 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1872 Ops[1] = Builder.CreateFNeg(Ops[1]);
1873 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1874
1875 // LLVM's fma intrinsic puts the accumulator in the last position, but the
1876 // AArch64 intrinsic has it first.
1877 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
1878 }
1879 case AArch64::BI__builtin_neon_vmaxnm_v:
1880 case AArch64::BI__builtin_neon_vmaxnmq_v: {
1881 Int = Intrinsic::aarch64_neon_vmaxnm;
1882 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
1883 }
1884 case AArch64::BI__builtin_neon_vminnm_v:
1885 case AArch64::BI__builtin_neon_vminnmq_v: {
1886 Int = Intrinsic::aarch64_neon_vminnm;
1887 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
1888 }
1889 case AArch64::BI__builtin_neon_vpmaxnm_v:
1890 case AArch64::BI__builtin_neon_vpmaxnmq_v: {
1891 Int = Intrinsic::aarch64_neon_vpmaxnm;
1892 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
1893 }
1894 case AArch64::BI__builtin_neon_vpminnm_v:
1895 case AArch64::BI__builtin_neon_vpminnmq_v: {
1896 Int = Intrinsic::aarch64_neon_vpminnm;
1897 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
1898 }
1899 case AArch64::BI__builtin_neon_vpmaxq_v: {
1900 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1901 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
1902 }
1903 case AArch64::BI__builtin_neon_vpminq_v: {
1904 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1905 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
1906 }
1907 case AArch64::BI__builtin_neon_vpaddq_v: {
1908 Int = Intrinsic::arm_neon_vpadd;
1909 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
1910 }
1911 case AArch64::BI__builtin_neon_vmulx_v:
1912 case AArch64::BI__builtin_neon_vmulxq_v: {
1913 Int = Intrinsic::aarch64_neon_vmulx;
1914 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
1915 }
1916 }
1917 }
1918
EmitARMBuiltinExpr(unsigned BuiltinID,const CallExpr * E)1919 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1920 const CallExpr *E) {
1921 if (BuiltinID == ARM::BI__clear_cache) {
1922 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
1923 const FunctionDecl *FD = E->getDirectCallee();
1924 SmallVector<Value*, 2> Ops;
1925 for (unsigned i = 0; i < 2; i++)
1926 Ops.push_back(EmitScalarExpr(E->getArg(i)));
1927 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1928 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1929 StringRef Name = FD->getName();
1930 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1931 }
1932
1933 if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
1934 (BuiltinID == ARM::BI__builtin_arm_ldrex &&
1935 getContext().getTypeSize(E->getType()) == 64)) {
1936 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1937
1938 Value *LdPtr = EmitScalarExpr(E->getArg(0));
1939 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
1940 "ldrexd");
1941
1942 Value *Val0 = Builder.CreateExtractValue(Val, 1);
1943 Value *Val1 = Builder.CreateExtractValue(Val, 0);
1944 Val0 = Builder.CreateZExt(Val0, Int64Ty);
1945 Val1 = Builder.CreateZExt(Val1, Int64Ty);
1946
1947 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1948 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1949 Val = Builder.CreateOr(Val, Val1);
1950 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
1951 }
1952
1953 if (BuiltinID == ARM::BI__builtin_arm_ldrex) {
1954 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
1955
1956 QualType Ty = E->getType();
1957 llvm::Type *RealResTy = ConvertType(Ty);
1958 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
1959 getContext().getTypeSize(Ty));
1960 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
1961
1962 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType());
1963 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
1964
1965 if (RealResTy->isPointerTy())
1966 return Builder.CreateIntToPtr(Val, RealResTy);
1967 else {
1968 Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
1969 return Builder.CreateBitCast(Val, RealResTy);
1970 }
1971 }
1972
1973 if (BuiltinID == ARM::BI__builtin_arm_strexd ||
1974 (BuiltinID == ARM::BI__builtin_arm_strex &&
1975 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
1976 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1977 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1978
1979 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1980 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()),
1981 One);
1982 Value *Val = EmitScalarExpr(E->getArg(0));
1983 Builder.CreateStore(Val, Tmp);
1984
1985 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1986 Val = Builder.CreateLoad(LdPtr);
1987
1988 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1989 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1990 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
1991 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1992 }
1993
1994 if (BuiltinID == ARM::BI__builtin_arm_strex) {
1995 Value *StoreVal = EmitScalarExpr(E->getArg(0));
1996 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
1997
1998 QualType Ty = E->getArg(0)->getType();
1999 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
2000 getContext().getTypeSize(Ty));
2001 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
2002
2003 if (StoreVal->getType()->isPointerTy())
2004 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2005 else {
2006 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
2007 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2008 }
2009
2010 Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType());
2011 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex");
2012 }
2013
2014 if (BuiltinID == ARM::BI__builtin_arm_clrex) {
2015 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2016 return Builder.CreateCall(F);
2017 }
2018
2019 SmallVector<Value*, 4> Ops;
2020 llvm::Value *Align = 0;
2021 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
2022 if (i == 0) {
2023 switch (BuiltinID) {
2024 case ARM::BI__builtin_neon_vld1_v:
2025 case ARM::BI__builtin_neon_vld1q_v:
2026 case ARM::BI__builtin_neon_vld1q_lane_v:
2027 case ARM::BI__builtin_neon_vld1_lane_v:
2028 case ARM::BI__builtin_neon_vld1_dup_v:
2029 case ARM::BI__builtin_neon_vld1q_dup_v:
2030 case ARM::BI__builtin_neon_vst1_v:
2031 case ARM::BI__builtin_neon_vst1q_v:
2032 case ARM::BI__builtin_neon_vst1q_lane_v:
2033 case ARM::BI__builtin_neon_vst1_lane_v:
2034 case ARM::BI__builtin_neon_vst2_v:
2035 case ARM::BI__builtin_neon_vst2q_v:
2036 case ARM::BI__builtin_neon_vst2_lane_v:
2037 case ARM::BI__builtin_neon_vst2q_lane_v:
2038 case ARM::BI__builtin_neon_vst3_v:
2039 case ARM::BI__builtin_neon_vst3q_v:
2040 case ARM::BI__builtin_neon_vst3_lane_v:
2041 case ARM::BI__builtin_neon_vst3q_lane_v:
2042 case ARM::BI__builtin_neon_vst4_v:
2043 case ARM::BI__builtin_neon_vst4q_v:
2044 case ARM::BI__builtin_neon_vst4_lane_v:
2045 case ARM::BI__builtin_neon_vst4q_lane_v:
2046 // Get the alignment for the argument in addition to the value;
2047 // we'll use it later.
2048 std::pair<llvm::Value*, unsigned> Src =
2049 EmitPointerWithAlignment(E->getArg(0));
2050 Ops.push_back(Src.first);
2051 Align = Builder.getInt32(Src.second);
2052 continue;
2053 }
2054 }
2055 if (i == 1) {
2056 switch (BuiltinID) {
2057 case ARM::BI__builtin_neon_vld2_v:
2058 case ARM::BI__builtin_neon_vld2q_v:
2059 case ARM::BI__builtin_neon_vld3_v:
2060 case ARM::BI__builtin_neon_vld3q_v:
2061 case ARM::BI__builtin_neon_vld4_v:
2062 case ARM::BI__builtin_neon_vld4q_v:
2063 case ARM::BI__builtin_neon_vld2_lane_v:
2064 case ARM::BI__builtin_neon_vld2q_lane_v:
2065 case ARM::BI__builtin_neon_vld3_lane_v:
2066 case ARM::BI__builtin_neon_vld3q_lane_v:
2067 case ARM::BI__builtin_neon_vld4_lane_v:
2068 case ARM::BI__builtin_neon_vld4q_lane_v:
2069 case ARM::BI__builtin_neon_vld2_dup_v:
2070 case ARM::BI__builtin_neon_vld3_dup_v:
2071 case ARM::BI__builtin_neon_vld4_dup_v:
2072 // Get the alignment for the argument in addition to the value;
2073 // we'll use it later.
2074 std::pair<llvm::Value*, unsigned> Src =
2075 EmitPointerWithAlignment(E->getArg(1));
2076 Ops.push_back(Src.first);
2077 Align = Builder.getInt32(Src.second);
2078 continue;
2079 }
2080 }
2081 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2082 }
2083
2084 // vget_lane and vset_lane are not overloaded and do not have an extra
2085 // argument that specifies the vector type.
2086 switch (BuiltinID) {
2087 default: break;
2088 case ARM::BI__builtin_neon_vget_lane_i8:
2089 case ARM::BI__builtin_neon_vget_lane_i16:
2090 case ARM::BI__builtin_neon_vget_lane_i32:
2091 case ARM::BI__builtin_neon_vget_lane_i64:
2092 case ARM::BI__builtin_neon_vget_lane_f32:
2093 case ARM::BI__builtin_neon_vgetq_lane_i8:
2094 case ARM::BI__builtin_neon_vgetq_lane_i16:
2095 case ARM::BI__builtin_neon_vgetq_lane_i32:
2096 case ARM::BI__builtin_neon_vgetq_lane_i64:
2097 case ARM::BI__builtin_neon_vgetq_lane_f32:
2098 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
2099 "vget_lane");
2100 case ARM::BI__builtin_neon_vset_lane_i8:
2101 case ARM::BI__builtin_neon_vset_lane_i16:
2102 case ARM::BI__builtin_neon_vset_lane_i32:
2103 case ARM::BI__builtin_neon_vset_lane_i64:
2104 case ARM::BI__builtin_neon_vset_lane_f32:
2105 case ARM::BI__builtin_neon_vsetq_lane_i8:
2106 case ARM::BI__builtin_neon_vsetq_lane_i16:
2107 case ARM::BI__builtin_neon_vsetq_lane_i32:
2108 case ARM::BI__builtin_neon_vsetq_lane_i64:
2109 case ARM::BI__builtin_neon_vsetq_lane_f32:
2110 Ops.push_back(EmitScalarExpr(E->getArg(2)));
2111 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
2112 }
2113
2114 // Get the last argument, which specifies the vector type.
2115 llvm::APSInt Result;
2116 const Expr *Arg = E->getArg(E->getNumArgs()-1);
2117 if (!Arg->isIntegerConstantExpr(Result, getContext()))
2118 return 0;
2119
2120 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
2121 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
2122 // Determine the overloaded type of this builtin.
2123 llvm::Type *Ty;
2124 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
2125 Ty = FloatTy;
2126 else
2127 Ty = DoubleTy;
2128
2129 // Determine whether this is an unsigned conversion or not.
2130 bool usgn = Result.getZExtValue() == 1;
2131 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2132
2133 // Call the appropriate intrinsic.
2134 Function *F = CGM.getIntrinsic(Int, Ty);
2135 return Builder.CreateCall(F, Ops, "vcvtr");
2136 }
2137
2138 // Determine the type of this overloaded NEON intrinsic.
2139 NeonTypeFlags Type(Result.getZExtValue());
2140 bool usgn = Type.isUnsigned();
2141 bool quad = Type.isQuad();
2142 bool rightShift = false;
2143
2144 llvm::VectorType *VTy = GetNeonType(this, Type);
2145 llvm::Type *Ty = VTy;
2146 if (!Ty)
2147 return 0;
2148
2149 unsigned Int;
2150 switch (BuiltinID) {
2151 default: return 0;
2152 case ARM::BI__builtin_neon_vbsl_v:
2153 case ARM::BI__builtin_neon_vbslq_v:
2154 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
2155 Ops, "vbsl");
2156 case ARM::BI__builtin_neon_vabd_v:
2157 case ARM::BI__builtin_neon_vabdq_v:
2158 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
2159 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
2160 case ARM::BI__builtin_neon_vabs_v:
2161 case ARM::BI__builtin_neon_vabsq_v:
2162 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
2163 Ops, "vabs");
2164 case ARM::BI__builtin_neon_vaddhn_v:
2165 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
2166 Ops, "vaddhn");
2167 case ARM::BI__builtin_neon_vcale_v:
2168 std::swap(Ops[0], Ops[1]);
2169 case ARM::BI__builtin_neon_vcage_v: {
2170 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
2171 return EmitNeonCall(F, Ops, "vcage");
2172 }
2173 case ARM::BI__builtin_neon_vcaleq_v:
2174 std::swap(Ops[0], Ops[1]);
2175 case ARM::BI__builtin_neon_vcageq_v: {
2176 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
2177 return EmitNeonCall(F, Ops, "vcage");
2178 }
2179 case ARM::BI__builtin_neon_vcalt_v:
2180 std::swap(Ops[0], Ops[1]);
2181 case ARM::BI__builtin_neon_vcagt_v: {
2182 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
2183 return EmitNeonCall(F, Ops, "vcagt");
2184 }
2185 case ARM::BI__builtin_neon_vcaltq_v:
2186 std::swap(Ops[0], Ops[1]);
2187 case ARM::BI__builtin_neon_vcagtq_v: {
2188 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
2189 return EmitNeonCall(F, Ops, "vcagt");
2190 }
2191 case ARM::BI__builtin_neon_vcls_v:
2192 case ARM::BI__builtin_neon_vclsq_v: {
2193 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
2194 return EmitNeonCall(F, Ops, "vcls");
2195 }
2196 case ARM::BI__builtin_neon_vclz_v:
2197 case ARM::BI__builtin_neon_vclzq_v: {
2198 // Generate target-independent intrinsic; also need to add second argument
2199 // for whether or not clz of zero is undefined; on ARM it isn't.
2200 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
2201 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
2202 return EmitNeonCall(F, Ops, "vclz");
2203 }
2204 case ARM::BI__builtin_neon_vcnt_v:
2205 case ARM::BI__builtin_neon_vcntq_v: {
2206 // generate target-independent intrinsic
2207 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
2208 return EmitNeonCall(F, Ops, "vctpop");
2209 }
2210 case ARM::BI__builtin_neon_vcvt_f16_v: {
2211 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
2212 "unexpected vcvt_f16_v builtin");
2213 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
2214 return EmitNeonCall(F, Ops, "vcvt");
2215 }
2216 case ARM::BI__builtin_neon_vcvt_f32_f16: {
2217 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
2218 "unexpected vcvt_f32_f16 builtin");
2219 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
2220 return EmitNeonCall(F, Ops, "vcvt");
2221 }
2222 case ARM::BI__builtin_neon_vcvt_f32_v:
2223 case ARM::BI__builtin_neon_vcvtq_f32_v:
2224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2225 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
2226 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
2227 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
2228 case ARM::BI__builtin_neon_vcvt_s32_v:
2229 case ARM::BI__builtin_neon_vcvt_u32_v:
2230 case ARM::BI__builtin_neon_vcvtq_s32_v:
2231 case ARM::BI__builtin_neon_vcvtq_u32_v: {
2232 llvm::Type *FloatTy =
2233 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
2234 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
2235 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
2236 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
2237 }
2238 case ARM::BI__builtin_neon_vcvt_n_f32_v:
2239 case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
2240 llvm::Type *FloatTy =
2241 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
2242 llvm::Type *Tys[2] = { FloatTy, Ty };
2243 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
2244 : Intrinsic::arm_neon_vcvtfxs2fp;
2245 Function *F = CGM.getIntrinsic(Int, Tys);
2246 return EmitNeonCall(F, Ops, "vcvt_n");
2247 }
2248 case ARM::BI__builtin_neon_vcvt_n_s32_v:
2249 case ARM::BI__builtin_neon_vcvt_n_u32_v:
2250 case ARM::BI__builtin_neon_vcvtq_n_s32_v:
2251 case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
2252 llvm::Type *FloatTy =
2253 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
2254 llvm::Type *Tys[2] = { Ty, FloatTy };
2255 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
2256 : Intrinsic::arm_neon_vcvtfp2fxs;
2257 Function *F = CGM.getIntrinsic(Int, Tys);
2258 return EmitNeonCall(F, Ops, "vcvt_n");
2259 }
2260 case ARM::BI__builtin_neon_vext_v:
2261 case ARM::BI__builtin_neon_vextq_v: {
2262 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2263 SmallVector<Constant*, 16> Indices;
2264 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2265 Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
2266
2267 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2268 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2269 Value *SV = llvm::ConstantVector::get(Indices);
2270 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
2271 }
2272 case ARM::BI__builtin_neon_vhadd_v:
2273 case ARM::BI__builtin_neon_vhaddq_v:
2274 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
2275 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
2276 case ARM::BI__builtin_neon_vhsub_v:
2277 case ARM::BI__builtin_neon_vhsubq_v:
2278 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
2279 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
2280 case ARM::BI__builtin_neon_vld1_v:
2281 case ARM::BI__builtin_neon_vld1q_v:
2282 Ops.push_back(Align);
2283 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
2284 Ops, "vld1");
2285 case ARM::BI__builtin_neon_vld1q_lane_v:
2286 // Handle 64-bit integer elements as a special case. Use shuffles of
2287 // one-element vectors to avoid poor code for i64 in the backend.
2288 if (VTy->getElementType()->isIntegerTy(64)) {
2289 // Extract the other lane.
2290 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2291 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
2292 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
2293 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2294 // Load the value as a one-element vector.
2295 Ty = llvm::VectorType::get(VTy->getElementType(), 1);
2296 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
2297 Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
2298 // Combine them.
2299 SmallVector<Constant*, 2> Indices;
2300 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
2301 Indices.push_back(ConstantInt::get(Int32Ty, Lane));
2302 SV = llvm::ConstantVector::get(Indices);
2303 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
2304 }
2305 // fall through
2306 case ARM::BI__builtin_neon_vld1_lane_v: {
2307 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2308 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
2309 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2310 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
2311 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2312 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
2313 }
2314 case ARM::BI__builtin_neon_vld1_dup_v:
2315 case ARM::BI__builtin_neon_vld1q_dup_v: {
2316 Value *V = UndefValue::get(Ty);
2317 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
2318 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2319 LoadInst *Ld = Builder.CreateLoad(Ops[0]);
2320 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2321 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
2322 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2323 return EmitNeonSplat(Ops[0], CI);
2324 }
2325 case ARM::BI__builtin_neon_vld2_v:
2326 case ARM::BI__builtin_neon_vld2q_v: {
2327 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
2328 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
2329 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2330 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2331 return Builder.CreateStore(Ops[1], Ops[0]);
2332 }
2333 case ARM::BI__builtin_neon_vld3_v:
2334 case ARM::BI__builtin_neon_vld3q_v: {
2335 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
2336 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
2337 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2338 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2339 return Builder.CreateStore(Ops[1], Ops[0]);
2340 }
2341 case ARM::BI__builtin_neon_vld4_v:
2342 case ARM::BI__builtin_neon_vld4q_v: {
2343 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
2344 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
2345 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2346 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2347 return Builder.CreateStore(Ops[1], Ops[0]);
2348 }
2349 case ARM::BI__builtin_neon_vld2_lane_v:
2350 case ARM::BI__builtin_neon_vld2q_lane_v: {
2351 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
2352 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2353 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2354 Ops.push_back(Align);
2355 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
2356 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2357 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2358 return Builder.CreateStore(Ops[1], Ops[0]);
2359 }
2360 case ARM::BI__builtin_neon_vld3_lane_v:
2361 case ARM::BI__builtin_neon_vld3q_lane_v: {
2362 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
2363 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2364 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2365 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
2366 Ops.push_back(Align);
2367 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
2368 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2369 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2370 return Builder.CreateStore(Ops[1], Ops[0]);
2371 }
2372 case ARM::BI__builtin_neon_vld4_lane_v:
2373 case ARM::BI__builtin_neon_vld4q_lane_v: {
2374 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
2375 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2376 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
2377 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
2378 Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
2379 Ops.push_back(Align);
2380 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
2381 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2382 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2383 return Builder.CreateStore(Ops[1], Ops[0]);
2384 }
2385 case ARM::BI__builtin_neon_vld2_dup_v:
2386 case ARM::BI__builtin_neon_vld3_dup_v:
2387 case ARM::BI__builtin_neon_vld4_dup_v: {
2388 // Handle 64-bit elements as a special-case. There is no "dup" needed.
2389 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
2390 switch (BuiltinID) {
2391 case ARM::BI__builtin_neon_vld2_dup_v:
2392 Int = Intrinsic::arm_neon_vld2;
2393 break;
2394 case ARM::BI__builtin_neon_vld3_dup_v:
2395 Int = Intrinsic::arm_neon_vld3;
2396 break;
2397 case ARM::BI__builtin_neon_vld4_dup_v:
2398 Int = Intrinsic::arm_neon_vld4;
2399 break;
2400 default: llvm_unreachable("unknown vld_dup intrinsic?");
2401 }
2402 Function *F = CGM.getIntrinsic(Int, Ty);
2403 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
2404 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2405 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2406 return Builder.CreateStore(Ops[1], Ops[0]);
2407 }
2408 switch (BuiltinID) {
2409 case ARM::BI__builtin_neon_vld2_dup_v:
2410 Int = Intrinsic::arm_neon_vld2lane;
2411 break;
2412 case ARM::BI__builtin_neon_vld3_dup_v:
2413 Int = Intrinsic::arm_neon_vld3lane;
2414 break;
2415 case ARM::BI__builtin_neon_vld4_dup_v:
2416 Int = Intrinsic::arm_neon_vld4lane;
2417 break;
2418 default: llvm_unreachable("unknown vld_dup intrinsic?");
2419 }
2420 Function *F = CGM.getIntrinsic(Int, Ty);
2421 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
2422
2423 SmallVector<Value*, 6> Args;
2424 Args.push_back(Ops[1]);
2425 Args.append(STy->getNumElements(), UndefValue::get(Ty));
2426
2427 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
2428 Args.push_back(CI);
2429 Args.push_back(Align);
2430
2431 Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
2432 // splat lane 0 to all elts in each vector of the result.
2433 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2434 Value *Val = Builder.CreateExtractValue(Ops[1], i);
2435 Value *Elt = Builder.CreateBitCast(Val, Ty);
2436 Elt = EmitNeonSplat(Elt, CI);
2437 Elt = Builder.CreateBitCast(Elt, Val->getType());
2438 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
2439 }
2440 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2441 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2442 return Builder.CreateStore(Ops[1], Ops[0]);
2443 }
2444 case ARM::BI__builtin_neon_vmax_v:
2445 case ARM::BI__builtin_neon_vmaxq_v:
2446 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
2447 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
2448 case ARM::BI__builtin_neon_vmin_v:
2449 case ARM::BI__builtin_neon_vminq_v:
2450 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
2451 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
2452 case ARM::BI__builtin_neon_vmovl_v: {
2453 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
2454 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2455 if (usgn)
2456 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2457 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2458 }
2459 case ARM::BI__builtin_neon_vmovn_v: {
2460 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2461 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2462 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2463 }
2464 case ARM::BI__builtin_neon_vmul_v:
2465 case ARM::BI__builtin_neon_vmulq_v:
2466 assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
2467 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
2468 Ops, "vmul");
2469 case ARM::BI__builtin_neon_vmull_v:
2470 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2471 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2472 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2473 case ARM::BI__builtin_neon_vfma_v:
2474 case ARM::BI__builtin_neon_vfmaq_v: {
2475 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2476 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2477 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2478 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2479
2480 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2481 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
2482 }
2483 case ARM::BI__builtin_neon_vpadal_v:
2484 case ARM::BI__builtin_neon_vpadalq_v: {
2485 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
2486 // The source operand type has twice as many elements of half the size.
2487 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2488 llvm::Type *EltTy =
2489 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2490 llvm::Type *NarrowTy =
2491 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2492 llvm::Type *Tys[2] = { Ty, NarrowTy };
2493 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
2494 }
2495 case ARM::BI__builtin_neon_vpadd_v:
2496 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
2497 Ops, "vpadd");
2498 case ARM::BI__builtin_neon_vpaddl_v:
2499 case ARM::BI__builtin_neon_vpaddlq_v: {
2500 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
2501 // The source operand type has twice as many elements of half the size.
2502 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2503 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2504 llvm::Type *NarrowTy =
2505 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2506 llvm::Type *Tys[2] = { Ty, NarrowTy };
2507 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2508 }
2509 case ARM::BI__builtin_neon_vpmax_v:
2510 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
2511 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
2512 case ARM::BI__builtin_neon_vpmin_v:
2513 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
2514 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
2515 case ARM::BI__builtin_neon_vqabs_v:
2516 case ARM::BI__builtin_neon_vqabsq_v:
2517 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
2518 Ops, "vqabs");
2519 case ARM::BI__builtin_neon_vqadd_v:
2520 case ARM::BI__builtin_neon_vqaddq_v:
2521 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
2522 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
2523 case ARM::BI__builtin_neon_vqdmlal_v:
2524 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
2525 Ops, "vqdmlal");
2526 case ARM::BI__builtin_neon_vqdmlsl_v:
2527 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
2528 Ops, "vqdmlsl");
2529 case ARM::BI__builtin_neon_vqdmulh_v:
2530 case ARM::BI__builtin_neon_vqdmulhq_v:
2531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
2532 Ops, "vqdmulh");
2533 case ARM::BI__builtin_neon_vqdmull_v:
2534 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
2535 Ops, "vqdmull");
2536 case ARM::BI__builtin_neon_vqmovn_v:
2537 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
2538 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
2539 case ARM::BI__builtin_neon_vqmovun_v:
2540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
2541 Ops, "vqdmull");
2542 case ARM::BI__builtin_neon_vqneg_v:
2543 case ARM::BI__builtin_neon_vqnegq_v:
2544 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
2545 Ops, "vqneg");
2546 case ARM::BI__builtin_neon_vqrdmulh_v:
2547 case ARM::BI__builtin_neon_vqrdmulhq_v:
2548 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
2549 Ops, "vqrdmulh");
2550 case ARM::BI__builtin_neon_vqrshl_v:
2551 case ARM::BI__builtin_neon_vqrshlq_v:
2552 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
2553 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
2554 case ARM::BI__builtin_neon_vqrshrn_n_v:
2555 Int =
2556 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2557 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
2558 1, true);
2559 case ARM::BI__builtin_neon_vqrshrun_n_v:
2560 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2561 Ops, "vqrshrun_n", 1, true);
2562 case ARM::BI__builtin_neon_vqshl_v:
2563 case ARM::BI__builtin_neon_vqshlq_v:
2564 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2565 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
2566 case ARM::BI__builtin_neon_vqshl_n_v:
2567 case ARM::BI__builtin_neon_vqshlq_n_v:
2568 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
2569 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2570 1, false);
2571 case ARM::BI__builtin_neon_vqshlu_n_v:
2572 case ARM::BI__builtin_neon_vqshluq_n_v:
2573 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
2574 Ops, "vqshlu", 1, false);
2575 case ARM::BI__builtin_neon_vqshrn_n_v:
2576 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2577 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
2578 1, true);
2579 case ARM::BI__builtin_neon_vqshrun_n_v:
2580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2581 Ops, "vqshrun_n", 1, true);
2582 case ARM::BI__builtin_neon_vqsub_v:
2583 case ARM::BI__builtin_neon_vqsubq_v:
2584 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
2585 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
2586 case ARM::BI__builtin_neon_vraddhn_v:
2587 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
2588 Ops, "vraddhn");
2589 case ARM::BI__builtin_neon_vrecpe_v:
2590 case ARM::BI__builtin_neon_vrecpeq_v:
2591 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2592 Ops, "vrecpe");
2593 case ARM::BI__builtin_neon_vrecps_v:
2594 case ARM::BI__builtin_neon_vrecpsq_v:
2595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
2596 Ops, "vrecps");
2597 case ARM::BI__builtin_neon_vrhadd_v:
2598 case ARM::BI__builtin_neon_vrhaddq_v:
2599 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
2600 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
2601 case ARM::BI__builtin_neon_vrshl_v:
2602 case ARM::BI__builtin_neon_vrshlq_v:
2603 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2604 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
2605 case ARM::BI__builtin_neon_vrshrn_n_v:
2606 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2607 Ops, "vrshrn_n", 1, true);
2608 case ARM::BI__builtin_neon_vrshr_n_v:
2609 case ARM::BI__builtin_neon_vrshrq_n_v:
2610 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2611 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
2612 case ARM::BI__builtin_neon_vrsqrte_v:
2613 case ARM::BI__builtin_neon_vrsqrteq_v:
2614 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
2615 Ops, "vrsqrte");
2616 case ARM::BI__builtin_neon_vrsqrts_v:
2617 case ARM::BI__builtin_neon_vrsqrtsq_v:
2618 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
2619 Ops, "vrsqrts");
2620 case ARM::BI__builtin_neon_vrsra_n_v:
2621 case ARM::BI__builtin_neon_vrsraq_n_v:
2622 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2623 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2624 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2625 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2626 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2627 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2628 case ARM::BI__builtin_neon_vrsubhn_v:
2629 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
2630 Ops, "vrsubhn");
2631 case ARM::BI__builtin_neon_vshl_v:
2632 case ARM::BI__builtin_neon_vshlq_v:
2633 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
2634 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
2635 case ARM::BI__builtin_neon_vshll_n_v:
2636 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
2637 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
2638 case ARM::BI__builtin_neon_vshl_n_v:
2639 case ARM::BI__builtin_neon_vshlq_n_v:
2640 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2641 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2642 "vshl_n");
2643 case ARM::BI__builtin_neon_vshrn_n_v:
2644 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
2645 Ops, "vshrn_n", 1, true);
2646 case ARM::BI__builtin_neon_vshr_n_v:
2647 case ARM::BI__builtin_neon_vshrq_n_v:
2648 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2649 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2650 if (usgn)
2651 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
2652 else
2653 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
2654 case ARM::BI__builtin_neon_vsri_n_v:
2655 case ARM::BI__builtin_neon_vsriq_n_v:
2656 rightShift = true;
2657 case ARM::BI__builtin_neon_vsli_n_v:
2658 case ARM::BI__builtin_neon_vsliq_n_v:
2659 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2660 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2661 Ops, "vsli_n");
2662 case ARM::BI__builtin_neon_vsra_n_v:
2663 case ARM::BI__builtin_neon_vsraq_n_v:
2664 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2665 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2666 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
2667 if (usgn)
2668 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
2669 else
2670 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
2671 return Builder.CreateAdd(Ops[0], Ops[1]);
2672 case ARM::BI__builtin_neon_vst1_v:
2673 case ARM::BI__builtin_neon_vst1q_v:
2674 Ops.push_back(Align);
2675 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
2676 Ops, "");
2677 case ARM::BI__builtin_neon_vst1q_lane_v:
2678 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2679 // a one-element vector and avoid poor code for i64 in the backend.
2680 if (VTy->getElementType()->isIntegerTy(64)) {
2681 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2682 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
2683 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2684 Ops[2] = Align;
2685 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2686 Ops[1]->getType()), Ops);
2687 }
2688 // fall through
2689 case ARM::BI__builtin_neon_vst1_lane_v: {
2690 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2691 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2692 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2693 StoreInst *St = Builder.CreateStore(Ops[1],
2694 Builder.CreateBitCast(Ops[0], Ty));
2695 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2696 return St;
2697 }
2698 case ARM::BI__builtin_neon_vst2_v:
2699 case ARM::BI__builtin_neon_vst2q_v:
2700 Ops.push_back(Align);
2701 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
2702 Ops, "");
2703 case ARM::BI__builtin_neon_vst2_lane_v:
2704 case ARM::BI__builtin_neon_vst2q_lane_v:
2705 Ops.push_back(Align);
2706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
2707 Ops, "");
2708 case ARM::BI__builtin_neon_vst3_v:
2709 case ARM::BI__builtin_neon_vst3q_v:
2710 Ops.push_back(Align);
2711 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
2712 Ops, "");
2713 case ARM::BI__builtin_neon_vst3_lane_v:
2714 case ARM::BI__builtin_neon_vst3q_lane_v:
2715 Ops.push_back(Align);
2716 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
2717 Ops, "");
2718 case ARM::BI__builtin_neon_vst4_v:
2719 case ARM::BI__builtin_neon_vst4q_v:
2720 Ops.push_back(Align);
2721 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
2722 Ops, "");
2723 case ARM::BI__builtin_neon_vst4_lane_v:
2724 case ARM::BI__builtin_neon_vst4q_lane_v:
2725 Ops.push_back(Align);
2726 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
2727 Ops, "");
2728 case ARM::BI__builtin_neon_vsubhn_v:
2729 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
2730 Ops, "vsubhn");
2731 case ARM::BI__builtin_neon_vtbl1_v:
2732 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2733 Ops, "vtbl1");
2734 case ARM::BI__builtin_neon_vtbl2_v:
2735 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2736 Ops, "vtbl2");
2737 case ARM::BI__builtin_neon_vtbl3_v:
2738 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2739 Ops, "vtbl3");
2740 case ARM::BI__builtin_neon_vtbl4_v:
2741 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2742 Ops, "vtbl4");
2743 case ARM::BI__builtin_neon_vtbx1_v:
2744 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2745 Ops, "vtbx1");
2746 case ARM::BI__builtin_neon_vtbx2_v:
2747 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2748 Ops, "vtbx2");
2749 case ARM::BI__builtin_neon_vtbx3_v:
2750 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2751 Ops, "vtbx3");
2752 case ARM::BI__builtin_neon_vtbx4_v:
2753 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2754 Ops, "vtbx4");
2755 case ARM::BI__builtin_neon_vtst_v:
2756 case ARM::BI__builtin_neon_vtstq_v: {
2757 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2758 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2759 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2760 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2761 ConstantAggregateZero::get(Ty));
2762 return Builder.CreateSExt(Ops[0], Ty, "vtst");
2763 }
2764 case ARM::BI__builtin_neon_vtrn_v:
2765 case ARM::BI__builtin_neon_vtrnq_v: {
2766 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2767 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2768 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2769 Value *SV = 0;
2770
2771 for (unsigned vi = 0; vi != 2; ++vi) {
2772 SmallVector<Constant*, 16> Indices;
2773 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2774 Indices.push_back(Builder.getInt32(i+vi));
2775 Indices.push_back(Builder.getInt32(i+e+vi));
2776 }
2777 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2778 SV = llvm::ConstantVector::get(Indices);
2779 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
2780 SV = Builder.CreateStore(SV, Addr);
2781 }
2782 return SV;
2783 }
2784 case ARM::BI__builtin_neon_vuzp_v:
2785 case ARM::BI__builtin_neon_vuzpq_v: {
2786 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2787 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2788 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2789 Value *SV = 0;
2790
2791 for (unsigned vi = 0; vi != 2; ++vi) {
2792 SmallVector<Constant*, 16> Indices;
2793 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2794 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
2795
2796 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2797 SV = llvm::ConstantVector::get(Indices);
2798 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
2799 SV = Builder.CreateStore(SV, Addr);
2800 }
2801 return SV;
2802 }
2803 case ARM::BI__builtin_neon_vzip_v:
2804 case ARM::BI__builtin_neon_vzipq_v: {
2805 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2806 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2807 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2808 Value *SV = 0;
2809
2810 for (unsigned vi = 0; vi != 2; ++vi) {
2811 SmallVector<Constant*, 16> Indices;
2812 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2813 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
2814 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
2815 }
2816 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2817 SV = llvm::ConstantVector::get(Indices);
2818 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
2819 SV = Builder.CreateStore(SV, Addr);
2820 }
2821 return SV;
2822 }
2823 }
2824 }
2825
2826 llvm::Value *CodeGenFunction::
BuildVector(ArrayRef<llvm::Value * > Ops)2827 BuildVector(ArrayRef<llvm::Value*> Ops) {
2828 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
2829 "Not a power-of-two sized vector!");
2830 bool AllConstants = true;
2831 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
2832 AllConstants &= isa<Constant>(Ops[i]);
2833
2834 // If this is a constant vector, create a ConstantVector.
2835 if (AllConstants) {
2836 SmallVector<llvm::Constant*, 16> CstOps;
2837 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2838 CstOps.push_back(cast<Constant>(Ops[i]));
2839 return llvm::ConstantVector::get(CstOps);
2840 }
2841
2842 // Otherwise, insertelement the values to build the vector.
2843 Value *Result =
2844 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2845
2846 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2847 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
2848
2849 return Result;
2850 }
2851
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)2852 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2853 const CallExpr *E) {
2854 SmallVector<Value*, 4> Ops;
2855
2856 // Find out if any arguments are required to be integer constant expressions.
2857 unsigned ICEArguments = 0;
2858 ASTContext::GetBuiltinTypeError Error;
2859 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2860 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2861
2862 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2863 // If this is a normal argument, just emit it as a scalar.
2864 if ((ICEArguments & (1 << i)) == 0) {
2865 Ops.push_back(EmitScalarExpr(E->getArg(i)));
2866 continue;
2867 }
2868
2869 // If this is required to be a constant, constant fold it so that we know
2870 // that the generated intrinsic gets a ConstantInt.
2871 llvm::APSInt Result;
2872 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2873 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2874 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2875 }
2876
2877 switch (BuiltinID) {
2878 default: return 0;
2879 case X86::BI__builtin_ia32_vec_init_v8qi:
2880 case X86::BI__builtin_ia32_vec_init_v4hi:
2881 case X86::BI__builtin_ia32_vec_init_v2si:
2882 return Builder.CreateBitCast(BuildVector(Ops),
2883 llvm::Type::getX86_MMXTy(getLLVMContext()));
2884 case X86::BI__builtin_ia32_vec_ext_v2si:
2885 return Builder.CreateExtractElement(Ops[0],
2886 llvm::ConstantInt::get(Ops[1]->getType(), 0));
2887 case X86::BI__builtin_ia32_ldmxcsr: {
2888 llvm::Type *PtrTy = Int8PtrTy;
2889 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2890 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2891 Builder.CreateStore(Ops[0], Tmp);
2892 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2893 Builder.CreateBitCast(Tmp, PtrTy));
2894 }
2895 case X86::BI__builtin_ia32_stmxcsr: {
2896 llvm::Type *PtrTy = Int8PtrTy;
2897 Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2898 Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2899 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2900 Builder.CreateBitCast(Tmp, PtrTy));
2901 return Builder.CreateLoad(Tmp, "stmxcsr");
2902 }
2903 case X86::BI__builtin_ia32_storehps:
2904 case X86::BI__builtin_ia32_storelps: {
2905 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2906 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2907
2908 // cast val v2i64
2909 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2910
2911 // extract (0, 1)
2912 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2913 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2914 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2915
2916 // cast pointer to i64 & store
2917 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2918 return Builder.CreateStore(Ops[1], Ops[0]);
2919 }
2920 case X86::BI__builtin_ia32_palignr: {
2921 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2922
2923 // If palignr is shifting the pair of input vectors less than 9 bytes,
2924 // emit a shuffle instruction.
2925 if (shiftVal <= 8) {
2926 SmallVector<llvm::Constant*, 8> Indices;
2927 for (unsigned i = 0; i != 8; ++i)
2928 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2929
2930 Value* SV = llvm::ConstantVector::get(Indices);
2931 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2932 }
2933
2934 // If palignr is shifting the pair of input vectors more than 8 but less
2935 // than 16 bytes, emit a logical right shift of the destination.
2936 if (shiftVal < 16) {
2937 // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2938 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2939
2940 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2941 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2942
2943 // create i32 constant
2944 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2945 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2946 }
2947
2948 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2949 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2950 }
2951 case X86::BI__builtin_ia32_palignr128: {
2952 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2953
2954 // If palignr is shifting the pair of input vectors less than 17 bytes,
2955 // emit a shuffle instruction.
2956 if (shiftVal <= 16) {
2957 SmallVector<llvm::Constant*, 16> Indices;
2958 for (unsigned i = 0; i != 16; ++i)
2959 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2960
2961 Value* SV = llvm::ConstantVector::get(Indices);
2962 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2963 }
2964
2965 // If palignr is shifting the pair of input vectors more than 16 but less
2966 // than 32 bytes, emit a logical right shift of the destination.
2967 if (shiftVal < 32) {
2968 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2969
2970 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2971 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2972
2973 // create i32 constant
2974 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2975 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2976 }
2977
2978 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2979 return llvm::Constant::getNullValue(ConvertType(E->getType()));
2980 }
2981 case X86::BI__builtin_ia32_palignr256: {
2982 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2983
2984 // If palignr is shifting the pair of input vectors less than 17 bytes,
2985 // emit a shuffle instruction.
2986 if (shiftVal <= 16) {
2987 SmallVector<llvm::Constant*, 32> Indices;
2988 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2989 for (unsigned l = 0; l != 2; ++l) {
2990 unsigned LaneStart = l * 16;
2991 unsigned LaneEnd = (l+1) * 16;
2992 for (unsigned i = 0; i != 16; ++i) {
2993 unsigned Idx = shiftVal + i + LaneStart;
2994 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
2995 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
2996 }
2997 }
2998
2999 Value* SV = llvm::ConstantVector::get(Indices);
3000 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
3001 }
3002
3003 // If palignr is shifting the pair of input vectors more than 16 but less
3004 // than 32 bytes, emit a logical right shift of the destination.
3005 if (shiftVal < 32) {
3006 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
3007
3008 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
3009 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
3010
3011 // create i32 constant
3012 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
3013 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
3014 }
3015
3016 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
3017 return llvm::Constant::getNullValue(ConvertType(E->getType()));
3018 }
3019 case X86::BI__builtin_ia32_movntps:
3020 case X86::BI__builtin_ia32_movntps256:
3021 case X86::BI__builtin_ia32_movntpd:
3022 case X86::BI__builtin_ia32_movntpd256:
3023 case X86::BI__builtin_ia32_movntdq:
3024 case X86::BI__builtin_ia32_movntdq256:
3025 case X86::BI__builtin_ia32_movnti: {
3026 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
3027 Builder.getInt32(1));
3028
3029 // Convert the type of the pointer to a pointer to the stored type.
3030 Value *BC = Builder.CreateBitCast(Ops[0],
3031 llvm::PointerType::getUnqual(Ops[1]->getType()),
3032 "cast");
3033 StoreInst *SI = Builder.CreateStore(Ops[1], BC);
3034 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
3035 SI->setAlignment(16);
3036 return SI;
3037 }
3038 // 3DNow!
3039 case X86::BI__builtin_ia32_pswapdsf:
3040 case X86::BI__builtin_ia32_pswapdsi: {
3041 const char *name = 0;
3042 Intrinsic::ID ID = Intrinsic::not_intrinsic;
3043 switch(BuiltinID) {
3044 default: llvm_unreachable("Unsupported intrinsic!");
3045 case X86::BI__builtin_ia32_pswapdsf:
3046 case X86::BI__builtin_ia32_pswapdsi:
3047 name = "pswapd";
3048 ID = Intrinsic::x86_3dnowa_pswapd;
3049 break;
3050 }
3051 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
3052 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
3053 llvm::Function *F = CGM.getIntrinsic(ID);
3054 return Builder.CreateCall(F, Ops, name);
3055 }
3056 case X86::BI__builtin_ia32_rdrand16_step:
3057 case X86::BI__builtin_ia32_rdrand32_step:
3058 case X86::BI__builtin_ia32_rdrand64_step:
3059 case X86::BI__builtin_ia32_rdseed16_step:
3060 case X86::BI__builtin_ia32_rdseed32_step:
3061 case X86::BI__builtin_ia32_rdseed64_step: {
3062 Intrinsic::ID ID;
3063 switch (BuiltinID) {
3064 default: llvm_unreachable("Unsupported intrinsic!");
3065 case X86::BI__builtin_ia32_rdrand16_step:
3066 ID = Intrinsic::x86_rdrand_16;
3067 break;
3068 case X86::BI__builtin_ia32_rdrand32_step:
3069 ID = Intrinsic::x86_rdrand_32;
3070 break;
3071 case X86::BI__builtin_ia32_rdrand64_step:
3072 ID = Intrinsic::x86_rdrand_64;
3073 break;
3074 case X86::BI__builtin_ia32_rdseed16_step:
3075 ID = Intrinsic::x86_rdseed_16;
3076 break;
3077 case X86::BI__builtin_ia32_rdseed32_step:
3078 ID = Intrinsic::x86_rdseed_32;
3079 break;
3080 case X86::BI__builtin_ia32_rdseed64_step:
3081 ID = Intrinsic::x86_rdseed_64;
3082 break;
3083 }
3084
3085 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
3086 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
3087 return Builder.CreateExtractValue(Call, 1);
3088 }
3089 }
3090 }
3091
3092
EmitPPCBuiltinExpr(unsigned BuiltinID,const CallExpr * E)3093 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
3094 const CallExpr *E) {
3095 SmallVector<Value*, 4> Ops;
3096
3097 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
3098 Ops.push_back(EmitScalarExpr(E->getArg(i)));
3099
3100 Intrinsic::ID ID = Intrinsic::not_intrinsic;
3101
3102 switch (BuiltinID) {
3103 default: return 0;
3104
3105 // vec_ld, vec_lvsl, vec_lvsr
3106 case PPC::BI__builtin_altivec_lvx:
3107 case PPC::BI__builtin_altivec_lvxl:
3108 case PPC::BI__builtin_altivec_lvebx:
3109 case PPC::BI__builtin_altivec_lvehx:
3110 case PPC::BI__builtin_altivec_lvewx:
3111 case PPC::BI__builtin_altivec_lvsl:
3112 case PPC::BI__builtin_altivec_lvsr:
3113 {
3114 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
3115
3116 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
3117 Ops.pop_back();
3118
3119 switch (BuiltinID) {
3120 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
3121 case PPC::BI__builtin_altivec_lvx:
3122 ID = Intrinsic::ppc_altivec_lvx;
3123 break;
3124 case PPC::BI__builtin_altivec_lvxl:
3125 ID = Intrinsic::ppc_altivec_lvxl;
3126 break;
3127 case PPC::BI__builtin_altivec_lvebx:
3128 ID = Intrinsic::ppc_altivec_lvebx;
3129 break;
3130 case PPC::BI__builtin_altivec_lvehx:
3131 ID = Intrinsic::ppc_altivec_lvehx;
3132 break;
3133 case PPC::BI__builtin_altivec_lvewx:
3134 ID = Intrinsic::ppc_altivec_lvewx;
3135 break;
3136 case PPC::BI__builtin_altivec_lvsl:
3137 ID = Intrinsic::ppc_altivec_lvsl;
3138 break;
3139 case PPC::BI__builtin_altivec_lvsr:
3140 ID = Intrinsic::ppc_altivec_lvsr;
3141 break;
3142 }
3143 llvm::Function *F = CGM.getIntrinsic(ID);
3144 return Builder.CreateCall(F, Ops, "");
3145 }
3146
3147 // vec_st
3148 case PPC::BI__builtin_altivec_stvx:
3149 case PPC::BI__builtin_altivec_stvxl:
3150 case PPC::BI__builtin_altivec_stvebx:
3151 case PPC::BI__builtin_altivec_stvehx:
3152 case PPC::BI__builtin_altivec_stvewx:
3153 {
3154 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
3155 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
3156 Ops.pop_back();
3157
3158 switch (BuiltinID) {
3159 default: llvm_unreachable("Unsupported st intrinsic!");
3160 case PPC::BI__builtin_altivec_stvx:
3161 ID = Intrinsic::ppc_altivec_stvx;
3162 break;
3163 case PPC::BI__builtin_altivec_stvxl:
3164 ID = Intrinsic::ppc_altivec_stvxl;
3165 break;
3166 case PPC::BI__builtin_altivec_stvebx:
3167 ID = Intrinsic::ppc_altivec_stvebx;
3168 break;
3169 case PPC::BI__builtin_altivec_stvehx:
3170 ID = Intrinsic::ppc_altivec_stvehx;
3171 break;
3172 case PPC::BI__builtin_altivec_stvewx:
3173 ID = Intrinsic::ppc_altivec_stvewx;
3174 break;
3175 }
3176 llvm::Function *F = CGM.getIntrinsic(ID);
3177 return Builder.CreateCall(F, Ops, "");
3178 }
3179 }
3180 }
3181