1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Reactor.hpp"
16
17 #include "x86.hpp"
18 #include "CPUID.hpp"
19 #include "Thread.hpp"
20 #include "ExecutableMemory.hpp"
21 #include "MutexLock.hpp"
22
23 #undef min
24 #undef max
25
26 #if REACTOR_LLVM_VERSION < 7
27 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/Constants.h"
29 #include "llvm/Function.h"
30 #include "llvm/GlobalVariable.h"
31 #include "llvm/Intrinsics.h"
32 #include "llvm/LLVMContext.h"
33 #include "llvm/Module.h"
34 #include "llvm/PassManager.h"
35 #include "llvm/Support/IRBuilder.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Target/TargetData.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "../lib/ExecutionEngine/JIT/JIT.h"
41
42 #include "LLVMRoutine.hpp"
43 #include "LLVMRoutineManager.hpp"
44
45 #define ARGS(...) __VA_ARGS__
46 #else
47 #include "llvm/Analysis/LoopPass.h"
48 #include "llvm/ExecutionEngine/ExecutionEngine.h"
49 #include "llvm/ExecutionEngine/JITSymbol.h"
50 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
51 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
52 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
53 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
54 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
55 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
56 #include "llvm/IR/Constants.h"
57 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/GlobalVariable.h"
60 #include "llvm/IR/IRBuilder.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/LegacyPassManager.h"
64 #include "llvm/IR/Mangler.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/TargetSelect.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include "llvm/Transforms/InstCombine/InstCombine.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72
73 #include "LLVMRoutine.hpp"
74
75 #define ARGS(...) {__VA_ARGS__}
76 #define CreateCall2 CreateCall
77 #define CreateCall3 CreateCall
78
79 #include <unordered_map>
80 #endif
81
82 #include <numeric>
83 #include <fstream>
84
85 #if defined(__i386__) || defined(__x86_64__)
86 #include <xmmintrin.h>
87 #endif
88
89 #include <math.h>
90
91 #if defined(__x86_64__) && defined(_WIN32)
X86CompilationCallback()92 extern "C" void X86CompilationCallback()
93 {
94 assert(false); // UNIMPLEMENTED
95 }
96 #endif
97
98 #if REACTOR_LLVM_VERSION < 7
99 namespace llvm
100 {
101 extern bool JITEmitDebugInfo;
102 }
103 #endif
104
105 namespace rr
106 {
107 class LLVMReactorJIT;
108 }
109
110 namespace
111 {
112 rr::LLVMReactorJIT *reactorJIT = nullptr;
113 llvm::IRBuilder<> *builder = nullptr;
114 llvm::LLVMContext *context = nullptr;
115 llvm::Module *module = nullptr;
116 llvm::Function *function = nullptr;
117
118 rr::MutexLock codegenMutex;
119
120 #if REACTOR_LLVM_VERSION >= 7
lowerPAVG(llvm::Value * x,llvm::Value * y)121 llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
122 {
123 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
124
125 llvm::VectorType *extTy =
126 llvm::VectorType::getExtendedElementVectorType(ty);
127 x = ::builder->CreateZExt(x, extTy);
128 y = ::builder->CreateZExt(y, extTy);
129
130 // (x + y + 1) >> 1
131 llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
132 llvm::Value *res = ::builder->CreateAdd(x, y);
133 res = ::builder->CreateAdd(res, one);
134 res = ::builder->CreateLShr(res, one);
135 return ::builder->CreateTrunc(res, ty);
136 }
137
lowerPMINMAX(llvm::Value * x,llvm::Value * y,llvm::ICmpInst::Predicate pred)138 llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
139 llvm::ICmpInst::Predicate pred)
140 {
141 return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
142 }
143
lowerPCMP(llvm::ICmpInst::Predicate pred,llvm::Value * x,llvm::Value * y,llvm::Type * dstTy)144 llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
145 llvm::Value *y, llvm::Type *dstTy)
146 {
147 return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
148 }
149
150 #if defined(__i386__) || defined(__x86_64__)
lowerPMOV(llvm::Value * op,llvm::Type * dstType,bool sext)151 llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
152 {
153 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
154 llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
155
156 llvm::Value *undef = llvm::UndefValue::get(srcTy);
157 llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
158 std::iota(mask.begin(), mask.end(), 0);
159 llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
160
161 return sext ? ::builder->CreateSExt(v, dstTy)
162 : ::builder->CreateZExt(v, dstTy);
163 }
164
lowerPABS(llvm::Value * v)165 llvm::Value *lowerPABS(llvm::Value *v)
166 {
167 llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
168 llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
169 llvm::Value *neg = ::builder->CreateNeg(v);
170 return ::builder->CreateSelect(cmp, v, neg);
171 }
172 #endif // defined(__i386__) || defined(__x86_64__)
173
174 #if !defined(__i386__) && !defined(__x86_64__)
lowerPFMINMAX(llvm::Value * x,llvm::Value * y,llvm::FCmpInst::Predicate pred)175 llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
176 llvm::FCmpInst::Predicate pred)
177 {
178 return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
179 }
180
lowerRound(llvm::Value * x)181 llvm::Value *lowerRound(llvm::Value *x)
182 {
183 llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
184 ::module, llvm::Intrinsic::nearbyint, {x->getType()});
185 return ::builder->CreateCall(nearbyint, ARGS(x));
186 }
187
lowerRoundInt(llvm::Value * x,llvm::Type * ty)188 llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
189 {
190 return ::builder->CreateFPToSI(lowerRound(x), ty);
191 }
192
lowerFloor(llvm::Value * x)193 llvm::Value *lowerFloor(llvm::Value *x)
194 {
195 llvm::Function *floor = llvm::Intrinsic::getDeclaration(
196 ::module, llvm::Intrinsic::floor, {x->getType()});
197 return ::builder->CreateCall(floor, ARGS(x));
198 }
199
lowerTrunc(llvm::Value * x)200 llvm::Value *lowerTrunc(llvm::Value *x)
201 {
202 llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
203 ::module, llvm::Intrinsic::trunc, {x->getType()});
204 return ::builder->CreateCall(trunc, ARGS(x));
205 }
206
207 // Packed add/sub saturatation
lowerPSAT(llvm::Value * x,llvm::Value * y,bool isAdd,bool isSigned)208 llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
209 {
210 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
211 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
212
213 unsigned numBits = ty->getScalarSizeInBits();
214
215 llvm::Value *max, *min, *extX, *extY;
216 if (isSigned)
217 {
218 max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
219 min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
220 extX = ::builder->CreateSExt(x, extTy);
221 extY = ::builder->CreateSExt(y, extTy);
222 }
223 else
224 {
225 assert(numBits <= 64);
226 uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
227 max = llvm::ConstantInt::get(extTy, maxVal, false);
228 min = llvm::ConstantInt::get(extTy, 0, false);
229 extX = ::builder->CreateZExt(x, extTy);
230 extY = ::builder->CreateZExt(y, extTy);
231 }
232
233 llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
234 : ::builder->CreateSub(extX, extY);
235
236 res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
237 res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
238
239 return ::builder->CreateTrunc(res, ty);
240 }
241
lowerPUADDSAT(llvm::Value * x,llvm::Value * y)242 llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
243 {
244 return lowerPSAT(x, y, true, false);
245 }
246
lowerPSADDSAT(llvm::Value * x,llvm::Value * y)247 llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
248 {
249 return lowerPSAT(x, y, true, true);
250 }
251
lowerPUSUBSAT(llvm::Value * x,llvm::Value * y)252 llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
253 {
254 return lowerPSAT(x, y, false, false);
255 }
256
lowerPSSUBSAT(llvm::Value * x,llvm::Value * y)257 llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
258 {
259 return lowerPSAT(x, y, false, true);
260 }
261
lowerSQRT(llvm::Value * x)262 llvm::Value *lowerSQRT(llvm::Value *x)
263 {
264 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
265 ::module, llvm::Intrinsic::sqrt, {x->getType()});
266 return ::builder->CreateCall(sqrt, ARGS(x));
267 }
268
lowerRCP(llvm::Value * x)269 llvm::Value *lowerRCP(llvm::Value *x)
270 {
271 llvm::Type *ty = x->getType();
272 llvm::Constant *one;
273 if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
274 {
275 one = llvm::ConstantVector::getSplat(
276 vectorTy->getNumElements(),
277 llvm::ConstantFP::get(vectorTy->getElementType(), 1));
278 }
279 else
280 {
281 one = llvm::ConstantFP::get(ty, 1);
282 }
283 return ::builder->CreateFDiv(one, x);
284 }
285
lowerRSQRT(llvm::Value * x)286 llvm::Value *lowerRSQRT(llvm::Value *x)
287 {
288 return lowerRCP(lowerSQRT(x));
289 }
290
lowerVectorShl(llvm::Value * x,uint64_t scalarY)291 llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
292 {
293 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
294 llvm::Value *y = llvm::ConstantVector::getSplat(
295 ty->getNumElements(),
296 llvm::ConstantInt::get(ty->getElementType(), scalarY));
297 return ::builder->CreateShl(x, y);
298 }
299
lowerVectorAShr(llvm::Value * x,uint64_t scalarY)300 llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
301 {
302 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
303 llvm::Value *y = llvm::ConstantVector::getSplat(
304 ty->getNumElements(),
305 llvm::ConstantInt::get(ty->getElementType(), scalarY));
306 return ::builder->CreateAShr(x, y);
307 }
308
lowerVectorLShr(llvm::Value * x,uint64_t scalarY)309 llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
310 {
311 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
312 llvm::Value *y = llvm::ConstantVector::getSplat(
313 ty->getNumElements(),
314 llvm::ConstantInt::get(ty->getElementType(), scalarY));
315 return ::builder->CreateLShr(x, y);
316 }
317
lowerMulAdd(llvm::Value * x,llvm::Value * y)318 llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
319 {
320 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
321 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
322
323 llvm::Value *extX = ::builder->CreateSExt(x, extTy);
324 llvm::Value *extY = ::builder->CreateSExt(y, extTy);
325 llvm::Value *mult = ::builder->CreateMul(extX, extY);
326
327 llvm::Value *undef = llvm::UndefValue::get(extTy);
328
329 llvm::SmallVector<uint32_t, 16> evenIdx;
330 llvm::SmallVector<uint32_t, 16> oddIdx;
331 for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
332 {
333 evenIdx.push_back(i);
334 oddIdx.push_back(i + 1);
335 }
336
337 llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
338 llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
339 return ::builder->CreateAdd(lhs, rhs);
340 }
341
lowerMulHigh(llvm::Value * x,llvm::Value * y,bool sext)342 llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
343 {
344 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
345 llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
346
347 llvm::Value *extX, *extY;
348 if (sext)
349 {
350 extX = ::builder->CreateSExt(x, extTy);
351 extY = ::builder->CreateSExt(y, extTy);
352 }
353 else
354 {
355 extX = ::builder->CreateZExt(x, extTy);
356 extY = ::builder->CreateZExt(y, extTy);
357 }
358
359 llvm::Value *mult = ::builder->CreateMul(extX, extY);
360
361 llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
362 llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getIntegerBitWidth());
363 return ::builder->CreateTrunc(mulh, ty);
364 }
365
lowerPack(llvm::Value * x,llvm::Value * y,bool isSigned)366 llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
367 {
368 llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
369 llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
370
371 llvm::IntegerType *dstElemTy =
372 llvm::cast<llvm::IntegerType>(dstTy->getElementType());
373
374 uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
375 assert(truncNumBits < 64 && "shift 64 must be handled separately");
376 llvm::Constant *max, *min;
377 if (isSigned)
378 {
379 max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
380 min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
381 }
382 else
383 {
384 max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
385 min = llvm::ConstantInt::get(srcTy, 0, false);
386 }
387
388 x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
389 x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
390 y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
391 y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
392
393 x = ::builder->CreateTrunc(x, dstTy);
394 y = ::builder->CreateTrunc(y, dstTy);
395
396 llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
397 std::iota(index.begin(), index.end(), 0);
398
399 return ::builder->CreateShuffleVector(x, y, index);
400 }
401
lowerSignMask(llvm::Value * x,llvm::Type * retTy)402 llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
403 {
404 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
405 llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
406 llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
407
408 llvm::Value *ret = ::builder->CreateZExt(
409 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
410 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
411 {
412 llvm::Value *elem = ::builder->CreateZExt(
413 ::builder->CreateExtractElement(cmp, i), retTy);
414 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
415 }
416 return ret;
417 }
418
lowerFPSignMask(llvm::Value * x,llvm::Type * retTy)419 llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
420 {
421 llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
422 llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
423 llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
424
425 llvm::Value *ret = ::builder->CreateZExt(
426 ::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
427 for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
428 {
429 llvm::Value *elem = ::builder->CreateZExt(
430 ::builder->CreateExtractElement(cmp, i), retTy);
431 ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
432 }
433 return ret;
434 }
435 #endif // !defined(__i386__) && !defined(__x86_64__)
436 #endif // REACTOR_LLVM_VERSION >= 7
437 }
438
439 namespace rr
440 {
441 #if REACTOR_LLVM_VERSION < 7
442 class LLVMReactorJIT
443 {
444 private:
445 std::string arch;
446 llvm::SmallVector<std::string, 16> mattrs;
447 llvm::ExecutionEngine *executionEngine;
448 LLVMRoutineManager *routineManager;
449
450 public:
LLVMReactorJIT(const std::string & arch_,const llvm::SmallVectorImpl<std::string> & mattrs_)451 LLVMReactorJIT(const std::string &arch_,
452 const llvm::SmallVectorImpl<std::string> &mattrs_) :
453 arch(arch_),
454 mattrs(mattrs_.begin(), mattrs_.end()),
455 executionEngine(nullptr),
456 routineManager(nullptr)
457 {
458 }
459
startSession()460 void startSession()
461 {
462 std::string error;
463
464 ::module = new llvm::Module("", *::context);
465
466 routineManager = new LLVMRoutineManager();
467
468 llvm::TargetMachine *targetMachine =
469 llvm::EngineBuilder::selectTarget(
470 ::module, arch, "", mattrs, llvm::Reloc::Default,
471 llvm::CodeModel::JITDefault, &error);
472
473 executionEngine = llvm::JIT::createJIT(
474 ::module, &error, routineManager, llvm::CodeGenOpt::Aggressive,
475 true, targetMachine);
476 }
477
endSession()478 void endSession()
479 {
480 delete executionEngine;
481 executionEngine = nullptr;
482 routineManager = nullptr;
483
484 ::function = nullptr;
485 ::module = nullptr;
486 }
487
acquireRoutine(llvm::Function * func)488 LLVMRoutine *acquireRoutine(llvm::Function *func)
489 {
490 void *entry = executionEngine->getPointerToFunction(::function);
491 return routineManager->acquireRoutine(entry);
492 }
493
optimize(llvm::Module * module)494 void optimize(llvm::Module *module)
495 {
496 static llvm::PassManager *passManager = nullptr;
497
498 if(!passManager)
499 {
500 passManager = new llvm::PassManager();
501
502 passManager->add(new llvm::TargetData(*executionEngine->getTargetData()));
503 passManager->add(llvm::createScalarReplAggregatesPass());
504
505 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
506 {
507 switch(optimization[pass])
508 {
509 case Disabled: break;
510 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
511 case LICM: passManager->add(llvm::createLICMPass()); break;
512 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
513 case GVN: passManager->add(llvm::createGVNPass()); break;
514 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
515 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
516 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
517 case SCCP: passManager->add(llvm::createSCCPPass()); break;
518 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
519 default:
520 assert(false);
521 }
522 }
523 }
524
525 passManager->run(*::module);
526 }
527 };
528 #else
529 class ExternalFunctionSymbolResolver
530 {
531 private:
532 using FunctionMap = std::unordered_map<std::string, void *>;
533 FunctionMap func_;
534
535 public:
536 ExternalFunctionSymbolResolver()
537 {
538 func_.emplace("floorf", reinterpret_cast<void*>(floorf));
539 func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
540 func_.emplace("truncf", reinterpret_cast<void*>(truncf));
541 }
542
543 void *findSymbol(const std::string &name) const
544 {
545 FunctionMap::const_iterator it = func_.find(name);
546 return (it != func_.end()) ? it->second : nullptr;
547 }
548 };
549
550 class LLVMReactorJIT
551 {
552 private:
553 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
554 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
555
556 llvm::orc::ExecutionSession session;
557 ExternalFunctionSymbolResolver externalSymbolResolver;
558 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
559 std::unique_ptr<llvm::TargetMachine> targetMachine;
560 const llvm::DataLayout dataLayout;
561 ObjLayer objLayer;
562 CompileLayer compileLayer;
563 size_t emittedFunctionsNum;
564
565 public:
566 LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
567 const llvm::TargetOptions &targetOpts):
568 resolver(createLegacyLookupResolver(
569 session,
570 [this](const std::string &name) {
571 void *func = externalSymbolResolver.findSymbol(name);
572 if (func != nullptr)
573 {
574 return llvm::JITSymbol(
575 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
576 }
577
578 return objLayer.findSymbol(name, true);
579 },
580 [](llvm::Error err) {
581 if (err)
582 {
583 // TODO: Log the symbol resolution errors.
584 return;
585 }
586 })),
587 targetMachine(llvm::EngineBuilder()
588 .setMArch(arch)
589 .setMAttrs(mattrs)
590 .setTargetOptions(targetOpts)
591 .selectTarget()),
592 dataLayout(targetMachine->createDataLayout()),
593 objLayer(
594 session,
595 [this](llvm::orc::VModuleKey) {
596 return ObjLayer::Resources{
597 std::make_shared<llvm::SectionMemoryManager>(),
598 resolver};
599 }),
600 compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
601 emittedFunctionsNum(0)
602 {
603 }
604
605 void startSession()
606 {
607 ::module = new llvm::Module("", *::context);
608 }
609
610 void endSession()
611 {
612 ::function = nullptr;
613 ::module = nullptr;
614 }
615
616 LLVMRoutine *acquireRoutine(llvm::Function *func)
617 {
618 std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
619 func->setName(name);
620 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
621 func->setDoesNotThrow();
622
623 std::unique_ptr<llvm::Module> mod(::module);
624 ::module = nullptr;
625 mod->setDataLayout(dataLayout);
626
627 auto moduleKey = session.allocateVModule();
628 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
629
630 std::string mangledName;
631 {
632 llvm::raw_string_ostream mangledNameStream(mangledName);
633 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
634 }
635
636 llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
637
638 llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
639 if(!expectAddr)
640 {
641 return nullptr;
642 }
643
644 void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
645 return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
646 }
647
648 void optimize(llvm::Module *module)
649 {
650 std::unique_ptr<llvm::legacy::PassManager> passManager(
651 new llvm::legacy::PassManager());
652
653 passManager->add(llvm::createSROAPass());
654
655 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
656 {
657 switch(optimization[pass])
658 {
659 case Disabled: break;
660 case CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
661 case LICM: passManager->add(llvm::createLICMPass()); break;
662 case AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
663 case GVN: passManager->add(llvm::createGVNPass()); break;
664 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
665 case Reassociate: passManager->add(llvm::createReassociatePass()); break;
666 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
667 case SCCP: passManager->add(llvm::createSCCPPass()); break;
668 case ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
669 default:
670 assert(false);
671 }
672 }
673
674 passManager->run(*::module);
675 }
676
677 private:
678 void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
679 {
680 llvm::cantFail(compileLayer.removeModule(moduleKey));
681 }
682
683 static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
684 {
685 jit->releaseRoutineModule(moduleKey);
686 }
687 };
688 #endif
689
690 Optimization optimization[10] = {InstructionCombining, Disabled};
691
692 enum EmulatedType
693 {
694 Type_v2i32,
695 Type_v4i16,
696 Type_v2i16,
697 Type_v8i8,
698 Type_v4i8,
699 Type_v2f32,
700 EmulatedTypeCount
701 };
702
T(Type * t)703 llvm::Type *T(Type *t)
704 {
705 uintptr_t type = reinterpret_cast<uintptr_t>(t);
706 if(type < EmulatedTypeCount)
707 {
708 // Use 128-bit vectors to implement logically shorter ones.
709 switch(type)
710 {
711 case Type_v2i32: return T(Int4::getType());
712 case Type_v4i16: return T(Short8::getType());
713 case Type_v2i16: return T(Short8::getType());
714 case Type_v8i8: return T(Byte16::getType());
715 case Type_v4i8: return T(Byte16::getType());
716 case Type_v2f32: return T(Float4::getType());
717 default: assert(false);
718 }
719 }
720
721 return reinterpret_cast<llvm::Type*>(t);
722 }
723
T(llvm::Type * t)724 inline Type *T(llvm::Type *t)
725 {
726 return reinterpret_cast<Type*>(t);
727 }
728
T(EmulatedType t)729 Type *T(EmulatedType t)
730 {
731 return reinterpret_cast<Type*>(t);
732 }
733
V(Value * t)734 inline llvm::Value *V(Value *t)
735 {
736 return reinterpret_cast<llvm::Value*>(t);
737 }
738
V(llvm::Value * t)739 inline Value *V(llvm::Value *t)
740 {
741 return reinterpret_cast<Value*>(t);
742 }
743
T(std::vector<Type * > & t)744 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
745 {
746 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
747 }
748
B(BasicBlock * t)749 inline llvm::BasicBlock *B(BasicBlock *t)
750 {
751 return reinterpret_cast<llvm::BasicBlock*>(t);
752 }
753
B(llvm::BasicBlock * t)754 inline BasicBlock *B(llvm::BasicBlock *t)
755 {
756 return reinterpret_cast<BasicBlock*>(t);
757 }
758
typeSize(Type * type)759 static size_t typeSize(Type *type)
760 {
761 uintptr_t t = reinterpret_cast<uintptr_t>(type);
762 if(t < EmulatedTypeCount)
763 {
764 switch(t)
765 {
766 case Type_v2i32: return 8;
767 case Type_v4i16: return 8;
768 case Type_v2i16: return 4;
769 case Type_v8i8: return 8;
770 case Type_v4i8: return 4;
771 case Type_v2f32: return 8;
772 default: assert(false);
773 }
774 }
775
776 return T(type)->getPrimitiveSizeInBits() / 8;
777 }
778
elementCount(Type * type)779 static unsigned int elementCount(Type *type)
780 {
781 uintptr_t t = reinterpret_cast<uintptr_t>(type);
782 if(t < EmulatedTypeCount)
783 {
784 switch(t)
785 {
786 case Type_v2i32: return 2;
787 case Type_v4i16: return 4;
788 case Type_v2i16: return 2;
789 case Type_v8i8: return 8;
790 case Type_v4i8: return 4;
791 case Type_v2f32: return 2;
792 default: assert(false);
793 }
794 }
795
796 return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
797 }
798
Nucleus()799 Nucleus::Nucleus()
800 {
801 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
802
803 llvm::InitializeNativeTarget();
804
805 #if REACTOR_LLVM_VERSION >= 7
806 llvm::InitializeNativeTargetAsmPrinter();
807 llvm::InitializeNativeTargetAsmParser();
808 #endif
809
810 if(!::context)
811 {
812 ::context = new llvm::LLVMContext();
813 }
814
815 #if defined(__x86_64__)
816 static const char arch[] = "x86-64";
817 #elif defined(__i386__)
818 static const char arch[] = "x86";
819 #elif defined(__aarch64__)
820 static const char arch[] = "arm64";
821 #elif defined(__arm__)
822 static const char arch[] = "arm";
823 #elif defined(__mips__)
824 #if defined(__mips64)
825 static const char arch[] = "mips64el";
826 #else
827 static const char arch[] = "mipsel";
828 #endif
829 #else
830 #error "unknown architecture"
831 #endif
832
833 llvm::SmallVector<std::string, 1> mattrs;
834 #if defined(__i386__) || defined(__x86_64__)
835 mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
836 mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
837 mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
838 mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
839 mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
840 mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
841 #if REACTOR_LLVM_VERSION < 7
842 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
843 #else
844 mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
845 #endif
846 #elif defined(__arm__)
847 #if __ARM_ARCH >= 8
848 mattrs.push_back("+armv8-a");
849 #else
850 // armv7-a requires compiler-rt routines; otherwise, compiled kernel
851 // might fail to link.
852 #endif
853 #endif
854
855 #if REACTOR_LLVM_VERSION < 7
856 llvm::JITEmitDebugInfo = false;
857 llvm::UnsafeFPMath = true;
858 // llvm::NoInfsFPMath = true;
859 // llvm::NoNaNsFPMath = true;
860 #else
861 llvm::TargetOptions targetOpts;
862 targetOpts.UnsafeFPMath = false;
863 // targetOpts.NoInfsFPMath = true;
864 // targetOpts.NoNaNsFPMath = true;
865 #endif
866
867 if(!::reactorJIT)
868 {
869 #if REACTOR_LLVM_VERSION < 7
870 ::reactorJIT = new LLVMReactorJIT(arch, mattrs);
871 #else
872 ::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
873 #endif
874 }
875
876 ::reactorJIT->startSession();
877
878 if(!::builder)
879 {
880 ::builder = new llvm::IRBuilder<>(*::context);
881 }
882 }
883
~Nucleus()884 Nucleus::~Nucleus()
885 {
886 ::reactorJIT->endSession();
887
888 ::codegenMutex.unlock();
889 }
890
acquireRoutine(const char * name,bool runOptimizations)891 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
892 {
893 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
894 {
895 llvm::Type *type = ::function->getReturnType();
896
897 if(type->isVoidTy())
898 {
899 createRetVoid();
900 }
901 else
902 {
903 createRet(V(llvm::UndefValue::get(type)));
904 }
905 }
906
907 if(false)
908 {
909 #if REACTOR_LLVM_VERSION < 7
910 std::string error;
911 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error);
912 #else
913 std::error_code error;
914 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
915 #endif
916
917 ::module->print(file, 0);
918 }
919
920 if(runOptimizations)
921 {
922 optimize();
923 }
924
925 if(false)
926 {
927 #if REACTOR_LLVM_VERSION < 7
928 std::string error;
929 llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error);
930 #else
931 std::error_code error;
932 llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
933 #endif
934
935 ::module->print(file, 0);
936 }
937
938 LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
939
940 return routine;
941 }
942
optimize()943 void Nucleus::optimize()
944 {
945 ::reactorJIT->optimize(::module);
946 }
947
allocateStackVariable(Type * type,int arraySize)948 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
949 {
950 // Need to allocate it in the entry block for mem2reg to work
951 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
952
953 llvm::Instruction *declaration;
954
955 if(arraySize)
956 {
957 #if REACTOR_LLVM_VERSION < 7
958 declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize)));
959 #else
960 declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
961 #endif
962 }
963 else
964 {
965 #if REACTOR_LLVM_VERSION < 7
966 declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr);
967 #else
968 declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
969 #endif
970 }
971
972 entryBlock.getInstList().push_front(declaration);
973
974 return V(declaration);
975 }
976
createBasicBlock()977 BasicBlock *Nucleus::createBasicBlock()
978 {
979 return B(llvm::BasicBlock::Create(*::context, "", ::function));
980 }
981
getInsertBlock()982 BasicBlock *Nucleus::getInsertBlock()
983 {
984 return B(::builder->GetInsertBlock());
985 }
986
setInsertBlock(BasicBlock * basicBlock)987 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
988 {
989 // assert(::builder->GetInsertBlock()->back().isTerminator());
990 ::builder->SetInsertPoint(B(basicBlock));
991 }
992
createFunction(Type * ReturnType,std::vector<Type * > & Params)993 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
994 {
995 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
996 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
997 ::function->setCallingConv(llvm::CallingConv::C);
998
999 #if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7
1000 // FIXME(capn):
1001 // On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1002 // having a trap which allows the OS to grow the stack. For functions with a stack frame
1003 // larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1004 // page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1005 // the stack and ensure all pages have been committed. This is currently broken in LLVM
1006 // JIT, but we can prevent emitting the stack probe call:
1007 ::function->addFnAttr("stack-probe-size", "1048576");
1008 #endif
1009
1010 ::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
1011 }
1012
getArgument(unsigned int index)1013 Value *Nucleus::getArgument(unsigned int index)
1014 {
1015 llvm::Function::arg_iterator args = ::function->arg_begin();
1016
1017 while(index)
1018 {
1019 args++;
1020 index--;
1021 }
1022
1023 return V(&*args);
1024 }
1025
createRetVoid()1026 void Nucleus::createRetVoid()
1027 {
1028 ::builder->CreateRetVoid();
1029 }
1030
createRet(Value * v)1031 void Nucleus::createRet(Value *v)
1032 {
1033 ::builder->CreateRet(V(v));
1034 }
1035
createBr(BasicBlock * dest)1036 void Nucleus::createBr(BasicBlock *dest)
1037 {
1038 ::builder->CreateBr(B(dest));
1039 }
1040
createCondBr(Value * cond,BasicBlock * ifTrue,BasicBlock * ifFalse)1041 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1042 {
1043 ::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
1044 }
1045
createAdd(Value * lhs,Value * rhs)1046 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1047 {
1048 return V(::builder->CreateAdd(V(lhs), V(rhs)));
1049 }
1050
createSub(Value * lhs,Value * rhs)1051 Value *Nucleus::createSub(Value *lhs, Value *rhs)
1052 {
1053 return V(::builder->CreateSub(V(lhs), V(rhs)));
1054 }
1055
createMul(Value * lhs,Value * rhs)1056 Value *Nucleus::createMul(Value *lhs, Value *rhs)
1057 {
1058 return V(::builder->CreateMul(V(lhs), V(rhs)));
1059 }
1060
createUDiv(Value * lhs,Value * rhs)1061 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1062 {
1063 return V(::builder->CreateUDiv(V(lhs), V(rhs)));
1064 }
1065
createSDiv(Value * lhs,Value * rhs)1066 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1067 {
1068 return V(::builder->CreateSDiv(V(lhs), V(rhs)));
1069 }
1070
createFAdd(Value * lhs,Value * rhs)1071 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1072 {
1073 return V(::builder->CreateFAdd(V(lhs), V(rhs)));
1074 }
1075
createFSub(Value * lhs,Value * rhs)1076 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1077 {
1078 return V(::builder->CreateFSub(V(lhs), V(rhs)));
1079 }
1080
createFMul(Value * lhs,Value * rhs)1081 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1082 {
1083 return V(::builder->CreateFMul(V(lhs), V(rhs)));
1084 }
1085
createFDiv(Value * lhs,Value * rhs)1086 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1087 {
1088 return V(::builder->CreateFDiv(V(lhs), V(rhs)));
1089 }
1090
createURem(Value * lhs,Value * rhs)1091 Value *Nucleus::createURem(Value *lhs, Value *rhs)
1092 {
1093 return V(::builder->CreateURem(V(lhs), V(rhs)));
1094 }
1095
createSRem(Value * lhs,Value * rhs)1096 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1097 {
1098 return V(::builder->CreateSRem(V(lhs), V(rhs)));
1099 }
1100
createFRem(Value * lhs,Value * rhs)1101 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1102 {
1103 return V(::builder->CreateFRem(V(lhs), V(rhs)));
1104 }
1105
createShl(Value * lhs,Value * rhs)1106 Value *Nucleus::createShl(Value *lhs, Value *rhs)
1107 {
1108 return V(::builder->CreateShl(V(lhs), V(rhs)));
1109 }
1110
createLShr(Value * lhs,Value * rhs)1111 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1112 {
1113 return V(::builder->CreateLShr(V(lhs), V(rhs)));
1114 }
1115
createAShr(Value * lhs,Value * rhs)1116 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1117 {
1118 return V(::builder->CreateAShr(V(lhs), V(rhs)));
1119 }
1120
createAnd(Value * lhs,Value * rhs)1121 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1122 {
1123 return V(::builder->CreateAnd(V(lhs), V(rhs)));
1124 }
1125
createOr(Value * lhs,Value * rhs)1126 Value *Nucleus::createOr(Value *lhs, Value *rhs)
1127 {
1128 return V(::builder->CreateOr(V(lhs), V(rhs)));
1129 }
1130
createXor(Value * lhs,Value * rhs)1131 Value *Nucleus::createXor(Value *lhs, Value *rhs)
1132 {
1133 return V(::builder->CreateXor(V(lhs), V(rhs)));
1134 }
1135
createNeg(Value * v)1136 Value *Nucleus::createNeg(Value *v)
1137 {
1138 return V(::builder->CreateNeg(V(v)));
1139 }
1140
createFNeg(Value * v)1141 Value *Nucleus::createFNeg(Value *v)
1142 {
1143 return V(::builder->CreateFNeg(V(v)));
1144 }
1145
createNot(Value * v)1146 Value *Nucleus::createNot(Value *v)
1147 {
1148 return V(::builder->CreateNot(V(v)));
1149 }
1150
createLoad(Value * ptr,Type * type,bool isVolatile,unsigned int alignment)1151 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
1152 {
1153 uintptr_t t = reinterpret_cast<uintptr_t>(type);
1154 if(t < EmulatedTypeCount)
1155 {
1156 switch(t)
1157 {
1158 case Type_v2i32:
1159 case Type_v4i16:
1160 case Type_v8i8:
1161 case Type_v2f32:
1162 return createBitCast(
1163 createInsertElement(
1164 V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
1165 createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment),
1166 0),
1167 type);
1168 case Type_v2i16:
1169 case Type_v4i8:
1170 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
1171 {
1172 Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
1173 Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
1174 i = createZExt(i, Long::getType());
1175 Value *v = createInsertElement(u, i, 0);
1176 return createBitCast(v, type);
1177 }
1178 break;
1179 default:
1180 assert(false);
1181 }
1182 }
1183
1184 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1185 return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment)));
1186 }
1187
createStore(Value * value,Value * ptr,Type * type,bool isVolatile,unsigned int alignment)1188 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
1189 {
1190 uintptr_t t = reinterpret_cast<uintptr_t>(type);
1191 if(t < EmulatedTypeCount)
1192 {
1193 switch(t)
1194 {
1195 case Type_v2i32:
1196 case Type_v4i16:
1197 case Type_v8i8:
1198 case Type_v2f32:
1199 createStore(
1200 createExtractElement(
1201 createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1202 createBitCast(ptr, Pointer<Long>::getType()),
1203 Long::getType(), isVolatile, alignment);
1204 return value;
1205 case Type_v2i16:
1206 case Type_v4i8:
1207 if(alignment != 0) // Not a local variable (all vectors are 128-bit).
1208 {
1209 createStore(
1210 createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1211 createBitCast(ptr, Pointer<Int>::getType()),
1212 Int::getType(), isVolatile, alignment);
1213 return value;
1214 }
1215 break;
1216 default:
1217 assert(false);
1218 }
1219 }
1220
1221 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1222 ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1223 return value;
1224 }
1225
createGEP(Value * ptr,Type * type,Value * index,bool unsignedIndex)1226 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1227 {
1228 if(sizeof(void*) == 8)
1229 {
1230 if(unsignedIndex)
1231 {
1232 index = createZExt(index, Long::getType());
1233 }
1234 else
1235 {
1236 index = createSExt(index, Long::getType());
1237 }
1238
1239 index = createMul(index, createConstantLong((int64_t)typeSize(type)));
1240 }
1241 else
1242 {
1243 index = createMul(index, createConstantInt((int)typeSize(type)));
1244 }
1245
1246 assert(V(ptr)->getType()->getContainedType(0) == T(type));
1247 return createBitCast(
1248 V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1249 T(llvm::PointerType::get(T(type), 0)));
1250 }
1251
createAtomicAdd(Value * ptr,Value * value)1252 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
1253 {
1254 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
1255 }
1256
createTrunc(Value * v,Type * destType)1257 Value *Nucleus::createTrunc(Value *v, Type *destType)
1258 {
1259 return V(::builder->CreateTrunc(V(v), T(destType)));
1260 }
1261
createZExt(Value * v,Type * destType)1262 Value *Nucleus::createZExt(Value *v, Type *destType)
1263 {
1264 return V(::builder->CreateZExt(V(v), T(destType)));
1265 }
1266
createSExt(Value * v,Type * destType)1267 Value *Nucleus::createSExt(Value *v, Type *destType)
1268 {
1269 return V(::builder->CreateSExt(V(v), T(destType)));
1270 }
1271
createFPToSI(Value * v,Type * destType)1272 Value *Nucleus::createFPToSI(Value *v, Type *destType)
1273 {
1274 return V(::builder->CreateFPToSI(V(v), T(destType)));
1275 }
1276
createSIToFP(Value * v,Type * destType)1277 Value *Nucleus::createSIToFP(Value *v, Type *destType)
1278 {
1279 return V(::builder->CreateSIToFP(V(v), T(destType)));
1280 }
1281
createFPTrunc(Value * v,Type * destType)1282 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1283 {
1284 return V(::builder->CreateFPTrunc(V(v), T(destType)));
1285 }
1286
createFPExt(Value * v,Type * destType)1287 Value *Nucleus::createFPExt(Value *v, Type *destType)
1288 {
1289 return V(::builder->CreateFPExt(V(v), T(destType)));
1290 }
1291
createBitCast(Value * v,Type * destType)1292 Value *Nucleus::createBitCast(Value *v, Type *destType)
1293 {
1294 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1295 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1296 // reading back as the destination type.
1297 if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
1298 {
1299 Value *readAddress = allocateStackVariable(destType);
1300 Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1301 createStore(v, writeAddress, T(V(v)->getType()));
1302 return createLoad(readAddress, destType);
1303 }
1304 else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
1305 {
1306 Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1307 createStore(v, writeAddress, T(V(v)->getType()));
1308 Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1309 return createLoad(readAddress, destType);
1310 }
1311
1312 return V(::builder->CreateBitCast(V(v), T(destType)));
1313 }
1314
createICmpEQ(Value * lhs,Value * rhs)1315 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1316 {
1317 return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
1318 }
1319
createICmpNE(Value * lhs,Value * rhs)1320 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1321 {
1322 return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
1323 }
1324
createICmpUGT(Value * lhs,Value * rhs)1325 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1326 {
1327 return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
1328 }
1329
createICmpUGE(Value * lhs,Value * rhs)1330 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1331 {
1332 return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
1333 }
1334
createICmpULT(Value * lhs,Value * rhs)1335 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1336 {
1337 return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
1338 }
1339
createICmpULE(Value * lhs,Value * rhs)1340 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1341 {
1342 return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
1343 }
1344
createICmpSGT(Value * lhs,Value * rhs)1345 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1346 {
1347 return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
1348 }
1349
createICmpSGE(Value * lhs,Value * rhs)1350 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1351 {
1352 return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
1353 }
1354
createICmpSLT(Value * lhs,Value * rhs)1355 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1356 {
1357 return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
1358 }
1359
createICmpSLE(Value * lhs,Value * rhs)1360 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1361 {
1362 return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
1363 }
1364
createFCmpOEQ(Value * lhs,Value * rhs)1365 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1366 {
1367 return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
1368 }
1369
createFCmpOGT(Value * lhs,Value * rhs)1370 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1371 {
1372 return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
1373 }
1374
createFCmpOGE(Value * lhs,Value * rhs)1375 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1376 {
1377 return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
1378 }
1379
createFCmpOLT(Value * lhs,Value * rhs)1380 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1381 {
1382 return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
1383 }
1384
createFCmpOLE(Value * lhs,Value * rhs)1385 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1386 {
1387 return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
1388 }
1389
createFCmpONE(Value * lhs,Value * rhs)1390 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1391 {
1392 return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
1393 }
1394
createFCmpORD(Value * lhs,Value * rhs)1395 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1396 {
1397 return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
1398 }
1399
createFCmpUNO(Value * lhs,Value * rhs)1400 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1401 {
1402 return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
1403 }
1404
createFCmpUEQ(Value * lhs,Value * rhs)1405 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1406 {
1407 return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
1408 }
1409
createFCmpUGT(Value * lhs,Value * rhs)1410 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1411 {
1412 return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
1413 }
1414
createFCmpUGE(Value * lhs,Value * rhs)1415 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1416 {
1417 return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
1418 }
1419
createFCmpULT(Value * lhs,Value * rhs)1420 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1421 {
1422 return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
1423 }
1424
createFCmpULE(Value * lhs,Value * rhs)1425 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1426 {
1427 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
1428 }
1429
createFCmpUNE(Value * lhs,Value * rhs)1430 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1431 {
1432 return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
1433 }
1434
createExtractElement(Value * vector,Type * type,int index)1435 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1436 {
1437 assert(V(vector)->getType()->getContainedType(0) == T(type));
1438 return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
1439 }
1440
createInsertElement(Value * vector,Value * element,int index)1441 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1442 {
1443 return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
1444 }
1445
createShuffleVector(Value * v1,Value * v2,const int * select)1446 Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
1447 {
1448 int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
1449 const int maxSize = 16;
1450 llvm::Constant *swizzle[maxSize];
1451 assert(size <= maxSize);
1452
1453 for(int i = 0; i < size; i++)
1454 {
1455 swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
1456 }
1457
1458 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1459
1460 return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
1461 }
1462
createSelect(Value * c,Value * ifTrue,Value * ifFalse)1463 Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
1464 {
1465 return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
1466 }
1467
createSwitch(Value * control,BasicBlock * defaultBranch,unsigned numCases)1468 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1469 {
1470 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
1471 }
1472
addSwitchCase(SwitchCases * switchCases,int label,BasicBlock * branch)1473 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1474 {
1475 llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1476 sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
1477 }
1478
createUnreachable()1479 void Nucleus::createUnreachable()
1480 {
1481 ::builder->CreateUnreachable();
1482 }
1483
createSwizzle4(Value * val,unsigned char select)1484 static Value *createSwizzle4(Value *val, unsigned char select)
1485 {
1486 int swizzle[4] =
1487 {
1488 (select >> 0) & 0x03,
1489 (select >> 2) & 0x03,
1490 (select >> 4) & 0x03,
1491 (select >> 6) & 0x03,
1492 };
1493
1494 return Nucleus::createShuffleVector(val, val, swizzle);
1495 }
1496
createMask4(Value * lhs,Value * rhs,unsigned char select)1497 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1498 {
1499 bool mask[4] = {false, false, false, false};
1500
1501 mask[(select >> 0) & 0x03] = true;
1502 mask[(select >> 2) & 0x03] = true;
1503 mask[(select >> 4) & 0x03] = true;
1504 mask[(select >> 6) & 0x03] = true;
1505
1506 int swizzle[4] =
1507 {
1508 mask[0] ? 4 : 0,
1509 mask[1] ? 5 : 1,
1510 mask[2] ? 6 : 2,
1511 mask[3] ? 7 : 3,
1512 };
1513
1514 return Nucleus::createShuffleVector(lhs, rhs, swizzle);
1515 }
1516
getPointerType(Type * ElementType)1517 Type *Nucleus::getPointerType(Type *ElementType)
1518 {
1519 return T(llvm::PointerType::get(T(ElementType), 0));
1520 }
1521
createNullValue(Type * Ty)1522 Value *Nucleus::createNullValue(Type *Ty)
1523 {
1524 return V(llvm::Constant::getNullValue(T(Ty)));
1525 }
1526
createConstantLong(int64_t i)1527 Value *Nucleus::createConstantLong(int64_t i)
1528 {
1529 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
1530 }
1531
createConstantInt(int i)1532 Value *Nucleus::createConstantInt(int i)
1533 {
1534 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
1535 }
1536
createConstantInt(unsigned int i)1537 Value *Nucleus::createConstantInt(unsigned int i)
1538 {
1539 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
1540 }
1541
createConstantBool(bool b)1542 Value *Nucleus::createConstantBool(bool b)
1543 {
1544 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
1545 }
1546
createConstantByte(signed char i)1547 Value *Nucleus::createConstantByte(signed char i)
1548 {
1549 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
1550 }
1551
createConstantByte(unsigned char i)1552 Value *Nucleus::createConstantByte(unsigned char i)
1553 {
1554 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
1555 }
1556
createConstantShort(short i)1557 Value *Nucleus::createConstantShort(short i)
1558 {
1559 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
1560 }
1561
createConstantShort(unsigned short i)1562 Value *Nucleus::createConstantShort(unsigned short i)
1563 {
1564 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
1565 }
1566
createConstantFloat(float x)1567 Value *Nucleus::createConstantFloat(float x)
1568 {
1569 return V(llvm::ConstantFP::get(T(Float::getType()), x));
1570 }
1571
createNullPointer(Type * Ty)1572 Value *Nucleus::createNullPointer(Type *Ty)
1573 {
1574 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
1575 }
1576
createConstantVector(const int64_t * constants,Type * type)1577 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1578 {
1579 assert(llvm::isa<llvm::VectorType>(T(type)));
1580 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1581 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1582 assert(numElements <= 16 && numConstants <= numElements);
1583 llvm::Constant *constantVector[16];
1584
1585 for(int i = 0; i < numElements; i++)
1586 {
1587 constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
1588 }
1589
1590 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
1591 }
1592
createConstantVector(const double * constants,Type * type)1593 Value *Nucleus::createConstantVector(const double *constants, Type *type)
1594 {
1595 assert(llvm::isa<llvm::VectorType>(T(type)));
1596 const int numConstants = elementCount(type); // Number of provided constants for the (emulated) type.
1597 const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements(); // Number of elements of the underlying vector type.
1598 assert(numElements <= 8 && numConstants <= numElements);
1599 llvm::Constant *constantVector[8];
1600
1601 for(int i = 0; i < numElements; i++)
1602 {
1603 constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
1604 }
1605
1606 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
1607 }
1608
getType()1609 Type *Void::getType()
1610 {
1611 return T(llvm::Type::getVoidTy(*::context));
1612 }
1613
Bool(Argument<Bool> argument)1614 Bool::Bool(Argument<Bool> argument)
1615 {
1616 storeValue(argument.value);
1617 }
1618
Bool(bool x)1619 Bool::Bool(bool x)
1620 {
1621 storeValue(Nucleus::createConstantBool(x));
1622 }
1623
Bool(RValue<Bool> rhs)1624 Bool::Bool(RValue<Bool> rhs)
1625 {
1626 storeValue(rhs.value);
1627 }
1628
Bool(const Bool & rhs)1629 Bool::Bool(const Bool &rhs)
1630 {
1631 Value *value = rhs.loadValue();
1632 storeValue(value);
1633 }
1634
Bool(const Reference<Bool> & rhs)1635 Bool::Bool(const Reference<Bool> &rhs)
1636 {
1637 Value *value = rhs.loadValue();
1638 storeValue(value);
1639 }
1640
operator =(RValue<Bool> rhs)1641 RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1642 {
1643 storeValue(rhs.value);
1644
1645 return rhs;
1646 }
1647
operator =(const Bool & rhs)1648 RValue<Bool> Bool::operator=(const Bool &rhs)
1649 {
1650 Value *value = rhs.loadValue();
1651 storeValue(value);
1652
1653 return RValue<Bool>(value);
1654 }
1655
operator =(const Reference<Bool> & rhs)1656 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1657 {
1658 Value *value = rhs.loadValue();
1659 storeValue(value);
1660
1661 return RValue<Bool>(value);
1662 }
1663
operator !(RValue<Bool> val)1664 RValue<Bool> operator!(RValue<Bool> val)
1665 {
1666 return RValue<Bool>(Nucleus::createNot(val.value));
1667 }
1668
operator &&(RValue<Bool> lhs,RValue<Bool> rhs)1669 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1670 {
1671 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1672 }
1673
operator ||(RValue<Bool> lhs,RValue<Bool> rhs)1674 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1675 {
1676 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1677 }
1678
getType()1679 Type *Bool::getType()
1680 {
1681 return T(llvm::Type::getInt1Ty(*::context));
1682 }
1683
Byte(Argument<Byte> argument)1684 Byte::Byte(Argument<Byte> argument)
1685 {
1686 storeValue(argument.value);
1687 }
1688
Byte(RValue<Int> cast)1689 Byte::Byte(RValue<Int> cast)
1690 {
1691 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1692
1693 storeValue(integer);
1694 }
1695
Byte(RValue<UInt> cast)1696 Byte::Byte(RValue<UInt> cast)
1697 {
1698 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1699
1700 storeValue(integer);
1701 }
1702
Byte(RValue<UShort> cast)1703 Byte::Byte(RValue<UShort> cast)
1704 {
1705 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1706
1707 storeValue(integer);
1708 }
1709
Byte(int x)1710 Byte::Byte(int x)
1711 {
1712 storeValue(Nucleus::createConstantByte((unsigned char)x));
1713 }
1714
Byte(unsigned char x)1715 Byte::Byte(unsigned char x)
1716 {
1717 storeValue(Nucleus::createConstantByte(x));
1718 }
1719
Byte(RValue<Byte> rhs)1720 Byte::Byte(RValue<Byte> rhs)
1721 {
1722 storeValue(rhs.value);
1723 }
1724
Byte(const Byte & rhs)1725 Byte::Byte(const Byte &rhs)
1726 {
1727 Value *value = rhs.loadValue();
1728 storeValue(value);
1729 }
1730
Byte(const Reference<Byte> & rhs)1731 Byte::Byte(const Reference<Byte> &rhs)
1732 {
1733 Value *value = rhs.loadValue();
1734 storeValue(value);
1735 }
1736
operator =(RValue<Byte> rhs)1737 RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1738 {
1739 storeValue(rhs.value);
1740
1741 return rhs;
1742 }
1743
operator =(const Byte & rhs)1744 RValue<Byte> Byte::operator=(const Byte &rhs)
1745 {
1746 Value *value = rhs.loadValue();
1747 storeValue(value);
1748
1749 return RValue<Byte>(value);
1750 }
1751
operator =(const Reference<Byte> & rhs)1752 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1753 {
1754 Value *value = rhs.loadValue();
1755 storeValue(value);
1756
1757 return RValue<Byte>(value);
1758 }
1759
operator +(RValue<Byte> lhs,RValue<Byte> rhs)1760 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1761 {
1762 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1763 }
1764
operator -(RValue<Byte> lhs,RValue<Byte> rhs)1765 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1766 {
1767 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1768 }
1769
operator *(RValue<Byte> lhs,RValue<Byte> rhs)1770 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1771 {
1772 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1773 }
1774
operator /(RValue<Byte> lhs,RValue<Byte> rhs)1775 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1776 {
1777 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1778 }
1779
operator %(RValue<Byte> lhs,RValue<Byte> rhs)1780 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1781 {
1782 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1783 }
1784
operator &(RValue<Byte> lhs,RValue<Byte> rhs)1785 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1786 {
1787 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1788 }
1789
operator |(RValue<Byte> lhs,RValue<Byte> rhs)1790 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1791 {
1792 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1793 }
1794
operator ^(RValue<Byte> lhs,RValue<Byte> rhs)1795 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1796 {
1797 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1798 }
1799
operator <<(RValue<Byte> lhs,RValue<Byte> rhs)1800 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1801 {
1802 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1803 }
1804
operator >>(RValue<Byte> lhs,RValue<Byte> rhs)1805 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1806 {
1807 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1808 }
1809
operator +=(Byte & lhs,RValue<Byte> rhs)1810 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1811 {
1812 return lhs = lhs + rhs;
1813 }
1814
operator -=(Byte & lhs,RValue<Byte> rhs)1815 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1816 {
1817 return lhs = lhs - rhs;
1818 }
1819
operator *=(Byte & lhs,RValue<Byte> rhs)1820 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1821 {
1822 return lhs = lhs * rhs;
1823 }
1824
operator /=(Byte & lhs,RValue<Byte> rhs)1825 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1826 {
1827 return lhs = lhs / rhs;
1828 }
1829
operator %=(Byte & lhs,RValue<Byte> rhs)1830 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1831 {
1832 return lhs = lhs % rhs;
1833 }
1834
operator &=(Byte & lhs,RValue<Byte> rhs)1835 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1836 {
1837 return lhs = lhs & rhs;
1838 }
1839
operator |=(Byte & lhs,RValue<Byte> rhs)1840 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1841 {
1842 return lhs = lhs | rhs;
1843 }
1844
operator ^=(Byte & lhs,RValue<Byte> rhs)1845 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1846 {
1847 return lhs = lhs ^ rhs;
1848 }
1849
operator <<=(Byte & lhs,RValue<Byte> rhs)1850 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1851 {
1852 return lhs = lhs << rhs;
1853 }
1854
operator >>=(Byte & lhs,RValue<Byte> rhs)1855 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1856 {
1857 return lhs = lhs >> rhs;
1858 }
1859
operator +(RValue<Byte> val)1860 RValue<Byte> operator+(RValue<Byte> val)
1861 {
1862 return val;
1863 }
1864
operator -(RValue<Byte> val)1865 RValue<Byte> operator-(RValue<Byte> val)
1866 {
1867 return RValue<Byte>(Nucleus::createNeg(val.value));
1868 }
1869
operator ~(RValue<Byte> val)1870 RValue<Byte> operator~(RValue<Byte> val)
1871 {
1872 return RValue<Byte>(Nucleus::createNot(val.value));
1873 }
1874
operator ++(Byte & val,int)1875 RValue<Byte> operator++(Byte &val, int) // Post-increment
1876 {
1877 RValue<Byte> res = val;
1878
1879 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
1880 val.storeValue(inc);
1881
1882 return res;
1883 }
1884
operator ++(Byte & val)1885 const Byte &operator++(Byte &val) // Pre-increment
1886 {
1887 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1888 val.storeValue(inc);
1889
1890 return val;
1891 }
1892
operator --(Byte & val,int)1893 RValue<Byte> operator--(Byte &val, int) // Post-decrement
1894 {
1895 RValue<Byte> res = val;
1896
1897 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
1898 val.storeValue(inc);
1899
1900 return res;
1901 }
1902
operator --(Byte & val)1903 const Byte &operator--(Byte &val) // Pre-decrement
1904 {
1905 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1906 val.storeValue(inc);
1907
1908 return val;
1909 }
1910
operator <(RValue<Byte> lhs,RValue<Byte> rhs)1911 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1912 {
1913 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1914 }
1915
operator <=(RValue<Byte> lhs,RValue<Byte> rhs)1916 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1917 {
1918 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1919 }
1920
operator >(RValue<Byte> lhs,RValue<Byte> rhs)1921 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1922 {
1923 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1924 }
1925
operator >=(RValue<Byte> lhs,RValue<Byte> rhs)1926 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1927 {
1928 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1929 }
1930
operator !=(RValue<Byte> lhs,RValue<Byte> rhs)1931 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1932 {
1933 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1934 }
1935
operator ==(RValue<Byte> lhs,RValue<Byte> rhs)1936 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1937 {
1938 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1939 }
1940
getType()1941 Type *Byte::getType()
1942 {
1943 return T(llvm::Type::getInt8Ty(*::context));
1944 }
1945
SByte(Argument<SByte> argument)1946 SByte::SByte(Argument<SByte> argument)
1947 {
1948 storeValue(argument.value);
1949 }
1950
SByte(RValue<Int> cast)1951 SByte::SByte(RValue<Int> cast)
1952 {
1953 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1954
1955 storeValue(integer);
1956 }
1957
SByte(RValue<Short> cast)1958 SByte::SByte(RValue<Short> cast)
1959 {
1960 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1961
1962 storeValue(integer);
1963 }
1964
SByte(signed char x)1965 SByte::SByte(signed char x)
1966 {
1967 storeValue(Nucleus::createConstantByte(x));
1968 }
1969
SByte(RValue<SByte> rhs)1970 SByte::SByte(RValue<SByte> rhs)
1971 {
1972 storeValue(rhs.value);
1973 }
1974
SByte(const SByte & rhs)1975 SByte::SByte(const SByte &rhs)
1976 {
1977 Value *value = rhs.loadValue();
1978 storeValue(value);
1979 }
1980
SByte(const Reference<SByte> & rhs)1981 SByte::SByte(const Reference<SByte> &rhs)
1982 {
1983 Value *value = rhs.loadValue();
1984 storeValue(value);
1985 }
1986
operator =(RValue<SByte> rhs)1987 RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1988 {
1989 storeValue(rhs.value);
1990
1991 return rhs;
1992 }
1993
operator =(const SByte & rhs)1994 RValue<SByte> SByte::operator=(const SByte &rhs)
1995 {
1996 Value *value = rhs.loadValue();
1997 storeValue(value);
1998
1999 return RValue<SByte>(value);
2000 }
2001
operator =(const Reference<SByte> & rhs)2002 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
2003 {
2004 Value *value = rhs.loadValue();
2005 storeValue(value);
2006
2007 return RValue<SByte>(value);
2008 }
2009
operator +(RValue<SByte> lhs,RValue<SByte> rhs)2010 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
2011 {
2012 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
2013 }
2014
operator -(RValue<SByte> lhs,RValue<SByte> rhs)2015 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
2016 {
2017 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
2018 }
2019
operator *(RValue<SByte> lhs,RValue<SByte> rhs)2020 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
2021 {
2022 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
2023 }
2024
operator /(RValue<SByte> lhs,RValue<SByte> rhs)2025 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
2026 {
2027 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
2028 }
2029
operator %(RValue<SByte> lhs,RValue<SByte> rhs)2030 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
2031 {
2032 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
2033 }
2034
operator &(RValue<SByte> lhs,RValue<SByte> rhs)2035 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
2036 {
2037 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
2038 }
2039
operator |(RValue<SByte> lhs,RValue<SByte> rhs)2040 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
2041 {
2042 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
2043 }
2044
operator ^(RValue<SByte> lhs,RValue<SByte> rhs)2045 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
2046 {
2047 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
2048 }
2049
operator <<(RValue<SByte> lhs,RValue<SByte> rhs)2050 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
2051 {
2052 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
2053 }
2054
operator >>(RValue<SByte> lhs,RValue<SByte> rhs)2055 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
2056 {
2057 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
2058 }
2059
operator +=(SByte & lhs,RValue<SByte> rhs)2060 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
2061 {
2062 return lhs = lhs + rhs;
2063 }
2064
operator -=(SByte & lhs,RValue<SByte> rhs)2065 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
2066 {
2067 return lhs = lhs - rhs;
2068 }
2069
operator *=(SByte & lhs,RValue<SByte> rhs)2070 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
2071 {
2072 return lhs = lhs * rhs;
2073 }
2074
operator /=(SByte & lhs,RValue<SByte> rhs)2075 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
2076 {
2077 return lhs = lhs / rhs;
2078 }
2079
operator %=(SByte & lhs,RValue<SByte> rhs)2080 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
2081 {
2082 return lhs = lhs % rhs;
2083 }
2084
operator &=(SByte & lhs,RValue<SByte> rhs)2085 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
2086 {
2087 return lhs = lhs & rhs;
2088 }
2089
operator |=(SByte & lhs,RValue<SByte> rhs)2090 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
2091 {
2092 return lhs = lhs | rhs;
2093 }
2094
operator ^=(SByte & lhs,RValue<SByte> rhs)2095 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
2096 {
2097 return lhs = lhs ^ rhs;
2098 }
2099
operator <<=(SByte & lhs,RValue<SByte> rhs)2100 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
2101 {
2102 return lhs = lhs << rhs;
2103 }
2104
operator >>=(SByte & lhs,RValue<SByte> rhs)2105 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
2106 {
2107 return lhs = lhs >> rhs;
2108 }
2109
operator +(RValue<SByte> val)2110 RValue<SByte> operator+(RValue<SByte> val)
2111 {
2112 return val;
2113 }
2114
operator -(RValue<SByte> val)2115 RValue<SByte> operator-(RValue<SByte> val)
2116 {
2117 return RValue<SByte>(Nucleus::createNeg(val.value));
2118 }
2119
operator ~(RValue<SByte> val)2120 RValue<SByte> operator~(RValue<SByte> val)
2121 {
2122 return RValue<SByte>(Nucleus::createNot(val.value));
2123 }
2124
operator ++(SByte & val,int)2125 RValue<SByte> operator++(SByte &val, int) // Post-increment
2126 {
2127 RValue<SByte> res = val;
2128
2129 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
2130 val.storeValue(inc);
2131
2132 return res;
2133 }
2134
operator ++(SByte & val)2135 const SByte &operator++(SByte &val) // Pre-increment
2136 {
2137 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
2138 val.storeValue(inc);
2139
2140 return val;
2141 }
2142
operator --(SByte & val,int)2143 RValue<SByte> operator--(SByte &val, int) // Post-decrement
2144 {
2145 RValue<SByte> res = val;
2146
2147 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
2148 val.storeValue(inc);
2149
2150 return res;
2151 }
2152
operator --(SByte & val)2153 const SByte &operator--(SByte &val) // Pre-decrement
2154 {
2155 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
2156 val.storeValue(inc);
2157
2158 return val;
2159 }
2160
operator <(RValue<SByte> lhs,RValue<SByte> rhs)2161 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
2162 {
2163 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2164 }
2165
operator <=(RValue<SByte> lhs,RValue<SByte> rhs)2166 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
2167 {
2168 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2169 }
2170
operator >(RValue<SByte> lhs,RValue<SByte> rhs)2171 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
2172 {
2173 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2174 }
2175
operator >=(RValue<SByte> lhs,RValue<SByte> rhs)2176 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
2177 {
2178 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2179 }
2180
operator !=(RValue<SByte> lhs,RValue<SByte> rhs)2181 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
2182 {
2183 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2184 }
2185
operator ==(RValue<SByte> lhs,RValue<SByte> rhs)2186 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
2187 {
2188 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2189 }
2190
getType()2191 Type *SByte::getType()
2192 {
2193 return T(llvm::Type::getInt8Ty(*::context));
2194 }
2195
Short(Argument<Short> argument)2196 Short::Short(Argument<Short> argument)
2197 {
2198 storeValue(argument.value);
2199 }
2200
Short(RValue<Int> cast)2201 Short::Short(RValue<Int> cast)
2202 {
2203 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
2204
2205 storeValue(integer);
2206 }
2207
Short(short x)2208 Short::Short(short x)
2209 {
2210 storeValue(Nucleus::createConstantShort(x));
2211 }
2212
Short(RValue<Short> rhs)2213 Short::Short(RValue<Short> rhs)
2214 {
2215 storeValue(rhs.value);
2216 }
2217
Short(const Short & rhs)2218 Short::Short(const Short &rhs)
2219 {
2220 Value *value = rhs.loadValue();
2221 storeValue(value);
2222 }
2223
Short(const Reference<Short> & rhs)2224 Short::Short(const Reference<Short> &rhs)
2225 {
2226 Value *value = rhs.loadValue();
2227 storeValue(value);
2228 }
2229
operator =(RValue<Short> rhs)2230 RValue<Short> Short::operator=(RValue<Short> rhs)
2231 {
2232 storeValue(rhs.value);
2233
2234 return rhs;
2235 }
2236
operator =(const Short & rhs)2237 RValue<Short> Short::operator=(const Short &rhs)
2238 {
2239 Value *value = rhs.loadValue();
2240 storeValue(value);
2241
2242 return RValue<Short>(value);
2243 }
2244
operator =(const Reference<Short> & rhs)2245 RValue<Short> Short::operator=(const Reference<Short> &rhs)
2246 {
2247 Value *value = rhs.loadValue();
2248 storeValue(value);
2249
2250 return RValue<Short>(value);
2251 }
2252
operator +(RValue<Short> lhs,RValue<Short> rhs)2253 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2254 {
2255 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2256 }
2257
operator -(RValue<Short> lhs,RValue<Short> rhs)2258 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2259 {
2260 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2261 }
2262
operator *(RValue<Short> lhs,RValue<Short> rhs)2263 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2264 {
2265 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2266 }
2267
operator /(RValue<Short> lhs,RValue<Short> rhs)2268 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2269 {
2270 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2271 }
2272
operator %(RValue<Short> lhs,RValue<Short> rhs)2273 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2274 {
2275 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2276 }
2277
operator &(RValue<Short> lhs,RValue<Short> rhs)2278 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2279 {
2280 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2281 }
2282
operator |(RValue<Short> lhs,RValue<Short> rhs)2283 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2284 {
2285 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2286 }
2287
operator ^(RValue<Short> lhs,RValue<Short> rhs)2288 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2289 {
2290 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2291 }
2292
operator <<(RValue<Short> lhs,RValue<Short> rhs)2293 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2294 {
2295 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2296 }
2297
operator >>(RValue<Short> lhs,RValue<Short> rhs)2298 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2299 {
2300 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2301 }
2302
operator +=(Short & lhs,RValue<Short> rhs)2303 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2304 {
2305 return lhs = lhs + rhs;
2306 }
2307
operator -=(Short & lhs,RValue<Short> rhs)2308 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2309 {
2310 return lhs = lhs - rhs;
2311 }
2312
operator *=(Short & lhs,RValue<Short> rhs)2313 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2314 {
2315 return lhs = lhs * rhs;
2316 }
2317
operator /=(Short & lhs,RValue<Short> rhs)2318 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2319 {
2320 return lhs = lhs / rhs;
2321 }
2322
operator %=(Short & lhs,RValue<Short> rhs)2323 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2324 {
2325 return lhs = lhs % rhs;
2326 }
2327
operator &=(Short & lhs,RValue<Short> rhs)2328 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2329 {
2330 return lhs = lhs & rhs;
2331 }
2332
operator |=(Short & lhs,RValue<Short> rhs)2333 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2334 {
2335 return lhs = lhs | rhs;
2336 }
2337
operator ^=(Short & lhs,RValue<Short> rhs)2338 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2339 {
2340 return lhs = lhs ^ rhs;
2341 }
2342
operator <<=(Short & lhs,RValue<Short> rhs)2343 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2344 {
2345 return lhs = lhs << rhs;
2346 }
2347
operator >>=(Short & lhs,RValue<Short> rhs)2348 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2349 {
2350 return lhs = lhs >> rhs;
2351 }
2352
operator +(RValue<Short> val)2353 RValue<Short> operator+(RValue<Short> val)
2354 {
2355 return val;
2356 }
2357
operator -(RValue<Short> val)2358 RValue<Short> operator-(RValue<Short> val)
2359 {
2360 return RValue<Short>(Nucleus::createNeg(val.value));
2361 }
2362
operator ~(RValue<Short> val)2363 RValue<Short> operator~(RValue<Short> val)
2364 {
2365 return RValue<Short>(Nucleus::createNot(val.value));
2366 }
2367
operator ++(Short & val,int)2368 RValue<Short> operator++(Short &val, int) // Post-increment
2369 {
2370 RValue<Short> res = val;
2371
2372 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
2373 val.storeValue(inc);
2374
2375 return res;
2376 }
2377
operator ++(Short & val)2378 const Short &operator++(Short &val) // Pre-increment
2379 {
2380 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
2381 val.storeValue(inc);
2382
2383 return val;
2384 }
2385
operator --(Short & val,int)2386 RValue<Short> operator--(Short &val, int) // Post-decrement
2387 {
2388 RValue<Short> res = val;
2389
2390 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
2391 val.storeValue(inc);
2392
2393 return res;
2394 }
2395
operator --(Short & val)2396 const Short &operator--(Short &val) // Pre-decrement
2397 {
2398 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
2399 val.storeValue(inc);
2400
2401 return val;
2402 }
2403
operator <(RValue<Short> lhs,RValue<Short> rhs)2404 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2405 {
2406 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2407 }
2408
operator <=(RValue<Short> lhs,RValue<Short> rhs)2409 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2410 {
2411 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2412 }
2413
operator >(RValue<Short> lhs,RValue<Short> rhs)2414 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2415 {
2416 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2417 }
2418
operator >=(RValue<Short> lhs,RValue<Short> rhs)2419 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2420 {
2421 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2422 }
2423
operator !=(RValue<Short> lhs,RValue<Short> rhs)2424 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2425 {
2426 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2427 }
2428
operator ==(RValue<Short> lhs,RValue<Short> rhs)2429 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2430 {
2431 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2432 }
2433
getType()2434 Type *Short::getType()
2435 {
2436 return T(llvm::Type::getInt16Ty(*::context));
2437 }
2438
UShort(Argument<UShort> argument)2439 UShort::UShort(Argument<UShort> argument)
2440 {
2441 storeValue(argument.value);
2442 }
2443
UShort(RValue<UInt> cast)2444 UShort::UShort(RValue<UInt> cast)
2445 {
2446 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2447
2448 storeValue(integer);
2449 }
2450
UShort(RValue<Int> cast)2451 UShort::UShort(RValue<Int> cast)
2452 {
2453 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2454
2455 storeValue(integer);
2456 }
2457
UShort(unsigned short x)2458 UShort::UShort(unsigned short x)
2459 {
2460 storeValue(Nucleus::createConstantShort(x));
2461 }
2462
UShort(RValue<UShort> rhs)2463 UShort::UShort(RValue<UShort> rhs)
2464 {
2465 storeValue(rhs.value);
2466 }
2467
UShort(const UShort & rhs)2468 UShort::UShort(const UShort &rhs)
2469 {
2470 Value *value = rhs.loadValue();
2471 storeValue(value);
2472 }
2473
UShort(const Reference<UShort> & rhs)2474 UShort::UShort(const Reference<UShort> &rhs)
2475 {
2476 Value *value = rhs.loadValue();
2477 storeValue(value);
2478 }
2479
operator =(RValue<UShort> rhs)2480 RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2481 {
2482 storeValue(rhs.value);
2483
2484 return rhs;
2485 }
2486
operator =(const UShort & rhs)2487 RValue<UShort> UShort::operator=(const UShort &rhs)
2488 {
2489 Value *value = rhs.loadValue();
2490 storeValue(value);
2491
2492 return RValue<UShort>(value);
2493 }
2494
operator =(const Reference<UShort> & rhs)2495 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2496 {
2497 Value *value = rhs.loadValue();
2498 storeValue(value);
2499
2500 return RValue<UShort>(value);
2501 }
2502
operator +(RValue<UShort> lhs,RValue<UShort> rhs)2503 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2504 {
2505 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2506 }
2507
operator -(RValue<UShort> lhs,RValue<UShort> rhs)2508 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2509 {
2510 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2511 }
2512
operator *(RValue<UShort> lhs,RValue<UShort> rhs)2513 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2514 {
2515 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2516 }
2517
operator /(RValue<UShort> lhs,RValue<UShort> rhs)2518 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2519 {
2520 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2521 }
2522
operator %(RValue<UShort> lhs,RValue<UShort> rhs)2523 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2524 {
2525 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2526 }
2527
operator &(RValue<UShort> lhs,RValue<UShort> rhs)2528 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2529 {
2530 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2531 }
2532
operator |(RValue<UShort> lhs,RValue<UShort> rhs)2533 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2534 {
2535 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2536 }
2537
operator ^(RValue<UShort> lhs,RValue<UShort> rhs)2538 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2539 {
2540 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2541 }
2542
operator <<(RValue<UShort> lhs,RValue<UShort> rhs)2543 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2544 {
2545 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2546 }
2547
operator >>(RValue<UShort> lhs,RValue<UShort> rhs)2548 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2549 {
2550 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2551 }
2552
operator +=(UShort & lhs,RValue<UShort> rhs)2553 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2554 {
2555 return lhs = lhs + rhs;
2556 }
2557
operator -=(UShort & lhs,RValue<UShort> rhs)2558 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2559 {
2560 return lhs = lhs - rhs;
2561 }
2562
operator *=(UShort & lhs,RValue<UShort> rhs)2563 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2564 {
2565 return lhs = lhs * rhs;
2566 }
2567
operator /=(UShort & lhs,RValue<UShort> rhs)2568 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2569 {
2570 return lhs = lhs / rhs;
2571 }
2572
operator %=(UShort & lhs,RValue<UShort> rhs)2573 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2574 {
2575 return lhs = lhs % rhs;
2576 }
2577
operator &=(UShort & lhs,RValue<UShort> rhs)2578 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2579 {
2580 return lhs = lhs & rhs;
2581 }
2582
operator |=(UShort & lhs,RValue<UShort> rhs)2583 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2584 {
2585 return lhs = lhs | rhs;
2586 }
2587
operator ^=(UShort & lhs,RValue<UShort> rhs)2588 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2589 {
2590 return lhs = lhs ^ rhs;
2591 }
2592
operator <<=(UShort & lhs,RValue<UShort> rhs)2593 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2594 {
2595 return lhs = lhs << rhs;
2596 }
2597
operator >>=(UShort & lhs,RValue<UShort> rhs)2598 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2599 {
2600 return lhs = lhs >> rhs;
2601 }
2602
operator +(RValue<UShort> val)2603 RValue<UShort> operator+(RValue<UShort> val)
2604 {
2605 return val;
2606 }
2607
operator -(RValue<UShort> val)2608 RValue<UShort> operator-(RValue<UShort> val)
2609 {
2610 return RValue<UShort>(Nucleus::createNeg(val.value));
2611 }
2612
operator ~(RValue<UShort> val)2613 RValue<UShort> operator~(RValue<UShort> val)
2614 {
2615 return RValue<UShort>(Nucleus::createNot(val.value));
2616 }
2617
operator ++(UShort & val,int)2618 RValue<UShort> operator++(UShort &val, int) // Post-increment
2619 {
2620 RValue<UShort> res = val;
2621
2622 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
2623 val.storeValue(inc);
2624
2625 return res;
2626 }
2627
operator ++(UShort & val)2628 const UShort &operator++(UShort &val) // Pre-increment
2629 {
2630 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
2631 val.storeValue(inc);
2632
2633 return val;
2634 }
2635
operator --(UShort & val,int)2636 RValue<UShort> operator--(UShort &val, int) // Post-decrement
2637 {
2638 RValue<UShort> res = val;
2639
2640 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
2641 val.storeValue(inc);
2642
2643 return res;
2644 }
2645
operator --(UShort & val)2646 const UShort &operator--(UShort &val) // Pre-decrement
2647 {
2648 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
2649 val.storeValue(inc);
2650
2651 return val;
2652 }
2653
operator <(RValue<UShort> lhs,RValue<UShort> rhs)2654 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2655 {
2656 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2657 }
2658
operator <=(RValue<UShort> lhs,RValue<UShort> rhs)2659 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2660 {
2661 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2662 }
2663
operator >(RValue<UShort> lhs,RValue<UShort> rhs)2664 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2665 {
2666 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2667 }
2668
operator >=(RValue<UShort> lhs,RValue<UShort> rhs)2669 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2670 {
2671 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2672 }
2673
operator !=(RValue<UShort> lhs,RValue<UShort> rhs)2674 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2675 {
2676 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2677 }
2678
operator ==(RValue<UShort> lhs,RValue<UShort> rhs)2679 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2680 {
2681 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2682 }
2683
getType()2684 Type *UShort::getType()
2685 {
2686 return T(llvm::Type::getInt16Ty(*::context));
2687 }
2688
Byte4(RValue<Byte8> cast)2689 Byte4::Byte4(RValue<Byte8> cast)
2690 {
2691 storeValue(Nucleus::createBitCast(cast.value, getType()));
2692 }
2693
Byte4(const Reference<Byte4> & rhs)2694 Byte4::Byte4(const Reference<Byte4> &rhs)
2695 {
2696 Value *value = rhs.loadValue();
2697 storeValue(value);
2698 }
2699
getType()2700 Type *Byte4::getType()
2701 {
2702 return T(Type_v4i8);
2703 }
2704
getType()2705 Type *SByte4::getType()
2706 {
2707 return T(Type_v4i8);
2708 }
2709
Byte8(uint8_t x0,uint8_t x1,uint8_t x2,uint8_t x3,uint8_t x4,uint8_t x5,uint8_t x6,uint8_t x7)2710 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2711 {
2712 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2713 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2714 }
2715
Byte8(RValue<Byte8> rhs)2716 Byte8::Byte8(RValue<Byte8> rhs)
2717 {
2718 storeValue(rhs.value);
2719 }
2720
Byte8(const Byte8 & rhs)2721 Byte8::Byte8(const Byte8 &rhs)
2722 {
2723 Value *value = rhs.loadValue();
2724 storeValue(value);
2725 }
2726
Byte8(const Reference<Byte8> & rhs)2727 Byte8::Byte8(const Reference<Byte8> &rhs)
2728 {
2729 Value *value = rhs.loadValue();
2730 storeValue(value);
2731 }
2732
operator =(RValue<Byte8> rhs)2733 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2734 {
2735 storeValue(rhs.value);
2736
2737 return rhs;
2738 }
2739
operator =(const Byte8 & rhs)2740 RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2741 {
2742 Value *value = rhs.loadValue();
2743 storeValue(value);
2744
2745 return RValue<Byte8>(value);
2746 }
2747
operator =(const Reference<Byte8> & rhs)2748 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2749 {
2750 Value *value = rhs.loadValue();
2751 storeValue(value);
2752
2753 return RValue<Byte8>(value);
2754 }
2755
operator +(RValue<Byte8> lhs,RValue<Byte8> rhs)2756 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2757 {
2758 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2759 }
2760
operator -(RValue<Byte8> lhs,RValue<Byte8> rhs)2761 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2762 {
2763 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2764 }
2765
2766 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2767 // {
2768 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2769 // }
2770
2771 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2772 // {
2773 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2774 // }
2775
2776 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2777 // {
2778 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2779 // }
2780
operator &(RValue<Byte8> lhs,RValue<Byte8> rhs)2781 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2782 {
2783 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2784 }
2785
operator |(RValue<Byte8> lhs,RValue<Byte8> rhs)2786 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2787 {
2788 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2789 }
2790
operator ^(RValue<Byte8> lhs,RValue<Byte8> rhs)2791 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2792 {
2793 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2794 }
2795
2796 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2797 // {
2798 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2799 // }
2800
2801 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2802 // {
2803 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2804 // }
2805
operator +=(Byte8 & lhs,RValue<Byte8> rhs)2806 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2807 {
2808 return lhs = lhs + rhs;
2809 }
2810
operator -=(Byte8 & lhs,RValue<Byte8> rhs)2811 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2812 {
2813 return lhs = lhs - rhs;
2814 }
2815
2816 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2817 // {
2818 // return lhs = lhs * rhs;
2819 // }
2820
2821 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2822 // {
2823 // return lhs = lhs / rhs;
2824 // }
2825
2826 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2827 // {
2828 // return lhs = lhs % rhs;
2829 // }
2830
operator &=(Byte8 & lhs,RValue<Byte8> rhs)2831 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2832 {
2833 return lhs = lhs & rhs;
2834 }
2835
operator |=(Byte8 & lhs,RValue<Byte8> rhs)2836 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2837 {
2838 return lhs = lhs | rhs;
2839 }
2840
operator ^=(Byte8 & lhs,RValue<Byte8> rhs)2841 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2842 {
2843 return lhs = lhs ^ rhs;
2844 }
2845
2846 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2847 // {
2848 // return lhs = lhs << rhs;
2849 // }
2850
2851 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2852 // {
2853 // return lhs = lhs >> rhs;
2854 // }
2855
2856 // RValue<Byte8> operator+(RValue<Byte8> val)
2857 // {
2858 // return val;
2859 // }
2860
2861 // RValue<Byte8> operator-(RValue<Byte8> val)
2862 // {
2863 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2864 // }
2865
operator ~(RValue<Byte8> val)2866 RValue<Byte8> operator~(RValue<Byte8> val)
2867 {
2868 return RValue<Byte8>(Nucleus::createNot(val.value));
2869 }
2870
AddSat(RValue<Byte8> x,RValue<Byte8> y)2871 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2872 {
2873 #if defined(__i386__) || defined(__x86_64__)
2874 return x86::paddusb(x, y);
2875 #else
2876 return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2877 #endif
2878 }
2879
SubSat(RValue<Byte8> x,RValue<Byte8> y)2880 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2881 {
2882 #if defined(__i386__) || defined(__x86_64__)
2883 return x86::psubusb(x, y);
2884 #else
2885 return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2886 #endif
2887 }
2888
Unpack(RValue<Byte4> x)2889 RValue<Short4> Unpack(RValue<Byte4> x)
2890 {
2891 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; // Real type is v16i8
2892 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2893 }
2894
Unpack(RValue<Byte4> x,RValue<Byte4> y)2895 RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2896 {
2897 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2898 }
2899
UnpackLow(RValue<Byte8> x,RValue<Byte8> y)2900 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2901 {
2902 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
2903 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2904 }
2905
UnpackHigh(RValue<Byte8> x,RValue<Byte8> y)2906 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2907 {
2908 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
2909 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2910 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2911 }
2912
SignMask(RValue<Byte8> x)2913 RValue<Int> SignMask(RValue<Byte8> x)
2914 {
2915 #if defined(__i386__) || defined(__x86_64__)
2916 return x86::pmovmskb(x);
2917 #else
2918 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2919 #endif
2920 }
2921
2922 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2923 // {
2924 //#if defined(__i386__) || defined(__x86_64__)
2925 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2926 //#else
2927 // return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2928 //#endif
2929 // }
2930
CmpEQ(RValue<Byte8> x,RValue<Byte8> y)2931 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2932 {
2933 #if defined(__i386__) || defined(__x86_64__)
2934 return x86::pcmpeqb(x, y);
2935 #else
2936 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2937 #endif
2938 }
2939
getType()2940 Type *Byte8::getType()
2941 {
2942 return T(Type_v8i8);
2943 }
2944
SByte8(uint8_t x0,uint8_t x1,uint8_t x2,uint8_t x3,uint8_t x4,uint8_t x5,uint8_t x6,uint8_t x7)2945 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2946 {
2947 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2948 Value *vector = Nucleus::createConstantVector(constantVector, getType());
2949
2950 storeValue(Nucleus::createBitCast(vector, getType()));
2951 }
2952
SByte8(RValue<SByte8> rhs)2953 SByte8::SByte8(RValue<SByte8> rhs)
2954 {
2955 storeValue(rhs.value);
2956 }
2957
SByte8(const SByte8 & rhs)2958 SByte8::SByte8(const SByte8 &rhs)
2959 {
2960 Value *value = rhs.loadValue();
2961 storeValue(value);
2962 }
2963
SByte8(const Reference<SByte8> & rhs)2964 SByte8::SByte8(const Reference<SByte8> &rhs)
2965 {
2966 Value *value = rhs.loadValue();
2967 storeValue(value);
2968 }
2969
operator =(RValue<SByte8> rhs)2970 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2971 {
2972 storeValue(rhs.value);
2973
2974 return rhs;
2975 }
2976
operator =(const SByte8 & rhs)2977 RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2978 {
2979 Value *value = rhs.loadValue();
2980 storeValue(value);
2981
2982 return RValue<SByte8>(value);
2983 }
2984
operator =(const Reference<SByte8> & rhs)2985 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2986 {
2987 Value *value = rhs.loadValue();
2988 storeValue(value);
2989
2990 return RValue<SByte8>(value);
2991 }
2992
operator +(RValue<SByte8> lhs,RValue<SByte8> rhs)2993 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2994 {
2995 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2996 }
2997
operator -(RValue<SByte8> lhs,RValue<SByte8> rhs)2998 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2999 {
3000 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
3001 }
3002
3003 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
3004 // {
3005 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
3006 // }
3007
3008 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
3009 // {
3010 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
3011 // }
3012
3013 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
3014 // {
3015 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
3016 // }
3017
operator &(RValue<SByte8> lhs,RValue<SByte8> rhs)3018 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
3019 {
3020 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
3021 }
3022
operator |(RValue<SByte8> lhs,RValue<SByte8> rhs)3023 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
3024 {
3025 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
3026 }
3027
operator ^(RValue<SByte8> lhs,RValue<SByte8> rhs)3028 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
3029 {
3030 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
3031 }
3032
3033 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
3034 // {
3035 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
3036 // }
3037
3038 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
3039 // {
3040 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
3041 // }
3042
operator +=(SByte8 & lhs,RValue<SByte8> rhs)3043 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
3044 {
3045 return lhs = lhs + rhs;
3046 }
3047
operator -=(SByte8 & lhs,RValue<SByte8> rhs)3048 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
3049 {
3050 return lhs = lhs - rhs;
3051 }
3052
3053 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
3054 // {
3055 // return lhs = lhs * rhs;
3056 // }
3057
3058 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
3059 // {
3060 // return lhs = lhs / rhs;
3061 // }
3062
3063 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
3064 // {
3065 // return lhs = lhs % rhs;
3066 // }
3067
operator &=(SByte8 & lhs,RValue<SByte8> rhs)3068 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
3069 {
3070 return lhs = lhs & rhs;
3071 }
3072
operator |=(SByte8 & lhs,RValue<SByte8> rhs)3073 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
3074 {
3075 return lhs = lhs | rhs;
3076 }
3077
operator ^=(SByte8 & lhs,RValue<SByte8> rhs)3078 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
3079 {
3080 return lhs = lhs ^ rhs;
3081 }
3082
3083 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
3084 // {
3085 // return lhs = lhs << rhs;
3086 // }
3087
3088 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
3089 // {
3090 // return lhs = lhs >> rhs;
3091 // }
3092
3093 // RValue<SByte8> operator+(RValue<SByte8> val)
3094 // {
3095 // return val;
3096 // }
3097
3098 // RValue<SByte8> operator-(RValue<SByte8> val)
3099 // {
3100 // return RValue<SByte8>(Nucleus::createNeg(val.value));
3101 // }
3102
operator ~(RValue<SByte8> val)3103 RValue<SByte8> operator~(RValue<SByte8> val)
3104 {
3105 return RValue<SByte8>(Nucleus::createNot(val.value));
3106 }
3107
AddSat(RValue<SByte8> x,RValue<SByte8> y)3108 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
3109 {
3110 #if defined(__i386__) || defined(__x86_64__)
3111 return x86::paddsb(x, y);
3112 #else
3113 return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
3114 #endif
3115 }
3116
SubSat(RValue<SByte8> x,RValue<SByte8> y)3117 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
3118 {
3119 #if defined(__i386__) || defined(__x86_64__)
3120 return x86::psubsb(x, y);
3121 #else
3122 return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
3123 #endif
3124 }
3125
UnpackLow(RValue<SByte8> x,RValue<SByte8> y)3126 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
3127 {
3128 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
3129 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3130 }
3131
UnpackHigh(RValue<SByte8> x,RValue<SByte8> y)3132 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
3133 {
3134 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; // Real type is v16i8
3135 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3136 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
3137 }
3138
SignMask(RValue<SByte8> x)3139 RValue<Int> SignMask(RValue<SByte8> x)
3140 {
3141 #if defined(__i386__) || defined(__x86_64__)
3142 return x86::pmovmskb(As<Byte8>(x));
3143 #else
3144 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
3145 #endif
3146 }
3147
CmpGT(RValue<SByte8> x,RValue<SByte8> y)3148 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
3149 {
3150 #if defined(__i386__) || defined(__x86_64__)
3151 return x86::pcmpgtb(x, y);
3152 #else
3153 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
3154 #endif
3155 }
3156
CmpEQ(RValue<SByte8> x,RValue<SByte8> y)3157 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
3158 {
3159 #if defined(__i386__) || defined(__x86_64__)
3160 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
3161 #else
3162 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
3163 #endif
3164 }
3165
getType()3166 Type *SByte8::getType()
3167 {
3168 return T(Type_v8i8);
3169 }
3170
Byte16(RValue<Byte16> rhs)3171 Byte16::Byte16(RValue<Byte16> rhs)
3172 {
3173 storeValue(rhs.value);
3174 }
3175
Byte16(const Byte16 & rhs)3176 Byte16::Byte16(const Byte16 &rhs)
3177 {
3178 Value *value = rhs.loadValue();
3179 storeValue(value);
3180 }
3181
Byte16(const Reference<Byte16> & rhs)3182 Byte16::Byte16(const Reference<Byte16> &rhs)
3183 {
3184 Value *value = rhs.loadValue();
3185 storeValue(value);
3186 }
3187
operator =(RValue<Byte16> rhs)3188 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
3189 {
3190 storeValue(rhs.value);
3191
3192 return rhs;
3193 }
3194
operator =(const Byte16 & rhs)3195 RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
3196 {
3197 Value *value = rhs.loadValue();
3198 storeValue(value);
3199
3200 return RValue<Byte16>(value);
3201 }
3202
operator =(const Reference<Byte16> & rhs)3203 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
3204 {
3205 Value *value = rhs.loadValue();
3206 storeValue(value);
3207
3208 return RValue<Byte16>(value);
3209 }
3210
getType()3211 Type *Byte16::getType()
3212 {
3213 return T(llvm::VectorType::get(T(Byte::getType()), 16));
3214 }
3215
getType()3216 Type *SByte16::getType()
3217 {
3218 return T(llvm::VectorType::get(T(SByte::getType()), 16));
3219 }
3220
Short2(RValue<Short4> cast)3221 Short2::Short2(RValue<Short4> cast)
3222 {
3223 storeValue(Nucleus::createBitCast(cast.value, getType()));
3224 }
3225
getType()3226 Type *Short2::getType()
3227 {
3228 return T(Type_v2i16);
3229 }
3230
UShort2(RValue<UShort4> cast)3231 UShort2::UShort2(RValue<UShort4> cast)
3232 {
3233 storeValue(Nucleus::createBitCast(cast.value, getType()));
3234 }
3235
getType()3236 Type *UShort2::getType()
3237 {
3238 return T(Type_v2i16);
3239 }
3240
Short4(RValue<Int> cast)3241 Short4::Short4(RValue<Int> cast)
3242 {
3243 Value *vector = loadValue();
3244 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3245 Value *insert = Nucleus::createInsertElement(vector, element, 0);
3246 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3247
3248 storeValue(swizzle);
3249 }
3250
Short4(RValue<Int4> cast)3251 Short4::Short4(RValue<Int4> cast)
3252 {
3253 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3254 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3255
3256 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3257 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
3258
3259 storeValue(short4);
3260 }
3261
3262 // Short4::Short4(RValue<Float> cast)
3263 // {
3264 // }
3265
Short4(RValue<Float4> cast)3266 Short4::Short4(RValue<Float4> cast)
3267 {
3268 Int4 v4i32 = Int4(cast);
3269 #if defined(__i386__) || defined(__x86_64__)
3270 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
3271 #else
3272 Value *v = v4i32.loadValue();
3273 v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
3274 #endif
3275
3276 storeValue(As<Short4>(Int2(v4i32)).value);
3277 }
3278
Short4(short xyzw)3279 Short4::Short4(short xyzw)
3280 {
3281 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3282 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3283 }
3284
Short4(short x,short y,short z,short w)3285 Short4::Short4(short x, short y, short z, short w)
3286 {
3287 int64_t constantVector[4] = {x, y, z, w};
3288 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3289 }
3290
Short4(RValue<Short4> rhs)3291 Short4::Short4(RValue<Short4> rhs)
3292 {
3293 storeValue(rhs.value);
3294 }
3295
Short4(const Short4 & rhs)3296 Short4::Short4(const Short4 &rhs)
3297 {
3298 Value *value = rhs.loadValue();
3299 storeValue(value);
3300 }
3301
Short4(const Reference<Short4> & rhs)3302 Short4::Short4(const Reference<Short4> &rhs)
3303 {
3304 Value *value = rhs.loadValue();
3305 storeValue(value);
3306 }
3307
Short4(RValue<UShort4> rhs)3308 Short4::Short4(RValue<UShort4> rhs)
3309 {
3310 storeValue(rhs.value);
3311 }
3312
Short4(const UShort4 & rhs)3313 Short4::Short4(const UShort4 &rhs)
3314 {
3315 storeValue(rhs.loadValue());
3316 }
3317
Short4(const Reference<UShort4> & rhs)3318 Short4::Short4(const Reference<UShort4> &rhs)
3319 {
3320 storeValue(rhs.loadValue());
3321 }
3322
operator =(RValue<Short4> rhs)3323 RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3324 {
3325 storeValue(rhs.value);
3326
3327 return rhs;
3328 }
3329
operator =(const Short4 & rhs)3330 RValue<Short4> Short4::operator=(const Short4 &rhs)
3331 {
3332 Value *value = rhs.loadValue();
3333 storeValue(value);
3334
3335 return RValue<Short4>(value);
3336 }
3337
operator =(const Reference<Short4> & rhs)3338 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3339 {
3340 Value *value = rhs.loadValue();
3341 storeValue(value);
3342
3343 return RValue<Short4>(value);
3344 }
3345
operator =(RValue<UShort4> rhs)3346 RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3347 {
3348 storeValue(rhs.value);
3349
3350 return RValue<Short4>(rhs);
3351 }
3352
operator =(const UShort4 & rhs)3353 RValue<Short4> Short4::operator=(const UShort4 &rhs)
3354 {
3355 Value *value = rhs.loadValue();
3356 storeValue(value);
3357
3358 return RValue<Short4>(value);
3359 }
3360
operator =(const Reference<UShort4> & rhs)3361 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3362 {
3363 Value *value = rhs.loadValue();
3364 storeValue(value);
3365
3366 return RValue<Short4>(value);
3367 }
3368
operator +(RValue<Short4> lhs,RValue<Short4> rhs)3369 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3370 {
3371 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3372 }
3373
operator -(RValue<Short4> lhs,RValue<Short4> rhs)3374 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3375 {
3376 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3377 }
3378
operator *(RValue<Short4> lhs,RValue<Short4> rhs)3379 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3380 {
3381 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3382 }
3383
3384 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3385 // {
3386 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3387 // }
3388
3389 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3390 // {
3391 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3392 // }
3393
operator &(RValue<Short4> lhs,RValue<Short4> rhs)3394 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3395 {
3396 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3397 }
3398
operator |(RValue<Short4> lhs,RValue<Short4> rhs)3399 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3400 {
3401 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3402 }
3403
operator ^(RValue<Short4> lhs,RValue<Short4> rhs)3404 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3405 {
3406 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3407 }
3408
operator <<(RValue<Short4> lhs,unsigned char rhs)3409 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3410 {
3411 #if defined(__i386__) || defined(__x86_64__)
3412 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3413
3414 return x86::psllw(lhs, rhs);
3415 #else
3416 return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
3417 #endif
3418 }
3419
operator >>(RValue<Short4> lhs,unsigned char rhs)3420 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3421 {
3422 #if defined(__i386__) || defined(__x86_64__)
3423 return x86::psraw(lhs, rhs);
3424 #else
3425 return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
3426 #endif
3427 }
3428
operator +=(Short4 & lhs,RValue<Short4> rhs)3429 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3430 {
3431 return lhs = lhs + rhs;
3432 }
3433
operator -=(Short4 & lhs,RValue<Short4> rhs)3434 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3435 {
3436 return lhs = lhs - rhs;
3437 }
3438
operator *=(Short4 & lhs,RValue<Short4> rhs)3439 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3440 {
3441 return lhs = lhs * rhs;
3442 }
3443
3444 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3445 // {
3446 // return lhs = lhs / rhs;
3447 // }
3448
3449 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3450 // {
3451 // return lhs = lhs % rhs;
3452 // }
3453
operator &=(Short4 & lhs,RValue<Short4> rhs)3454 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3455 {
3456 return lhs = lhs & rhs;
3457 }
3458
operator |=(Short4 & lhs,RValue<Short4> rhs)3459 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3460 {
3461 return lhs = lhs | rhs;
3462 }
3463
operator ^=(Short4 & lhs,RValue<Short4> rhs)3464 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3465 {
3466 return lhs = lhs ^ rhs;
3467 }
3468
operator <<=(Short4 & lhs,unsigned char rhs)3469 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3470 {
3471 return lhs = lhs << rhs;
3472 }
3473
operator >>=(Short4 & lhs,unsigned char rhs)3474 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3475 {
3476 return lhs = lhs >> rhs;
3477 }
3478
3479 // RValue<Short4> operator+(RValue<Short4> val)
3480 // {
3481 // return val;
3482 // }
3483
operator -(RValue<Short4> val)3484 RValue<Short4> operator-(RValue<Short4> val)
3485 {
3486 return RValue<Short4>(Nucleus::createNeg(val.value));
3487 }
3488
operator ~(RValue<Short4> val)3489 RValue<Short4> operator~(RValue<Short4> val)
3490 {
3491 return RValue<Short4>(Nucleus::createNot(val.value));
3492 }
3493
RoundShort4(RValue<Float4> cast)3494 RValue<Short4> RoundShort4(RValue<Float4> cast)
3495 {
3496 RValue<Int4> int4 = RoundInt(cast);
3497 return As<Short4>(PackSigned(int4, int4));
3498 }
3499
Max(RValue<Short4> x,RValue<Short4> y)3500 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3501 {
3502 #if defined(__i386__) || defined(__x86_64__)
3503 return x86::pmaxsw(x, y);
3504 #else
3505 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3506 #endif
3507 }
3508
Min(RValue<Short4> x,RValue<Short4> y)3509 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3510 {
3511 #if defined(__i386__) || defined(__x86_64__)
3512 return x86::pminsw(x, y);
3513 #else
3514 return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3515 #endif
3516 }
3517
AddSat(RValue<Short4> x,RValue<Short4> y)3518 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3519 {
3520 #if defined(__i386__) || defined(__x86_64__)
3521 return x86::paddsw(x, y);
3522 #else
3523 return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
3524 #endif
3525 }
3526
SubSat(RValue<Short4> x,RValue<Short4> y)3527 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3528 {
3529 #if defined(__i386__) || defined(__x86_64__)
3530 return x86::psubsw(x, y);
3531 #else
3532 return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
3533 #endif
3534 }
3535
MulHigh(RValue<Short4> x,RValue<Short4> y)3536 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3537 {
3538 #if defined(__i386__) || defined(__x86_64__)
3539 return x86::pmulhw(x, y);
3540 #else
3541 return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3542 #endif
3543 }
3544
MulAdd(RValue<Short4> x,RValue<Short4> y)3545 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3546 {
3547 #if defined(__i386__) || defined(__x86_64__)
3548 return x86::pmaddwd(x, y);
3549 #else
3550 return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
3551 #endif
3552 }
3553
PackSigned(RValue<Short4> x,RValue<Short4> y)3554 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
3555 {
3556 #if defined(__i386__) || defined(__x86_64__)
3557 auto result = x86::packsswb(x, y);
3558 #else
3559 auto result = V(lowerPack(V(x.value), V(y.value), true));
3560 #endif
3561 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
3562 }
3563
PackUnsigned(RValue<Short4> x,RValue<Short4> y)3564 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
3565 {
3566 #if defined(__i386__) || defined(__x86_64__)
3567 auto result = x86::packuswb(x, y);
3568 #else
3569 auto result = V(lowerPack(V(x.value), V(y.value), false));
3570 #endif
3571 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
3572 }
3573
UnpackLow(RValue<Short4> x,RValue<Short4> y)3574 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3575 {
3576 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
3577 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3578 }
3579
UnpackHigh(RValue<Short4> x,RValue<Short4> y)3580 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3581 {
3582 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; // Real type is v8i16
3583 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3584 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3585 }
3586
Swizzle(RValue<Short4> x,unsigned char select)3587 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3588 {
3589 // Real type is v8i16
3590 int shuffle[8] =
3591 {
3592 (select >> 0) & 0x03,
3593 (select >> 2) & 0x03,
3594 (select >> 4) & 0x03,
3595 (select >> 6) & 0x03,
3596 (select >> 0) & 0x03,
3597 (select >> 2) & 0x03,
3598 (select >> 4) & 0x03,
3599 (select >> 6) & 0x03,
3600 };
3601
3602 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3603 }
3604
Insert(RValue<Short4> val,RValue<Short> element,int i)3605 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3606 {
3607 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3608 }
3609
Extract(RValue<Short4> val,int i)3610 RValue<Short> Extract(RValue<Short4> val, int i)
3611 {
3612 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3613 }
3614
CmpGT(RValue<Short4> x,RValue<Short4> y)3615 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3616 {
3617 #if defined(__i386__) || defined(__x86_64__)
3618 return x86::pcmpgtw(x, y);
3619 #else
3620 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
3621 #endif
3622 }
3623
CmpEQ(RValue<Short4> x,RValue<Short4> y)3624 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3625 {
3626 #if defined(__i386__) || defined(__x86_64__)
3627 return x86::pcmpeqw(x, y);
3628 #else
3629 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
3630 #endif
3631 }
3632
getType()3633 Type *Short4::getType()
3634 {
3635 return T(Type_v4i16);
3636 }
3637
UShort4(RValue<Int4> cast)3638 UShort4::UShort4(RValue<Int4> cast)
3639 {
3640 *this = Short4(cast);
3641 }
3642
UShort4(RValue<Float4> cast,bool saturate)3643 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3644 {
3645 if(saturate)
3646 {
3647 #if defined(__i386__) || defined(__x86_64__)
3648 if(CPUID::supportsSSE4_1())
3649 {
3650 Int4 int4(Min(cast, Float4(0xFFFF))); // packusdw takes care of 0x0000 saturation
3651 *this = As<Short4>(PackUnsigned(int4, int4));
3652 }
3653 else
3654 #endif
3655 {
3656 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3657 }
3658 }
3659 else
3660 {
3661 *this = Short4(Int4(cast));
3662 }
3663 }
3664
UShort4(unsigned short xyzw)3665 UShort4::UShort4(unsigned short xyzw)
3666 {
3667 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3668 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3669 }
3670
UShort4(unsigned short x,unsigned short y,unsigned short z,unsigned short w)3671 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3672 {
3673 int64_t constantVector[4] = {x, y, z, w};
3674 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3675 }
3676
UShort4(RValue<UShort4> rhs)3677 UShort4::UShort4(RValue<UShort4> rhs)
3678 {
3679 storeValue(rhs.value);
3680 }
3681
UShort4(const UShort4 & rhs)3682 UShort4::UShort4(const UShort4 &rhs)
3683 {
3684 Value *value = rhs.loadValue();
3685 storeValue(value);
3686 }
3687
UShort4(const Reference<UShort4> & rhs)3688 UShort4::UShort4(const Reference<UShort4> &rhs)
3689 {
3690 Value *value = rhs.loadValue();
3691 storeValue(value);
3692 }
3693
UShort4(RValue<Short4> rhs)3694 UShort4::UShort4(RValue<Short4> rhs)
3695 {
3696 storeValue(rhs.value);
3697 }
3698
UShort4(const Short4 & rhs)3699 UShort4::UShort4(const Short4 &rhs)
3700 {
3701 Value *value = rhs.loadValue();
3702 storeValue(value);
3703 }
3704
UShort4(const Reference<Short4> & rhs)3705 UShort4::UShort4(const Reference<Short4> &rhs)
3706 {
3707 Value *value = rhs.loadValue();
3708 storeValue(value);
3709 }
3710
operator =(RValue<UShort4> rhs)3711 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3712 {
3713 storeValue(rhs.value);
3714
3715 return rhs;
3716 }
3717
operator =(const UShort4 & rhs)3718 RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3719 {
3720 Value *value = rhs.loadValue();
3721 storeValue(value);
3722
3723 return RValue<UShort4>(value);
3724 }
3725
operator =(const Reference<UShort4> & rhs)3726 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3727 {
3728 Value *value = rhs.loadValue();
3729 storeValue(value);
3730
3731 return RValue<UShort4>(value);
3732 }
3733
operator =(RValue<Short4> rhs)3734 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3735 {
3736 storeValue(rhs.value);
3737
3738 return RValue<UShort4>(rhs);
3739 }
3740
operator =(const Short4 & rhs)3741 RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3742 {
3743 Value *value = rhs.loadValue();
3744 storeValue(value);
3745
3746 return RValue<UShort4>(value);
3747 }
3748
operator =(const Reference<Short4> & rhs)3749 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3750 {
3751 Value *value = rhs.loadValue();
3752 storeValue(value);
3753
3754 return RValue<UShort4>(value);
3755 }
3756
operator +(RValue<UShort4> lhs,RValue<UShort4> rhs)3757 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3758 {
3759 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3760 }
3761
operator -(RValue<UShort4> lhs,RValue<UShort4> rhs)3762 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3763 {
3764 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3765 }
3766
operator *(RValue<UShort4> lhs,RValue<UShort4> rhs)3767 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3768 {
3769 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3770 }
3771
operator &(RValue<UShort4> lhs,RValue<UShort4> rhs)3772 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3773 {
3774 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3775 }
3776
operator |(RValue<UShort4> lhs,RValue<UShort4> rhs)3777 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3778 {
3779 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3780 }
3781
operator ^(RValue<UShort4> lhs,RValue<UShort4> rhs)3782 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3783 {
3784 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3785 }
3786
operator <<(RValue<UShort4> lhs,unsigned char rhs)3787 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3788 {
3789 #if defined(__i386__) || defined(__x86_64__)
3790 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3791
3792 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3793 #else
3794 return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
3795 #endif
3796 }
3797
operator >>(RValue<UShort4> lhs,unsigned char rhs)3798 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3799 {
3800 #if defined(__i386__) || defined(__x86_64__)
3801 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3802
3803 return x86::psrlw(lhs, rhs);
3804 #else
3805 return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3806 #endif
3807 }
3808
operator <<=(UShort4 & lhs,unsigned char rhs)3809 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3810 {
3811 return lhs = lhs << rhs;
3812 }
3813
operator >>=(UShort4 & lhs,unsigned char rhs)3814 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3815 {
3816 return lhs = lhs >> rhs;
3817 }
3818
operator ~(RValue<UShort4> val)3819 RValue<UShort4> operator~(RValue<UShort4> val)
3820 {
3821 return RValue<UShort4>(Nucleus::createNot(val.value));
3822 }
3823
Max(RValue<UShort4> x,RValue<UShort4> y)3824 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3825 {
3826 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3827 }
3828
Min(RValue<UShort4> x,RValue<UShort4> y)3829 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3830 {
3831 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3832 }
3833
AddSat(RValue<UShort4> x,RValue<UShort4> y)3834 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3835 {
3836 #if defined(__i386__) || defined(__x86_64__)
3837 return x86::paddusw(x, y);
3838 #else
3839 return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
3840 #endif
3841 }
3842
SubSat(RValue<UShort4> x,RValue<UShort4> y)3843 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3844 {
3845 #if defined(__i386__) || defined(__x86_64__)
3846 return x86::psubusw(x, y);
3847 #else
3848 return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
3849 #endif
3850 }
3851
MulHigh(RValue<UShort4> x,RValue<UShort4> y)3852 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3853 {
3854 #if defined(__i386__) || defined(__x86_64__)
3855 return x86::pmulhuw(x, y);
3856 #else
3857 return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
3858 #endif
3859 }
3860
Average(RValue<UShort4> x,RValue<UShort4> y)3861 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3862 {
3863 #if defined(__i386__) || defined(__x86_64__)
3864 return x86::pavgw(x, y);
3865 #else
3866 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
3867 #endif
3868 }
3869
getType()3870 Type *UShort4::getType()
3871 {
3872 return T(Type_v4i16);
3873 }
3874
Short8(short c)3875 Short8::Short8(short c)
3876 {
3877 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3878 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3879 }
3880
Short8(short c0,short c1,short c2,short c3,short c4,short c5,short c6,short c7)3881 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3882 {
3883 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3884 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3885 }
3886
Short8(RValue<Short8> rhs)3887 Short8::Short8(RValue<Short8> rhs)
3888 {
3889 storeValue(rhs.value);
3890 }
3891
Short8(const Reference<Short8> & rhs)3892 Short8::Short8(const Reference<Short8> &rhs)
3893 {
3894 Value *value = rhs.loadValue();
3895 storeValue(value);
3896 }
3897
Short8(RValue<Short4> lo,RValue<Short4> hi)3898 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3899 {
3900 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
3901 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3902
3903 storeValue(packed);
3904 }
3905
operator +(RValue<Short8> lhs,RValue<Short8> rhs)3906 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3907 {
3908 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3909 }
3910
operator &(RValue<Short8> lhs,RValue<Short8> rhs)3911 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3912 {
3913 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3914 }
3915
operator <<(RValue<Short8> lhs,unsigned char rhs)3916 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3917 {
3918 #if defined(__i386__) || defined(__x86_64__)
3919 return x86::psllw(lhs, rhs);
3920 #else
3921 return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
3922 #endif
3923 }
3924
operator >>(RValue<Short8> lhs,unsigned char rhs)3925 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3926 {
3927 #if defined(__i386__) || defined(__x86_64__)
3928 return x86::psraw(lhs, rhs);
3929 #else
3930 return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
3931 #endif
3932 }
3933
MulAdd(RValue<Short8> x,RValue<Short8> y)3934 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3935 {
3936 #if defined(__i386__) || defined(__x86_64__)
3937 return x86::pmaddwd(x, y);
3938 #else
3939 return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
3940 #endif
3941 }
3942
Abs(RValue<Int4> x)3943 RValue<Int4> Abs(RValue<Int4> x)
3944 {
3945 auto negative = x >> 31;
3946 return (x ^ negative) - negative;
3947 }
3948
MulHigh(RValue<Short8> x,RValue<Short8> y)3949 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3950 {
3951 #if defined(__i386__) || defined(__x86_64__)
3952 return x86::pmulhw(x, y);
3953 #else
3954 return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3955 #endif
3956 }
3957
getType()3958 Type *Short8::getType()
3959 {
3960 return T(llvm::VectorType::get(T(Short::getType()), 8));
3961 }
3962
UShort8(unsigned short c)3963 UShort8::UShort8(unsigned short c)
3964 {
3965 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3966 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3967 }
3968
UShort8(unsigned short c0,unsigned short c1,unsigned short c2,unsigned short c3,unsigned short c4,unsigned short c5,unsigned short c6,unsigned short c7)3969 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3970 {
3971 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3972 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3973 }
3974
UShort8(RValue<UShort8> rhs)3975 UShort8::UShort8(RValue<UShort8> rhs)
3976 {
3977 storeValue(rhs.value);
3978 }
3979
UShort8(const Reference<UShort8> & rhs)3980 UShort8::UShort8(const Reference<UShort8> &rhs)
3981 {
3982 Value *value = rhs.loadValue();
3983 storeValue(value);
3984 }
3985
UShort8(RValue<UShort4> lo,RValue<UShort4> hi)3986 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3987 {
3988 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11}; // Real type is v8i16
3989 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3990
3991 storeValue(packed);
3992 }
3993
operator =(RValue<UShort8> rhs)3994 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3995 {
3996 storeValue(rhs.value);
3997
3998 return rhs;
3999 }
4000
operator =(const UShort8 & rhs)4001 RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
4002 {
4003 Value *value = rhs.loadValue();
4004 storeValue(value);
4005
4006 return RValue<UShort8>(value);
4007 }
4008
operator =(const Reference<UShort8> & rhs)4009 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
4010 {
4011 Value *value = rhs.loadValue();
4012 storeValue(value);
4013
4014 return RValue<UShort8>(value);
4015 }
4016
operator &(RValue<UShort8> lhs,RValue<UShort8> rhs)4017 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
4018 {
4019 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
4020 }
4021
operator <<(RValue<UShort8> lhs,unsigned char rhs)4022 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
4023 {
4024 #if defined(__i386__) || defined(__x86_64__)
4025 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
4026 #else
4027 return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
4028 #endif
4029 }
4030
operator >>(RValue<UShort8> lhs,unsigned char rhs)4031 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
4032 {
4033 #if defined(__i386__) || defined(__x86_64__)
4034 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
4035 #else
4036 return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
4037 #endif
4038 }
4039
operator +(RValue<UShort8> lhs,RValue<UShort8> rhs)4040 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
4041 {
4042 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
4043 }
4044
operator *(RValue<UShort8> lhs,RValue<UShort8> rhs)4045 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
4046 {
4047 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
4048 }
4049
operator +=(UShort8 & lhs,RValue<UShort8> rhs)4050 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
4051 {
4052 return lhs = lhs + rhs;
4053 }
4054
operator ~(RValue<UShort8> val)4055 RValue<UShort8> operator~(RValue<UShort8> val)
4056 {
4057 return RValue<UShort8>(Nucleus::createNot(val.value));
4058 }
4059
Swizzle(RValue<UShort8> x,char select0,char select1,char select2,char select3,char select4,char select5,char select6,char select7)4060 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
4061 {
4062 int pshufb[16] =
4063 {
4064 select0 + 0,
4065 select0 + 1,
4066 select1 + 0,
4067 select1 + 1,
4068 select2 + 0,
4069 select2 + 1,
4070 select3 + 0,
4071 select3 + 1,
4072 select4 + 0,
4073 select4 + 1,
4074 select5 + 0,
4075 select5 + 1,
4076 select6 + 0,
4077 select6 + 1,
4078 select7 + 0,
4079 select7 + 1,
4080 };
4081
4082 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
4083 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
4084 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
4085
4086 return RValue<UShort8>(short8);
4087 }
4088
MulHigh(RValue<UShort8> x,RValue<UShort8> y)4089 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
4090 {
4091 #if defined(__i386__) || defined(__x86_64__)
4092 return x86::pmulhuw(x, y);
4093 #else
4094 return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
4095 #endif
4096 }
4097
getType()4098 Type *UShort8::getType()
4099 {
4100 return T(llvm::VectorType::get(T(UShort::getType()), 8));
4101 }
4102
Int(Argument<Int> argument)4103 Int::Int(Argument<Int> argument)
4104 {
4105 storeValue(argument.value);
4106 }
4107
Int(RValue<Byte> cast)4108 Int::Int(RValue<Byte> cast)
4109 {
4110 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4111
4112 storeValue(integer);
4113 }
4114
Int(RValue<SByte> cast)4115 Int::Int(RValue<SByte> cast)
4116 {
4117 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4118
4119 storeValue(integer);
4120 }
4121
Int(RValue<Short> cast)4122 Int::Int(RValue<Short> cast)
4123 {
4124 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4125
4126 storeValue(integer);
4127 }
4128
Int(RValue<UShort> cast)4129 Int::Int(RValue<UShort> cast)
4130 {
4131 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4132
4133 storeValue(integer);
4134 }
4135
Int(RValue<Int2> cast)4136 Int::Int(RValue<Int2> cast)
4137 {
4138 *this = Extract(cast, 0);
4139 }
4140
Int(RValue<Long> cast)4141 Int::Int(RValue<Long> cast)
4142 {
4143 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
4144
4145 storeValue(integer);
4146 }
4147
Int(RValue<Float> cast)4148 Int::Int(RValue<Float> cast)
4149 {
4150 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
4151
4152 storeValue(integer);
4153 }
4154
Int(int x)4155 Int::Int(int x)
4156 {
4157 storeValue(Nucleus::createConstantInt(x));
4158 }
4159
Int(RValue<Int> rhs)4160 Int::Int(RValue<Int> rhs)
4161 {
4162 storeValue(rhs.value);
4163 }
4164
Int(RValue<UInt> rhs)4165 Int::Int(RValue<UInt> rhs)
4166 {
4167 storeValue(rhs.value);
4168 }
4169
Int(const Int & rhs)4170 Int::Int(const Int &rhs)
4171 {
4172 Value *value = rhs.loadValue();
4173 storeValue(value);
4174 }
4175
Int(const Reference<Int> & rhs)4176 Int::Int(const Reference<Int> &rhs)
4177 {
4178 Value *value = rhs.loadValue();
4179 storeValue(value);
4180 }
4181
Int(const UInt & rhs)4182 Int::Int(const UInt &rhs)
4183 {
4184 Value *value = rhs.loadValue();
4185 storeValue(value);
4186 }
4187
Int(const Reference<UInt> & rhs)4188 Int::Int(const Reference<UInt> &rhs)
4189 {
4190 Value *value = rhs.loadValue();
4191 storeValue(value);
4192 }
4193
operator =(int rhs)4194 RValue<Int> Int::operator=(int rhs)
4195 {
4196 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
4197 }
4198
operator =(RValue<Int> rhs)4199 RValue<Int> Int::operator=(RValue<Int> rhs)
4200 {
4201 storeValue(rhs.value);
4202
4203 return rhs;
4204 }
4205
operator =(RValue<UInt> rhs)4206 RValue<Int> Int::operator=(RValue<UInt> rhs)
4207 {
4208 storeValue(rhs.value);
4209
4210 return RValue<Int>(rhs);
4211 }
4212
operator =(const Int & rhs)4213 RValue<Int> Int::operator=(const Int &rhs)
4214 {
4215 Value *value = rhs.loadValue();
4216 storeValue(value);
4217
4218 return RValue<Int>(value);
4219 }
4220
operator =(const Reference<Int> & rhs)4221 RValue<Int> Int::operator=(const Reference<Int> &rhs)
4222 {
4223 Value *value = rhs.loadValue();
4224 storeValue(value);
4225
4226 return RValue<Int>(value);
4227 }
4228
operator =(const UInt & rhs)4229 RValue<Int> Int::operator=(const UInt &rhs)
4230 {
4231 Value *value = rhs.loadValue();
4232 storeValue(value);
4233
4234 return RValue<Int>(value);
4235 }
4236
operator =(const Reference<UInt> & rhs)4237 RValue<Int> Int::operator=(const Reference<UInt> &rhs)
4238 {
4239 Value *value = rhs.loadValue();
4240 storeValue(value);
4241
4242 return RValue<Int>(value);
4243 }
4244
operator +(RValue<Int> lhs,RValue<Int> rhs)4245 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
4246 {
4247 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
4248 }
4249
operator -(RValue<Int> lhs,RValue<Int> rhs)4250 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
4251 {
4252 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
4253 }
4254
operator *(RValue<Int> lhs,RValue<Int> rhs)4255 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
4256 {
4257 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
4258 }
4259
operator /(RValue<Int> lhs,RValue<Int> rhs)4260 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
4261 {
4262 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
4263 }
4264
operator %(RValue<Int> lhs,RValue<Int> rhs)4265 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
4266 {
4267 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4268 }
4269
operator &(RValue<Int> lhs,RValue<Int> rhs)4270 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4271 {
4272 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4273 }
4274
operator |(RValue<Int> lhs,RValue<Int> rhs)4275 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4276 {
4277 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4278 }
4279
operator ^(RValue<Int> lhs,RValue<Int> rhs)4280 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4281 {
4282 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4283 }
4284
operator <<(RValue<Int> lhs,RValue<Int> rhs)4285 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4286 {
4287 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4288 }
4289
operator >>(RValue<Int> lhs,RValue<Int> rhs)4290 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4291 {
4292 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4293 }
4294
operator +=(Int & lhs,RValue<Int> rhs)4295 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4296 {
4297 return lhs = lhs + rhs;
4298 }
4299
operator -=(Int & lhs,RValue<Int> rhs)4300 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4301 {
4302 return lhs = lhs - rhs;
4303 }
4304
operator *=(Int & lhs,RValue<Int> rhs)4305 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4306 {
4307 return lhs = lhs * rhs;
4308 }
4309
operator /=(Int & lhs,RValue<Int> rhs)4310 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4311 {
4312 return lhs = lhs / rhs;
4313 }
4314
operator %=(Int & lhs,RValue<Int> rhs)4315 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4316 {
4317 return lhs = lhs % rhs;
4318 }
4319
operator &=(Int & lhs,RValue<Int> rhs)4320 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4321 {
4322 return lhs = lhs & rhs;
4323 }
4324
operator |=(Int & lhs,RValue<Int> rhs)4325 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4326 {
4327 return lhs = lhs | rhs;
4328 }
4329
operator ^=(Int & lhs,RValue<Int> rhs)4330 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4331 {
4332 return lhs = lhs ^ rhs;
4333 }
4334
operator <<=(Int & lhs,RValue<Int> rhs)4335 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4336 {
4337 return lhs = lhs << rhs;
4338 }
4339
operator >>=(Int & lhs,RValue<Int> rhs)4340 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4341 {
4342 return lhs = lhs >> rhs;
4343 }
4344
operator +(RValue<Int> val)4345 RValue<Int> operator+(RValue<Int> val)
4346 {
4347 return val;
4348 }
4349
operator -(RValue<Int> val)4350 RValue<Int> operator-(RValue<Int> val)
4351 {
4352 return RValue<Int>(Nucleus::createNeg(val.value));
4353 }
4354
operator ~(RValue<Int> val)4355 RValue<Int> operator~(RValue<Int> val)
4356 {
4357 return RValue<Int>(Nucleus::createNot(val.value));
4358 }
4359
operator ++(Int & val,int)4360 RValue<Int> operator++(Int &val, int) // Post-increment
4361 {
4362 RValue<Int> res = val;
4363
4364 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4365 val.storeValue(inc);
4366
4367 return res;
4368 }
4369
operator ++(Int & val)4370 const Int &operator++(Int &val) // Pre-increment
4371 {
4372 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4373 val.storeValue(inc);
4374
4375 return val;
4376 }
4377
operator --(Int & val,int)4378 RValue<Int> operator--(Int &val, int) // Post-decrement
4379 {
4380 RValue<Int> res = val;
4381
4382 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4383 val.storeValue(inc);
4384
4385 return res;
4386 }
4387
operator --(Int & val)4388 const Int &operator--(Int &val) // Pre-decrement
4389 {
4390 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4391 val.storeValue(inc);
4392
4393 return val;
4394 }
4395
operator <(RValue<Int> lhs,RValue<Int> rhs)4396 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4397 {
4398 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4399 }
4400
operator <=(RValue<Int> lhs,RValue<Int> rhs)4401 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4402 {
4403 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4404 }
4405
operator >(RValue<Int> lhs,RValue<Int> rhs)4406 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4407 {
4408 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4409 }
4410
operator >=(RValue<Int> lhs,RValue<Int> rhs)4411 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4412 {
4413 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4414 }
4415
operator !=(RValue<Int> lhs,RValue<Int> rhs)4416 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4417 {
4418 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4419 }
4420
operator ==(RValue<Int> lhs,RValue<Int> rhs)4421 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4422 {
4423 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4424 }
4425
Max(RValue<Int> x,RValue<Int> y)4426 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4427 {
4428 return IfThenElse(x > y, x, y);
4429 }
4430
Min(RValue<Int> x,RValue<Int> y)4431 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4432 {
4433 return IfThenElse(x < y, x, y);
4434 }
4435
Clamp(RValue<Int> x,RValue<Int> min,RValue<Int> max)4436 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4437 {
4438 return Min(Max(x, min), max);
4439 }
4440
RoundInt(RValue<Float> cast)4441 RValue<Int> RoundInt(RValue<Float> cast)
4442 {
4443 #if defined(__i386__) || defined(__x86_64__)
4444 return x86::cvtss2si(cast);
4445 #else
4446 return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
4447 #endif
4448 }
4449
getType()4450 Type *Int::getType()
4451 {
4452 return T(llvm::Type::getInt32Ty(*::context));
4453 }
4454
Long(RValue<Int> cast)4455 Long::Long(RValue<Int> cast)
4456 {
4457 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4458
4459 storeValue(integer);
4460 }
4461
Long(RValue<UInt> cast)4462 Long::Long(RValue<UInt> cast)
4463 {
4464 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4465
4466 storeValue(integer);
4467 }
4468
Long(RValue<Long> rhs)4469 Long::Long(RValue<Long> rhs)
4470 {
4471 storeValue(rhs.value);
4472 }
4473
operator =(int64_t rhs)4474 RValue<Long> Long::operator=(int64_t rhs)
4475 {
4476 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4477 }
4478
operator =(RValue<Long> rhs)4479 RValue<Long> Long::operator=(RValue<Long> rhs)
4480 {
4481 storeValue(rhs.value);
4482
4483 return rhs;
4484 }
4485
operator =(const Long & rhs)4486 RValue<Long> Long::operator=(const Long &rhs)
4487 {
4488 Value *value = rhs.loadValue();
4489 storeValue(value);
4490
4491 return RValue<Long>(value);
4492 }
4493
operator =(const Reference<Long> & rhs)4494 RValue<Long> Long::operator=(const Reference<Long> &rhs)
4495 {
4496 Value *value = rhs.loadValue();
4497 storeValue(value);
4498
4499 return RValue<Long>(value);
4500 }
4501
operator +(RValue<Long> lhs,RValue<Long> rhs)4502 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4503 {
4504 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4505 }
4506
operator -(RValue<Long> lhs,RValue<Long> rhs)4507 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4508 {
4509 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4510 }
4511
operator +=(Long & lhs,RValue<Long> rhs)4512 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4513 {
4514 return lhs = lhs + rhs;
4515 }
4516
operator -=(Long & lhs,RValue<Long> rhs)4517 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4518 {
4519 return lhs = lhs - rhs;
4520 }
4521
AddAtomic(RValue<Pointer<Long>> x,RValue<Long> y)4522 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4523 {
4524 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4525 }
4526
getType()4527 Type *Long::getType()
4528 {
4529 return T(llvm::Type::getInt64Ty(*::context));
4530 }
4531
UInt(Argument<UInt> argument)4532 UInt::UInt(Argument<UInt> argument)
4533 {
4534 storeValue(argument.value);
4535 }
4536
UInt(RValue<UShort> cast)4537 UInt::UInt(RValue<UShort> cast)
4538 {
4539 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4540
4541 storeValue(integer);
4542 }
4543
UInt(RValue<Long> cast)4544 UInt::UInt(RValue<Long> cast)
4545 {
4546 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4547
4548 storeValue(integer);
4549 }
4550
UInt(RValue<Float> cast)4551 UInt::UInt(RValue<Float> cast)
4552 {
4553 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
4554 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4555
4556 // Smallest positive value representable in UInt, but not in Int
4557 const unsigned int ustart = 0x80000000u;
4558 const float ustartf = float(ustart);
4559
4560 // If the value is negative, store 0, otherwise store the result of the conversion
4561 storeValue((~(As<Int>(cast) >> 31) &
4562 // Check if the value can be represented as an Int
4563 IfThenElse(cast >= ustartf,
4564 // If the value is too large, subtract ustart and re-add it after conversion.
4565 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4566 // Otherwise, just convert normally
4567 Int(cast))).value);
4568 }
4569
UInt(int x)4570 UInt::UInt(int x)
4571 {
4572 storeValue(Nucleus::createConstantInt(x));
4573 }
4574
UInt(unsigned int x)4575 UInt::UInt(unsigned int x)
4576 {
4577 storeValue(Nucleus::createConstantInt(x));
4578 }
4579
UInt(RValue<UInt> rhs)4580 UInt::UInt(RValue<UInt> rhs)
4581 {
4582 storeValue(rhs.value);
4583 }
4584
UInt(RValue<Int> rhs)4585 UInt::UInt(RValue<Int> rhs)
4586 {
4587 storeValue(rhs.value);
4588 }
4589
UInt(const UInt & rhs)4590 UInt::UInt(const UInt &rhs)
4591 {
4592 Value *value = rhs.loadValue();
4593 storeValue(value);
4594 }
4595
UInt(const Reference<UInt> & rhs)4596 UInt::UInt(const Reference<UInt> &rhs)
4597 {
4598 Value *value = rhs.loadValue();
4599 storeValue(value);
4600 }
4601
UInt(const Int & rhs)4602 UInt::UInt(const Int &rhs)
4603 {
4604 Value *value = rhs.loadValue();
4605 storeValue(value);
4606 }
4607
UInt(const Reference<Int> & rhs)4608 UInt::UInt(const Reference<Int> &rhs)
4609 {
4610 Value *value = rhs.loadValue();
4611 storeValue(value);
4612 }
4613
operator =(unsigned int rhs)4614 RValue<UInt> UInt::operator=(unsigned int rhs)
4615 {
4616 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4617 }
4618
operator =(RValue<UInt> rhs)4619 RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4620 {
4621 storeValue(rhs.value);
4622
4623 return rhs;
4624 }
4625
operator =(RValue<Int> rhs)4626 RValue<UInt> UInt::operator=(RValue<Int> rhs)
4627 {
4628 storeValue(rhs.value);
4629
4630 return RValue<UInt>(rhs);
4631 }
4632
operator =(const UInt & rhs)4633 RValue<UInt> UInt::operator=(const UInt &rhs)
4634 {
4635 Value *value = rhs.loadValue();
4636 storeValue(value);
4637
4638 return RValue<UInt>(value);
4639 }
4640
operator =(const Reference<UInt> & rhs)4641 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4642 {
4643 Value *value = rhs.loadValue();
4644 storeValue(value);
4645
4646 return RValue<UInt>(value);
4647 }
4648
operator =(const Int & rhs)4649 RValue<UInt> UInt::operator=(const Int &rhs)
4650 {
4651 Value *value = rhs.loadValue();
4652 storeValue(value);
4653
4654 return RValue<UInt>(value);
4655 }
4656
operator =(const Reference<Int> & rhs)4657 RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4658 {
4659 Value *value = rhs.loadValue();
4660 storeValue(value);
4661
4662 return RValue<UInt>(value);
4663 }
4664
operator +(RValue<UInt> lhs,RValue<UInt> rhs)4665 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4666 {
4667 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4668 }
4669
operator -(RValue<UInt> lhs,RValue<UInt> rhs)4670 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4671 {
4672 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4673 }
4674
operator *(RValue<UInt> lhs,RValue<UInt> rhs)4675 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4676 {
4677 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4678 }
4679
operator /(RValue<UInt> lhs,RValue<UInt> rhs)4680 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4681 {
4682 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4683 }
4684
operator %(RValue<UInt> lhs,RValue<UInt> rhs)4685 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4686 {
4687 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4688 }
4689
operator &(RValue<UInt> lhs,RValue<UInt> rhs)4690 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4691 {
4692 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4693 }
4694
operator |(RValue<UInt> lhs,RValue<UInt> rhs)4695 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4696 {
4697 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4698 }
4699
operator ^(RValue<UInt> lhs,RValue<UInt> rhs)4700 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4701 {
4702 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4703 }
4704
operator <<(RValue<UInt> lhs,RValue<UInt> rhs)4705 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4706 {
4707 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4708 }
4709
operator >>(RValue<UInt> lhs,RValue<UInt> rhs)4710 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4711 {
4712 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4713 }
4714
operator +=(UInt & lhs,RValue<UInt> rhs)4715 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4716 {
4717 return lhs = lhs + rhs;
4718 }
4719
operator -=(UInt & lhs,RValue<UInt> rhs)4720 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4721 {
4722 return lhs = lhs - rhs;
4723 }
4724
operator *=(UInt & lhs,RValue<UInt> rhs)4725 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4726 {
4727 return lhs = lhs * rhs;
4728 }
4729
operator /=(UInt & lhs,RValue<UInt> rhs)4730 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4731 {
4732 return lhs = lhs / rhs;
4733 }
4734
operator %=(UInt & lhs,RValue<UInt> rhs)4735 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4736 {
4737 return lhs = lhs % rhs;
4738 }
4739
operator &=(UInt & lhs,RValue<UInt> rhs)4740 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4741 {
4742 return lhs = lhs & rhs;
4743 }
4744
operator |=(UInt & lhs,RValue<UInt> rhs)4745 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4746 {
4747 return lhs = lhs | rhs;
4748 }
4749
operator ^=(UInt & lhs,RValue<UInt> rhs)4750 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4751 {
4752 return lhs = lhs ^ rhs;
4753 }
4754
operator <<=(UInt & lhs,RValue<UInt> rhs)4755 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4756 {
4757 return lhs = lhs << rhs;
4758 }
4759
operator >>=(UInt & lhs,RValue<UInt> rhs)4760 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4761 {
4762 return lhs = lhs >> rhs;
4763 }
4764
operator +(RValue<UInt> val)4765 RValue<UInt> operator+(RValue<UInt> val)
4766 {
4767 return val;
4768 }
4769
operator -(RValue<UInt> val)4770 RValue<UInt> operator-(RValue<UInt> val)
4771 {
4772 return RValue<UInt>(Nucleus::createNeg(val.value));
4773 }
4774
operator ~(RValue<UInt> val)4775 RValue<UInt> operator~(RValue<UInt> val)
4776 {
4777 return RValue<UInt>(Nucleus::createNot(val.value));
4778 }
4779
operator ++(UInt & val,int)4780 RValue<UInt> operator++(UInt &val, int) // Post-increment
4781 {
4782 RValue<UInt> res = val;
4783
4784 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4785 val.storeValue(inc);
4786
4787 return res;
4788 }
4789
operator ++(UInt & val)4790 const UInt &operator++(UInt &val) // Pre-increment
4791 {
4792 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4793 val.storeValue(inc);
4794
4795 return val;
4796 }
4797
operator --(UInt & val,int)4798 RValue<UInt> operator--(UInt &val, int) // Post-decrement
4799 {
4800 RValue<UInt> res = val;
4801
4802 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4803 val.storeValue(inc);
4804
4805 return res;
4806 }
4807
operator --(UInt & val)4808 const UInt &operator--(UInt &val) // Pre-decrement
4809 {
4810 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4811 val.storeValue(inc);
4812
4813 return val;
4814 }
4815
Max(RValue<UInt> x,RValue<UInt> y)4816 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4817 {
4818 return IfThenElse(x > y, x, y);
4819 }
4820
Min(RValue<UInt> x,RValue<UInt> y)4821 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4822 {
4823 return IfThenElse(x < y, x, y);
4824 }
4825
Clamp(RValue<UInt> x,RValue<UInt> min,RValue<UInt> max)4826 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4827 {
4828 return Min(Max(x, min), max);
4829 }
4830
operator <(RValue<UInt> lhs,RValue<UInt> rhs)4831 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4832 {
4833 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4834 }
4835
operator <=(RValue<UInt> lhs,RValue<UInt> rhs)4836 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4837 {
4838 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4839 }
4840
operator >(RValue<UInt> lhs,RValue<UInt> rhs)4841 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4842 {
4843 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4844 }
4845
operator >=(RValue<UInt> lhs,RValue<UInt> rhs)4846 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4847 {
4848 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4849 }
4850
operator !=(RValue<UInt> lhs,RValue<UInt> rhs)4851 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4852 {
4853 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4854 }
4855
operator ==(RValue<UInt> lhs,RValue<UInt> rhs)4856 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4857 {
4858 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4859 }
4860
4861 // RValue<UInt> RoundUInt(RValue<Float> cast)
4862 // {
4863 //#if defined(__i386__) || defined(__x86_64__)
4864 // return x86::cvtss2si(val); // FIXME: Unsigned
4865 //#else
4866 // return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
4867 //#endif
4868 // }
4869
getType()4870 Type *UInt::getType()
4871 {
4872 return T(llvm::Type::getInt32Ty(*::context));
4873 }
4874
4875 // Int2::Int2(RValue<Int> cast)
4876 // {
4877 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4878 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4879 //
4880 // int shuffle[2] = {0, 0};
4881 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4882 //
4883 // storeValue(replicate);
4884 // }
4885
Int2(RValue<Int4> cast)4886 Int2::Int2(RValue<Int4> cast)
4887 {
4888 storeValue(Nucleus::createBitCast(cast.value, getType()));
4889 }
4890
Int2(int x,int y)4891 Int2::Int2(int x, int y)
4892 {
4893 int64_t constantVector[2] = {x, y};
4894 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4895 }
4896
Int2(RValue<Int2> rhs)4897 Int2::Int2(RValue<Int2> rhs)
4898 {
4899 storeValue(rhs.value);
4900 }
4901
Int2(const Int2 & rhs)4902 Int2::Int2(const Int2 &rhs)
4903 {
4904 Value *value = rhs.loadValue();
4905 storeValue(value);
4906 }
4907
Int2(const Reference<Int2> & rhs)4908 Int2::Int2(const Reference<Int2> &rhs)
4909 {
4910 Value *value = rhs.loadValue();
4911 storeValue(value);
4912 }
4913
Int2(RValue<Int> lo,RValue<Int> hi)4914 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4915 {
4916 int shuffle[4] = {0, 4, 1, 5};
4917 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4918
4919 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4920 }
4921
operator =(RValue<Int2> rhs)4922 RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4923 {
4924 storeValue(rhs.value);
4925
4926 return rhs;
4927 }
4928
operator =(const Int2 & rhs)4929 RValue<Int2> Int2::operator=(const Int2 &rhs)
4930 {
4931 Value *value = rhs.loadValue();
4932 storeValue(value);
4933
4934 return RValue<Int2>(value);
4935 }
4936
operator =(const Reference<Int2> & rhs)4937 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4938 {
4939 Value *value = rhs.loadValue();
4940 storeValue(value);
4941
4942 return RValue<Int2>(value);
4943 }
4944
operator +(RValue<Int2> lhs,RValue<Int2> rhs)4945 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4946 {
4947 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4948 }
4949
operator -(RValue<Int2> lhs,RValue<Int2> rhs)4950 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4951 {
4952 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4953 }
4954
4955 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4956 // {
4957 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4958 // }
4959
4960 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4961 // {
4962 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4963 // }
4964
4965 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4966 // {
4967 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4968 // }
4969
operator &(RValue<Int2> lhs,RValue<Int2> rhs)4970 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4971 {
4972 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4973 }
4974
operator |(RValue<Int2> lhs,RValue<Int2> rhs)4975 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4976 {
4977 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4978 }
4979
operator ^(RValue<Int2> lhs,RValue<Int2> rhs)4980 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4981 {
4982 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4983 }
4984
operator <<(RValue<Int2> lhs,unsigned char rhs)4985 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4986 {
4987 #if defined(__i386__) || defined(__x86_64__)
4988 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4989
4990 return x86::pslld(lhs, rhs);
4991 #else
4992 return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
4993 #endif
4994 }
4995
operator >>(RValue<Int2> lhs,unsigned char rhs)4996 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4997 {
4998 #if defined(__i386__) || defined(__x86_64__)
4999 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
5000
5001 return x86::psrad(lhs, rhs);
5002 #else
5003 return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
5004 #endif
5005 }
5006
operator +=(Int2 & lhs,RValue<Int2> rhs)5007 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
5008 {
5009 return lhs = lhs + rhs;
5010 }
5011
operator -=(Int2 & lhs,RValue<Int2> rhs)5012 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
5013 {
5014 return lhs = lhs - rhs;
5015 }
5016
5017 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
5018 // {
5019 // return lhs = lhs * rhs;
5020 // }
5021
5022 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
5023 // {
5024 // return lhs = lhs / rhs;
5025 // }
5026
5027 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
5028 // {
5029 // return lhs = lhs % rhs;
5030 // }
5031
operator &=(Int2 & lhs,RValue<Int2> rhs)5032 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
5033 {
5034 return lhs = lhs & rhs;
5035 }
5036
operator |=(Int2 & lhs,RValue<Int2> rhs)5037 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
5038 {
5039 return lhs = lhs | rhs;
5040 }
5041
operator ^=(Int2 & lhs,RValue<Int2> rhs)5042 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
5043 {
5044 return lhs = lhs ^ rhs;
5045 }
5046
operator <<=(Int2 & lhs,unsigned char rhs)5047 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
5048 {
5049 return lhs = lhs << rhs;
5050 }
5051
operator >>=(Int2 & lhs,unsigned char rhs)5052 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
5053 {
5054 return lhs = lhs >> rhs;
5055 }
5056
5057 // RValue<Int2> operator+(RValue<Int2> val)
5058 // {
5059 // return val;
5060 // }
5061
5062 // RValue<Int2> operator-(RValue<Int2> val)
5063 // {
5064 // return RValue<Int2>(Nucleus::createNeg(val.value));
5065 // }
5066
operator ~(RValue<Int2> val)5067 RValue<Int2> operator~(RValue<Int2> val)
5068 {
5069 return RValue<Int2>(Nucleus::createNot(val.value));
5070 }
5071
UnpackLow(RValue<Int2> x,RValue<Int2> y)5072 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
5073 {
5074 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
5075 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5076 }
5077
UnpackHigh(RValue<Int2> x,RValue<Int2> y)5078 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
5079 {
5080 int shuffle[4] = {0, 4, 1, 5}; // Real type is v4i32
5081 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5082 return As<Short4>(Swizzle(lowHigh, 0xEE));
5083 }
5084
Extract(RValue<Int2> val,int i)5085 RValue<Int> Extract(RValue<Int2> val, int i)
5086 {
5087 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
5088 }
5089
Insert(RValue<Int2> val,RValue<Int> element,int i)5090 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
5091 {
5092 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
5093 }
5094
getType()5095 Type *Int2::getType()
5096 {
5097 return T(Type_v2i32);
5098 }
5099
UInt2(unsigned int x,unsigned int y)5100 UInt2::UInt2(unsigned int x, unsigned int y)
5101 {
5102 int64_t constantVector[2] = {x, y};
5103 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5104 }
5105
UInt2(RValue<UInt2> rhs)5106 UInt2::UInt2(RValue<UInt2> rhs)
5107 {
5108 storeValue(rhs.value);
5109 }
5110
UInt2(const UInt2 & rhs)5111 UInt2::UInt2(const UInt2 &rhs)
5112 {
5113 Value *value = rhs.loadValue();
5114 storeValue(value);
5115 }
5116
UInt2(const Reference<UInt2> & rhs)5117 UInt2::UInt2(const Reference<UInt2> &rhs)
5118 {
5119 Value *value = rhs.loadValue();
5120 storeValue(value);
5121 }
5122
operator =(RValue<UInt2> rhs)5123 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
5124 {
5125 storeValue(rhs.value);
5126
5127 return rhs;
5128 }
5129
operator =(const UInt2 & rhs)5130 RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
5131 {
5132 Value *value = rhs.loadValue();
5133 storeValue(value);
5134
5135 return RValue<UInt2>(value);
5136 }
5137
operator =(const Reference<UInt2> & rhs)5138 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
5139 {
5140 Value *value = rhs.loadValue();
5141 storeValue(value);
5142
5143 return RValue<UInt2>(value);
5144 }
5145
operator +(RValue<UInt2> lhs,RValue<UInt2> rhs)5146 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5147 {
5148 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5149 }
5150
operator -(RValue<UInt2> lhs,RValue<UInt2> rhs)5151 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5152 {
5153 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5154 }
5155
5156 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5157 // {
5158 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5159 // }
5160
5161 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5162 // {
5163 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5164 // }
5165
5166 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5167 // {
5168 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5169 // }
5170
operator &(RValue<UInt2> lhs,RValue<UInt2> rhs)5171 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5172 {
5173 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5174 }
5175
operator |(RValue<UInt2> lhs,RValue<UInt2> rhs)5176 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5177 {
5178 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5179 }
5180
operator ^(RValue<UInt2> lhs,RValue<UInt2> rhs)5181 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5182 {
5183 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5184 }
5185
operator <<(RValue<UInt2> lhs,unsigned char rhs)5186 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5187 {
5188 #if defined(__i386__) || defined(__x86_64__)
5189 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5190
5191 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5192 #else
5193 return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
5194 #endif
5195 }
5196
operator >>(RValue<UInt2> lhs,unsigned char rhs)5197 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5198 {
5199 #if defined(__i386__) || defined(__x86_64__)
5200 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5201
5202 return x86::psrld(lhs, rhs);
5203 #else
5204 return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
5205 #endif
5206 }
5207
operator +=(UInt2 & lhs,RValue<UInt2> rhs)5208 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
5209 {
5210 return lhs = lhs + rhs;
5211 }
5212
operator -=(UInt2 & lhs,RValue<UInt2> rhs)5213 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
5214 {
5215 return lhs = lhs - rhs;
5216 }
5217
5218 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
5219 // {
5220 // return lhs = lhs * rhs;
5221 // }
5222
5223 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
5224 // {
5225 // return lhs = lhs / rhs;
5226 // }
5227
5228 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
5229 // {
5230 // return lhs = lhs % rhs;
5231 // }
5232
operator &=(UInt2 & lhs,RValue<UInt2> rhs)5233 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
5234 {
5235 return lhs = lhs & rhs;
5236 }
5237
operator |=(UInt2 & lhs,RValue<UInt2> rhs)5238 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
5239 {
5240 return lhs = lhs | rhs;
5241 }
5242
operator ^=(UInt2 & lhs,RValue<UInt2> rhs)5243 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
5244 {
5245 return lhs = lhs ^ rhs;
5246 }
5247
operator <<=(UInt2 & lhs,unsigned char rhs)5248 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
5249 {
5250 return lhs = lhs << rhs;
5251 }
5252
operator >>=(UInt2 & lhs,unsigned char rhs)5253 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
5254 {
5255 return lhs = lhs >> rhs;
5256 }
5257
5258 // RValue<UInt2> operator+(RValue<UInt2> val)
5259 // {
5260 // return val;
5261 // }
5262
5263 // RValue<UInt2> operator-(RValue<UInt2> val)
5264 // {
5265 // return RValue<UInt2>(Nucleus::createNeg(val.value));
5266 // }
5267
operator ~(RValue<UInt2> val)5268 RValue<UInt2> operator~(RValue<UInt2> val)
5269 {
5270 return RValue<UInt2>(Nucleus::createNot(val.value));
5271 }
5272
getType()5273 Type *UInt2::getType()
5274 {
5275 return T(Type_v2i32);
5276 }
5277
Int4()5278 Int4::Int4() : XYZW(this)
5279 {
5280 }
5281
Int4(RValue<Byte4> cast)5282 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
5283 {
5284 #if defined(__i386__) || defined(__x86_64__)
5285 if(CPUID::supportsSSE4_1())
5286 {
5287 *this = x86::pmovzxbd(As<Byte16>(cast));
5288 }
5289 else
5290 #endif
5291 {
5292 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5293 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
5294 Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
5295
5296 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5297 Value *c = Nucleus::createBitCast(b, Short8::getType());
5298 Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
5299
5300 *this = As<Int4>(d);
5301 }
5302 }
5303
Int4(RValue<SByte4> cast)5304 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
5305 {
5306 #if defined(__i386__) || defined(__x86_64__)
5307 if(CPUID::supportsSSE4_1())
5308 {
5309 *this = x86::pmovsxbd(As<SByte16>(cast));
5310 }
5311 else
5312 #endif
5313 {
5314 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5315 Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
5316 Value *b = Nucleus::createShuffleVector(a, a, swizzle);
5317
5318 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5319 Value *c = Nucleus::createBitCast(b, Short8::getType());
5320 Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
5321
5322 *this = As<Int4>(d) >> 24;
5323 }
5324 }
5325
Int4(RValue<Float4> cast)5326 Int4::Int4(RValue<Float4> cast) : XYZW(this)
5327 {
5328 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5329
5330 storeValue(xyzw);
5331 }
5332
Int4(RValue<Short4> cast)5333 Int4::Int4(RValue<Short4> cast) : XYZW(this)
5334 {
5335 #if defined(__i386__) || defined(__x86_64__)
5336 if(CPUID::supportsSSE4_1())
5337 {
5338 *this = x86::pmovsxwd(As<Short8>(cast));
5339 }
5340 else
5341 #endif
5342 {
5343 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5344 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5345 *this = As<Int4>(c) >> 16;
5346 }
5347 }
5348
Int4(RValue<UShort4> cast)5349 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
5350 {
5351 #if defined(__i386__) || defined(__x86_64__)
5352 if(CPUID::supportsSSE4_1())
5353 {
5354 *this = x86::pmovzxwd(As<UShort8>(cast));
5355 }
5356 else
5357 #endif
5358 {
5359 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5360 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5361 *this = As<Int4>(c);
5362 }
5363 }
5364
Int4(int xyzw)5365 Int4::Int4(int xyzw) : XYZW(this)
5366 {
5367 constant(xyzw, xyzw, xyzw, xyzw);
5368 }
5369
Int4(int x,int yzw)5370 Int4::Int4(int x, int yzw) : XYZW(this)
5371 {
5372 constant(x, yzw, yzw, yzw);
5373 }
5374
Int4(int x,int y,int zw)5375 Int4::Int4(int x, int y, int zw) : XYZW(this)
5376 {
5377 constant(x, y, zw, zw);
5378 }
5379
Int4(int x,int y,int z,int w)5380 Int4::Int4(int x, int y, int z, int w) : XYZW(this)
5381 {
5382 constant(x, y, z, w);
5383 }
5384
constant(int x,int y,int z,int w)5385 void Int4::constant(int x, int y, int z, int w)
5386 {
5387 int64_t constantVector[4] = {x, y, z, w};
5388 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5389 }
5390
Int4(RValue<Int4> rhs)5391 Int4::Int4(RValue<Int4> rhs) : XYZW(this)
5392 {
5393 storeValue(rhs.value);
5394 }
5395
Int4(const Int4 & rhs)5396 Int4::Int4(const Int4 &rhs) : XYZW(this)
5397 {
5398 Value *value = rhs.loadValue();
5399 storeValue(value);
5400 }
5401
Int4(const Reference<Int4> & rhs)5402 Int4::Int4(const Reference<Int4> &rhs) : XYZW(this)
5403 {
5404 Value *value = rhs.loadValue();
5405 storeValue(value);
5406 }
5407
Int4(RValue<UInt4> rhs)5408 Int4::Int4(RValue<UInt4> rhs) : XYZW(this)
5409 {
5410 storeValue(rhs.value);
5411 }
5412
Int4(const UInt4 & rhs)5413 Int4::Int4(const UInt4 &rhs) : XYZW(this)
5414 {
5415 Value *value = rhs.loadValue();
5416 storeValue(value);
5417 }
5418
Int4(const Reference<UInt4> & rhs)5419 Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this)
5420 {
5421 Value *value = rhs.loadValue();
5422 storeValue(value);
5423 }
5424
Int4(RValue<Int2> lo,RValue<Int2> hi)5425 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this)
5426 {
5427 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
5428 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5429
5430 storeValue(packed);
5431 }
5432
Int4(RValue<Int> rhs)5433 Int4::Int4(RValue<Int> rhs) : XYZW(this)
5434 {
5435 Value *vector = loadValue();
5436 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5437
5438 int swizzle[4] = {0, 0, 0, 0};
5439 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5440
5441 storeValue(replicate);
5442 }
5443
Int4(const Int & rhs)5444 Int4::Int4(const Int &rhs) : XYZW(this)
5445 {
5446 *this = RValue<Int>(rhs.loadValue());
5447 }
5448
Int4(const Reference<Int> & rhs)5449 Int4::Int4(const Reference<Int> &rhs) : XYZW(this)
5450 {
5451 *this = RValue<Int>(rhs.loadValue());
5452 }
5453
operator =(RValue<Int4> rhs)5454 RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5455 {
5456 storeValue(rhs.value);
5457
5458 return rhs;
5459 }
5460
operator =(const Int4 & rhs)5461 RValue<Int4> Int4::operator=(const Int4 &rhs)
5462 {
5463 Value *value = rhs.loadValue();
5464 storeValue(value);
5465
5466 return RValue<Int4>(value);
5467 }
5468
operator =(const Reference<Int4> & rhs)5469 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5470 {
5471 Value *value = rhs.loadValue();
5472 storeValue(value);
5473
5474 return RValue<Int4>(value);
5475 }
5476
operator +(RValue<Int4> lhs,RValue<Int4> rhs)5477 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5478 {
5479 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5480 }
5481
operator -(RValue<Int4> lhs,RValue<Int4> rhs)5482 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5483 {
5484 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5485 }
5486
operator *(RValue<Int4> lhs,RValue<Int4> rhs)5487 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5488 {
5489 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5490 }
5491
operator /(RValue<Int4> lhs,RValue<Int4> rhs)5492 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5493 {
5494 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5495 }
5496
operator %(RValue<Int4> lhs,RValue<Int4> rhs)5497 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5498 {
5499 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5500 }
5501
operator &(RValue<Int4> lhs,RValue<Int4> rhs)5502 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5503 {
5504 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5505 }
5506
operator |(RValue<Int4> lhs,RValue<Int4> rhs)5507 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5508 {
5509 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5510 }
5511
operator ^(RValue<Int4> lhs,RValue<Int4> rhs)5512 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5513 {
5514 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5515 }
5516
operator <<(RValue<Int4> lhs,unsigned char rhs)5517 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5518 {
5519 #if defined(__i386__) || defined(__x86_64__)
5520 return x86::pslld(lhs, rhs);
5521 #else
5522 return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
5523 #endif
5524 }
5525
operator >>(RValue<Int4> lhs,unsigned char rhs)5526 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5527 {
5528 #if defined(__i386__) || defined(__x86_64__)
5529 return x86::psrad(lhs, rhs);
5530 #else
5531 return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
5532 #endif
5533 }
5534
operator <<(RValue<Int4> lhs,RValue<Int4> rhs)5535 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5536 {
5537 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5538 }
5539
operator >>(RValue<Int4> lhs,RValue<Int4> rhs)5540 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5541 {
5542 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5543 }
5544
operator +=(Int4 & lhs,RValue<Int4> rhs)5545 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5546 {
5547 return lhs = lhs + rhs;
5548 }
5549
operator -=(Int4 & lhs,RValue<Int4> rhs)5550 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5551 {
5552 return lhs = lhs - rhs;
5553 }
5554
operator *=(Int4 & lhs,RValue<Int4> rhs)5555 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5556 {
5557 return lhs = lhs * rhs;
5558 }
5559
5560 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5561 // {
5562 // return lhs = lhs / rhs;
5563 // }
5564
5565 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5566 // {
5567 // return lhs = lhs % rhs;
5568 // }
5569
operator &=(Int4 & lhs,RValue<Int4> rhs)5570 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5571 {
5572 return lhs = lhs & rhs;
5573 }
5574
operator |=(Int4 & lhs,RValue<Int4> rhs)5575 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5576 {
5577 return lhs = lhs | rhs;
5578 }
5579
operator ^=(Int4 & lhs,RValue<Int4> rhs)5580 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5581 {
5582 return lhs = lhs ^ rhs;
5583 }
5584
operator <<=(Int4 & lhs,unsigned char rhs)5585 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5586 {
5587 return lhs = lhs << rhs;
5588 }
5589
operator >>=(Int4 & lhs,unsigned char rhs)5590 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5591 {
5592 return lhs = lhs >> rhs;
5593 }
5594
operator +(RValue<Int4> val)5595 RValue<Int4> operator+(RValue<Int4> val)
5596 {
5597 return val;
5598 }
5599
operator -(RValue<Int4> val)5600 RValue<Int4> operator-(RValue<Int4> val)
5601 {
5602 return RValue<Int4>(Nucleus::createNeg(val.value));
5603 }
5604
operator ~(RValue<Int4> val)5605 RValue<Int4> operator~(RValue<Int4> val)
5606 {
5607 return RValue<Int4>(Nucleus::createNot(val.value));
5608 }
5609
CmpEQ(RValue<Int4> x,RValue<Int4> y)5610 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5611 {
5612 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5613 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5614 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5615 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5616 }
5617
CmpLT(RValue<Int4> x,RValue<Int4> y)5618 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5619 {
5620 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5621 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5622 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5623 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5624 }
5625
CmpLE(RValue<Int4> x,RValue<Int4> y)5626 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5627 {
5628 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5629 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5630 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5631 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5632 }
5633
CmpNEQ(RValue<Int4> x,RValue<Int4> y)5634 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5635 {
5636 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5637 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5638 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5639 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5640 }
5641
CmpNLT(RValue<Int4> x,RValue<Int4> y)5642 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5643 {
5644 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5645 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5646 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5647 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5648 }
5649
CmpNLE(RValue<Int4> x,RValue<Int4> y)5650 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5651 {
5652 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5653 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5654 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5655 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5656 }
5657
Max(RValue<Int4> x,RValue<Int4> y)5658 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5659 {
5660 #if defined(__i386__) || defined(__x86_64__)
5661 if(CPUID::supportsSSE4_1())
5662 {
5663 return x86::pmaxsd(x, y);
5664 }
5665 else
5666 #endif
5667 {
5668 RValue<Int4> greater = CmpNLE(x, y);
5669 return (x & greater) | (y & ~greater);
5670 }
5671 }
5672
Min(RValue<Int4> x,RValue<Int4> y)5673 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5674 {
5675 #if defined(__i386__) || defined(__x86_64__)
5676 if(CPUID::supportsSSE4_1())
5677 {
5678 return x86::pminsd(x, y);
5679 }
5680 else
5681 #endif
5682 {
5683 RValue<Int4> less = CmpLT(x, y);
5684 return (x & less) | (y & ~less);
5685 }
5686 }
5687
RoundInt(RValue<Float4> cast)5688 RValue<Int4> RoundInt(RValue<Float4> cast)
5689 {
5690 #if defined(__i386__) || defined(__x86_64__)
5691 return x86::cvtps2dq(cast);
5692 #else
5693 return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
5694 #endif
5695 }
5696
PackSigned(RValue<Int4> x,RValue<Int4> y)5697 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
5698 {
5699 #if defined(__i386__) || defined(__x86_64__)
5700 return x86::packssdw(x, y);
5701 #else
5702 return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
5703 #endif
5704 }
5705
PackUnsigned(RValue<Int4> x,RValue<Int4> y)5706 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
5707 {
5708 #if defined(__i386__) || defined(__x86_64__)
5709 return x86::packusdw(x, y);
5710 #else
5711 return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
5712 #endif
5713 }
5714
Extract(RValue<Int4> x,int i)5715 RValue<Int> Extract(RValue<Int4> x, int i)
5716 {
5717 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5718 }
5719
Insert(RValue<Int4> x,RValue<Int> element,int i)5720 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5721 {
5722 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5723 }
5724
SignMask(RValue<Int4> x)5725 RValue<Int> SignMask(RValue<Int4> x)
5726 {
5727 #if defined(__i386__) || defined(__x86_64__)
5728 return x86::movmskps(As<Float4>(x));
5729 #else
5730 return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
5731 #endif
5732 }
5733
Swizzle(RValue<Int4> x,unsigned char select)5734 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5735 {
5736 return RValue<Int4>(createSwizzle4(x.value, select));
5737 }
5738
getType()5739 Type *Int4::getType()
5740 {
5741 return T(llvm::VectorType::get(T(Int::getType()), 4));
5742 }
5743
UInt4()5744 UInt4::UInt4() : XYZW(this)
5745 {
5746 }
5747
UInt4(RValue<Float4> cast)5748 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
5749 {
5750 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
5751 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5752
5753 // Smallest positive value representable in UInt, but not in Int
5754 const unsigned int ustart = 0x80000000u;
5755 const float ustartf = float(ustart);
5756
5757 // Check if the value can be represented as an Int
5758 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5759 // If the value is too large, subtract ustart and re-add it after conversion.
5760 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5761 // Otherwise, just convert normally
5762 (~uiValue & Int4(cast));
5763 // If the value is negative, store 0, otherwise store the result of the conversion
5764 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5765 }
5766
UInt4(int xyzw)5767 UInt4::UInt4(int xyzw) : XYZW(this)
5768 {
5769 constant(xyzw, xyzw, xyzw, xyzw);
5770 }
5771
UInt4(int x,int yzw)5772 UInt4::UInt4(int x, int yzw) : XYZW(this)
5773 {
5774 constant(x, yzw, yzw, yzw);
5775 }
5776
UInt4(int x,int y,int zw)5777 UInt4::UInt4(int x, int y, int zw) : XYZW(this)
5778 {
5779 constant(x, y, zw, zw);
5780 }
5781
UInt4(int x,int y,int z,int w)5782 UInt4::UInt4(int x, int y, int z, int w) : XYZW(this)
5783 {
5784 constant(x, y, z, w);
5785 }
5786
constant(int x,int y,int z,int w)5787 void UInt4::constant(int x, int y, int z, int w)
5788 {
5789 int64_t constantVector[4] = {x, y, z, w};
5790 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5791 }
5792
UInt4(RValue<UInt4> rhs)5793 UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this)
5794 {
5795 storeValue(rhs.value);
5796 }
5797
UInt4(const UInt4 & rhs)5798 UInt4::UInt4(const UInt4 &rhs) : XYZW(this)
5799 {
5800 Value *value = rhs.loadValue();
5801 storeValue(value);
5802 }
5803
UInt4(const Reference<UInt4> & rhs)5804 UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this)
5805 {
5806 Value *value = rhs.loadValue();
5807 storeValue(value);
5808 }
5809
UInt4(RValue<Int4> rhs)5810 UInt4::UInt4(RValue<Int4> rhs) : XYZW(this)
5811 {
5812 storeValue(rhs.value);
5813 }
5814
UInt4(const Int4 & rhs)5815 UInt4::UInt4(const Int4 &rhs) : XYZW(this)
5816 {
5817 Value *value = rhs.loadValue();
5818 storeValue(value);
5819 }
5820
UInt4(const Reference<Int4> & rhs)5821 UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this)
5822 {
5823 Value *value = rhs.loadValue();
5824 storeValue(value);
5825 }
5826
UInt4(RValue<UInt2> lo,RValue<UInt2> hi)5827 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this)
5828 {
5829 int shuffle[4] = {0, 1, 4, 5}; // Real type is v4i32
5830 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5831
5832 storeValue(packed);
5833 }
5834
operator =(RValue<UInt4> rhs)5835 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5836 {
5837 storeValue(rhs.value);
5838
5839 return rhs;
5840 }
5841
operator =(const UInt4 & rhs)5842 RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5843 {
5844 Value *value = rhs.loadValue();
5845 storeValue(value);
5846
5847 return RValue<UInt4>(value);
5848 }
5849
operator =(const Reference<UInt4> & rhs)5850 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5851 {
5852 Value *value = rhs.loadValue();
5853 storeValue(value);
5854
5855 return RValue<UInt4>(value);
5856 }
5857
operator +(RValue<UInt4> lhs,RValue<UInt4> rhs)5858 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5859 {
5860 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5861 }
5862
operator -(RValue<UInt4> lhs,RValue<UInt4> rhs)5863 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5864 {
5865 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5866 }
5867
operator *(RValue<UInt4> lhs,RValue<UInt4> rhs)5868 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5869 {
5870 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5871 }
5872
operator /(RValue<UInt4> lhs,RValue<UInt4> rhs)5873 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5874 {
5875 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5876 }
5877
operator %(RValue<UInt4> lhs,RValue<UInt4> rhs)5878 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5879 {
5880 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5881 }
5882
operator &(RValue<UInt4> lhs,RValue<UInt4> rhs)5883 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5884 {
5885 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5886 }
5887
operator |(RValue<UInt4> lhs,RValue<UInt4> rhs)5888 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5889 {
5890 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5891 }
5892
operator ^(RValue<UInt4> lhs,RValue<UInt4> rhs)5893 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5894 {
5895 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5896 }
5897
operator <<(RValue<UInt4> lhs,unsigned char rhs)5898 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5899 {
5900 #if defined(__i386__) || defined(__x86_64__)
5901 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5902 #else
5903 return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
5904 #endif
5905 }
5906
operator >>(RValue<UInt4> lhs,unsigned char rhs)5907 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5908 {
5909 #if defined(__i386__) || defined(__x86_64__)
5910 return x86::psrld(lhs, rhs);
5911 #else
5912 return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
5913 #endif
5914 }
5915
operator <<(RValue<UInt4> lhs,RValue<UInt4> rhs)5916 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5917 {
5918 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5919 }
5920
operator >>(RValue<UInt4> lhs,RValue<UInt4> rhs)5921 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5922 {
5923 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5924 }
5925
operator +=(UInt4 & lhs,RValue<UInt4> rhs)5926 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5927 {
5928 return lhs = lhs + rhs;
5929 }
5930
operator -=(UInt4 & lhs,RValue<UInt4> rhs)5931 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5932 {
5933 return lhs = lhs - rhs;
5934 }
5935
operator *=(UInt4 & lhs,RValue<UInt4> rhs)5936 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5937 {
5938 return lhs = lhs * rhs;
5939 }
5940
5941 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5942 // {
5943 // return lhs = lhs / rhs;
5944 // }
5945
5946 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5947 // {
5948 // return lhs = lhs % rhs;
5949 // }
5950
operator &=(UInt4 & lhs,RValue<UInt4> rhs)5951 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5952 {
5953 return lhs = lhs & rhs;
5954 }
5955
operator |=(UInt4 & lhs,RValue<UInt4> rhs)5956 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5957 {
5958 return lhs = lhs | rhs;
5959 }
5960
operator ^=(UInt4 & lhs,RValue<UInt4> rhs)5961 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5962 {
5963 return lhs = lhs ^ rhs;
5964 }
5965
operator <<=(UInt4 & lhs,unsigned char rhs)5966 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5967 {
5968 return lhs = lhs << rhs;
5969 }
5970
operator >>=(UInt4 & lhs,unsigned char rhs)5971 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5972 {
5973 return lhs = lhs >> rhs;
5974 }
5975
operator +(RValue<UInt4> val)5976 RValue<UInt4> operator+(RValue<UInt4> val)
5977 {
5978 return val;
5979 }
5980
operator -(RValue<UInt4> val)5981 RValue<UInt4> operator-(RValue<UInt4> val)
5982 {
5983 return RValue<UInt4>(Nucleus::createNeg(val.value));
5984 }
5985
operator ~(RValue<UInt4> val)5986 RValue<UInt4> operator~(RValue<UInt4> val)
5987 {
5988 return RValue<UInt4>(Nucleus::createNot(val.value));
5989 }
5990
CmpEQ(RValue<UInt4> x,RValue<UInt4> y)5991 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5992 {
5993 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5994 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5995 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5996 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5997 }
5998
CmpLT(RValue<UInt4> x,RValue<UInt4> y)5999 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
6000 {
6001 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
6002 }
6003
CmpLE(RValue<UInt4> x,RValue<UInt4> y)6004 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
6005 {
6006 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6007 // Restore the following line when LLVM is updated to a version where this issue is fixed.
6008 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
6009 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6010 }
6011
CmpNEQ(RValue<UInt4> x,RValue<UInt4> y)6012 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
6013 {
6014 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
6015 }
6016
CmpNLT(RValue<UInt4> x,RValue<UInt4> y)6017 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
6018 {
6019 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6020 // Restore the following line when LLVM is updated to a version where this issue is fixed.
6021 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
6022 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6023 }
6024
CmpNLE(RValue<UInt4> x,RValue<UInt4> y)6025 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
6026 {
6027 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
6028 }
6029
Max(RValue<UInt4> x,RValue<UInt4> y)6030 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6031 {
6032 #if defined(__i386__) || defined(__x86_64__)
6033 if(CPUID::supportsSSE4_1())
6034 {
6035 return x86::pmaxud(x, y);
6036 }
6037 else
6038 #endif
6039 {
6040 RValue<UInt4> greater = CmpNLE(x, y);
6041 return (x & greater) | (y & ~greater);
6042 }
6043 }
6044
Min(RValue<UInt4> x,RValue<UInt4> y)6045 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6046 {
6047 #if defined(__i386__) || defined(__x86_64__)
6048 if(CPUID::supportsSSE4_1())
6049 {
6050 return x86::pminud(x, y);
6051 }
6052 else
6053 #endif
6054 {
6055 RValue<UInt4> less = CmpLT(x, y);
6056 return (x & less) | (y & ~less);
6057 }
6058 }
6059
getType()6060 Type *UInt4::getType()
6061 {
6062 return T(llvm::VectorType::get(T(UInt::getType()), 4));
6063 }
6064
Half(RValue<Float> cast)6065 Half::Half(RValue<Float> cast)
6066 {
6067 UInt fp32i = As<UInt>(cast);
6068 UInt abs = fp32i & 0x7FFFFFFF;
6069 UShort fp16i((fp32i & 0x80000000) >> 16); // sign
6070
6071 If(abs > 0x47FFEFFF) // Infinity
6072 {
6073 fp16i |= UShort(0x7FFF);
6074 }
6075 Else
6076 {
6077 If(abs < 0x38800000) // Denormal
6078 {
6079 Int mantissa = (abs & 0x007FFFFF) | 0x00800000;
6080 Int e = 113 - (abs >> 23);
6081 abs = IfThenElse(e < 24, mantissa >> e, Int(0));
6082 fp16i |= UShort((abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
6083 }
6084 Else
6085 {
6086 fp16i |= UShort((abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
6087 }
6088 }
6089
6090 storeValue(fp16i.loadValue());
6091 }
6092
getType()6093 Type *Half::getType()
6094 {
6095 return T(llvm::Type::getInt16Ty(*::context));
6096 }
6097
Float(RValue<Int> cast)6098 Float::Float(RValue<Int> cast)
6099 {
6100 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6101
6102 storeValue(integer);
6103 }
6104
Float(RValue<UInt> cast)6105 Float::Float(RValue<UInt> cast)
6106 {
6107 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
6108 As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
6109
6110 storeValue(result.value);
6111 }
6112
Float(RValue<Half> cast)6113 Float::Float(RValue<Half> cast)
6114 {
6115 Int fp16i(As<UShort>(cast));
6116
6117 Int s = (fp16i >> 15) & 0x00000001;
6118 Int e = (fp16i >> 10) & 0x0000001F;
6119 Int m = fp16i & 0x000003FF;
6120
6121 UInt fp32i(s << 31);
6122 If(e == 0)
6123 {
6124 If(m != 0)
6125 {
6126 While((m & 0x00000400) == 0)
6127 {
6128 m <<= 1;
6129 e -= 1;
6130 }
6131
6132 fp32i |= As<UInt>(((e + (127 - 15) + 1) << 23) | ((m & ~0x00000400) << 13));
6133 }
6134 }
6135 Else
6136 {
6137 fp32i |= As<UInt>(((e + (127 - 15)) << 23) | (m << 13));
6138 }
6139
6140 storeValue(As<Float>(fp32i).value);
6141 }
6142
Float(float x)6143 Float::Float(float x)
6144 {
6145 storeValue(Nucleus::createConstantFloat(x));
6146 }
6147
Float(RValue<Float> rhs)6148 Float::Float(RValue<Float> rhs)
6149 {
6150 storeValue(rhs.value);
6151 }
6152
Float(const Float & rhs)6153 Float::Float(const Float &rhs)
6154 {
6155 Value *value = rhs.loadValue();
6156 storeValue(value);
6157 }
6158
Float(const Reference<Float> & rhs)6159 Float::Float(const Reference<Float> &rhs)
6160 {
6161 Value *value = rhs.loadValue();
6162 storeValue(value);
6163 }
6164
operator =(RValue<Float> rhs)6165 RValue<Float> Float::operator=(RValue<Float> rhs)
6166 {
6167 storeValue(rhs.value);
6168
6169 return rhs;
6170 }
6171
operator =(const Float & rhs)6172 RValue<Float> Float::operator=(const Float &rhs)
6173 {
6174 Value *value = rhs.loadValue();
6175 storeValue(value);
6176
6177 return RValue<Float>(value);
6178 }
6179
operator =(const Reference<Float> & rhs)6180 RValue<Float> Float::operator=(const Reference<Float> &rhs)
6181 {
6182 Value *value = rhs.loadValue();
6183 storeValue(value);
6184
6185 return RValue<Float>(value);
6186 }
6187
operator +(RValue<Float> lhs,RValue<Float> rhs)6188 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6189 {
6190 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6191 }
6192
operator -(RValue<Float> lhs,RValue<Float> rhs)6193 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6194 {
6195 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6196 }
6197
operator *(RValue<Float> lhs,RValue<Float> rhs)6198 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6199 {
6200 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6201 }
6202
operator /(RValue<Float> lhs,RValue<Float> rhs)6203 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6204 {
6205 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6206 }
6207
operator +=(Float & lhs,RValue<Float> rhs)6208 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
6209 {
6210 return lhs = lhs + rhs;
6211 }
6212
operator -=(Float & lhs,RValue<Float> rhs)6213 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
6214 {
6215 return lhs = lhs - rhs;
6216 }
6217
operator *=(Float & lhs,RValue<Float> rhs)6218 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
6219 {
6220 return lhs = lhs * rhs;
6221 }
6222
operator /=(Float & lhs,RValue<Float> rhs)6223 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
6224 {
6225 return lhs = lhs / rhs;
6226 }
6227
operator +(RValue<Float> val)6228 RValue<Float> operator+(RValue<Float> val)
6229 {
6230 return val;
6231 }
6232
operator -(RValue<Float> val)6233 RValue<Float> operator-(RValue<Float> val)
6234 {
6235 return RValue<Float>(Nucleus::createFNeg(val.value));
6236 }
6237
operator <(RValue<Float> lhs,RValue<Float> rhs)6238 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6239 {
6240 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6241 }
6242
operator <=(RValue<Float> lhs,RValue<Float> rhs)6243 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6244 {
6245 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6246 }
6247
operator >(RValue<Float> lhs,RValue<Float> rhs)6248 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6249 {
6250 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6251 }
6252
operator >=(RValue<Float> lhs,RValue<Float> rhs)6253 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6254 {
6255 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6256 }
6257
operator !=(RValue<Float> lhs,RValue<Float> rhs)6258 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6259 {
6260 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6261 }
6262
operator ==(RValue<Float> lhs,RValue<Float> rhs)6263 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6264 {
6265 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6266 }
6267
Abs(RValue<Float> x)6268 RValue<Float> Abs(RValue<Float> x)
6269 {
6270 return IfThenElse(x > 0.0f, x, -x);
6271 }
6272
Max(RValue<Float> x,RValue<Float> y)6273 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6274 {
6275 return IfThenElse(x > y, x, y);
6276 }
6277
Min(RValue<Float> x,RValue<Float> y)6278 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6279 {
6280 return IfThenElse(x < y, x, y);
6281 }
6282
Rcp_pp(RValue<Float> x,bool exactAtPow2)6283 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6284 {
6285 #if defined(__i386__) || defined(__x86_64__)
6286 if(exactAtPow2)
6287 {
6288 // rcpss uses a piecewise-linear approximation which minimizes the relative error
6289 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6290 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6291 }
6292 return x86::rcpss(x);
6293 #else
6294 return As<Float>(V(lowerRCP(V(x.value))));
6295 #endif
6296 }
6297
RcpSqrt_pp(RValue<Float> x)6298 RValue<Float> RcpSqrt_pp(RValue<Float> x)
6299 {
6300 #if defined(__i386__) || defined(__x86_64__)
6301 return x86::rsqrtss(x);
6302 #else
6303 return As<Float>(V(lowerRSQRT(V(x.value))));
6304 #endif
6305 }
6306
Sqrt(RValue<Float> x)6307 RValue<Float> Sqrt(RValue<Float> x)
6308 {
6309 #if defined(__i386__) || defined(__x86_64__)
6310 return x86::sqrtss(x);
6311 #else
6312 return As<Float>(V(lowerSQRT(V(x.value))));
6313 #endif
6314 }
6315
Round(RValue<Float> x)6316 RValue<Float> Round(RValue<Float> x)
6317 {
6318 #if defined(__i386__) || defined(__x86_64__)
6319 if(CPUID::supportsSSE4_1())
6320 {
6321 return x86::roundss(x, 0);
6322 }
6323 else
6324 {
6325 return Float4(Round(Float4(x))).x;
6326 }
6327 #else
6328 return RValue<Float>(V(lowerRound(V(x.value))));
6329 #endif
6330 }
6331
Trunc(RValue<Float> x)6332 RValue<Float> Trunc(RValue<Float> x)
6333 {
6334 #if defined(__i386__) || defined(__x86_64__)
6335 if(CPUID::supportsSSE4_1())
6336 {
6337 return x86::roundss(x, 3);
6338 }
6339 else
6340 {
6341 return Float(Int(x)); // Rounded toward zero
6342 }
6343 #else
6344 return RValue<Float>(V(lowerTrunc(V(x.value))));
6345 #endif
6346 }
6347
Frac(RValue<Float> x)6348 RValue<Float> Frac(RValue<Float> x)
6349 {
6350 #if defined(__i386__) || defined(__x86_64__)
6351 if(CPUID::supportsSSE4_1())
6352 {
6353 return x - x86::floorss(x);
6354 }
6355 else
6356 {
6357 return Float4(Frac(Float4(x))).x;
6358 }
6359 #else
6360 // x - floor(x) can be 1.0 for very small negative x.
6361 // Clamp against the value just below 1.0.
6362 return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
6363 #endif
6364 }
6365
Floor(RValue<Float> x)6366 RValue<Float> Floor(RValue<Float> x)
6367 {
6368 #if defined(__i386__) || defined(__x86_64__)
6369 if(CPUID::supportsSSE4_1())
6370 {
6371 return x86::floorss(x);
6372 }
6373 else
6374 {
6375 return Float4(Floor(Float4(x))).x;
6376 }
6377 #else
6378 return RValue<Float>(V(lowerFloor(V(x.value))));
6379 #endif
6380 }
6381
Ceil(RValue<Float> x)6382 RValue<Float> Ceil(RValue<Float> x)
6383 {
6384 #if defined(__i386__) || defined(__x86_64__)
6385 if(CPUID::supportsSSE4_1())
6386 {
6387 return x86::ceilss(x);
6388 }
6389 else
6390 #endif
6391 {
6392 return Float4(Ceil(Float4(x))).x;
6393 }
6394 }
6395
getType()6396 Type *Float::getType()
6397 {
6398 return T(llvm::Type::getFloatTy(*::context));
6399 }
6400
Float2(RValue<Float4> cast)6401 Float2::Float2(RValue<Float4> cast)
6402 {
6403 storeValue(Nucleus::createBitCast(cast.value, getType()));
6404 }
6405
getType()6406 Type *Float2::getType()
6407 {
6408 return T(Type_v2f32);
6409 }
6410
Float4(RValue<Byte4> cast)6411 Float4::Float4(RValue<Byte4> cast) : XYZW(this)
6412 {
6413 Value *a = Int4(cast).loadValue();
6414 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6415
6416 storeValue(xyzw);
6417 }
6418
Float4(RValue<SByte4> cast)6419 Float4::Float4(RValue<SByte4> cast) : XYZW(this)
6420 {
6421 Value *a = Int4(cast).loadValue();
6422 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6423
6424 storeValue(xyzw);
6425 }
6426
Float4(RValue<Short4> cast)6427 Float4::Float4(RValue<Short4> cast) : XYZW(this)
6428 {
6429 Int4 c(cast);
6430 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6431 }
6432
Float4(RValue<UShort4> cast)6433 Float4::Float4(RValue<UShort4> cast) : XYZW(this)
6434 {
6435 Int4 c(cast);
6436 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6437 }
6438
Float4(RValue<Int4> cast)6439 Float4::Float4(RValue<Int4> cast) : XYZW(this)
6440 {
6441 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6442
6443 storeValue(xyzw);
6444 }
6445
Float4(RValue<UInt4> cast)6446 Float4::Float4(RValue<UInt4> cast) : XYZW(this)
6447 {
6448 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6449 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6450
6451 storeValue(result.value);
6452 }
6453
Float4()6454 Float4::Float4() : XYZW(this)
6455 {
6456 }
6457
Float4(float xyzw)6458 Float4::Float4(float xyzw) : XYZW(this)
6459 {
6460 constant(xyzw, xyzw, xyzw, xyzw);
6461 }
6462
Float4(float x,float yzw)6463 Float4::Float4(float x, float yzw) : XYZW(this)
6464 {
6465 constant(x, yzw, yzw, yzw);
6466 }
6467
Float4(float x,float y,float zw)6468 Float4::Float4(float x, float y, float zw) : XYZW(this)
6469 {
6470 constant(x, y, zw, zw);
6471 }
6472
Float4(float x,float y,float z,float w)6473 Float4::Float4(float x, float y, float z, float w) : XYZW(this)
6474 {
6475 constant(x, y, z, w);
6476 }
6477
constant(float x,float y,float z,float w)6478 void Float4::constant(float x, float y, float z, float w)
6479 {
6480 double constantVector[4] = {x, y, z, w};
6481 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6482 }
6483
Float4(RValue<Float4> rhs)6484 Float4::Float4(RValue<Float4> rhs) : XYZW(this)
6485 {
6486 storeValue(rhs.value);
6487 }
6488
Float4(const Float4 & rhs)6489 Float4::Float4(const Float4 &rhs) : XYZW(this)
6490 {
6491 Value *value = rhs.loadValue();
6492 storeValue(value);
6493 }
6494
Float4(const Reference<Float4> & rhs)6495 Float4::Float4(const Reference<Float4> &rhs) : XYZW(this)
6496 {
6497 Value *value = rhs.loadValue();
6498 storeValue(value);
6499 }
6500
Float4(RValue<Float> rhs)6501 Float4::Float4(RValue<Float> rhs) : XYZW(this)
6502 {
6503 Value *vector = loadValue();
6504 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6505
6506 int swizzle[4] = {0, 0, 0, 0};
6507 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
6508
6509 storeValue(replicate);
6510 }
6511
Float4(const Float & rhs)6512 Float4::Float4(const Float &rhs) : XYZW(this)
6513 {
6514 *this = RValue<Float>(rhs.loadValue());
6515 }
6516
Float4(const Reference<Float> & rhs)6517 Float4::Float4(const Reference<Float> &rhs) : XYZW(this)
6518 {
6519 *this = RValue<Float>(rhs.loadValue());
6520 }
6521
operator =(float x)6522 RValue<Float4> Float4::operator=(float x)
6523 {
6524 return *this = Float4(x, x, x, x);
6525 }
6526
operator =(RValue<Float4> rhs)6527 RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6528 {
6529 storeValue(rhs.value);
6530
6531 return rhs;
6532 }
6533
operator =(const Float4 & rhs)6534 RValue<Float4> Float4::operator=(const Float4 &rhs)
6535 {
6536 Value *value = rhs.loadValue();
6537 storeValue(value);
6538
6539 return RValue<Float4>(value);
6540 }
6541
operator =(const Reference<Float4> & rhs)6542 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6543 {
6544 Value *value = rhs.loadValue();
6545 storeValue(value);
6546
6547 return RValue<Float4>(value);
6548 }
6549
operator =(RValue<Float> rhs)6550 RValue<Float4> Float4::operator=(RValue<Float> rhs)
6551 {
6552 return *this = Float4(rhs);
6553 }
6554
operator =(const Float & rhs)6555 RValue<Float4> Float4::operator=(const Float &rhs)
6556 {
6557 return *this = Float4(rhs);
6558 }
6559
operator =(const Reference<Float> & rhs)6560 RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6561 {
6562 return *this = Float4(rhs);
6563 }
6564
operator +(RValue<Float4> lhs,RValue<Float4> rhs)6565 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6566 {
6567 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6568 }
6569
operator -(RValue<Float4> lhs,RValue<Float4> rhs)6570 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6571 {
6572 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6573 }
6574
operator *(RValue<Float4> lhs,RValue<Float4> rhs)6575 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6576 {
6577 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6578 }
6579
operator /(RValue<Float4> lhs,RValue<Float4> rhs)6580 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6581 {
6582 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6583 }
6584
operator %(RValue<Float4> lhs,RValue<Float4> rhs)6585 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6586 {
6587 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6588 }
6589
operator +=(Float4 & lhs,RValue<Float4> rhs)6590 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6591 {
6592 return lhs = lhs + rhs;
6593 }
6594
operator -=(Float4 & lhs,RValue<Float4> rhs)6595 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6596 {
6597 return lhs = lhs - rhs;
6598 }
6599
operator *=(Float4 & lhs,RValue<Float4> rhs)6600 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6601 {
6602 return lhs = lhs * rhs;
6603 }
6604
operator /=(Float4 & lhs,RValue<Float4> rhs)6605 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6606 {
6607 return lhs = lhs / rhs;
6608 }
6609
operator %=(Float4 & lhs,RValue<Float4> rhs)6610 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6611 {
6612 return lhs = lhs % rhs;
6613 }
6614
operator +(RValue<Float4> val)6615 RValue<Float4> operator+(RValue<Float4> val)
6616 {
6617 return val;
6618 }
6619
operator -(RValue<Float4> val)6620 RValue<Float4> operator-(RValue<Float4> val)
6621 {
6622 return RValue<Float4>(Nucleus::createFNeg(val.value));
6623 }
6624
Abs(RValue<Float4> x)6625 RValue<Float4> Abs(RValue<Float4> x)
6626 {
6627 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6628 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6629 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::getType()));
6630
6631 return As<Float4>(result);
6632 }
6633
Max(RValue<Float4> x,RValue<Float4> y)6634 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6635 {
6636 #if defined(__i386__) || defined(__x86_64__)
6637 return x86::maxps(x, y);
6638 #else
6639 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
6640 #endif
6641 }
6642
Min(RValue<Float4> x,RValue<Float4> y)6643 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6644 {
6645 #if defined(__i386__) || defined(__x86_64__)
6646 return x86::minps(x, y);
6647 #else
6648 return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
6649 #endif
6650 }
6651
Rcp_pp(RValue<Float4> x,bool exactAtPow2)6652 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6653 {
6654 #if defined(__i386__) || defined(__x86_64__)
6655 if(exactAtPow2)
6656 {
6657 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6658 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6659 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6660 }
6661 return x86::rcpps(x);
6662 #else
6663 return As<Float4>(V(lowerRCP(V(x.value))));
6664 #endif
6665 }
6666
RcpSqrt_pp(RValue<Float4> x)6667 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6668 {
6669 #if defined(__i386__) || defined(__x86_64__)
6670 return x86::rsqrtps(x);
6671 #else
6672 return As<Float4>(V(lowerRSQRT(V(x.value))));
6673 #endif
6674 }
6675
Sqrt(RValue<Float4> x)6676 RValue<Float4> Sqrt(RValue<Float4> x)
6677 {
6678 #if defined(__i386__) || defined(__x86_64__)
6679 return x86::sqrtps(x);
6680 #else
6681 return As<Float4>(V(lowerSQRT(V(x.value))));
6682 #endif
6683 }
6684
Insert(RValue<Float4> x,RValue<Float> element,int i)6685 RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6686 {
6687 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6688 }
6689
Extract(RValue<Float4> x,int i)6690 RValue<Float> Extract(RValue<Float4> x, int i)
6691 {
6692 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6693 }
6694
Swizzle(RValue<Float4> x,unsigned char select)6695 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6696 {
6697 return RValue<Float4>(createSwizzle4(x.value, select));
6698 }
6699
ShuffleLowHigh(RValue<Float4> x,RValue<Float4> y,unsigned char imm)6700 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6701 {
6702 int shuffle[4] =
6703 {
6704 ((imm >> 0) & 0x03) + 0,
6705 ((imm >> 2) & 0x03) + 0,
6706 ((imm >> 4) & 0x03) + 4,
6707 ((imm >> 6) & 0x03) + 4,
6708 };
6709
6710 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6711 }
6712
UnpackLow(RValue<Float4> x,RValue<Float4> y)6713 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6714 {
6715 int shuffle[4] = {0, 4, 1, 5};
6716 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6717 }
6718
UnpackHigh(RValue<Float4> x,RValue<Float4> y)6719 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6720 {
6721 int shuffle[4] = {2, 6, 3, 7};
6722 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6723 }
6724
Mask(Float4 & lhs,RValue<Float4> rhs,unsigned char select)6725 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6726 {
6727 Value *vector = lhs.loadValue();
6728 Value *result = createMask4(vector, rhs.value, select);
6729 lhs.storeValue(result);
6730
6731 return RValue<Float4>(result);
6732 }
6733
SignMask(RValue<Float4> x)6734 RValue<Int> SignMask(RValue<Float4> x)
6735 {
6736 #if defined(__i386__) || defined(__x86_64__)
6737 return x86::movmskps(x);
6738 #else
6739 return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
6740 #endif
6741 }
6742
CmpEQ(RValue<Float4> x,RValue<Float4> y)6743 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6744 {
6745 // return As<Int4>(x86::cmpeqps(x, y));
6746 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6747 }
6748
CmpLT(RValue<Float4> x,RValue<Float4> y)6749 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6750 {
6751 // return As<Int4>(x86::cmpltps(x, y));
6752 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6753 }
6754
CmpLE(RValue<Float4> x,RValue<Float4> y)6755 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6756 {
6757 // return As<Int4>(x86::cmpleps(x, y));
6758 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6759 }
6760
CmpNEQ(RValue<Float4> x,RValue<Float4> y)6761 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6762 {
6763 // return As<Int4>(x86::cmpneqps(x, y));
6764 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6765 }
6766
CmpNLT(RValue<Float4> x,RValue<Float4> y)6767 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6768 {
6769 // return As<Int4>(x86::cmpnltps(x, y));
6770 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6771 }
6772
CmpNLE(RValue<Float4> x,RValue<Float4> y)6773 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6774 {
6775 // return As<Int4>(x86::cmpnleps(x, y));
6776 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6777 }
6778
IsInf(RValue<Float4> x)6779 RValue<Int4> IsInf(RValue<Float4> x)
6780 {
6781 return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000));
6782 }
6783
IsNan(RValue<Float4> x)6784 RValue<Int4> IsNan(RValue<Float4> x)
6785 {
6786 return ~CmpEQ(x, x);
6787 }
6788
Round(RValue<Float4> x)6789 RValue<Float4> Round(RValue<Float4> x)
6790 {
6791 #if defined(__i386__) || defined(__x86_64__)
6792 if(CPUID::supportsSSE4_1())
6793 {
6794 return x86::roundps(x, 0);
6795 }
6796 else
6797 {
6798 return Float4(RoundInt(x));
6799 }
6800 #else
6801 return RValue<Float4>(V(lowerRound(V(x.value))));
6802 #endif
6803 }
6804
Trunc(RValue<Float4> x)6805 RValue<Float4> Trunc(RValue<Float4> x)
6806 {
6807 #if defined(__i386__) || defined(__x86_64__)
6808 if(CPUID::supportsSSE4_1())
6809 {
6810 return x86::roundps(x, 3);
6811 }
6812 else
6813 {
6814 return Float4(Int4(x));
6815 }
6816 #else
6817 return RValue<Float4>(V(lowerTrunc(V(x.value))));
6818 #endif
6819 }
6820
Frac(RValue<Float4> x)6821 RValue<Float4> Frac(RValue<Float4> x)
6822 {
6823 Float4 frc;
6824
6825 #if defined(__i386__) || defined(__x86_64__)
6826 if(CPUID::supportsSSE4_1())
6827 {
6828 frc = x - Floor(x);
6829 }
6830 else
6831 {
6832 frc = x - Float4(Int4(x)); // Signed fractional part.
6833
6834 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
6835 }
6836 #else
6837 frc = x - Floor(x);
6838 #endif
6839
6840 // x - floor(x) can be 1.0 for very small negative x.
6841 // Clamp against the value just below 1.0.
6842 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
6843 }
6844
Floor(RValue<Float4> x)6845 RValue<Float4> Floor(RValue<Float4> x)
6846 {
6847 #if defined(__i386__) || defined(__x86_64__)
6848 if(CPUID::supportsSSE4_1())
6849 {
6850 return x86::floorps(x);
6851 }
6852 else
6853 {
6854 return x - Frac(x);
6855 }
6856 #else
6857 return RValue<Float4>(V(lowerFloor(V(x.value))));
6858 #endif
6859 }
6860
Ceil(RValue<Float4> x)6861 RValue<Float4> Ceil(RValue<Float4> x)
6862 {
6863 #if defined(__i386__) || defined(__x86_64__)
6864 if(CPUID::supportsSSE4_1())
6865 {
6866 return x86::ceilps(x);
6867 }
6868 else
6869 #endif
6870 {
6871 return -Floor(-x);
6872 }
6873 }
6874
getType()6875 Type *Float4::getType()
6876 {
6877 return T(llvm::VectorType::get(T(Float::getType()), 4));
6878 }
6879
operator +(RValue<Pointer<Byte>> lhs,int offset)6880 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6881 {
6882 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6883 }
6884
operator +(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6885 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6886 {
6887 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
6888 }
6889
operator +(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6890 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6891 {
6892 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
6893 }
6894
operator +=(Pointer<Byte> & lhs,int offset)6895 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6896 {
6897 return lhs = lhs + offset;
6898 }
6899
operator +=(Pointer<Byte> & lhs,RValue<Int> offset)6900 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6901 {
6902 return lhs = lhs + offset;
6903 }
6904
operator +=(Pointer<Byte> & lhs,RValue<UInt> offset)6905 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6906 {
6907 return lhs = lhs + offset;
6908 }
6909
operator -(RValue<Pointer<Byte>> lhs,int offset)6910 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6911 {
6912 return lhs + -offset;
6913 }
6914
operator -(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6915 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6916 {
6917 return lhs + -offset;
6918 }
6919
operator -(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6920 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6921 {
6922 return lhs + -offset;
6923 }
6924
operator -=(Pointer<Byte> & lhs,int offset)6925 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6926 {
6927 return lhs = lhs - offset;
6928 }
6929
operator -=(Pointer<Byte> & lhs,RValue<Int> offset)6930 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6931 {
6932 return lhs = lhs - offset;
6933 }
6934
operator -=(Pointer<Byte> & lhs,RValue<UInt> offset)6935 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6936 {
6937 return lhs = lhs - offset;
6938 }
6939
Return()6940 void Return()
6941 {
6942 Nucleus::createRetVoid();
6943 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6944 Nucleus::createUnreachable();
6945 }
6946
Return(RValue<Int> ret)6947 void Return(RValue<Int> ret)
6948 {
6949 Nucleus::createRet(ret.value);
6950 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6951 Nucleus::createUnreachable();
6952 }
6953
branch(RValue<Bool> cmp,BasicBlock * bodyBB,BasicBlock * endBB)6954 void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6955 {
6956 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6957 Nucleus::setInsertBlock(bodyBB);
6958 }
6959
Ticks()6960 RValue<Long> Ticks()
6961 {
6962 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
6963
6964 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
6965 }
6966 }
6967
6968 namespace rr
6969 {
6970 #if defined(__i386__) || defined(__x86_64__)
6971 namespace x86
6972 {
cvtss2si(RValue<Float> val)6973 RValue<Int> cvtss2si(RValue<Float> val)
6974 {
6975 llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
6976
6977 Float4 vector;
6978 vector.x = val;
6979
6980 return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
6981 }
6982
cvtps2dq(RValue<Float4> val)6983 RValue<Int4> cvtps2dq(RValue<Float4> val)
6984 {
6985 llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
6986
6987 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
6988 }
6989
rcpss(RValue<Float> val)6990 RValue<Float> rcpss(RValue<Float> val)
6991 {
6992 llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
6993
6994 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
6995
6996 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
6997 }
6998
sqrtss(RValue<Float> val)6999 RValue<Float> sqrtss(RValue<Float> val)
7000 {
7001 #if REACTOR_LLVM_VERSION < 7
7002 llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
7003 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
7004
7005 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0));
7006 #else
7007 llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
7008 return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
7009 #endif
7010 }
7011
rsqrtss(RValue<Float> val)7012 RValue<Float> rsqrtss(RValue<Float> val)
7013 {
7014 llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
7015
7016 Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
7017
7018 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
7019 }
7020
rcpps(RValue<Float4> val)7021 RValue<Float4> rcpps(RValue<Float4> val)
7022 {
7023 llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
7024
7025 return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
7026 }
7027
sqrtps(RValue<Float4> val)7028 RValue<Float4> sqrtps(RValue<Float4> val)
7029 {
7030 #if REACTOR_LLVM_VERSION < 7
7031 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
7032 #else
7033 llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
7034 #endif
7035
7036 return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
7037 }
7038
rsqrtps(RValue<Float4> val)7039 RValue<Float4> rsqrtps(RValue<Float4> val)
7040 {
7041 llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
7042
7043 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
7044 }
7045
maxps(RValue<Float4> x,RValue<Float4> y)7046 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
7047 {
7048 llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
7049
7050 return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
7051 }
7052
minps(RValue<Float4> x,RValue<Float4> y)7053 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
7054 {
7055 llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
7056
7057 return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
7058 }
7059
roundss(RValue<Float> val,unsigned char imm)7060 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
7061 {
7062 llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
7063
7064 Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
7065 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
7066
7067 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
7068 }
7069
floorss(RValue<Float> val)7070 RValue<Float> floorss(RValue<Float> val)
7071 {
7072 return roundss(val, 1);
7073 }
7074
ceilss(RValue<Float> val)7075 RValue<Float> ceilss(RValue<Float> val)
7076 {
7077 return roundss(val, 2);
7078 }
7079
roundps(RValue<Float4> val,unsigned char imm)7080 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
7081 {
7082 llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
7083
7084 return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
7085 }
7086
floorps(RValue<Float4> val)7087 RValue<Float4> floorps(RValue<Float4> val)
7088 {
7089 return roundps(val, 1);
7090 }
7091
ceilps(RValue<Float4> val)7092 RValue<Float4> ceilps(RValue<Float4> val)
7093 {
7094 return roundps(val, 2);
7095 }
7096
pabsd(RValue<Int4> x)7097 RValue<Int4> pabsd(RValue<Int4> x)
7098 {
7099 #if REACTOR_LLVM_VERSION < 7
7100 llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
7101
7102 return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value)))));
7103 #else
7104 return RValue<Int4>(V(lowerPABS(V(x.value))));
7105 #endif
7106 }
7107
paddsw(RValue<Short4> x,RValue<Short4> y)7108 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
7109 {
7110 llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
7111
7112 return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
7113 }
7114
psubsw(RValue<Short4> x,RValue<Short4> y)7115 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7116 {
7117 llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
7118
7119 return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
7120 }
7121
paddusw(RValue<UShort4> x,RValue<UShort4> y)7122 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7123 {
7124 llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
7125
7126 return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
7127 }
7128
psubusw(RValue<UShort4> x,RValue<UShort4> y)7129 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7130 {
7131 llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
7132
7133 return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
7134 }
7135
paddsb(RValue<SByte8> x,RValue<SByte8> y)7136 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7137 {
7138 llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
7139
7140 return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
7141 }
7142
psubsb(RValue<SByte8> x,RValue<SByte8> y)7143 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7144 {
7145 llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
7146
7147 return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
7148 }
7149
paddusb(RValue<Byte8> x,RValue<Byte8> y)7150 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7151 {
7152 llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
7153
7154 return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
7155 }
7156
psubusb(RValue<Byte8> x,RValue<Byte8> y)7157 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7158 {
7159 llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
7160
7161 return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
7162 }
7163
pavgw(RValue<UShort4> x,RValue<UShort4> y)7164 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7165 {
7166 #if REACTOR_LLVM_VERSION < 7
7167 llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
7168
7169 return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value)))));
7170 #else
7171 return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
7172 #endif
7173 }
7174
pmaxsw(RValue<Short4> x,RValue<Short4> y)7175 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7176 {
7177 #if REACTOR_LLVM_VERSION < 7
7178 llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
7179
7180 return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value)))));
7181 #else
7182 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
7183 #endif
7184 }
7185
pminsw(RValue<Short4> x,RValue<Short4> y)7186 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7187 {
7188 #if REACTOR_LLVM_VERSION < 7
7189 llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
7190
7191 return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value)))));
7192 #else
7193 return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
7194 #endif
7195 }
7196
pcmpgtw(RValue<Short4> x,RValue<Short4> y)7197 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7198 {
7199 #if REACTOR_LLVM_VERSION < 7
7200 llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
7201
7202 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value)))));
7203 #else
7204 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
7205 #endif
7206 }
7207
pcmpeqw(RValue<Short4> x,RValue<Short4> y)7208 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7209 {
7210 #if REACTOR_LLVM_VERSION < 7
7211 llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
7212
7213 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value)))));
7214 #else
7215 return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
7216 #endif
7217 }
7218
pcmpgtb(RValue<SByte8> x,RValue<SByte8> y)7219 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7220 {
7221 #if REACTOR_LLVM_VERSION < 7
7222 llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
7223
7224 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value)))));
7225 #else
7226 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
7227 #endif
7228 }
7229
pcmpeqb(RValue<Byte8> x,RValue<Byte8> y)7230 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7231 {
7232 #if REACTOR_LLVM_VERSION < 7
7233 llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
7234
7235 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value)))));
7236 #else
7237 return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
7238 #endif
7239 }
7240
packssdw(RValue<Int2> x,RValue<Int2> y)7241 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7242 {
7243 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
7244
7245 return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
7246 }
7247
packssdw(RValue<Int4> x,RValue<Int4> y)7248 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7249 {
7250 llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
7251
7252 return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
7253 }
7254
packsswb(RValue<Short4> x,RValue<Short4> y)7255 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7256 {
7257 llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
7258
7259 return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
7260 }
7261
packuswb(RValue<Short4> x,RValue<Short4> y)7262 RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
7263 {
7264 llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
7265
7266 return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
7267 }
7268
packusdw(RValue<Int4> x,RValue<Int4> y)7269 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
7270 {
7271 if(CPUID::supportsSSE4_1())
7272 {
7273 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
7274
7275 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
7276 }
7277 else
7278 {
7279 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
7280 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
7281
7282 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
7283 }
7284 }
7285
psrlw(RValue<UShort4> x,unsigned char y)7286 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7287 {
7288 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
7289
7290 return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7291 }
7292
psrlw(RValue<UShort8> x,unsigned char y)7293 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7294 {
7295 llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
7296
7297 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7298 }
7299
psraw(RValue<Short4> x,unsigned char y)7300 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7301 {
7302 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
7303
7304 return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7305 }
7306
psraw(RValue<Short8> x,unsigned char y)7307 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7308 {
7309 llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
7310
7311 return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7312 }
7313
psllw(RValue<Short4> x,unsigned char y)7314 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7315 {
7316 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
7317
7318 return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7319 }
7320
psllw(RValue<Short8> x,unsigned char y)7321 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7322 {
7323 llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
7324
7325 return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7326 }
7327
pslld(RValue<Int2> x,unsigned char y)7328 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7329 {
7330 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
7331
7332 return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7333 }
7334
pslld(RValue<Int4> x,unsigned char y)7335 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7336 {
7337 llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
7338
7339 return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7340 }
7341
psrad(RValue<Int2> x,unsigned char y)7342 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7343 {
7344 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
7345
7346 return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7347 }
7348
psrad(RValue<Int4> x,unsigned char y)7349 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7350 {
7351 llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
7352
7353 return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7354 }
7355
psrld(RValue<UInt2> x,unsigned char y)7356 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7357 {
7358 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
7359
7360 return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7361 }
7362
psrld(RValue<UInt4> x,unsigned char y)7363 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7364 {
7365 llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
7366
7367 return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7368 }
7369
pmaxsd(RValue<Int4> x,RValue<Int4> y)7370 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7371 {
7372 #if REACTOR_LLVM_VERSION < 7
7373 llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
7374
7375 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value)))));
7376 #else
7377 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
7378 #endif
7379 }
7380
pminsd(RValue<Int4> x,RValue<Int4> y)7381 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7382 {
7383 #if REACTOR_LLVM_VERSION < 7
7384 llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
7385
7386 return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value)))));
7387 #else
7388 return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
7389 #endif
7390 }
7391
pmaxud(RValue<UInt4> x,RValue<UInt4> y)7392 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7393 {
7394 #if REACTOR_LLVM_VERSION < 7
7395 llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
7396
7397 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value)))));
7398 #else
7399 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
7400 #endif
7401 }
7402
pminud(RValue<UInt4> x,RValue<UInt4> y)7403 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7404 {
7405 #if REACTOR_LLVM_VERSION < 7
7406 llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
7407
7408 return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value)))));
7409 #else
7410 return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
7411 #endif
7412 }
7413
pmulhw(RValue<Short4> x,RValue<Short4> y)7414 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7415 {
7416 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
7417
7418 return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
7419 }
7420
pmulhuw(RValue<UShort4> x,RValue<UShort4> y)7421 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7422 {
7423 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
7424
7425 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
7426 }
7427
pmaddwd(RValue<Short4> x,RValue<Short4> y)7428 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7429 {
7430 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
7431
7432 return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
7433 }
7434
pmulhw(RValue<Short8> x,RValue<Short8> y)7435 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7436 {
7437 llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
7438
7439 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
7440 }
7441
pmulhuw(RValue<UShort8> x,RValue<UShort8> y)7442 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7443 {
7444 llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
7445
7446 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
7447 }
7448
pmaddwd(RValue<Short8> x,RValue<Short8> y)7449 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7450 {
7451 llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
7452
7453 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
7454 }
7455
movmskps(RValue<Float4> x)7456 RValue<Int> movmskps(RValue<Float4> x)
7457 {
7458 llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
7459
7460 return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
7461 }
7462
pmovmskb(RValue<Byte8> x)7463 RValue<Int> pmovmskb(RValue<Byte8> x)
7464 {
7465 llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
7466
7467 return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
7468 }
7469
pmovzxbd(RValue<Byte16> x)7470 RValue<Int4> pmovzxbd(RValue<Byte16> x)
7471 {
7472 #if REACTOR_LLVM_VERSION < 7
7473 llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
7474
7475 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value)))));
7476 #else
7477 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
7478 #endif
7479 }
7480
pmovsxbd(RValue<SByte16> x)7481 RValue<Int4> pmovsxbd(RValue<SByte16> x)
7482 {
7483 #if REACTOR_LLVM_VERSION < 7
7484 llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
7485
7486 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value)))));
7487 #else
7488 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
7489 #endif
7490 }
7491
pmovzxwd(RValue<UShort8> x)7492 RValue<Int4> pmovzxwd(RValue<UShort8> x)
7493 {
7494 #if REACTOR_LLVM_VERSION < 7
7495 llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
7496
7497 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value)))));
7498 #else
7499 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
7500 #endif
7501 }
7502
pmovsxwd(RValue<Short8> x)7503 RValue<Int4> pmovsxwd(RValue<Short8> x)
7504 {
7505 #if REACTOR_LLVM_VERSION < 7
7506 llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
7507
7508 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value)))));
7509 #else
7510 return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
7511 #endif
7512 }
7513 }
7514 #endif // defined(__i386__) || defined(__x86_64__)
7515 }
7516