1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file builder_misc.cpp 24 * 25 * @brief Implementation for miscellaneous builder functions 26 * 27 * Notes: 28 * 29 ******************************************************************************/ 30 #include "jit_pch.hpp" 31 #include "builder.h" 32 #include "common/rdtsc_buckets.h" 33 34 #include <cstdarg> 35 36 extern "C" void CallPrint(const char* fmt, ...); 37 38 namespace SwrJit 39 { 40 ////////////////////////////////////////////////////////////////////////// 41 /// @brief Convert an IEEE 754 32-bit single precision float to an 42 /// 16 bit float with 5 exponent bits and a variable 43 /// number of mantissa bits. 44 /// @param val - 32-bit float 45 /// @todo Maybe move this outside of this file into a header? ConvertFloat32ToFloat16(float val)46 static uint16_t ConvertFloat32ToFloat16(float val) 47 { 48 uint32_t sign, exp, mant; 49 uint32_t roundBits; 50 51 // Extract the sign, exponent, and mantissa 52 uint32_t uf = *(uint32_t*)&val; 53 sign = (uf & 0x80000000) >> 31; 54 exp = (uf & 0x7F800000) >> 23; 55 mant = uf & 0x007FFFFF; 56 57 // Check for out of range 58 if (std::isnan(val)) 59 { 60 exp = 0x1F; 61 mant = 0x200; 62 sign = 1; // set the sign bit for NANs 63 } 64 else if (std::isinf(val)) 65 { 66 exp = 0x1f; 67 mant = 0x0; 68 } 69 else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value 70 { 71 exp = 0x1E; 72 mant = 0x3FF; 73 } 74 else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm 75 { 76 mant |= 0x00800000; 77 for (; exp <= 0x70; mant >>= 1, exp++) 78 ; 79 exp = 0; 80 mant = mant >> 13; 81 } 82 else if (exp < 0x66) // Too small to represent -> Zero 83 { 84 exp = 0; 85 mant = 0; 86 } 87 else 88 { 89 // Saves bits that will be shifted off for rounding 90 roundBits = mant & 0x1FFFu; 91 // convert exponent and mantissa to 16 bit format 92 exp = exp - 0x70; 93 mant = mant >> 13; 94 95 // Essentially RTZ, but round up if off by only 1 lsb 96 if (roundBits == 0x1FFFu) 97 { 98 mant++; 99 // check for overflow 100 if ((mant & 0xC00u) != 0) 101 exp++; 102 // make sure only the needed bits are used 103 mant &= 0x3FF; 104 } 105 } 106 107 uint32_t tmpVal = (sign << 15) | (exp << 10) | mant; 108 return (uint16_t)tmpVal; 109 } 110 C(bool i)111 Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); } 112 C(char i)113 Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); } 114 C(uint8_t i)115 Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); } 116 C(int i)117 Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); } 118 C(int64_t i)119 Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); } 120 C(uint16_t i)121 Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); } 122 C(uint32_t i)123 Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); } 124 C(uint64_t i)125 Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); } 126 C(float i)127 Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); } 128 PRED(bool pred)129 Constant* Builder::PRED(bool pred) 130 { 131 return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0)); 132 } 133 VIMMED1(uint64_t i)134 Value* Builder::VIMMED1(uint64_t i) 135 { 136 #if LLVM_VERSION_MAJOR <= 10 137 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); 138 #elif LLVM_VERSION_MAJOR == 11 139 return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); 140 #else 141 return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); 142 #endif 143 } 144 VIMMED1_16(uint64_t i)145 Value* Builder::VIMMED1_16(uint64_t i) 146 { 147 #if LLVM_VERSION_MAJOR <= 10 148 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); 149 #elif LLVM_VERSION_MAJOR == 11 150 return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); 151 #else 152 return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); 153 #endif 154 } 155 VIMMED1(int i)156 Value* Builder::VIMMED1(int i) 157 { 158 #if LLVM_VERSION_MAJOR <= 10 159 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); 160 #elif LLVM_VERSION_MAJOR == 11 161 return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); 162 #else 163 return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); 164 #endif 165 } 166 VIMMED1_16(int i)167 Value* Builder::VIMMED1_16(int i) 168 { 169 #if LLVM_VERSION_MAJOR <= 10 170 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); 171 #elif LLVM_VERSION_MAJOR == 11 172 return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); 173 #else 174 return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); 175 #endif 176 } 177 VIMMED1(uint32_t i)178 Value* Builder::VIMMED1(uint32_t i) 179 { 180 #if LLVM_VERSION_MAJOR <= 10 181 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); 182 #elif LLVM_VERSION_MAJOR == 11 183 return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); 184 #else 185 return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); 186 #endif 187 } 188 VIMMED1_16(uint32_t i)189 Value* Builder::VIMMED1_16(uint32_t i) 190 { 191 #if LLVM_VERSION_MAJOR <= 10 192 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); 193 #elif LLVM_VERSION_MAJOR == 11 194 return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); 195 #else 196 return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); 197 #endif 198 } 199 VIMMED1(float i)200 Value* Builder::VIMMED1(float i) 201 { 202 #if LLVM_VERSION_MAJOR <= 10 203 return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i))); 204 #elif LLVM_VERSION_MAJOR == 11 205 return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantFP>(C(i))); 206 #else 207 return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantFP>(C(i))); 208 #endif 209 } 210 VIMMED1_16(float i)211 Value* Builder::VIMMED1_16(float i) 212 { 213 #if LLVM_VERSION_MAJOR <= 10 214 return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i))); 215 #elif LLVM_VERSION_MAJOR == 11 216 return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantFP>(C(i))); 217 #else 218 return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantFP>(C(i))); 219 #endif 220 } 221 VIMMED1(bool i)222 Value* Builder::VIMMED1(bool i) 223 { 224 #if LLVM_VERSION_MAJOR <= 10 225 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i))); 226 #elif LLVM_VERSION_MAJOR == 11 227 return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i))); 228 #else 229 return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i))); 230 #endif 231 } 232 VIMMED1_16(bool i)233 Value* Builder::VIMMED1_16(bool i) 234 { 235 #if LLVM_VERSION_MAJOR <= 10 236 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i))); 237 #elif LLVM_VERSION_MAJOR == 11 238 return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i))); 239 #else 240 return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i))); 241 #endif 242 } 243 VUNDEF_IPTR()244 Value* Builder::VUNDEF_IPTR() { return UndefValue::get(getVectorType(mInt32PtrTy, mVWidth)); } 245 VUNDEF(Type * t)246 Value* Builder::VUNDEF(Type* t) { return UndefValue::get(getVectorType(t, mVWidth)); } 247 VUNDEF_I()248 Value* Builder::VUNDEF_I() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth)); } 249 VUNDEF_I_16()250 Value* Builder::VUNDEF_I_16() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth16)); } 251 VUNDEF_F()252 Value* Builder::VUNDEF_F() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth)); } 253 VUNDEF_F_16()254 Value* Builder::VUNDEF_F_16() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth16)); } 255 VUNDEF(Type * ty,uint32_t size)256 Value* Builder::VUNDEF(Type* ty, uint32_t size) 257 { 258 return UndefValue::get(getVectorType(ty, size)); 259 } 260 VBROADCAST(Value * src,const llvm::Twine & name)261 Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name) 262 { 263 // check if src is already a vector 264 if (src->getType()->isVectorTy()) 265 { 266 return src; 267 } 268 269 return VECTOR_SPLAT(mVWidth, src, name); 270 } 271 VBROADCAST_16(Value * src)272 Value* Builder::VBROADCAST_16(Value* src) 273 { 274 // check if src is already a vector 275 if (src->getType()->isVectorTy()) 276 { 277 return src; 278 } 279 280 return VECTOR_SPLAT(mVWidth16, src); 281 } 282 IMMED(Value * v)283 uint32_t Builder::IMMED(Value* v) 284 { 285 SWR_ASSERT(isa<ConstantInt>(v)); 286 ConstantInt* pValConst = cast<ConstantInt>(v); 287 return pValConst->getZExtValue(); 288 } 289 S_IMMED(Value * v)290 int32_t Builder::S_IMMED(Value* v) 291 { 292 SWR_ASSERT(isa<ConstantInt>(v)); 293 ConstantInt* pValConst = cast<ConstantInt>(v); 294 return pValConst->getSExtValue(); 295 } 296 CALL(Value * Callee,const std::initializer_list<Value * > & argsList,const llvm::Twine & name)297 CallInst* Builder::CALL(Value* Callee, 298 const std::initializer_list<Value*>& argsList, 299 const llvm::Twine& name) 300 { 301 std::vector<Value*> args; 302 for (auto arg : argsList) 303 args.push_back(arg); 304 #if LLVM_VERSION_MAJOR >= 11 305 // see comment to CALLA(Callee) function in the header 306 return CALLA(FunctionCallee(cast<Function>(Callee)), args, name); 307 #else 308 return CALLA(Callee, args, name); 309 #endif 310 } 311 CALL(Value * Callee,Value * arg)312 CallInst* Builder::CALL(Value* Callee, Value* arg) 313 { 314 std::vector<Value*> args; 315 args.push_back(arg); 316 #if LLVM_VERSION_MAJOR >= 11 317 // see comment to CALLA(Callee) function in the header 318 return CALLA(FunctionCallee(cast<Function>(Callee)), args); 319 #else 320 return CALLA(Callee, args); 321 #endif 322 } 323 CALL2(Value * Callee,Value * arg1,Value * arg2)324 CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2) 325 { 326 std::vector<Value*> args; 327 args.push_back(arg1); 328 args.push_back(arg2); 329 #if LLVM_VERSION_MAJOR >= 11 330 // see comment to CALLA(Callee) function in the header 331 return CALLA(FunctionCallee(cast<Function>(Callee)), args); 332 #else 333 return CALLA(Callee, args); 334 #endif 335 } 336 CALL3(Value * Callee,Value * arg1,Value * arg2,Value * arg3)337 CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3) 338 { 339 std::vector<Value*> args; 340 args.push_back(arg1); 341 args.push_back(arg2); 342 args.push_back(arg3); 343 #if LLVM_VERSION_MAJOR >= 11 344 // see comment to CALLA(Callee) function in the header 345 return CALLA(FunctionCallee(cast<Function>(Callee)), args); 346 #else 347 return CALLA(Callee, args); 348 #endif 349 } 350 VRCP(Value * va,const llvm::Twine & name)351 Value* Builder::VRCP(Value* va, const llvm::Twine& name) 352 { 353 return FDIV(VIMMED1(1.0f), va, name); // 1 / a 354 } 355 VPLANEPS(Value * vA,Value * vB,Value * vC,Value * & vX,Value * & vY)356 Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY) 357 { 358 Value* vOut = FMADDPS(vA, vX, vC); 359 vOut = FMADDPS(vB, vY, vOut); 360 return vOut; 361 } 362 363 ////////////////////////////////////////////////////////////////////////// 364 /// @brief insert a JIT call to CallPrint 365 /// - outputs formatted string to both stdout and VS output window 366 /// - DEBUG builds only 367 /// Usage example: 368 /// PRINT("index %d = 0x%p\n",{C(lane), pIndex}); 369 /// where C(lane) creates a constant value to print, and pIndex is the Value* 370 /// result from a GEP, printing out the pointer to memory 371 /// @param printStr - constant string to print, which includes format specifiers 372 /// @param printArgs - initializer list of Value*'s to print to std out PRINT(const std::string & printStr,const std::initializer_list<Value * > & printArgs)373 CallInst* Builder::PRINT(const std::string& printStr, 374 const std::initializer_list<Value*>& printArgs) 375 { 376 // push the arguments to CallPrint into a vector 377 std::vector<Value*> printCallArgs; 378 // save room for the format string. we still need to modify it for vectors 379 printCallArgs.resize(1); 380 381 // search through the format string for special processing 382 size_t pos = 0; 383 std::string tempStr(printStr); 384 pos = tempStr.find('%', pos); 385 auto v = printArgs.begin(); 386 387 while ((pos != std::string::npos) && (v != printArgs.end())) 388 { 389 Value* pArg = *v; 390 Type* pType = pArg->getType(); 391 392 if (pType->isVectorTy()) 393 { 394 Type* pContainedType = pType->getContainedType(0); 395 #if LLVM_VERSION_MAJOR >= 11 396 VectorType* pVectorType = cast<VectorType>(pType); 397 #endif 398 if (toupper(tempStr[pos + 1]) == 'X') 399 { 400 tempStr[pos] = '0'; 401 tempStr[pos + 1] = 'x'; 402 tempStr.insert(pos + 2, "%08X "); 403 pos += 7; 404 405 printCallArgs.push_back(VEXTRACT(pArg, C(0))); 406 407 std::string vectorFormatStr; 408 #if LLVM_VERSION_MAJOR >= 11 409 for (uint32_t i = 1; i < pVectorType->getNumElements(); ++i) 410 #else 411 for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i) 412 #endif 413 { 414 vectorFormatStr += "0x%08X "; 415 printCallArgs.push_back(VEXTRACT(pArg, C(i))); 416 } 417 418 tempStr.insert(pos, vectorFormatStr); 419 pos += vectorFormatStr.size(); 420 } 421 else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy())) 422 { 423 uint32_t i = 0; 424 #if LLVM_VERSION_MAJOR >= 11 425 for (; i < pVectorType->getNumElements() - 1; i++) 426 #else 427 for (; i < pType->getVectorNumElements() - 1; i++) 428 #endif 429 { 430 tempStr.insert(pos, std::string("%f ")); 431 pos += 3; 432 printCallArgs.push_back( 433 FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext))); 434 } 435 printCallArgs.push_back( 436 FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext))); 437 } 438 else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy())) 439 { 440 uint32_t i = 0; 441 #if LLVM_VERSION_MAJOR >= 11 442 for (; i < pVectorType->getNumElements() - 1; i++) 443 #else 444 for (; i < pType->getVectorNumElements() - 1; i++) 445 #endif 446 { 447 tempStr.insert(pos, std::string("%d ")); 448 pos += 3; 449 printCallArgs.push_back( 450 S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); 451 } 452 printCallArgs.push_back( 453 S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); 454 } 455 else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy())) 456 { 457 uint32_t i = 0; 458 #if LLVM_VERSION_MAJOR >= 11 459 for (; i < pVectorType->getNumElements() - 1; i++) 460 #else 461 for (; i < pType->getVectorNumElements() - 1; i++) 462 #endif 463 { 464 tempStr.insert(pos, std::string("%d ")); 465 pos += 3; 466 printCallArgs.push_back( 467 Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); 468 } 469 printCallArgs.push_back( 470 Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext))); 471 } 472 } 473 else 474 { 475 if (toupper(tempStr[pos + 1]) == 'X') 476 { 477 tempStr[pos] = '0'; 478 tempStr.insert(pos + 1, "x%08"); 479 printCallArgs.push_back(pArg); 480 pos += 3; 481 } 482 // for %f we need to cast float Values to doubles so that they print out correctly 483 else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy())) 484 { 485 printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext))); 486 pos++; 487 } 488 else 489 { 490 printCallArgs.push_back(pArg); 491 } 492 } 493 494 // advance to the next arguement 495 v++; 496 pos = tempStr.find('%', ++pos); 497 } 498 499 // create global variable constant string 500 Constant* constString = ConstantDataArray::getString(JM()->mContext, tempStr, true); 501 GlobalVariable* gvPtr = new GlobalVariable( 502 constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr"); 503 JM()->mpCurrentModule->getGlobalList().push_back(gvPtr); 504 505 // get a pointer to the first character in the constant string array 506 std::vector<Constant*> geplist{C(0), C(0)}; 507 Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false); 508 509 // insert the pointer to the format string in the argument vector 510 printCallArgs[0] = strGEP; 511 512 // get pointer to CallPrint function and insert decl into the module if needed 513 std::vector<Type*> args; 514 args.push_back(PointerType::get(mInt8Ty, 0)); 515 FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true); 516 Function* callPrintFn = 517 #if LLVM_VERSION_MAJOR >= 9 518 cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy).getCallee()); 519 #else 520 cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy)); 521 #endif 522 523 // if we haven't yet added the symbol to the symbol table 524 if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr) 525 { 526 sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint); 527 } 528 529 // insert a call to CallPrint 530 return CALLA(callPrintFn, printCallArgs); 531 } 532 533 ////////////////////////////////////////////////////////////////////////// 534 /// @brief Wrapper around PRINT with initializer list. PRINT(const std::string & printStr)535 CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); } 536 EXTRACT_16(Value * x,uint32_t imm)537 Value* Builder::EXTRACT_16(Value* x, uint32_t imm) 538 { 539 if (imm == 0) 540 { 541 return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); 542 } 543 else 544 { 545 return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15}); 546 } 547 } 548 JOIN_16(Value * a,Value * b)549 Value* Builder::JOIN_16(Value* a, Value* b) 550 { 551 return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); 552 } 553 554 ////////////////////////////////////////////////////////////////////////// 555 /// @brief convert x86 <N x float> mask to llvm <N x i1> mask MASK(Value * vmask)556 Value* Builder::MASK(Value* vmask) 557 { 558 Value* src = BITCAST(vmask, mSimdInt32Ty); 559 return ICMP_SLT(src, VIMMED1(0)); 560 } 561 MASK_16(Value * vmask)562 Value* Builder::MASK_16(Value* vmask) 563 { 564 Value* src = BITCAST(vmask, mSimd16Int32Ty); 565 return ICMP_SLT(src, VIMMED1_16(0)); 566 } 567 568 ////////////////////////////////////////////////////////////////////////// 569 /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask VMASK(Value * mask)570 Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); } 571 VMASK_16(Value * mask)572 Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); } 573 574 /// @brief Convert <Nxi1> llvm mask to integer VMOVMSK(Value * mask)575 Value* Builder::VMOVMSK(Value* mask) 576 { 577 #if LLVM_VERSION_MAJOR >= 11 578 VectorType* pVectorType = cast<VectorType>(mask->getType()); 579 SWR_ASSERT(pVectorType->getElementType() == mInt1Ty); 580 uint32_t numLanes = pVectorType->getNumElements(); 581 #else 582 SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty); 583 uint32_t numLanes = mask->getType()->getVectorNumElements(); 584 #endif 585 Value* i32Result; 586 if (numLanes == 8) 587 { 588 i32Result = BITCAST(mask, mInt8Ty); 589 } 590 else if (numLanes == 16) 591 { 592 i32Result = BITCAST(mask, mInt16Ty); 593 } 594 else 595 { 596 SWR_ASSERT("Unsupported vector width"); 597 i32Result = BITCAST(mask, mInt8Ty); 598 } 599 return Z_EXT(i32Result, mInt32Ty); 600 } 601 602 ////////////////////////////////////////////////////////////////////////// 603 /// @brief Generate a VPSHUFB operation in LLVM IR. If not 604 /// supported on the underlying platform, emulate it 605 /// @param a - 256bit SIMD(32x8bit) of 8bit integer values 606 /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values 607 /// Byte masks in lower 128 lane of b selects 8 bit values from lower 608 /// 128bits of a, and vice versa for the upper lanes. If the mask 609 /// value is negative, '0' is inserted. PSHUFB(Value * a,Value * b)610 Value* Builder::PSHUFB(Value* a, Value* b) 611 { 612 Value* res; 613 // use avx2 pshufb instruction if available 614 if (JM()->mArch.AVX2()) 615 { 616 res = VPSHUFB(a, b); 617 } 618 else 619 { 620 Constant* cB = dyn_cast<Constant>(b); 621 assert(cB != nullptr); 622 // number of 8 bit elements in b 623 uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements(); 624 // output vector 625 Value* vShuf = UndefValue::get(getVectorType(mInt8Ty, numElms)); 626 627 // insert an 8 bit value from the high and low lanes of a per loop iteration 628 numElms /= 2; 629 for (uint32_t i = 0; i < numElms; i++) 630 { 631 ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i)); 632 ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms)); 633 634 // extract values from constant mask 635 char valLow128bLane = (char)(cLow128b->getSExtValue()); 636 char valHigh128bLane = (char)(cHigh128b->getSExtValue()); 637 638 Value* insertValLow128b; 639 Value* insertValHigh128b; 640 641 // if the mask value is negative, insert a '0' in the respective output position 642 // otherwise, lookup the value at mask position (bits 3..0 of the respective mask 643 // byte) in a and insert in output vector 644 insertValLow128b = 645 (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF))); 646 insertValHigh128b = (valHigh128bLane < 0) 647 ? C((char)0) 648 : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms)); 649 650 vShuf = VINSERT(vShuf, insertValLow128b, i); 651 vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms)); 652 } 653 res = vShuf; 654 } 655 return res; 656 } 657 658 ////////////////////////////////////////////////////////////////////////// 659 /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32 660 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it 661 /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only 662 /// lower 8 values are used. PMOVSXBD(Value * a)663 Value* Builder::PMOVSXBD(Value* a) 664 { 665 // VPMOVSXBD output type 666 Type* v8x32Ty = getVectorType(mInt32Ty, 8); 667 // Extract 8 values from 128bit lane and sign extend 668 return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); 669 } 670 671 ////////////////////////////////////////////////////////////////////////// 672 /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32 673 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it 674 /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values. PMOVSXWD(Value * a)675 Value* Builder::PMOVSXWD(Value* a) 676 { 677 // VPMOVSXWD output type 678 Type* v8x32Ty = getVectorType(mInt32Ty, 8); 679 // Extract 8 values from 128bit lane and sign extend 680 return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty); 681 } 682 683 ////////////////////////////////////////////////////////////////////////// 684 /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion) 685 /// in LLVM IR. If not supported on the underlying platform, emulate it 686 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format. CVTPH2PS(Value * a,const llvm::Twine & name)687 Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name) 688 { 689 // Bitcast Nxint16 to Nxhalf 690 #if LLVM_VERSION_MAJOR >= 11 691 uint32_t numElems = cast<VectorType>(a->getType())->getNumElements(); 692 #else 693 uint32_t numElems = a->getType()->getVectorNumElements(); 694 #endif 695 Value* input = BITCAST(a, getVectorType(mFP16Ty, numElems)); 696 697 return FP_EXT(input, getVectorType(mFP32Ty, numElems), name); 698 } 699 700 ////////////////////////////////////////////////////////////////////////// 701 /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion) 702 /// in LLVM IR. If not supported on the underlying platform, emulate it 703 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format. CVTPS2PH(Value * a,Value * rounding)704 Value* Builder::CVTPS2PH(Value* a, Value* rounding) 705 { 706 if (JM()->mArch.F16C()) 707 { 708 return VCVTPS2PH(a, rounding); 709 } 710 else 711 { 712 // call scalar C function for now 713 FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty); 714 Function* pCvtPs2Ph = cast<Function>( 715 #if LLVM_VERSION_MAJOR >= 9 716 JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy).getCallee()); 717 #else 718 JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy)); 719 #endif 720 721 if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr) 722 { 723 sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", 724 (void*)&ConvertFloat32ToFloat16); 725 } 726 727 Value* pResult = UndefValue::get(mSimdInt16Ty); 728 for (uint32_t i = 0; i < mVWidth; ++i) 729 { 730 Value* pSrc = VEXTRACT(a, C(i)); 731 Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc}); 732 pResult = VINSERT(pResult, pConv, C(i)); 733 } 734 735 return pResult; 736 } 737 } 738 PMAXSD(Value * a,Value * b)739 Value* Builder::PMAXSD(Value* a, Value* b) 740 { 741 Value* cmp = ICMP_SGT(a, b); 742 return SELECT(cmp, a, b); 743 } 744 PMINSD(Value * a,Value * b)745 Value* Builder::PMINSD(Value* a, Value* b) 746 { 747 Value* cmp = ICMP_SLT(a, b); 748 return SELECT(cmp, a, b); 749 } 750 PMAXUD(Value * a,Value * b)751 Value* Builder::PMAXUD(Value* a, Value* b) 752 { 753 Value* cmp = ICMP_UGT(a, b); 754 return SELECT(cmp, a, b); 755 } 756 PMINUD(Value * a,Value * b)757 Value* Builder::PMINUD(Value* a, Value* b) 758 { 759 Value* cmp = ICMP_ULT(a, b); 760 return SELECT(cmp, a, b); 761 } 762 763 // Helper function to create alloca in entry block of function CreateEntryAlloca(Function * pFunc,Type * pType)764 Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType) 765 { 766 auto saveIP = IRB()->saveIP(); 767 IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin()); 768 Value* pAlloca = ALLOCA(pType); 769 if (saveIP.isSet()) 770 IRB()->restoreIP(saveIP); 771 return pAlloca; 772 } 773 CreateEntryAlloca(Function * pFunc,Type * pType,Value * pArraySize)774 Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize) 775 { 776 auto saveIP = IRB()->saveIP(); 777 IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin()); 778 Value* pAlloca = ALLOCA(pType, pArraySize); 779 if (saveIP.isSet()) 780 IRB()->restoreIP(saveIP); 781 return pAlloca; 782 } 783 VABSPS(Value * a)784 Value* Builder::VABSPS(Value* a) 785 { 786 Value* asInt = BITCAST(a, mSimdInt32Ty); 787 Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty); 788 return result; 789 } 790 ICLAMP(Value * src,Value * low,Value * high,const llvm::Twine & name)791 Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name) 792 { 793 Value* lowCmp = ICMP_SLT(src, low); 794 Value* ret = SELECT(lowCmp, low, src); 795 796 Value* highCmp = ICMP_SGT(ret, high); 797 ret = SELECT(highCmp, high, ret, name); 798 799 return ret; 800 } 801 FCLAMP(Value * src,Value * low,Value * high)802 Value* Builder::FCLAMP(Value* src, Value* low, Value* high) 803 { 804 Value* lowCmp = FCMP_OLT(src, low); 805 Value* ret = SELECT(lowCmp, low, src); 806 807 Value* highCmp = FCMP_OGT(ret, high); 808 ret = SELECT(highCmp, high, ret); 809 810 return ret; 811 } 812 FCLAMP(Value * src,float low,float high)813 Value* Builder::FCLAMP(Value* src, float low, float high) 814 { 815 Value* result = VMAXPS(src, VIMMED1(low)); 816 result = VMINPS(result, VIMMED1(high)); 817 818 return result; 819 } 820 FMADDPS(Value * a,Value * b,Value * c)821 Value* Builder::FMADDPS(Value* a, Value* b, Value* c) 822 { 823 Value* vOut; 824 // This maps to LLVM fmuladd intrinsic 825 vOut = VFMADDPS(a, b, c); 826 return vOut; 827 } 828 829 ////////////////////////////////////////////////////////////////////////// 830 /// @brief pop count on vector mask (e.g. <8 x i1>) VPOPCNT(Value * a)831 Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); } 832 833 ////////////////////////////////////////////////////////////////////////// 834 /// @brief Float / Fixed-point conversions 835 ////////////////////////////////////////////////////////////////////////// VCVT_F32_FIXED_SI(Value * vFloat,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)836 Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat, 837 uint32_t numIntBits, 838 uint32_t numFracBits, 839 const llvm::Twine& name) 840 { 841 SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); 842 Value* fixed = nullptr; 843 844 #if 0 // This doesn't work for negative numbers!! 845 { 846 fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), 847 C(_MM_FROUND_TO_NEAREST_INT)), 848 mSimdInt32Ty); 849 } 850 else 851 #endif 852 { 853 // Do round to nearest int on fractional bits first 854 // Not entirely perfect for negative numbers, but close enough 855 vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), 856 C(_MM_FROUND_TO_NEAREST_INT)); 857 vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); 858 859 // TODO: Handle INF, NAN, overflow / underflow, etc. 860 861 Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); 862 Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); 863 Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); 864 vFixed = OR(vFixed, VIMMED1(1 << 23)); 865 vFixed = SELECT(vSgn, NEG(vFixed), vFixed); 866 867 Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); 868 vExp = SUB(vExp, VIMMED1(127)); 869 870 Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); 871 872 fixed = ASHR(vFixed, vExtraBits, name); 873 } 874 875 return fixed; 876 } 877 VCVT_FIXED_SI_F32(Value * vFixed,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)878 Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed, 879 uint32_t numIntBits, 880 uint32_t numFracBits, 881 const llvm::Twine& name) 882 { 883 SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); 884 uint32_t extraBits = 32 - numIntBits - numFracBits; 885 if (numIntBits && extraBits) 886 { 887 // Sign extend 888 Value* shftAmt = VIMMED1(extraBits); 889 vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); 890 } 891 892 Value* fVal = VIMMED1(0.0f); 893 Value* fFrac = VIMMED1(0.0f); 894 if (numIntBits) 895 { 896 fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); 897 } 898 899 if (numFracBits) 900 { 901 fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); 902 fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); 903 } 904 905 return FADD(fVal, fFrac, name); 906 } 907 VCVT_F32_FIXED_UI(Value * vFloat,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)908 Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat, 909 uint32_t numIntBits, 910 uint32_t numFracBits, 911 const llvm::Twine& name) 912 { 913 SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); 914 Value* fixed = nullptr; 915 #if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision 916 // standpoint... 917 { 918 fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), 919 C(_MM_FROUND_TO_NEAREST_INT)), 920 mSimdInt32Ty); 921 } 922 #else 923 { 924 // Do round to nearest int on fractional bits first 925 vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), 926 C(_MM_FROUND_TO_NEAREST_INT)); 927 vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); 928 929 // TODO: Handle INF, NAN, overflow / underflow, etc. 930 931 Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); 932 Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); 933 Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); 934 vFixed = OR(vFixed, VIMMED1(1 << 23)); 935 936 Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); 937 vExp = SUB(vExp, VIMMED1(127)); 938 939 Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); 940 941 fixed = LSHR(vFixed, vExtraBits, name); 942 } 943 #endif 944 return fixed; 945 } 946 VCVT_FIXED_UI_F32(Value * vFixed,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)947 Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed, 948 uint32_t numIntBits, 949 uint32_t numFracBits, 950 const llvm::Twine& name) 951 { 952 SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); 953 uint32_t extraBits = 32 - numIntBits - numFracBits; 954 if (numIntBits && extraBits) 955 { 956 // Sign extend 957 Value* shftAmt = VIMMED1(extraBits); 958 vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); 959 } 960 961 Value* fVal = VIMMED1(0.0f); 962 Value* fFrac = VIMMED1(0.0f); 963 if (numIntBits) 964 { 965 fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); 966 } 967 968 if (numFracBits) 969 { 970 fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); 971 fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); 972 } 973 974 return FADD(fVal, fFrac, name); 975 } 976 977 ////////////////////////////////////////////////////////////////////////// 978 /// @brief C functions called by LLVM IR 979 ////////////////////////////////////////////////////////////////////////// 980 VEXTRACTI128(Value * a,Constant * imm8)981 Value* Builder::VEXTRACTI128(Value* a, Constant* imm8) 982 { 983 bool flag = !imm8->isZeroValue(); 984 SmallVector<Constant*, 8> idx; 985 for (unsigned i = 0; i < mVWidth / 2; i++) 986 { 987 idx.push_back(C(flag ? i + mVWidth / 2 : i)); 988 } 989 return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx)); 990 } 991 VINSERTI128(Value * a,Value * b,Constant * imm8)992 Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8) 993 { 994 bool flag = !imm8->isZeroValue(); 995 SmallVector<Constant*, 8> idx; 996 for (unsigned i = 0; i < mVWidth; i++) 997 { 998 idx.push_back(C(i)); 999 } 1000 Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx)); 1001 1002 SmallVector<Constant*, 8> idx2; 1003 for (unsigned i = 0; i < mVWidth / 2; i++) 1004 { 1005 idx2.push_back(C(flag ? i : i + mVWidth)); 1006 } 1007 for (unsigned i = mVWidth / 2; i < mVWidth; i++) 1008 { 1009 idx2.push_back(C(flag ? i + mVWidth / 2 : i)); 1010 } 1011 return VSHUFFLE(a, inter, ConstantVector::get(idx2)); 1012 } 1013 1014 // rdtsc buckets macros RDTSC_START(Value * pBucketMgr,Value * pId)1015 void Builder::RDTSC_START(Value* pBucketMgr, Value* pId) 1016 { 1017 // @todo due to an issue with thread local storage propagation in llvm, we can only safely 1018 // call into buckets framework when single threaded 1019 if (KNOB_SINGLE_THREADED) 1020 { 1021 std::vector<Type*> args{ 1022 PointerType::get(mInt32Ty, 0), // pBucketMgr 1023 mInt32Ty // id 1024 }; 1025 1026 FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false); 1027 Function* pFunc = cast<Function>( 1028 #if LLVM_VERSION_MAJOR >= 9 1029 JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy).getCallee()); 1030 #else 1031 JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy)); 1032 #endif 1033 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == 1034 nullptr) 1035 { 1036 sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", 1037 (void*)&BucketManager_StartBucket); 1038 } 1039 1040 CALL(pFunc, {pBucketMgr, pId}); 1041 } 1042 } 1043 RDTSC_STOP(Value * pBucketMgr,Value * pId)1044 void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId) 1045 { 1046 // @todo due to an issue with thread local storage propagation in llvm, we can only safely 1047 // call into buckets framework when single threaded 1048 if (KNOB_SINGLE_THREADED) 1049 { 1050 std::vector<Type*> args{ 1051 PointerType::get(mInt32Ty, 0), // pBucketMgr 1052 mInt32Ty // id 1053 }; 1054 1055 FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false); 1056 Function* pFunc = cast<Function>( 1057 #if LLVM_VERSION_MAJOR >= 9 1058 JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy).getCallee()); 1059 #else 1060 JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy)); 1061 #endif 1062 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == 1063 nullptr) 1064 { 1065 sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", 1066 (void*)&BucketManager_StopBucket); 1067 } 1068 1069 CALL(pFunc, {pBucketMgr, pId}); 1070 } 1071 } 1072 GetTypeSize(Type * pType)1073 uint32_t Builder::GetTypeSize(Type* pType) 1074 { 1075 if (pType->isStructTy()) 1076 { 1077 uint32_t numElems = pType->getStructNumElements(); 1078 Type* pElemTy = pType->getStructElementType(0); 1079 return numElems * GetTypeSize(pElemTy); 1080 } 1081 1082 if (pType->isArrayTy()) 1083 { 1084 uint32_t numElems = pType->getArrayNumElements(); 1085 Type* pElemTy = pType->getArrayElementType(); 1086 return numElems * GetTypeSize(pElemTy); 1087 } 1088 1089 if (pType->isIntegerTy()) 1090 { 1091 uint32_t bitSize = pType->getIntegerBitWidth(); 1092 return bitSize / 8; 1093 } 1094 1095 if (pType->isFloatTy()) 1096 { 1097 return 4; 1098 } 1099 1100 if (pType->isHalfTy()) 1101 { 1102 return 2; 1103 } 1104 1105 if (pType->isDoubleTy()) 1106 { 1107 return 8; 1108 } 1109 1110 SWR_ASSERT(false, "Unimplemented type."); 1111 return 0; 1112 } 1113 } // namespace SwrJit 1114