Lines Matching refs:B
50 Value *buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V,
52 Value *buildShiftRight(IRBuilder<> &B, Value *V, Value *const Identity) const;
246 static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, in buildNonAtomicBinOp() argument
254 return B.CreateBinOp(Instruction::Add, LHS, RHS); in buildNonAtomicBinOp()
256 return B.CreateBinOp(Instruction::Sub, LHS, RHS); in buildNonAtomicBinOp()
258 return B.CreateBinOp(Instruction::And, LHS, RHS); in buildNonAtomicBinOp()
260 return B.CreateBinOp(Instruction::Or, LHS, RHS); in buildNonAtomicBinOp()
262 return B.CreateBinOp(Instruction::Xor, LHS, RHS); in buildNonAtomicBinOp()
277 Value *Cond = B.CreateICmp(Pred, LHS, RHS); in buildNonAtomicBinOp()
278 return B.CreateSelect(Cond, LHS, RHS); in buildNonAtomicBinOp()
283 Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, in buildScan() argument
286 Module *M = B.GetInsertBlock()->getModule(); in buildScan()
296 B, Op, V, in buildScan()
297 B.CreateCall(UpdateDPP, in buildScan()
298 {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx), in buildScan()
299 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()})); in buildScan()
304 B, Op, V, in buildScan()
305 B.CreateCall(UpdateDPP, in buildScan()
306 {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa), in buildScan()
307 B.getInt32(0xf), B.getFalse()})); in buildScan()
309 B, Op, V, in buildScan()
310 B.CreateCall(UpdateDPP, in buildScan()
311 {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc), in buildScan()
312 B.getInt32(0xf), B.getFalse()})); in buildScan()
320 B.CreateCall(PermLaneX16, {V, V, B.getInt32(-1), B.getInt32(-1), in buildScan()
321 B.getFalse(), B.getFalse()}); in buildScan()
323 B, Op, V, in buildScan()
324 B.CreateCall(UpdateDPP, in buildScan()
325 {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID), in buildScan()
326 B.getInt32(0xa), B.getInt32(0xf), B.getFalse()})); in buildScan()
329 Value *const Lane31 = B.CreateCall(ReadLane, {V, B.getInt32(31)}); in buildScan()
331 B, Op, V, in buildScan()
332 B.CreateCall(UpdateDPP, in buildScan()
333 {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID), in buildScan()
334 B.getInt32(0xc), B.getInt32(0xf), B.getFalse()})); in buildScan()
342 Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V, in buildShiftRight() argument
345 Module *M = B.GetInsertBlock()->getModule(); in buildShiftRight()
355 V = B.CreateCall(UpdateDPP, in buildShiftRight()
356 {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf), in buildShiftRight()
357 B.getInt32(0xf), B.getFalse()}); in buildShiftRight()
362 V = B.CreateCall(UpdateDPP, in buildShiftRight()
363 {Identity, V, B.getInt32(DPP::ROW_SHR0 + 1), in buildShiftRight()
364 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); in buildShiftRight()
367 V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}), in buildShiftRight()
368 B.getInt32(16), V}); in buildShiftRight()
372 V = B.CreateCall( in buildShiftRight()
374 {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V}); in buildShiftRight()
377 V = B.CreateCall( in buildShiftRight()
379 {B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V}); in buildShiftRight()
407 static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) { in buildMul() argument
409 return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS); in buildMul()
417 IRBuilder<> B(&I); in optimizeAtomic() local
433 Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {}); in optimizeAtomic()
441 B.SetInsertPoint(&I); in optimizeAtomic()
446 auto *const VecTy = FixedVectorType::get(B.getInt32Ty(), 2); in optimizeAtomic()
454 Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize()); in optimizeAtomic()
456 B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue()); in optimizeAtomic()
464 Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, in optimizeAtomic()
465 {Ballot, B.getInt32(0)}); in optimizeAtomic()
467 Value *const BitCast = B.CreateBitCast(Ballot, VecTy); in optimizeAtomic()
468 Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0)); in optimizeAtomic()
469 Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); in optimizeAtomic()
470 Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, in optimizeAtomic()
471 {ExtractLo, B.getInt32(0)}); in optimizeAtomic()
473 B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt}); in optimizeAtomic()
475 Mbcnt = B.CreateIntCast(Mbcnt, Ty, false); in optimizeAtomic()
477 Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth)); in optimizeAtomic()
487 NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); in optimizeAtomic()
491 NewV = buildScan(B, ScanOp, NewV, Identity); in optimizeAtomic()
492 ExclScan = buildShiftRight(B, NewV, Identity); in optimizeAtomic()
497 Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1); in optimizeAtomic()
499 Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty()); in optimizeAtomic()
501 B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty()); in optimizeAtomic()
502 CallInst *const ReadLaneLo = B.CreateIntrinsic( in optimizeAtomic()
504 CallInst *const ReadLaneHi = B.CreateIntrinsic( in optimizeAtomic()
506 Value *const PartialInsert = B.CreateInsertElement( in optimizeAtomic()
507 UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0)); in optimizeAtomic()
509 B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1)); in optimizeAtomic()
510 NewV = B.CreateBitCast(Insert, Ty); in optimizeAtomic()
512 NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, in optimizeAtomic()
519 NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV); in optimizeAtomic()
529 Value *const Ctpop = B.CreateIntCast( in optimizeAtomic()
530 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); in optimizeAtomic()
531 NewV = buildMul(B, V, Ctpop); in optimizeAtomic()
549 Value *const Ctpop = B.CreateIntCast( in optimizeAtomic()
550 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); in optimizeAtomic()
551 NewV = buildMul(B, V, B.CreateAnd(Ctpop, 1)); in optimizeAtomic()
559 Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0)); in optimizeAtomic()
573 B.SetInsertPoint(SingleLaneTerminator); in optimizeAtomic()
578 B.Insert(NewI); in optimizeAtomic()
583 B.SetInsertPoint(&I); in optimizeAtomic()
588 PHINode *const PHI = B.CreatePHI(Ty, 2); in optimizeAtomic()
598 Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty()); in optimizeAtomic()
600 B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty()); in optimizeAtomic()
602 B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo); in optimizeAtomic()
604 B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi); in optimizeAtomic()
605 Value *const PartialInsert = B.CreateInsertElement( in optimizeAtomic()
606 UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); in optimizeAtomic()
608 B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); in optimizeAtomic()
609 BroadcastI = B.CreateBitCast(Insert, Ty); in optimizeAtomic()
612 BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI); in optimizeAtomic()
623 LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); in optimizeAtomic()
630 LaneOffset = buildMul(B, V, Mbcnt); in optimizeAtomic()
638 LaneOffset = B.CreateSelect(Cond, Identity, V); in optimizeAtomic()
641 LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1)); in optimizeAtomic()
645 Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); in optimizeAtomic()
649 B.SetInsertPoint(PixelExitBB->getFirstNonPHI()); in optimizeAtomic()
651 PHINode *const PHI = B.CreatePHI(Ty, 2); in optimizeAtomic()