1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements IR expansion for reduction intrinsics, allowing targets
11 // to enable the experimental intrinsics until just before codegen.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/CodeGen/ExpandReductions.h"
16 #include "llvm/Analysis/TargetTransformInfo.h"
17 #include "llvm/CodeGen/Passes.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/IR/InstIterator.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Pass.h"
25 #include "llvm/Transforms/Utils/LoopUtils.h"
26
27 using namespace llvm;
28
29 namespace {
30
getOpcode(Intrinsic::ID ID)31 unsigned getOpcode(Intrinsic::ID ID) {
32 switch (ID) {
33 case Intrinsic::experimental_vector_reduce_fadd:
34 return Instruction::FAdd;
35 case Intrinsic::experimental_vector_reduce_fmul:
36 return Instruction::FMul;
37 case Intrinsic::experimental_vector_reduce_add:
38 return Instruction::Add;
39 case Intrinsic::experimental_vector_reduce_mul:
40 return Instruction::Mul;
41 case Intrinsic::experimental_vector_reduce_and:
42 return Instruction::And;
43 case Intrinsic::experimental_vector_reduce_or:
44 return Instruction::Or;
45 case Intrinsic::experimental_vector_reduce_xor:
46 return Instruction::Xor;
47 case Intrinsic::experimental_vector_reduce_smax:
48 case Intrinsic::experimental_vector_reduce_smin:
49 case Intrinsic::experimental_vector_reduce_umax:
50 case Intrinsic::experimental_vector_reduce_umin:
51 return Instruction::ICmp;
52 case Intrinsic::experimental_vector_reduce_fmax:
53 case Intrinsic::experimental_vector_reduce_fmin:
54 return Instruction::FCmp;
55 default:
56 llvm_unreachable("Unexpected ID");
57 }
58 }
59
getMRK(Intrinsic::ID ID)60 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
61 switch (ID) {
62 case Intrinsic::experimental_vector_reduce_smax:
63 return RecurrenceDescriptor::MRK_SIntMax;
64 case Intrinsic::experimental_vector_reduce_smin:
65 return RecurrenceDescriptor::MRK_SIntMin;
66 case Intrinsic::experimental_vector_reduce_umax:
67 return RecurrenceDescriptor::MRK_UIntMax;
68 case Intrinsic::experimental_vector_reduce_umin:
69 return RecurrenceDescriptor::MRK_UIntMin;
70 case Intrinsic::experimental_vector_reduce_fmax:
71 return RecurrenceDescriptor::MRK_FloatMax;
72 case Intrinsic::experimental_vector_reduce_fmin:
73 return RecurrenceDescriptor::MRK_FloatMin;
74 default:
75 return RecurrenceDescriptor::MRK_Invalid;
76 }
77 }
78
expandReductions(Function & F,const TargetTransformInfo * TTI)79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
80 bool Changed = false;
81 SmallVector<IntrinsicInst *, 4> Worklist;
82 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
83 if (auto II = dyn_cast<IntrinsicInst>(&*I))
84 Worklist.push_back(II);
85
86 for (auto *II : Worklist) {
87 IRBuilder<> Builder(II);
88 bool IsOrdered = false;
89 Value *Acc = nullptr;
90 Value *Vec = nullptr;
91 auto ID = II->getIntrinsicID();
92 auto MRK = RecurrenceDescriptor::MRK_Invalid;
93 switch (ID) {
94 case Intrinsic::experimental_vector_reduce_fadd:
95 case Intrinsic::experimental_vector_reduce_fmul:
96 // FMFs must be attached to the call, otherwise it's an ordered reduction
97 // and it can't be handled by generating a shuffle sequence.
98 if (!II->getFastMathFlags().isFast())
99 IsOrdered = true;
100 Acc = II->getArgOperand(0);
101 Vec = II->getArgOperand(1);
102 break;
103 case Intrinsic::experimental_vector_reduce_add:
104 case Intrinsic::experimental_vector_reduce_mul:
105 case Intrinsic::experimental_vector_reduce_and:
106 case Intrinsic::experimental_vector_reduce_or:
107 case Intrinsic::experimental_vector_reduce_xor:
108 case Intrinsic::experimental_vector_reduce_smax:
109 case Intrinsic::experimental_vector_reduce_smin:
110 case Intrinsic::experimental_vector_reduce_umax:
111 case Intrinsic::experimental_vector_reduce_umin:
112 case Intrinsic::experimental_vector_reduce_fmax:
113 case Intrinsic::experimental_vector_reduce_fmin:
114 Vec = II->getArgOperand(0);
115 MRK = getMRK(ID);
116 break;
117 default:
118 continue;
119 }
120 if (!TTI->shouldExpandReduction(II))
121 continue;
122 Value *Rdx =
123 IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
124 : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
125 II->replaceAllUsesWith(Rdx);
126 II->eraseFromParent();
127 Changed = true;
128 }
129 return Changed;
130 }
131
132 class ExpandReductions : public FunctionPass {
133 public:
134 static char ID;
ExpandReductions()135 ExpandReductions() : FunctionPass(ID) {
136 initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
137 }
138
runOnFunction(Function & F)139 bool runOnFunction(Function &F) override {
140 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
141 return expandReductions(F, TTI);
142 }
143
getAnalysisUsage(AnalysisUsage & AU) const144 void getAnalysisUsage(AnalysisUsage &AU) const override {
145 AU.addRequired<TargetTransformInfoWrapperPass>();
146 AU.setPreservesCFG();
147 }
148 };
149 }
150
151 char ExpandReductions::ID;
152 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
153 "Expand reduction intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)154 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
155 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
156 "Expand reduction intrinsics", false, false)
157
158 FunctionPass *llvm::createExpandReductionsPass() {
159 return new ExpandReductions();
160 }
161
run(Function & F,FunctionAnalysisManager & AM)162 PreservedAnalyses ExpandReductionsPass::run(Function &F,
163 FunctionAnalysisManager &AM) {
164 const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
165 if (!expandReductions(F, &TTI))
166 return PreservedAnalyses::all();
167 PreservedAnalyses PA;
168 PA.preserveSet<CFGAnalyses>();
169 return PA;
170 }
171