1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements feature and label extraction for offline supervised learning
11 // of a IR to native size model.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
15
16 #ifdef LLVM_HAVE_TF_API
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #endif
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/MC/MCAsmLayout.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 #include <algorithm>
33 #include <deque>
34
35 using namespace llvm;
36
37 AnalysisKey InlineSizeEstimatorAnalysis::Key;
38
39 #define DEBUG_TYPE "inline-size-estimator"
40
41 #ifdef LLVM_HAVE_TF_API
42 cl::opt<std::string> TFIR2NativeModelPath(
43 "ml-inliner-ir2native-model", cl::Hidden,
44 cl::desc("Path to saved model evaluating native size from IR."));
45
46 namespace {
getMaxInstructionID()47 unsigned getMaxInstructionID() {
48 #define LAST_OTHER_INST(NR) return NR;
49 #include "llvm/IR/Instruction.def"
50 }
51
52 class IRToNativeSizeLearning {
53 public:
54 enum class NamedFeatureIndex : size_t {
55 InitialSize,
56 Blocks,
57 Calls,
58 IsLocal,
59 IsLinkOnceODR,
60 IsLinkOnce,
61 Loops,
62 MaxLoopDepth,
63 MaxDomTreeLevel,
64
65 NumNamedFeatures
66 };
67 static const size_t NumNamedFeatures =
68 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
69 struct FunctionFeatures {
70 static const size_t FeatureCount;
71
72 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
73 std::vector<int32_t> InstructionHistogram;
74 std::vector<int32_t> InstructionPairHistogram;
75
76 void fillTensor(int32_t *Ptr) const;
operator []__anon3819aff00111::IRToNativeSizeLearning::FunctionFeatures77 int32_t &operator[](NamedFeatureIndex Pos) {
78 return NamedFeatures[static_cast<size_t>(Pos)];
79 }
80 };
81 IRToNativeSizeLearning() = default;
82
83 static FunctionFeatures getFunctionFeatures(Function &F,
84 FunctionAnalysisManager &FAM);
85 };
86
87 // This is a point in time - we determined including these pairs of
88 // consecutive instructions (in the IR layout available at inline time) as
89 // features improves the model performance. We want to move away from manual
90 // feature selection.
91 // The array is given in opcode pairs rather than labels because 1) labels
92 // weren't readily available, and 2) the successions were hand - extracted.
93 //
94 // This array must be sorted.
95 static const std::array<std::pair<size_t, size_t>, 137>
96 ImportantInstructionSuccessions{
97 {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
98 {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
99 {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
100 {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
101 {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
102 {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
103 {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
104 {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
105 {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
106 {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
107 {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
108 {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
109 {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
110 {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
111 {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
112 {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
113 {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
114 {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
115 {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
116 {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
117
118 // We have: 9 calculated features (the features here); 1 feature for each
119 // instruction opcode; and 1 feature for each manually-identified sequence.
120 // For the latter 2, we build a histogram: we count the number of
121 // occurrences of each instruction opcode or succession of instructions,
122 // respectively.
123 // Note that instruction opcodes start from 1. For convenience, we also have an
124 // always 0 feature for the '0' opcode, hence the extra 1.
125 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
126 ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
127 IRToNativeSizeLearning::NumNamedFeatures;
128
getSize(Function & F,TargetTransformInfo & TTI)129 size_t getSize(Function &F, TargetTransformInfo &TTI) {
130 size_t Ret = 0;
131 for (const auto &BB : F)
132 for (const auto &I : BB)
133 Ret += TTI.getInstructionCost(
134 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
135 return Ret;
136 }
137
getSize(Function & F,FunctionAnalysisManager & FAM)138 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
139 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
140 return getSize(F, TTI);
141 }
142
getMaxDominatorTreeDepth(const Function & F,const DominatorTree & Tree)143 unsigned getMaxDominatorTreeDepth(const Function &F,
144 const DominatorTree &Tree) {
145 unsigned Ret = 0;
146 for (const auto &BB : F)
147 if (const auto *TN = Tree.getNode(&BB))
148 Ret = std::max(Ret, TN->getLevel());
149 return Ret;
150 }
151 } // namespace
152
153 IRToNativeSizeLearning::FunctionFeatures
getFunctionFeatures(Function & F,FunctionAnalysisManager & FAM)154 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
155 FunctionAnalysisManager &FAM) {
156 assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
157 "expected function features are sorted");
158
159 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
160 FunctionFeatures FF;
161 size_t InstrCount = getMaxInstructionID() + 1;
162 FF.InstructionHistogram.resize(InstrCount);
163
164 FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
165
166 int StartID = 0;
167 int LastID = StartID;
168 auto getPairIndex = [](size_t a, size_t b) {
169 auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
170 if (I == ImportantInstructionSuccessions.end())
171 return -1;
172 return static_cast<int>(
173 std::distance(ImportantInstructionSuccessions.begin(), I));
174 };
175
176 // We don't want debug calls, because they'd just add noise.
177 for (const auto &BB : F) {
178 for (const auto &I : BB.instructionsWithoutDebug()) {
179 auto ID = I.getOpcode();
180
181 ++FF.InstructionHistogram[ID];
182 int PairIndex = getPairIndex(LastID, ID);
183 if (PairIndex >= 0)
184 ++FF.InstructionPairHistogram[PairIndex];
185 LastID = ID;
186 if (isa<CallBase>(I))
187 ++FF[NamedFeatureIndex::Calls];
188 }
189 }
190
191 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
192 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
193 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
194 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
195 FF[NamedFeatureIndex::Blocks] =
196 std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
197 auto &LI = FAM.getResult<LoopAnalysis>(F);
198 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
199 for (auto &L : LI)
200 FF[NamedFeatureIndex::MaxLoopDepth] =
201 std::max(FF[NamedFeatureIndex::MaxLoopDepth],
202 static_cast<int32_t>(L->getLoopDepth()));
203 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
204 return FF;
205 }
206
fillTensor(int32_t * Ptr) const207 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
208 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
209 Ptr += NamedFeatures.size();
210 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
211 Ptr += InstructionHistogram.size();
212 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
213 Ptr);
214 }
215
isEvaluatorRequested()216 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
217 return !TFIR2NativeModelPath.empty();
218 }
219
InlineSizeEstimatorAnalysis()220 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
221 if (!isEvaluatorRequested()) {
222 return;
223 }
224 std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
225 "serving_default_input_1",
226 {1, static_cast<int64_t>(
227 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
228 std::vector<TensorSpec> OutputSpecs{
229 TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
230 Evaluator = std::make_unique<TFModelEvaluator>(
231 TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
232 if (!Evaluator || !Evaluator->isValid()) {
233 Evaluator.reset();
234 return;
235 }
236 }
237
238 InlineSizeEstimatorAnalysis::Result
run(const Function & F,FunctionAnalysisManager & FAM)239 InlineSizeEstimatorAnalysis::run(const Function &F,
240 FunctionAnalysisManager &FAM) {
241 if (!Evaluator)
242 return None;
243 auto Features = IRToNativeSizeLearning::getFunctionFeatures(
244 const_cast<Function &>(F), FAM);
245 int32_t *V = Evaluator->getInput<int32_t>(0);
246 Features.fillTensor(V);
247 auto ER = Evaluator->evaluate();
248 if (!ER)
249 return None;
250 float Ret = *ER->getTensorValue<float>(0);
251 if (Ret < 0.0)
252 Ret = 0.0;
253 return static_cast<size_t>(Ret);
254 }
255
~InlineSizeEstimatorAnalysis()256 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis && Other)257 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
258 InlineSizeEstimatorAnalysis &&Other)
259 : Evaluator(std::move(Other.Evaluator)) {}
260
261 #else
262 namespace llvm {
263 class TFModelEvaluator {};
264 } // namespace llvm
InlineSizeEstimatorAnalysis()265 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&)266 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
267 InlineSizeEstimatorAnalysis &&) {}
~InlineSizeEstimatorAnalysis()268 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
269 InlineSizeEstimatorAnalysis::Result
run(const Function & F,FunctionAnalysisManager & FAM)270 InlineSizeEstimatorAnalysis::run(const Function &F,
271 FunctionAnalysisManager &FAM) {
272 return None;
273 }
isEvaluatorRequested()274 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
275 #endif
276
277 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)278 InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
279 FunctionAnalysisManager &AM) {
280 OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
281 << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
282 return PreservedAnalyses::all();
283 }
284