1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the skeleton of the TargetLowering class.
12 ///
13 /// Specifically this invokes the appropriate lowering method for a given
14 /// instruction kind and driving global register allocation. It also implements
15 /// the non-deleted instruction iteration in LoweringContext.
16 ///
17 //===----------------------------------------------------------------------===//
18
19 #include "IceTargetLowering.h"
20
21 #include "IceBitVector.h"
22 #include "IceCfg.h" // setError()
23 #include "IceCfgNode.h"
24 #include "IceGlobalContext.h"
25 #include "IceGlobalInits.h"
26 #include "IceInstVarIter.h"
27 #include "IceLiveness.h"
28 #include "IceOperand.h"
29 #include "IceRegAlloc.h"
30
31 #include <string>
32 #include <vector>
33
34 #define TARGET_LOWERING_CLASS_FOR(t) Target_##t
35
36 // We prevent target-specific implementation details from leaking outside their
37 // implementations by forbidding #include of target-specific header files
38 // anywhere outside their own files. To create target-specific objects
39 // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the
40 // following named constructors. For reference, each target Foo needs to
41 // implement the following named constructors and initializer:
42 //
43 // namespace Foo {
44 // unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *);
45 // unique_ptr<Ice::TargetDataLowering>
46 // createTargetDataLowering(Ice::GlobalContext*);
47 // unique_ptr<Ice::TargetHeaderLowering>
48 // createTargetHeaderLowering(Ice::GlobalContext *);
49 // void staticInit(::Ice::GlobalContext *);
50 // }
51 #define SUBZERO_TARGET(X) \
52 namespace X { \
53 std::unique_ptr<::Ice::TargetLowering> \
54 createTargetLowering(::Ice::Cfg *Func); \
55 std::unique_ptr<::Ice::TargetDataLowering> \
56 createTargetDataLowering(::Ice::GlobalContext *Ctx); \
57 std::unique_ptr<::Ice::TargetHeaderLowering> \
58 createTargetHeaderLowering(::Ice::GlobalContext *Ctx); \
59 void staticInit(::Ice::GlobalContext *Ctx); \
60 bool shouldBePooled(const ::Ice::Constant *C); \
61 ::Ice::Type getPointerType(); \
62 } // end of namespace X
63 #include "SZTargets.def"
64 #undef SUBZERO_TARGET
65
66 namespace Ice {
init(CfgNode * N)67 void LoweringContext::init(CfgNode *N) {
68 Node = N;
69 End = getNode()->getInsts().end();
70 rewind();
71 advanceForward(Next);
72 }
73
rewind()74 void LoweringContext::rewind() {
75 Begin = getNode()->getInsts().begin();
76 Cur = Begin;
77 skipDeleted(Cur);
78 Next = Cur;
79 availabilityReset();
80 }
81
insert(Inst * Instr)82 void LoweringContext::insert(Inst *Instr) {
83 getNode()->getInsts().insert(Next, Instr);
84 LastInserted = Instr;
85 }
86
skipDeleted(InstList::iterator & I) const87 void LoweringContext::skipDeleted(InstList::iterator &I) const {
88 while (I != End && I->isDeleted())
89 ++I;
90 }
91
advanceForward(InstList::iterator & I) const92 void LoweringContext::advanceForward(InstList::iterator &I) const {
93 if (I != End) {
94 ++I;
95 skipDeleted(I);
96 }
97 }
98
getLastInserted() const99 Inst *LoweringContext::getLastInserted() const {
100 assert(LastInserted);
101 return LastInserted;
102 }
103
availabilityReset()104 void LoweringContext::availabilityReset() {
105 LastDest = nullptr;
106 LastSrc = nullptr;
107 }
108
availabilityUpdate()109 void LoweringContext::availabilityUpdate() {
110 availabilityReset();
111 Inst *Instr = LastInserted;
112 if (Instr == nullptr)
113 return;
114 if (!Instr->isVarAssign())
115 return;
116 // Since isVarAssign() is true, the source operand must be a Variable.
117 LastDest = Instr->getDest();
118 LastSrc = llvm::cast<Variable>(Instr->getSrc(0));
119 }
120
availabilityGet(Operand * Src) const121 Variable *LoweringContext::availabilityGet(Operand *Src) const {
122 assert(Src);
123 if (Src == LastDest)
124 return LastSrc;
125 return nullptr;
126 }
127
128 namespace {
129
printRegisterSet(Ostream & Str,const SmallBitVector & Bitset,std::function<std::string (RegNumT)> getRegName,const std::string & LineIndentString)130 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
131 std::function<std::string(RegNumT)> getRegName,
132 const std::string &LineIndentString) {
133 constexpr size_t RegistersPerLine = 16;
134 size_t Count = 0;
135 for (RegNumT RegNum : RegNumBVIter(Bitset)) {
136 if (Count == 0) {
137 Str << LineIndentString;
138 } else {
139 Str << ",";
140 }
141 if (Count > 0 && Count % RegistersPerLine == 0)
142 Str << "\n" << LineIndentString;
143 ++Count;
144 Str << getRegName(RegNum);
145 }
146 if (Count)
147 Str << "\n";
148 }
149
150 // Splits "<class>:<reg>" into "<class>" plus "<reg>". If there is no <class>
151 // component, the result is "" plus "<reg>".
splitToClassAndName(const std::string & RegName,std::string * SplitRegClass,std::string * SplitRegName)152 void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass,
153 std::string *SplitRegName) {
154 constexpr const char Separator[] = ":";
155 constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1;
156 size_t Pos = RegName.find(Separator);
157 if (Pos == std::string::npos) {
158 *SplitRegClass = "";
159 *SplitRegName = RegName;
160 } else {
161 *SplitRegClass = RegName.substr(0, Pos);
162 *SplitRegName = RegName.substr(Pos + SeparatorWidth);
163 }
164 }
165
badTargetFatalError(TargetArch Target)166 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) {
167 llvm::report_fatal_error("Unsupported target: " +
168 std::string(targetArchString(Target)));
169 }
170
171 } // end of anonymous namespace
172
filterTypeToRegisterSet(GlobalContext * Ctx,int32_t NumRegs,SmallBitVector TypeToRegisterSet[],size_t TypeToRegisterSetSize,std::function<std::string (RegNumT)> getRegName,std::function<const char * (RegClass)> getRegClassName)173 void TargetLowering::filterTypeToRegisterSet(
174 GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
175 size_t TypeToRegisterSetSize,
176 std::function<std::string(RegNumT)> getRegName,
177 std::function<const char *(RegClass)> getRegClassName) {
178 std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
179 SmallBitVector(NumRegs));
180 std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
181 SmallBitVector(NumRegs));
182
183 std::unordered_map<std::string, RegNumT> RegNameToIndex;
184 for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
185 const auto RegNum = RegNumT::fromInt(RegIndex);
186 RegNameToIndex[getRegName(RegNum)] = RegNum;
187 }
188
189 std::vector<std::string> BadRegNames;
190
191 // The processRegList function iterates across the RegNames vector. Each
192 // entry in the vector is a string of the form "<reg>" or "<class>:<reg>".
193 // The register class and register number are computed, and the corresponding
194 // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set
195 // for all classes.
196 auto processRegList = [&](const std::vector<std::string> &RegNames,
197 std::vector<SmallBitVector> &RegSet) {
198 for (const std::string &RegClassAndName : RegNames) {
199 std::string RClass;
200 std::string RName;
201 splitToClassAndName(RegClassAndName, &RClass, &RName);
202 if (!RegNameToIndex.count(RName)) {
203 BadRegNames.push_back(RName);
204 continue;
205 }
206 const int32_t RegIndex = RegNameToIndex.at(RName);
207 for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize;
208 ++TypeIndex) {
209 if (RClass.empty() ||
210 RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) {
211 RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex];
212 }
213 }
214 }
215 };
216
217 processRegList(getFlags().getUseRestrictedRegisters(), UseSet);
218 processRegList(getFlags().getExcludedRegisters(), ExcludeSet);
219
220 if (!BadRegNames.empty()) {
221 std::string Buffer;
222 llvm::raw_string_ostream StrBuf(Buffer);
223 StrBuf << "Unrecognized use/exclude registers:";
224 for (const auto &RegName : BadRegNames)
225 StrBuf << " " << RegName;
226 llvm::report_fatal_error(StrBuf.str());
227 }
228
229 // Apply filters.
230 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
231 SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
232 SmallBitVector *UseBitSet = &UseSet[TypeIndex];
233 SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
234 if (UseBitSet->any())
235 *TypeBitSet = *UseBitSet;
236 (*TypeBitSet).reset(*ExcludeBitSet);
237 }
238
239 // Display filtered register sets, if requested.
240 if (BuildDefs::dump() && NumRegs &&
241 (getFlags().getVerbose() & IceV_AvailableRegs)) {
242 Ostream &Str = Ctx->getStrDump();
243 const std::string Indent = " ";
244 const std::string IndentTwice = Indent + Indent;
245 Str << "Registers available for register allocation:\n";
246 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
247 Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex))
248 << ":\n";
249 printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName,
250 IndentTwice);
251 }
252 Str << "\n";
253 }
254 }
255
256 std::unique_ptr<TargetLowering>
createLowering(TargetArch Target,Cfg * Func)257 TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
258 switch (Target) {
259 default:
260 badTargetFatalError(Target);
261 #define SUBZERO_TARGET(X) \
262 case TARGET_LOWERING_CLASS_FOR(X): \
263 return ::X::createTargetLowering(Func);
264 #include "SZTargets.def"
265 #undef SUBZERO_TARGET
266 }
267 }
268
staticInit(GlobalContext * Ctx)269 void TargetLowering::staticInit(GlobalContext *Ctx) {
270 const TargetArch Target = getFlags().getTargetArch();
271 // Call the specified target's static initializer.
272 switch (Target) {
273 default:
274 badTargetFatalError(Target);
275 #define SUBZERO_TARGET(X) \
276 case TARGET_LOWERING_CLASS_FOR(X): { \
277 static bool InitGuard##X = false; \
278 if (InitGuard##X) { \
279 return; \
280 } \
281 InitGuard##X = true; \
282 ::X::staticInit(Ctx); \
283 } break;
284 #include "SZTargets.def"
285 #undef SUBZERO_TARGET
286 }
287 }
288
shouldBePooled(const Constant * C)289 bool TargetLowering::shouldBePooled(const Constant *C) {
290 const TargetArch Target = getFlags().getTargetArch();
291 switch (Target) {
292 default:
293 return false;
294 #define SUBZERO_TARGET(X) \
295 case TARGET_LOWERING_CLASS_FOR(X): \
296 return ::X::shouldBePooled(C);
297 #include "SZTargets.def"
298 #undef SUBZERO_TARGET
299 }
300 }
301
getPointerType()302 ::Ice::Type TargetLowering::getPointerType() {
303 const TargetArch Target = getFlags().getTargetArch();
304 switch (Target) {
305 default:
306 return ::Ice::IceType_void;
307 #define SUBZERO_TARGET(X) \
308 case TARGET_LOWERING_CLASS_FOR(X): \
309 return ::X::getPointerType();
310 #include "SZTargets.def"
311 #undef SUBZERO_TARGET
312 }
313 }
314
315 TargetLowering::SandboxType
determineSandboxTypeFromFlags(const ClFlags & Flags)316 TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) {
317 assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi());
318 if (Flags.getUseNonsfi()) {
319 return TargetLowering::ST_Nonsfi;
320 }
321 if (Flags.getUseSandboxing()) {
322 return TargetLowering::ST_NaCl;
323 }
324 return TargetLowering::ST_None;
325 }
326
TargetLowering(Cfg * Func)327 TargetLowering::TargetLowering(Cfg *Func)
328 : Func(Func), Ctx(Func->getContext()),
329 SandboxingType(determineSandboxTypeFromFlags(getFlags())) {}
330
AutoBundle(TargetLowering * Target,InstBundleLock::Option Option)331 TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target,
332 InstBundleLock::Option Option)
333 : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) {
334 assert(!Target->AutoBundling);
335 Target->AutoBundling = true;
336 if (NeedSandboxing) {
337 Target->_bundle_lock(Option);
338 }
339 }
340
~AutoBundle()341 TargetLowering::AutoBundle::~AutoBundle() {
342 assert(Target->AutoBundling);
343 Target->AutoBundling = false;
344 if (NeedSandboxing) {
345 Target->_bundle_unlock();
346 }
347 }
348
genTargetHelperCalls()349 void TargetLowering::genTargetHelperCalls() {
350 TimerMarker T(TimerStack::TT_genHelpers, Func);
351 Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
352 for (CfgNode *Node : Func->getNodes()) {
353 Context.init(Node);
354 while (!Context.atEnd()) {
355 PostIncrLoweringContext _(Context);
356 genTargetHelperCallFor(iteratorToInst(Context.getCur()));
357 }
358 }
359 }
360
doAddressOpt()361 void TargetLowering::doAddressOpt() {
362 doAddressOptOther();
363 if (llvm::isa<InstLoad>(*Context.getCur()))
364 doAddressOptLoad();
365 else if (llvm::isa<InstStore>(*Context.getCur()))
366 doAddressOptStore();
367 else if (auto *Intrinsic =
368 llvm::dyn_cast<InstIntrinsic>(&*Context.getCur())) {
369 if (Intrinsic->getIntrinsicID() == Intrinsics::LoadSubVector)
370 doAddressOptLoadSubVector();
371 else if (Intrinsic->getIntrinsicID() == Intrinsics::StoreSubVector)
372 doAddressOptStoreSubVector();
373 }
374 Context.advanceCur();
375 Context.advanceNext();
376 }
377
378 // Lowers a single instruction according to the information in Context, by
379 // checking the Context.Cur instruction kind and calling the appropriate
380 // lowering method. The lowering method should insert target instructions at
381 // the Cur.Next insertion point, and should not delete the Context.Cur
382 // instruction or advance Context.Cur.
383 //
384 // The lowering method may look ahead in the instruction stream as desired, and
385 // lower additional instructions in conjunction with the current one, for
386 // example fusing a compare and branch. If it does, it should advance
387 // Context.Cur to point to the next non-deleted instruction to process, and it
388 // should delete any additional instructions it consumes.
lower()389 void TargetLowering::lower() {
390 assert(!Context.atEnd());
391 Inst *Instr = iteratorToInst(Context.getCur());
392 Instr->deleteIfDead();
393 if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) &&
394 !llvm::isa<InstFakeUse>(Instr)) {
395 // Mark the current instruction as deleted before lowering, otherwise the
396 // Dest variable will likely get marked as non-SSA. See
397 // Variable::setDefinition(). However, just pass-through FakeDef and
398 // FakeUse instructions that might have been inserted prior to lowering.
399 Instr->setDeleted();
400 switch (Instr->getKind()) {
401 case Inst::Alloca:
402 lowerAlloca(llvm::cast<InstAlloca>(Instr));
403 break;
404 case Inst::Arithmetic:
405 lowerArithmetic(llvm::cast<InstArithmetic>(Instr));
406 break;
407 case Inst::Assign:
408 lowerAssign(llvm::cast<InstAssign>(Instr));
409 break;
410 case Inst::Br:
411 lowerBr(llvm::cast<InstBr>(Instr));
412 break;
413 case Inst::Breakpoint:
414 lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr));
415 break;
416 case Inst::Call:
417 lowerCall(llvm::cast<InstCall>(Instr));
418 break;
419 case Inst::Cast:
420 lowerCast(llvm::cast<InstCast>(Instr));
421 break;
422 case Inst::ExtractElement:
423 lowerExtractElement(llvm::cast<InstExtractElement>(Instr));
424 break;
425 case Inst::Fcmp:
426 lowerFcmp(llvm::cast<InstFcmp>(Instr));
427 break;
428 case Inst::Icmp:
429 lowerIcmp(llvm::cast<InstIcmp>(Instr));
430 break;
431 case Inst::InsertElement:
432 lowerInsertElement(llvm::cast<InstInsertElement>(Instr));
433 break;
434 case Inst::Intrinsic: {
435 auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
436 if (Intrinsic->getIntrinsicInfo().ReturnsTwice)
437 setCallsReturnsTwice(true);
438 lowerIntrinsic(Intrinsic);
439 break;
440 }
441 case Inst::Load:
442 lowerLoad(llvm::cast<InstLoad>(Instr));
443 break;
444 case Inst::Phi:
445 lowerPhi(llvm::cast<InstPhi>(Instr));
446 break;
447 case Inst::Ret:
448 lowerRet(llvm::cast<InstRet>(Instr));
449 break;
450 case Inst::Select:
451 lowerSelect(llvm::cast<InstSelect>(Instr));
452 break;
453 case Inst::ShuffleVector:
454 lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr));
455 break;
456 case Inst::Store:
457 lowerStore(llvm::cast<InstStore>(Instr));
458 break;
459 case Inst::Switch:
460 lowerSwitch(llvm::cast<InstSwitch>(Instr));
461 break;
462 case Inst::Unreachable:
463 lowerUnreachable(llvm::cast<InstUnreachable>(Instr));
464 break;
465 default:
466 lowerOther(Instr);
467 break;
468 }
469
470 postLower();
471 }
472
473 Context.advanceCur();
474 Context.advanceNext();
475 }
476
lowerInst(CfgNode * Node,InstList::iterator Next,InstHighLevel * Instr)477 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next,
478 InstHighLevel *Instr) {
479 // TODO(stichnot): Consider modifying the design/implementation to avoid
480 // multiple init() calls when using lowerInst() to lower several instructions
481 // in the same node.
482 Context.init(Node);
483 Context.setNext(Next);
484 Context.insert(Instr);
485 --Next;
486 assert(iteratorToInst(Next) == Instr);
487 Context.setCur(Next);
488 lower();
489 }
490
lowerOther(const Inst * Instr)491 void TargetLowering::lowerOther(const Inst *Instr) {
492 (void)Instr;
493 Func->setError("Can't lower unsupported instruction type");
494 }
495
496 // Drives register allocation, allowing all physical registers (except perhaps
497 // for the frame pointer) to be allocated. This set of registers could
498 // potentially be parameterized if we want to restrict registers e.g. for
499 // performance testing.
regAlloc(RegAllocKind Kind)500 void TargetLowering::regAlloc(RegAllocKind Kind) {
501 TimerMarker T(TimerStack::TT_regAlloc, Func);
502 LinearScan LinearScan(Func);
503 RegSetMask RegInclude = RegSet_None;
504 RegSetMask RegExclude = RegSet_None;
505 RegInclude |= RegSet_CallerSave;
506 RegInclude |= RegSet_CalleeSave;
507 if (hasFramePointer())
508 RegExclude |= RegSet_FramePointer;
509 SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
510 bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc());
511 CfgSet<Variable *> EmptySet;
512 do {
513 LinearScan.init(Kind, EmptySet);
514 LinearScan.scan(RegMask);
515 if (!LinearScan.hasEvictions())
516 Repeat = false;
517 Kind = RAK_SecondChance;
518 } while (Repeat);
519 // TODO(stichnot): Run the register allocator one more time to do stack slot
520 // coalescing. The idea would be to initialize the Unhandled list with the
521 // set of Variables that have no register and a non-empty live range, and
522 // model an infinite number of registers. Maybe use the register aliasing
523 // mechanism to get better packing of narrower slots.
524 if (getFlags().getSplitGlobalVars())
525 postRegallocSplitting(RegMask);
526 }
527
528 namespace {
getInstructionsInRange(CfgNode * Node,InstNumberT Start,InstNumberT End)529 CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start,
530 InstNumberT End) {
531 CfgVector<Inst *> Result;
532 bool Started = false;
533 auto Process = [&](InstList &Insts) {
534 for (auto &Instr : Insts) {
535 if (Instr.isDeleted()) {
536 continue;
537 }
538 if (Instr.getNumber() == Start) {
539 Started = true;
540 }
541 if (Started) {
542 Result.emplace_back(&Instr);
543 }
544 if (Instr.getNumber() == End) {
545 break;
546 }
547 }
548 };
549 Process(Node->getPhis());
550 Process(Node->getInsts());
551 // TODO(manasijm): Investigate why checking >= End significantly changes
552 // output. Should not happen when renumbering produces monotonically
553 // increasing instruction numbers and live ranges begin and end on non-deleted
554 // instructions.
555 return Result;
556 }
557 } // namespace
558
postRegallocSplitting(const SmallBitVector & RegMask)559 void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) {
560 // Splits the live ranges of global(/multi block) variables and runs the
561 // register allocator to find registers for as many of the new variables as
562 // possible.
563 // TODO(manasijm): Merge the small liveranges back into multi-block ones when
564 // the variables get the same register. This will reduce the amount of new
565 // instructions inserted. This might involve a full dataflow analysis.
566 // Also, modify the preference mechanism in the register allocator to match.
567
568 TimerMarker _(TimerStack::TT_splitGlobalVars, Func);
569 CfgSet<Variable *> SplitCandidates;
570
571 // Find variables that do not have registers but are allowed to. Also skip
572 // variables with single segment live ranges as they are not split further in
573 // this function.
574 for (Variable *Var : Func->getVariables()) {
575 if (!Var->mustNotHaveReg() && !Var->hasReg()) {
576 if (Var->getLiveRange().getNumSegments() > 1)
577 SplitCandidates.insert(Var);
578 }
579 }
580 if (SplitCandidates.empty())
581 return;
582
583 CfgSet<Variable *> ExtraVars;
584
585 struct UseInfo {
586 Variable *Replacing = nullptr;
587 Inst *FirstUse = nullptr;
588 Inst *LastDef = nullptr;
589 SizeT UseCount = 0;
590 };
591 CfgUnorderedMap<Variable *, UseInfo> VarInfo;
592 // Split the live ranges of the viable variables by node.
593 // Compute metadata (UseInfo) for each of the resulting variables.
594 for (auto *Var : SplitCandidates) {
595 for (auto &Segment : Var->getLiveRange().getSegments()) {
596 UseInfo Info;
597 Info.Replacing = Var;
598 auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first);
599
600 for (auto *Instr :
601 getInstructionsInRange(Node, Segment.first, Segment.second)) {
602 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
603 // It's safe to iterate over the top-level src operands rather than
604 // using FOREACH_VAR_IN_INST(), because any variables inside e.g.
605 // mem operands should already have registers.
606 if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
607 if (Var == Info.Replacing) {
608 if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) {
609 Info.FirstUse = Instr;
610 }
611 Info.UseCount++;
612 }
613 }
614 }
615 if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) {
616 Info.LastDef = Instr;
617 }
618 }
619
620 static constexpr SizeT MinUseThreshold = 3;
621 // Skip if variable has less than `MinUseThreshold` uses in the segment.
622 if (Info.UseCount < MinUseThreshold)
623 continue;
624
625 if (!Info.FirstUse && !Info.LastDef) {
626 continue;
627 }
628
629 LiveRange LR;
630 LR.addSegment(Segment);
631 Variable *NewVar = Func->makeVariable(Var->getType());
632
633 NewVar->setLiveRange(LR);
634
635 VarInfo[NewVar] = Info;
636
637 ExtraVars.insert(NewVar);
638 }
639 }
640 // Run the register allocator with all these new variables included
641 LinearScan RegAlloc(Func);
642 RegAlloc.init(RAK_Global, SplitCandidates);
643 RegAlloc.scan(RegMask);
644
645 // Modify the Cfg to use the new variables that now have registers.
646 for (auto *ExtraVar : ExtraVars) {
647 if (!ExtraVar->hasReg()) {
648 continue;
649 }
650
651 auto &Info = VarInfo[ExtraVar];
652
653 assert(ExtraVar->getLiveRange().getSegments().size() == 1);
654 auto Segment = ExtraVar->getLiveRange().getSegments()[0];
655
656 auto *Node =
657 Info.Replacing->getLiveRange().getNodeForSegment(Segment.first);
658
659 auto RelevantInsts =
660 getInstructionsInRange(Node, Segment.first, Segment.second);
661
662 if (RelevantInsts.empty())
663 continue;
664
665 // Replace old variables
666 for (auto *Instr : RelevantInsts) {
667 if (llvm::isa<InstPhi>(Instr))
668 continue;
669 // TODO(manasijm): Figure out how to safely enable replacing phi dest
670 // variables. The issue is that we can not insert low level mov
671 // instructions into the PhiList.
672 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
673 // FOREACH_VAR_IN_INST() not needed. Same logic as above.
674 if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
675 if (Var == Info.Replacing) {
676 Instr->replaceSource(i, ExtraVar);
677 }
678 }
679 }
680 if (Instr->getDest() == Info.Replacing) {
681 Instr->replaceDest(ExtraVar);
682 }
683 }
684
685 assert(Info.FirstUse != Info.LastDef);
686 assert(Info.FirstUse || Info.LastDef);
687
688 // Insert spill code
689 if (Info.FirstUse != nullptr) {
690 auto *NewInst =
691 Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing);
692 Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst);
693 }
694 if (Info.LastDef != nullptr) {
695 auto *NewInst =
696 Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar);
697 Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst);
698 }
699 }
700 }
701
markRedefinitions()702 void TargetLowering::markRedefinitions() {
703 // Find (non-SSA) instructions where the Dest variable appears in some source
704 // operand, and set the IsDestRedefined flag to keep liveness analysis
705 // consistent.
706 for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E;
707 ++Instr) {
708 if (Instr->isDeleted())
709 continue;
710 Variable *Dest = Instr->getDest();
711 if (Dest == nullptr)
712 continue;
713 FOREACH_VAR_IN_INST(Var, *Instr) {
714 if (Var == Dest) {
715 Instr->setDestRedefined();
716 break;
717 }
718 }
719 }
720 }
721
addFakeDefUses(const Inst * Instr)722 void TargetLowering::addFakeDefUses(const Inst *Instr) {
723 FOREACH_VAR_IN_INST(Var, *Instr) {
724 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
725 Context.insert<InstFakeUse>(Var64->getLo());
726 Context.insert<InstFakeUse>(Var64->getHi());
727 } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) {
728 for (Variable *Var : VarVec->getContainers()) {
729 Context.insert<InstFakeUse>(Var);
730 }
731 } else {
732 Context.insert<InstFakeUse>(Var);
733 }
734 }
735 Variable *Dest = Instr->getDest();
736 if (Dest == nullptr)
737 return;
738 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
739 Context.insert<InstFakeDef>(Var64->getLo());
740 Context.insert<InstFakeDef>(Var64->getHi());
741 } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) {
742 for (Variable *Var : VarVec->getContainers()) {
743 Context.insert<InstFakeDef>(Var);
744 }
745 } else {
746 Context.insert<InstFakeDef>(Dest);
747 }
748 }
749
sortVarsByAlignment(VarList & Dest,const VarList & Source) const750 void TargetLowering::sortVarsByAlignment(VarList &Dest,
751 const VarList &Source) const {
752 Dest = Source;
753 // Instead of std::sort, we could do a bucket sort with log2(alignment) as
754 // the buckets, if performance is an issue.
755 std::sort(Dest.begin(), Dest.end(),
756 [this](const Variable *V1, const Variable *V2) {
757 const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType());
758 const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType());
759 if (WidthV1 == WidthV2)
760 return V1->getIndex() < V2->getIndex();
761 return WidthV1 > WidthV2;
762 });
763 }
764
getVarStackSlotParams(VarList & SortedSpilledVariables,SmallBitVector & RegsUsed,size_t * GlobalsSize,size_t * SpillAreaSizeBytes,uint32_t * SpillAreaAlignmentBytes,uint32_t * LocalsSlotsAlignmentBytes,std::function<bool (Variable *)> TargetVarHook)765 void TargetLowering::getVarStackSlotParams(
766 VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
767 size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
768 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
769 std::function<bool(Variable *)> TargetVarHook) {
770 const VariablesMetadata *VMetadata = Func->getVMetadata();
771 BitVector IsVarReferenced(Func->getNumVariables());
772 for (CfgNode *Node : Func->getNodes()) {
773 for (Inst &Instr : Node->getInsts()) {
774 if (Instr.isDeleted())
775 continue;
776 if (const Variable *Var = Instr.getDest())
777 IsVarReferenced[Var->getIndex()] = true;
778 FOREACH_VAR_IN_INST(Var, Instr) {
779 IsVarReferenced[Var->getIndex()] = true;
780 }
781 }
782 }
783
784 // If SimpleCoalescing is false, each variable without a register gets its
785 // own unique stack slot, which leads to large stack frames. If
786 // SimpleCoalescing is true, then each "global" variable without a register
787 // gets its own slot, but "local" variable slots are reused across basic
788 // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
789 // then C may share a slot with A or B.
790 //
791 // We cannot coalesce stack slots if this function calls a "returns twice"
792 // function. In that case, basic blocks may be revisited, and variables local
793 // to those basic blocks are actually live until after the called function
794 // returns a second time.
795 const bool SimpleCoalescing = !callsReturnsTwice();
796
797 CfgVector<size_t> LocalsSize(Func->getNumNodes());
798 const VarList &Variables = Func->getVariables();
799 VarList SpilledVariables;
800 for (Variable *Var : Variables) {
801 if (Var->hasReg()) {
802 // Don't consider a rematerializable variable to be an actual register use
803 // (specifically of the frame pointer). Otherwise, the prolog may decide
804 // to save the frame pointer twice - once because of the explicit need for
805 // a frame pointer, and once because of an active use of a callee-save
806 // register.
807 if (!Var->isRematerializable())
808 RegsUsed[Var->getRegNum()] = true;
809 continue;
810 }
811 // An argument either does not need a stack slot (if passed in a register)
812 // or already has one (if passed on the stack).
813 if (Var->getIsArg()) {
814 if (!Var->hasReg()) {
815 assert(!Var->hasStackOffset());
816 Var->setHasStackOffset();
817 }
818 continue;
819 }
820 // An unreferenced variable doesn't need a stack slot.
821 if (!IsVarReferenced[Var->getIndex()])
822 continue;
823 // Check a target-specific variable (it may end up sharing stack slots) and
824 // not need accounting here.
825 if (TargetVarHook(Var))
826 continue;
827 assert(!Var->hasStackOffset());
828 Var->setHasStackOffset();
829 SpilledVariables.push_back(Var);
830 }
831
832 SortedSpilledVariables.reserve(SpilledVariables.size());
833 sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);
834
835 for (Variable *Var : SortedSpilledVariables) {
836 size_t Increment = typeWidthInBytesOnStack(Var->getType());
837 // We have sorted by alignment, so the first variable we encounter that is
838 // located in each area determines the max alignment for the area.
839 if (!*SpillAreaAlignmentBytes)
840 *SpillAreaAlignmentBytes = Increment;
841 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
842 if (VMetadata->isMultiBlock(Var)) {
843 *GlobalsSize += Increment;
844 } else {
845 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
846 LocalsSize[NodeIndex] += Increment;
847 if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
848 *SpillAreaSizeBytes = LocalsSize[NodeIndex];
849 if (!*LocalsSlotsAlignmentBytes)
850 *LocalsSlotsAlignmentBytes = Increment;
851 }
852 } else {
853 *SpillAreaSizeBytes += Increment;
854 }
855 }
856 // For testing legalization of large stack offsets on targets with limited
857 // offset bits in instruction encodings, add some padding.
858 *SpillAreaSizeBytes += getFlags().getTestStackExtra();
859 }
860
alignStackSpillAreas(uint32_t SpillAreaStartOffset,uint32_t SpillAreaAlignmentBytes,size_t GlobalsSize,uint32_t LocalsSlotsAlignmentBytes,uint32_t * SpillAreaPaddingBytes,uint32_t * LocalsSlotsPaddingBytes)861 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
862 uint32_t SpillAreaAlignmentBytes,
863 size_t GlobalsSize,
864 uint32_t LocalsSlotsAlignmentBytes,
865 uint32_t *SpillAreaPaddingBytes,
866 uint32_t *LocalsSlotsPaddingBytes) {
867 if (SpillAreaAlignmentBytes) {
868 uint32_t PaddingStart = SpillAreaStartOffset;
869 uint32_t SpillAreaStart =
870 Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
871 *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
872 }
873
874 // If there are separate globals and locals areas, make sure the locals area
875 // is aligned by padding the end of the globals area.
876 if (LocalsSlotsAlignmentBytes) {
877 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
878 GlobalsAndSubsequentPaddingSize =
879 Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
880 *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
881 }
882 }
883
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize,bool UsesFramePointer)884 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
885 size_t SpillAreaPaddingBytes,
886 size_t SpillAreaSizeBytes,
887 size_t GlobalsAndSubsequentPaddingSize,
888 bool UsesFramePointer) {
889 const VariablesMetadata *VMetadata = Func->getVMetadata();
890 // For testing legalization of large stack offsets on targets with limited
891 // offset bits in instruction encodings, add some padding. This assumes that
892 // SpillAreaSizeBytes has accounted for the extra test padding. When
893 // UseFramePointer is true, the offset depends on the padding, not just the
894 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
895 // offsets depend on the gap between SpillAreaSizeBytes and
896 // SpillAreaPaddingBytes, so we don't increment that.
897 size_t TestPadding = getFlags().getTestStackExtra();
898 if (UsesFramePointer)
899 SpillAreaPaddingBytes += TestPadding;
900 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
901 size_t NextStackOffset = SpillAreaPaddingBytes;
902 CfgVector<size_t> LocalsSize(Func->getNumNodes());
903 const bool SimpleCoalescing = !callsReturnsTwice();
904
905 for (Variable *Var : SortedSpilledVariables) {
906 size_t Increment = typeWidthInBytesOnStack(Var->getType());
907 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
908 if (VMetadata->isMultiBlock(Var)) {
909 GlobalsSpaceUsed += Increment;
910 NextStackOffset = GlobalsSpaceUsed;
911 } else {
912 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
913 LocalsSize[NodeIndex] += Increment;
914 NextStackOffset = SpillAreaPaddingBytes +
915 GlobalsAndSubsequentPaddingSize +
916 LocalsSize[NodeIndex];
917 }
918 } else {
919 NextStackOffset += Increment;
920 }
921 if (UsesFramePointer)
922 Var->setStackOffset(-NextStackOffset);
923 else
924 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
925 }
926 }
927
makeHelperCall(RuntimeHelper FuncID,Variable * Dest,SizeT MaxSrcs)928 InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest,
929 SizeT MaxSrcs) {
930 constexpr bool HasTailCall = false;
931 Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID);
932 InstCall *Call =
933 InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
934 return Call;
935 }
936
shouldOptimizeMemIntrins()937 bool TargetLowering::shouldOptimizeMemIntrins() {
938 return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt();
939 }
940
scalarizeArithmetic(InstArithmetic::OpKind Kind,Variable * Dest,Operand * Src0,Operand * Src1)941 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
942 Variable *Dest, Operand *Src0,
943 Operand *Src1) {
944 scalarizeInstruction(
945 Dest,
946 [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) {
947 return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
948 },
949 Src0, Src1);
950 }
951
emitWithoutPrefix(const ConstantRelocatable * C,const char * Suffix) const952 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
953 const char *Suffix) const {
954 if (!BuildDefs::dump())
955 return;
956 Ostream &Str = Ctx->getStrEmit();
957 const std::string &EmitStr = C->getEmitString();
958 if (!EmitStr.empty()) {
959 // C has a custom emit string, so we use it instead of the canonical
960 // Name + Offset form.
961 Str << EmitStr;
962 return;
963 }
964 Str << C->getName() << Suffix;
965 RelocOffsetT Offset = C->getOffset();
966 if (Offset) {
967 if (Offset > 0)
968 Str << "+";
969 Str << Offset;
970 }
971 }
972
973 std::unique_ptr<TargetDataLowering>
createLowering(GlobalContext * Ctx)974 TargetDataLowering::createLowering(GlobalContext *Ctx) {
975 TargetArch Target = getFlags().getTargetArch();
976 switch (Target) {
977 default:
978 badTargetFatalError(Target);
979 #define SUBZERO_TARGET(X) \
980 case TARGET_LOWERING_CLASS_FOR(X): \
981 return ::X::createTargetDataLowering(Ctx);
982 #include "SZTargets.def"
983 #undef SUBZERO_TARGET
984 }
985 }
986
987 TargetDataLowering::~TargetDataLowering() = default;
988
989 namespace {
990
991 // dataSectionSuffix decides whether to use SectionSuffix or VarName as data
992 // section suffix. Essentially, when using separate data sections for globals
993 // SectionSuffix is not necessary.
dataSectionSuffix(const std::string & SectionSuffix,const std::string & VarName,const bool DataSections)994 std::string dataSectionSuffix(const std::string &SectionSuffix,
995 const std::string &VarName,
996 const bool DataSections) {
997 if (SectionSuffix.empty() && !DataSections) {
998 return "";
999 }
1000
1001 if (DataSections) {
1002 // With data sections we don't need to use the SectionSuffix.
1003 return "." + VarName;
1004 }
1005
1006 assert(!SectionSuffix.empty());
1007 return "." + SectionSuffix;
1008 }
1009
1010 } // end of anonymous namespace
1011
emitGlobal(const VariableDeclaration & Var,const std::string & SectionSuffix)1012 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
1013 const std::string &SectionSuffix) {
1014 if (!BuildDefs::dump())
1015 return;
1016
1017 // If external and not initialized, this must be a cross test. Don't generate
1018 // a declaration for such cases.
1019 const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal();
1020 if (IsExternal && !Var.hasInitializer())
1021 return;
1022
1023 Ostream &Str = Ctx->getStrEmit();
1024 const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
1025 const bool IsConstant = Var.getIsConstant();
1026 const SizeT Size = Var.getNumBytes();
1027 const std::string Name = Var.getName().toString();
1028
1029 Str << "\t.type\t" << Name << ",%object\n";
1030
1031 const bool UseDataSections = getFlags().getDataSections();
1032 const bool UseNonsfi = getFlags().getUseNonsfi();
1033 const std::string Suffix =
1034 dataSectionSuffix(SectionSuffix, Name, UseDataSections);
1035 if (IsConstant && UseNonsfi)
1036 Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n";
1037 else if (IsConstant)
1038 Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
1039 else if (HasNonzeroInitializer)
1040 Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
1041 else
1042 Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";
1043
1044 if (IsExternal)
1045 Str << "\t.globl\t" << Name << "\n";
1046
1047 const uint32_t Align = Var.getAlignment();
1048 if (Align > 1) {
1049 assert(llvm::isPowerOf2_32(Align));
1050 // Use the .p2align directive, since the .align N directive can either
1051 // interpret N as bytes, or power of 2 bytes, depending on the target.
1052 Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
1053 }
1054
1055 Str << Name << ":\n";
1056
1057 if (HasNonzeroInitializer) {
1058 for (const auto *Init : Var.getInitializers()) {
1059 switch (Init->getKind()) {
1060 case VariableDeclaration::Initializer::DataInitializerKind: {
1061 const auto &Data =
1062 llvm::cast<VariableDeclaration::DataInitializer>(Init)
1063 ->getContents();
1064 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
1065 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
1066 }
1067 break;
1068 }
1069 case VariableDeclaration::Initializer::ZeroInitializerKind:
1070 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
1071 break;
1072 case VariableDeclaration::Initializer::RelocInitializerKind: {
1073 const auto *Reloc =
1074 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
1075 Str << "\t" << getEmit32Directive() << "\t";
1076 Str << Reloc->getDeclaration()->getName();
1077 if (Reloc->hasFixup()) {
1078 // TODO(jpp): this is ARM32 specific.
1079 Str << "(GOTOFF)";
1080 }
1081 if (RelocOffsetT Offset = Reloc->getOffset()) {
1082 if (Offset >= 0 || (Offset == INT32_MIN))
1083 Str << " + " << Offset;
1084 else
1085 Str << " - " << -Offset;
1086 }
1087 Str << "\n";
1088 break;
1089 }
1090 }
1091 }
1092 } else {
1093 // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
1094 // ELF writer would not write to the file, and only track virtual offsets,
1095 // but the .s writer still needs this .zero and cannot simply use the .size
1096 // to advance offsets.
1097 Str << "\t.zero\t" << Size << "\n";
1098 }
1099
1100 Str << "\t.size\t" << Name << ", " << Size << "\n";
1101 }
1102
1103 std::unique_ptr<TargetHeaderLowering>
createLowering(GlobalContext * Ctx)1104 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
1105 TargetArch Target = getFlags().getTargetArch();
1106 switch (Target) {
1107 default:
1108 badTargetFatalError(Target);
1109 #define SUBZERO_TARGET(X) \
1110 case TARGET_LOWERING_CLASS_FOR(X): \
1111 return ::X::createTargetHeaderLowering(Ctx);
1112 #include "SZTargets.def"
1113 #undef SUBZERO_TARGET
1114 }
1115 }
1116
1117 TargetHeaderLowering::~TargetHeaderLowering() = default;
1118
1119 } // end of namespace Ice
1120