• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_color_ra.h"
17 #include <iostream>
18 #include <fstream>
19 #include "aarch64_cg.h"
20 #include "mir_lower.h"
21 #include "securec.h"
22 /*
23  * Based on concepts from Chow and Hennessey.
24  * Phases are as follows:
25  *   Prepass to collect local BB information.
26  *     Compute local register allocation demands for global RA.
27  *   Compute live ranges.
28  *     Live ranges LR represented by a vector of size #BBs.
29  *     for each cross bb vreg, a bit is set in the vector.
30  *   Build interference graph with basic block as granularity.
31  *     When intersection of two LRs is not null, they interfere.
32  *   Separate unconstrained and constrained LRs.
33  *     unconstrained - LR with connect edges less than available colors.
34  *                     These LR can always be colored.
35  *     constrained - not uncontrained.
36  *   Split LR based on priority cost
37  *     Repetitive adding BB from original LR to new LR until constrained.
38  *     Update all LR the new LR interferes with.
39  *   Color the new LR
40  *     Each LR has a forbidden list, the registers cannot be assigned
41  *     Coalesce move using preferred color first.
42  *   Mark the remaining uncolorable LR after split as spill.
43  *   Local register allocate.
44  *   Emit and insert spills.
45  */
46 namespace maplebe {
47 #define JAVALANG (cgFunc->GetMirModule().IsJavaModule())
48 #define CLANG (cgFunc->GetMirModule().IsCModule())
49 
50 /*
51  * for physical regOpnd phyOpnd,
52  * R0->GetRegisterNumber() == 1
53  * V0->GetRegisterNumber() == 33
54  */
55 constexpr uint32 kLoopWeight = 20;
56 constexpr uint32 kAdjustWeight = 2;
57 constexpr uint32 kInsnStep = 2;
58 constexpr uint32 kMaxSplitCount = 3;
59 constexpr uint32 kRematWeight = 3;
60 constexpr uint32 kPriorityDefThreashold = 1;
61 constexpr uint32 kPriorityUseThreashold = 5;
62 constexpr uint32 kPriorityBBThreashold = 1000;
63 constexpr float kPriorityRatioThreashold = 0.9;
64 
PrintLiveUnit() const65 void LiveUnit::PrintLiveUnit() const
66 {
67     LogInfo::MapleLogger() << "[" << begin << "," << end << "]"
68                            << "<D" << defNum << "U" << useNum << ">";
69     if (!hasCall) {
70         /* Too many calls, so only print when there is no call. */
71         LogInfo::MapleLogger() << " nc";
72     }
73     if (needReload) {
74         LogInfo::MapleLogger() << " rlod";
75     }
76     if (needRestore) {
77         LogInfo::MapleLogger() << " rstr";
78     }
79 }
80 
IsRematerializable(AArch64CGFunc & cgFunc,uint8 rematLev) const81 bool LiveRange::IsRematerializable(AArch64CGFunc &cgFunc, uint8 rematLev) const
82 {
83     if (rematLev == kRematOff)
84         return false;
85 
86     switch (op) {
87         case OP_undef:
88             return false;
89         case OP_constval: {
90             const MIRConst *mirConst = rematInfo.mirConst;
91             if (mirConst->GetKind() != kConstInt) {
92                 return false;
93             }
94             const MIRIntConst *intConst = static_cast<const MIRIntConst *>(rematInfo.mirConst);
95             int64 val = intConst->GetExtValue();
96             if (val >= -kMax16UnsignedImm && val <= kMax16UnsignedImm) {
97                 return true;
98             }
99             auto uval = static_cast<uint64>(val);
100             if (IsMoveWidableImmediate(uval, GetSpillSize())) {
101                 return true;
102             }
103             return IsBitmaskImmediate(uval, GetSpillSize());
104         }
105         case OP_addrof: {
106             if (rematLev < kRematAddr) {
107                 return false;
108             }
109             const MIRSymbol *symbol = rematInfo.sym;
110             if (symbol->IsDeleted()) {
111                 return false;
112             }
113             /* cost too much to remat */
114             if ((symbol->GetStorageClass() == kScFormal) && (symbol->GetSKind() == kStVar) &&
115                 ((fieldID != 0) ||
116                  (cgFunc.GetBecommon().GetTypeSize(symbol->GetType()->GetTypeIndex().GetIdx()) > k16ByteSize))) {
117                 return false;
118             }
119             if (!addrUpper && CGOptions::IsPIC() &&
120                 ((symbol->GetStorageClass() == kScGlobal) || (symbol->GetStorageClass() == kScExtern))) {
121                 /* check if in loop  */
122                 bool useInLoop = false;
123                 bool defOutLoop = false;
124                 for (auto luIt : luMap) {
125                     BB *bb = cgFunc.GetBBFromID(luIt.first);
126                     LiveUnit *curLu = luIt.second;
127                     if (bb->GetLoop() != nullptr && curLu->GetUseNum() != 0) {
128                         useInLoop = true;
129                     }
130                     if (bb->GetLoop() == nullptr && curLu->GetDefNum() != 0) {
131                         defOutLoop = true;
132                     }
133                 }
134                 return !(useInLoop && defOutLoop);
135             }
136             return true;
137         }
138         case OP_dread: {
139             if (rematLev < kRematDreadLocal) {
140                 return false;
141             }
142             const MIRSymbol *symbol = rematInfo.sym;
143             if (symbol->IsDeleted()) {
144                 return false;
145             }
146             MIRStorageClass storageClass = symbol->GetStorageClass();
147             if ((storageClass == kScAuto) || (storageClass == kScFormal)) {
148                 /* cost too much to remat. */
149                 return false;
150             }
151             PrimType symType = symbol->GetType()->GetPrimType();
152             int32 offset = 0;
153             if (fieldID != 0) {
154                 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
155                 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
156                 symType = structType->GetFieldType(fieldID)->GetPrimType();
157                 offset = cgFunc.GetBecommon().GetFieldOffset(*structType, fieldID).first;
158             }
159             /* check stImm.GetOffset() is in addri12 */
160             StImmOperand &stOpnd = cgFunc.CreateStImmOperand(*symbol, offset, 0);
161             uint32 dataSize = GetPrimTypeBitSize(symType);
162             ImmOperand &immOpnd = cgFunc.CreateImmOperand(stOpnd.GetOffset(), dataSize, false);
163             if (!immOpnd.IsInBitSize(kMaxImmVal12Bits, 0)) {
164                 return false;
165             }
166             if (rematLev < kRematDreadGlobal && !symbol->IsLocal()) {
167                 return false;
168             }
169             return true;
170         }
171         default:
172             return false;
173     }
174 }
175 
Rematerialize(AArch64CGFunc * cgFunc,RegOperand & regOp)176 std::vector<Insn *> LiveRange::Rematerialize(AArch64CGFunc *cgFunc, RegOperand &regOp)
177 {
178     std::vector<Insn *> insns;
179     switch (op) {
180         case OP_constval:
181             switch (rematInfo.mirConst->GetKind()) {
182                 case kConstInt: {
183                     MIRIntConst *intConst =
184                         const_cast<MIRIntConst *>(static_cast<const MIRIntConst *>(rematInfo.mirConst));
185 
186                     Operand *immOp = cgFunc->SelectIntConst(*intConst);
187                     MOperator movOp = (GetSpillSize() == k32BitSize) ? MOP_wmovri32 : MOP_xmovri64;
188                     insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(movOp, regOp, *immOp));
189                     break;
190                 }
191                 default:
192                     DEBUG_ASSERT(false, "Unsupported constant for rematerialization");
193             }
194             break;
195         case OP_dread: {
196             const MIRSymbol *symbol = rematInfo.sym;
197             PrimType symType = symbol->GetType()->GetPrimType();
198             RegOperand *regOp64 = &cgFunc->GetOrCreatePhysicalRegisterOperand(
199                 static_cast<AArch64reg>(regOp.GetRegisterNumber()), k64BitSize, regOp.GetRegisterType());
200             int32 offset = 0;
201             if (fieldID != 0) {
202                 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
203                 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
204                 symType = structType->GetFieldType(fieldID)->GetPrimType();
205                 offset = cgFunc->GetBecommon().GetFieldOffset(*structType, fieldID).first;
206             }
207 
208             uint32 dataSize = GetPrimTypeBitSize(symType);
209             MemOperand *spillMemOp =
210                 &cgFunc->GetOrCreateMemOpndAfterRa(*symbol, offset, dataSize, false, regOp64, insns);
211             MOperator mOp = cgFunc->PickLdInsn(spillMemOp->GetSize(), symType);
212             insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(mOp, regOp, *spillMemOp));
213             break;
214         }
215         case OP_addrof: {
216             const MIRSymbol *symbol = rematInfo.sym;
217             int32 offset = 0;
218             if (fieldID != 0) {
219                 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
220                 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
221                 offset = cgFunc->GetBecommon().GetFieldOffset(*structType, fieldID).first;
222             }
223             StImmOperand &stImm = cgFunc->CreateStImmOperand(*symbol, offset, 0);
224             if ((symbol->GetStorageClass() == kScAuto) || (symbol->GetStorageClass() == kScFormal)) {
225                 AArch64SymbolAlloc *symLoc =
226                     static_cast<AArch64SymbolAlloc *>(cgFunc->GetMemlayout()->GetSymAllocInfo(symbol->GetStIndex()));
227                 ImmOperand *offsetOp = nullptr;
228                 offsetOp = &cgFunc->CreateImmOperand(cgFunc->GetBaseOffset(*symLoc) + offset, k64BitSize, false);
229 
230                 Insn *insn =
231                     &cgFunc->GetInsnBuilder()->BuildInsn(MOP_xaddrri12, regOp, *cgFunc->GetBaseReg(*symLoc), *offsetOp);
232                 if (CGOptions::kVerboseCG) {
233                     std::string comm = "local/formal var: ";
234                     comm.append(symbol->GetName());
235                     insn->SetComment(comm);
236                 }
237                 insns.push_back(insn);
238             } else {
239                 Insn *insn = &cgFunc->GetInsnBuilder()->BuildInsn(MOP_xadrp, regOp, stImm);
240                 insns.push_back(insn);
241                 if (!addrUpper && CGOptions::IsPIC() &&
242                     ((symbol->GetStorageClass() == kScGlobal) || (symbol->GetStorageClass() == kScExtern))) {
243                     /* ldr     x0, [x0, #:got_lo12:Ljava_2Flang_2FSystem_3B_7Cout] */
244                     OfstOperand &offsetOp = cgFunc->CreateOfstOpnd(*symbol, offset, 0);
245                     MemOperand &memOpnd =
246                         cgFunc->GetOrCreateMemOpnd(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte,
247                                                    static_cast<RegOperand *>(&regOp), nullptr, &offsetOp, nullptr);
248                     MOperator ldOp = (memOpnd.GetSize() == k64BitSize) ? MOP_xldr : MOP_wldr;
249                     insn = &cgFunc->GetInsnBuilder()->BuildInsn(ldOp, regOp, memOpnd);
250                     insns.push_back(insn);
251                     if (offset > 0) {
252                         OfstOperand &ofstOpnd =
253                             cgFunc->GetOrCreateOfstOpnd(static_cast<uint64>(static_cast<int64>(offset)), k32BitSize);
254                         insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(MOP_xaddrri12, regOp, regOp, ofstOpnd));
255                     }
256                 } else if (!addrUpper) {
257                     insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(MOP_xadrpl12, regOp, regOp, stImm));
258                 }
259             }
260             break;
261         }
262         default:
263             DEBUG_ASSERT(false, "Unexpected op in live range");
264     }
265 
266     return insns;
267 }
268 
269 template <typename Func>
ForEachBBArrElem(const uint64 * vec,Func functor) const270 void GraphColorRegAllocator::ForEachBBArrElem(const uint64 *vec, Func functor) const
271 {
272     for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) {
273         for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
274             if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
275                 functor(iBBArrElem * kU64 + bBBArrElem);
276             }
277         }
278     }
279 }
280 
281 template <typename Func>
ForEachBBArrElemWithInterrupt(const uint64 * vec,Func functor) const282 void GraphColorRegAllocator::ForEachBBArrElemWithInterrupt(const uint64 *vec, Func functor) const
283 {
284     for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) {
285         for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
286             if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
287                 if (functor(iBBArrElem * kU64 + bBBArrElem)) {
288                     return;
289                 }
290             }
291         }
292     }
293 }
294 
295 template <typename Func>
ForEachRegArrElem(const uint64 * vec,Func functor) const296 void GraphColorRegAllocator::ForEachRegArrElem(const uint64 *vec, Func functor) const
297 {
298     for (uint32 iBBArrElem = 0; iBBArrElem < regBuckets; ++iBBArrElem) {
299         for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
300             if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
301                 functor(iBBArrElem * kU64 + bBBArrElem);
302             }
303         }
304     }
305 }
306 
PrintLiveUnitMap(const LiveRange & lr) const307 void GraphColorRegAllocator::PrintLiveUnitMap(const LiveRange &lr) const
308 {
309     LogInfo::MapleLogger() << "\n\tlu:";
310     for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) {
311         if (!IsBitArrElemSet(lr.GetBBMember(), i)) {
312             continue;
313         }
314         auto lu = lr.GetLuMap().find(i);
315         if (lu != lr.GetLuMap().end() && (lu->second->GetDefNum() || lu->second->GetUseNum())) {
316             LogInfo::MapleLogger() << "(" << i << " ";
317             lu->second->PrintLiveUnit();
318             LogInfo::MapleLogger() << ")";
319         }
320     }
321     LogInfo::MapleLogger() << "\n";
322 }
323 
PrintLiveRangeConflicts(const LiveRange & lr) const324 void GraphColorRegAllocator::PrintLiveRangeConflicts(const LiveRange &lr) const
325 {
326     LogInfo::MapleLogger() << "\n\tinterfere(" << lr.GetNumBBConflicts() << "): ";
327     for (uint32 i = 0; i < regBuckets; ++i) {
328         uint64 chunk = lr.GetBBConflictElem(i);
329         for (uint64 bit = 0; bit < kU64; ++bit) {
330             if (chunk & (1ULL << bit)) {
331                 regno_t newNO = i * kU64 + bit;
332                 LogInfo::MapleLogger() << newNO << ",";
333             }
334         }
335     }
336     LogInfo::MapleLogger() << "\n";
337 }
338 
PrintLiveBBBit(const LiveRange & lr) const339 void GraphColorRegAllocator::PrintLiveBBBit(const LiveRange &lr) const
340 {
341     LogInfo::MapleLogger() << "live_bb(" << lr.GetNumBBMembers() << "): ";
342     for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) {
343         if (IsBitArrElemSet(lr.GetBBMember(), i)) {
344             LogInfo::MapleLogger() << i << " ";
345         }
346     }
347     LogInfo::MapleLogger() << "\n";
348 }
349 
PrintLiveRange(const LiveRange & lr,const std::string & str) const350 void GraphColorRegAllocator::PrintLiveRange(const LiveRange &lr, const std::string &str) const
351 {
352     LogInfo::MapleLogger() << str << "\n";
353 
354     LogInfo::MapleLogger() << "R" << lr.GetRegNO();
355     if (lr.GetRegType() == kRegTyInt) {
356         LogInfo::MapleLogger() << "(I)";
357     } else if (lr.GetRegType() == kRegTyFloat) {
358         LogInfo::MapleLogger() << "(F)";
359     } else {
360         LogInfo::MapleLogger() << "(U)";
361     }
362     if (lr.GetSpillSize() == k32) {
363         LogInfo::MapleLogger() << "S32";
364     } else if (lr.GetSpillSize() == k64) {
365         LogInfo::MapleLogger() << "S64";
366     } else {
367         LogInfo::MapleLogger() << "S0(nodef)";
368     }
369     LogInfo::MapleLogger() << "\tnumCall " << lr.GetNumCall();
370     LogInfo::MapleLogger() << "\tpriority " << lr.GetPriority();
371     LogInfo::MapleLogger() << "\tforbidden: ";
372     for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) {
373         if (lr.GetForbidden(preg)) {
374             LogInfo::MapleLogger() << preg << ",";
375         }
376     }
377     LogInfo::MapleLogger() << "\tpregveto: ";
378     for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) {
379         if (lr.GetPregveto(preg)) {
380             LogInfo::MapleLogger() << preg << ",";
381         }
382     }
383     if (lr.IsSpilled()) {
384         LogInfo::MapleLogger() << " spilled";
385     }
386     if (lr.GetSplitLr()) {
387         LogInfo::MapleLogger() << " split";
388     }
389     LogInfo::MapleLogger() << "\top: " << kOpcodeInfo.GetName(lr.GetOp());
390     LogInfo::MapleLogger() << "\n";
391     PrintLiveBBBit(lr);
392     PrintLiveRangeConflicts(lr);
393     PrintLiveUnitMap(lr);
394     if (lr.GetSplitLr()) {
395         PrintLiveRange(*lr.GetSplitLr(), "===>Split LR");
396     }
397 }
398 
PrintLiveRanges() const399 void GraphColorRegAllocator::PrintLiveRanges() const
400 {
401     LogInfo::MapleLogger() << "PrintLiveRanges: size = " << lrMap.size() << "\n";
402     for (auto it : lrMap) {
403         PrintLiveRange(*it.second, "");
404     }
405     LogInfo::MapleLogger() << "\n";
406 }
407 
PrintLocalRAInfo(const std::string & str) const408 void GraphColorRegAllocator::PrintLocalRAInfo(const std::string &str) const
409 {
410     LogInfo::MapleLogger() << str << "\n";
411     for (uint32 id = 0; id < cgFunc->NumBBs(); ++id) {
412         LocalRaInfo *lraInfo = localRegVec[id];
413         if (lraInfo == nullptr) {
414             continue;
415         }
416         LogInfo::MapleLogger() << "bb " << id << " def ";
417         for (const auto &defCntPair : lraInfo->GetDefCnt()) {
418             LogInfo::MapleLogger() << "[" << defCntPair.first << ":" << defCntPair.second << "],";
419         }
420         LogInfo::MapleLogger() << "\n";
421         LogInfo::MapleLogger() << "use ";
422         for (const auto &useCntPair : lraInfo->GetUseCnt()) {
423             LogInfo::MapleLogger() << "[" << useCntPair.first << ":" << useCntPair.second << "],";
424         }
425         LogInfo::MapleLogger() << "\n";
426     }
427 }
428 
PrintBBAssignInfo() const429 void GraphColorRegAllocator::PrintBBAssignInfo() const
430 {
431     for (size_t id = 0; id < bfs->sortedBBs.size(); ++id) {
432         uint32 bbID = bfs->sortedBBs[id]->GetId();
433         BBAssignInfo *bbInfo = bbRegInfo[bbID];
434         if (bbInfo == nullptr) {
435             continue;
436         }
437         LogInfo::MapleLogger() << "BBinfo(" << id << ")";
438         LogInfo::MapleLogger() << " lra-needed int " << bbInfo->GetIntLocalRegsNeeded();
439         LogInfo::MapleLogger() << " fp " << bbInfo->GetFpLocalRegsNeeded();
440         LogInfo::MapleLogger() << " greg-used ";
441         for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
442             if (bbInfo->GetGlobalsAssigned(regNO)) {
443                 LogInfo::MapleLogger() << regNO << ",";
444             }
445         }
446         LogInfo::MapleLogger() << "\n";
447     }
448 }
449 
CalculatePriority(LiveRange & lr) const450 void GraphColorRegAllocator::CalculatePriority(LiveRange &lr) const
451 {
452 #ifdef RANDOM_PRIORITY
453     unsigned long seed = 0;
454     size_t size = sizeof(seed);
455     std::ifstream randomNum("/dev/random", std::ios::in | std::ios::binary);
456     if (randomNum) {
457         randomNum.read(reinterpret_cast<char *>(&seed), size);
458         if (randomNum) {
459             lr.SetPriority(1 / (seed + 1));
460         }
461         randomNum.close();
462     } else {
463         std::cerr << "Failed to open /dev/urandom" << '\n';
464     }
465     return;
466 #endif /* RANDOM_PRIORITY */
467     float pri = 0.0;
468     uint32 bbNum = 0;
469     uint32 numDefs = 0;
470     uint32 numUses = 0;
471     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
472     CG *cg = a64CGFunc->GetCG();
473 
474     if (cg->GetRematLevel() >= kRematConst && lr.IsRematerializable(*a64CGFunc, kRematConst)) {
475         lr.SetRematLevel(kRematConst);
476     } else if (cg->GetRematLevel() >= kRematAddr && lr.IsRematerializable(*a64CGFunc, kRematAddr)) {
477         lr.SetRematLevel(kRematAddr);
478     } else if (cg->GetRematLevel() >= kRematDreadLocal && lr.IsRematerializable(*a64CGFunc, kRematDreadLocal)) {
479         lr.SetRematLevel(kRematDreadLocal);
480     } else if (cg->GetRematLevel() >= kRematDreadGlobal && lr.IsRematerializable(*a64CGFunc, kRematDreadGlobal)) {
481         lr.SetRematLevel(kRematDreadGlobal);
482     }
483 
484     auto calculatePriorityFunc = [&lr, &bbNum, &numDefs, &numUses, &pri, this](uint32 bbID) {
485         auto lu = lr.FindInLuMap(bbID);
486         DEBUG_ASSERT(lu != lr.EndOfLuMap(), "can not find live unit");
487         BB *bb = bbVec[bbID];
488         if (bb->GetFirstInsn() != nullptr && !bb->IsSoloGoto()) {
489             ++bbNum;
490             numDefs += lu->second->GetDefNum();
491             numUses += lu->second->GetUseNum();
492             uint32 useCnt = lu->second->GetDefNum() + lu->second->GetUseNum();
493             uint32 mult;
494 #ifdef USE_BB_FREQUENCY
495             mult = bb->GetFrequency();
496 #else  /* USE_BB_FREQUENCY */
497             if (bb->GetLoop() != nullptr) {
498                 uint32 loopFactor;
499                 if (lr.GetNumCall() > 0 && lr.GetRematLevel() == kRematOff) {
500                     loopFactor = bb->GetLoop()->GetLoopLevel() * kAdjustWeight;
501                 } else {
502                     loopFactor = bb->GetLoop()->GetLoopLevel() / kAdjustWeight;
503                 }
504                 mult = static_cast<uint32>(pow(kLoopWeight, loopFactor));
505             } else {
506                 mult = 1;
507             }
508 #endif /* USE_BB_FREQUENCY */
509             pri += useCnt * mult;
510         }
511     };
512     ForEachBBArrElem(lr.GetBBMember(), calculatePriorityFunc);
513 
514     if (lr.GetRematLevel() == kRematAddr || lr.GetRematLevel() == kRematConst) {
515         if (numDefs <= 1 && numUses <= 1) {
516             pri = -0xFFFF;
517         } else {
518             pri /= kRematWeight;
519         }
520     } else if (lr.GetRematLevel() == kRematDreadLocal) {
521         pri /= 4; // divide 4 to lower priority
522     } else if (lr.GetRematLevel() == kRematDreadGlobal) {
523         pri /= 2; // divide 2 to lower priority
524     }
525 
526     lr.SetPriority(pri);
527     lr.SetNumDefs(numDefs);
528     lr.SetNumUses(numUses);
529     if (lr.GetPriority() > 0 && numDefs <= kPriorityDefThreashold && numUses <= kPriorityUseThreashold &&
530         cgFunc->NumBBs() > kPriorityBBThreashold &&
531         (static_cast<float>(lr.GetNumBBMembers()) / cgFunc->NumBBs()) > kPriorityRatioThreashold) {
532         /* for large functions, delay allocating long LR with few defs and uses */
533         lr.SetPriority(0.0);
534     }
535 }
536 
PrintBBs() const537 void GraphColorRegAllocator::PrintBBs() const
538 {
539     for (auto *bb : bfs->sortedBBs) {
540         LogInfo::MapleLogger() << "\n< === > ";
541         LogInfo::MapleLogger() << bb->GetId();
542         LogInfo::MapleLogger() << " succs:";
543         for (auto *succBB : bb->GetSuccs()) {
544             LogInfo::MapleLogger() << " " << succBB->GetId();
545         }
546         LogInfo::MapleLogger() << " eh_succs:";
547         for (auto *succBB : bb->GetEhSuccs()) {
548             LogInfo::MapleLogger() << " " << succBB->GetId();
549         }
550     }
551     LogInfo::MapleLogger() << "\n";
552 }
553 
MaxIntPhysRegNum() const554 uint32 GraphColorRegAllocator::MaxIntPhysRegNum() const
555 {
556     return (R28 - R0);
557 }
558 
MaxFloatPhysRegNum() const559 uint32 GraphColorRegAllocator::MaxFloatPhysRegNum() const
560 {
561     return (V31 - V0);
562 }
563 
IsReservedReg(AArch64reg regNO) const564 bool GraphColorRegAllocator::IsReservedReg(AArch64reg regNO) const
565 {
566     if (!doMultiPass || cgFunc->GetMirModule().GetSrcLang() != kSrcLangC) {
567         return (regNO == R16) || (regNO == R17);
568     } else {
569         return (regNO == R16);
570     }
571 }
572 
InitFreeRegPool()573 void GraphColorRegAllocator::InitFreeRegPool()
574 {
575     /*
576      *  ==== int regs ====
577      *  FP 29, LR 30, SP 31, 0 to 7 parameters
578 
579      *  MapleCG defines 32 as ZR (zero register)
580      *  use 8 if callee does not return large struct ? No
581      *  16 and 17 are intra-procedure call temp, can be caller saved
582      *  18 is platform reg, still use it
583      */
584     uint32 intNum = 0;
585     uint32 fpNum = 0;
586     for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) {
587         if (!AArch64Abi::IsAvailableReg(static_cast<AArch64reg>(regNO))) {
588             continue;
589         }
590 
591         /*
592          * Because of the try-catch scenario in JAVALANG,
593          * we should use specialized spill register to prevent register changes when exceptions occur.
594          */
595         if (JAVALANG && AArch64Abi::IsSpillRegInRA(static_cast<AArch64reg>(regNO), needExtraSpillReg)) {
596             if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
597                 /* Preset int spill registers */
598                 (void)intSpillRegSet.insert(regNO - R0);
599             } else {
600                 /* Preset float spill registers */
601                 (void)fpSpillRegSet.insert(regNO - V0);
602             }
603             continue;
604         }
605 
606 #ifdef RESERVED_REGS
607         /* r16,r17 are used besides ra. */
608         if (IsReservedReg(static_cast<AArch64reg>(regNO))) {
609             continue;
610         }
611 #endif /* RESERVED_REGS */
612 
613         if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
614             /* when yieldpoint is enabled, x19 is reserved. */
615             if (IsYieldPointReg(static_cast<AArch64reg>(regNO))) {
616                 continue;
617             }
618             if (regNO == R29) {
619                 if (!cgFunc->UseFP()) {
620                     (void)intCalleeRegSet.insert(regNO - R0);
621                     ++intNum;
622                 }
623                 continue;
624             }
625             if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO))) {
626                 (void)intCalleeRegSet.insert(regNO - R0);
627             } else {
628                 (void)intCallerRegSet.insert(regNO - R0);
629             }
630             ++intNum;
631         } else {
632             if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO))) {
633                 (void)fpCalleeRegSet.insert(regNO - V0);
634             } else {
635                 (void)fpCallerRegSet.insert(regNO - V0);
636             }
637             ++fpNum;
638         }
639     }
640     intRegNum = intNum;
641     fpRegNum = fpNum;
642 }
643 
InitCCReg()644 void GraphColorRegAllocator::InitCCReg()
645 {
646     Operand &opnd = cgFunc->GetOrCreateRflag();
647     auto &tmpRegOp = static_cast<RegOperand &>(opnd);
648     ccReg = tmpRegOp.GetRegisterNumber();
649 }
650 
IsYieldPointReg(regno_t regNO) const651 bool GraphColorRegAllocator::IsYieldPointReg(regno_t regNO) const
652 {
653     if (cgFunc->GetCG()->GenYieldPoint()) {
654         return (regNO == RYP);
655     }
656     return false;
657 }
658 
IsUnconcernedReg(regno_t regNO) const659 bool GraphColorRegAllocator::IsUnconcernedReg(regno_t regNO) const
660 {
661     /* RFP = 32, RLR = 31, RSP = 33, RZR = 34 */
662     if ((regNO >= RLR && regNO <= RZR) || regNO == RFP || regNO == ccReg) {
663         return true;
664     }
665 
666     /* when yieldpoint is enabled, the RYP(x19) can not be used. */
667     if (IsYieldPointReg(static_cast<AArch64reg>(regNO))) {
668         return true;
669     }
670 
671     return false;
672 }
673 
IsUnconcernedReg(const RegOperand & regOpnd) const674 bool GraphColorRegAllocator::IsUnconcernedReg(const RegOperand &regOpnd) const
675 {
676     RegType regType = regOpnd.GetRegisterType();
677     if (regType == kRegTyCc || regType == kRegTyVary) {
678         return true;
679     }
680     uint32 regNO = regOpnd.GetRegisterNumber();
681     if (regNO == RZR) {
682         return true;
683     }
684     return IsUnconcernedReg(regNO);
685 }
686 
687 /*
688  *  Based on live analysis, the live-in and live-out set determines
689  *  the bit to be set in the LR vector, which is of size #BBs.
690  *  If a vreg is in the live-in and live-out set, it is live in the BB.
691  *
692  *  Also keep track if a LR crosses a call.  If a LR crosses a call, it
693  *  interferes with all caller saved registers.  Add all caller registers
694  *  to the LR's forbidden list.
695  *
696  *  Return created LiveRange object
697  *
698  *  maybe need extra info:
699  *  Add info for setjmp.
700  *  Add info for defBB, useBB, index in BB for def and use
701  *  Add info for startingBB and endingBB
702  */
NewLiveRange()703 LiveRange *GraphColorRegAllocator::NewLiveRange()
704 {
705     LiveRange *lr = memPool->New<LiveRange>(alloc);
706 
707     if (bbBuckets == 0) {
708         bbBuckets = (cgFunc->NumBBs() / kU64) + 1;
709     }
710     lr->SetBBBuckets(bbBuckets);
711     lr->InitBBMember(*memPool, bbBuckets);
712     if (regBuckets == 0) {
713         regBuckets = (cgFunc->GetMaxRegNum() / kU64) + 1;
714     }
715     lr->SetRegBuckets(regBuckets);
716     lr->InitBBConflict(*memPool, regBuckets);
717     lr->InitPregveto();
718     lr->InitForbidden();
719     return lr;
720 }
721 
722 /* Create local info for LR.  return true if reg is not local. */
CreateLiveRangeHandleLocal(regno_t regNO,const BB & bb,bool isDef)723 bool GraphColorRegAllocator::CreateLiveRangeHandleLocal(regno_t regNO, const BB &bb, bool isDef)
724 {
725     if (FindIn(bb.GetLiveInRegNO(), regNO) || FindIn(bb.GetLiveOutRegNO(), regNO)) {
726         return true;
727     }
728     /*
729      *  register not in globals for the bb, so it is local.
730      *  Compute local RA info.
731      */
732     LocalRaInfo *lraInfo = localRegVec[bb.GetId()];
733     if (lraInfo == nullptr) {
734         lraInfo = memPool->New<LocalRaInfo>(alloc);
735         localRegVec[bb.GetId()] = lraInfo;
736     }
737     if (isDef) {
738         /* movk is handled by different id for use/def in the same insn. */
739         lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1);
740     } else {
741         lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1);
742     }
743     /* lr info is useful for lra, so continue lr info */
744     return false;
745 }
746 
CreateLiveRangeAllocateAndUpdate(regno_t regNO,const BB & bb,bool isDef,uint32 currId)747 LiveRange *GraphColorRegAllocator::CreateLiveRangeAllocateAndUpdate(regno_t regNO, const BB &bb, bool isDef,
748                                                                     uint32 currId)
749 {
750     LiveRange *lr = GetLiveRange(regNO);
751     if (lr == nullptr) {
752         lr = NewLiveRange();
753         lr->SetID(currId);
754 
755         LiveUnit *lu = memPool->New<LiveUnit>();
756         lr->SetElemToLuMap(bb.GetId(), *lu);
757         lu->SetBegin(currId);
758         lu->SetEnd(currId);
759         if (isDef) {
760             /* means no use after def for reg, chances for ebo opt */
761             for (const auto &pregNO : pregLive) {
762                 lr->InsertElemToPregveto(pregNO);
763             }
764         }
765     } else {
766         LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb.GetId());
767         if (lu == nullptr) {
768             lu = memPool->New<LiveUnit>();
769             lr->SetElemToLuMap(bb.GetId(), *lu);
770             lu->SetBegin(currId);
771             lu->SetEnd(currId);
772         }
773         if (lu->GetBegin() > currId) {
774             lu->SetBegin(currId);
775         }
776     }
777 
778     if (CLANG) {
779         auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
780         MIRPreg *preg = a64CGFunc->GetPseudoRegFromVirtualRegNO(regNO, CGOptions::DoCGSSA());
781         if (preg) {
782             switch (preg->GetOp()) {
783                 case OP_constval:
784                     lr->SetRematerializable(preg->rematInfo.mirConst);
785                     break;
786                 case OP_addrof:
787                 case OP_dread:
788                     lr->SetRematerializable(preg->GetOp(), preg->rematInfo.sym, preg->fieldID, preg->addrUpper);
789                     break;
790                 case OP_undef:
791                     break;
792                 default:
793                     DEBUG_ASSERT(false, "Unexpected op in Preg");
794             }
795         }
796     }
797 
798     return lr;
799 }
800 
CreateLiveRange(regno_t regNO,const BB & bb,bool isDef,uint32 currId,bool updateCount)801 void GraphColorRegAllocator::CreateLiveRange(regno_t regNO, const BB &bb, bool isDef, uint32 currId, bool updateCount)
802 {
803     bool isNonLocal = CreateLiveRangeHandleLocal(regNO, bb, isDef);
804 
805     if (!isDef) {
806         --currId;
807     }
808 
809     LiveRange *lr = CreateLiveRangeAllocateAndUpdate(regNO, bb, isDef, currId);
810     lr->SetRegNO(regNO);
811     lr->SetIsNonLocal(isNonLocal);
812     if (isDef) {
813         (void)vregLive.erase(regNO);
814 #ifdef OPTIMIZE_FOR_PROLOG
815         if (doOptProlog && updateCount) {
816             if (lr->GetNumDefs() == 0) {
817                 lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency());
818             }
819             lr->IncNumDefs();
820         }
821 #endif /* OPTIMIZE_FOR_PROLOG */
822     } else {
823         (void)vregLive.insert(regNO);
824 #ifdef OPTIMIZE_FOR_PROLOG
825         if (doOptProlog && updateCount) {
826             if (lr->GetNumUses() == 0) {
827                 lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency());
828             }
829             lr->IncNumUses();
830         }
831 #endif /* OPTIMIZE_FOR_PROLOG */
832     }
833     for (const auto &pregNO : pregLive) {
834         lr->InsertElemToPregveto(pregNO);
835     }
836 
837     /* only handle it in live_in and def point? */
838     uint32 bbID = bb.GetId();
839     lr->SetMemberBitArrElem(bbID);
840 
841     lrMap[regNO] = lr;
842 }
843 
SetupLiveRangeByOpHandlePhysicalReg(const RegOperand & regOpnd,Insn & insn,regno_t regNO,bool isDef)844 bool GraphColorRegAllocator::SetupLiveRangeByOpHandlePhysicalReg(const RegOperand &regOpnd, Insn &insn, regno_t regNO,
845                                                                  bool isDef)
846 {
847     if (!regOpnd.IsPhysicalRegister()) {
848         return false;
849     }
850     LocalRaInfo *lraInfo = localRegVec[insn.GetBB()->GetId()];
851     if (lraInfo == nullptr) {
852         lraInfo = memPool->New<LocalRaInfo>(alloc);
853         localRegVec[insn.GetBB()->GetId()] = lraInfo;
854     }
855 
856     if (isDef) {
857         if (FindNotIn(pregLive, regNO)) {
858             for (const auto &vRegNO : vregLive) {
859                 if (IsUnconcernedReg(vRegNO)) {
860                     continue;
861                 }
862                 lrMap[vRegNO]->InsertElemToPregveto(regNO);
863             }
864         }
865         pregLive.erase(regNO);
866         if (lraInfo != nullptr) {
867             lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1);
868         }
869     } else {
870         (void)pregLive.insert(regNO);
871         for (const auto &vregNO : vregLive) {
872             if (IsUnconcernedReg(vregNO)) {
873                 continue;
874             }
875             LiveRange *lr = lrMap[vregNO];
876             lr->InsertElemToPregveto(regNO);
877         }
878 
879         if (lraInfo != nullptr) {
880             lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1);
881         }
882     }
883     return true;
884 }
885 
886 /*
887  *  add pregs to forbidden list of lr. If preg is in
888  *  the live list, then it is forbidden for other vreg on the list.
889  */
SetupLiveRangeByOp(Operand & op,Insn & insn,bool isDef,uint32 & numUses)890 void GraphColorRegAllocator::SetupLiveRangeByOp(Operand &op, Insn &insn, bool isDef, uint32 &numUses)
891 {
892     if (!op.IsRegister()) {
893         return;
894     }
895     auto &regOpnd = static_cast<RegOperand &>(op);
896     uint32 regNO = regOpnd.GetRegisterNumber();
897     if (IsUnconcernedReg(regOpnd)) {
898         if (GetLiveRange(regNO) != nullptr) {
899             DEBUG_ASSERT(false, "Unconcerned reg");
900             lrMap.erase(regNO);
901         }
902         return;
903     }
904     if (SetupLiveRangeByOpHandlePhysicalReg(regOpnd, insn, regNO, isDef)) {
905         return;
906     }
907 
908     CreateLiveRange(regNO, *insn.GetBB(), isDef, insn.GetId(), true);
909 
910     LiveRange *lr = GetLiveRange(regNO);
911     DEBUG_ASSERT(lr != nullptr, "lr should not be nullptr");
912     if (isDef) {
913         lr->SetSpillSize((regOpnd.GetSize() <= k32) ? k32 : k64);
914     }
915     if (lr->GetRegType() == kRegTyUndef) {
916         lr->SetRegType(regOpnd.GetRegisterType());
917     }
918     if (isDef) {
919         lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncDefNum();
920         lr->AddRef(insn.GetBB()->GetId(), insn.GetId(), kIsDef);
921     } else {
922         lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncUseNum();
923         lr->AddRef(insn.GetBB()->GetId(), insn.GetId(), kIsUse);
924         ++numUses;
925     }
926 #ifdef MOVE_COALESCE
927     if (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr) {
928         RegOperand &opnd1 = static_cast<RegOperand &>(insn.GetOperand(1));
929         if (opnd1.GetRegisterNumber() < kAllRegNum && !IsUnconcernedReg(opnd1)) {
930             lr->InsertElemToPrefs(opnd1.GetRegisterNumber() - R0);
931         }
932         RegOperand &opnd0 = static_cast<RegOperand &>(insn.GetOperand(0));
933         if (opnd0.GetRegisterNumber() < kAllRegNum) {
934             lr->InsertElemToPrefs(opnd0.GetRegisterNumber() - R0);
935         }
936     }
937 #endif /*  MOVE_COALESCE */
938     if (!insn.IsSpecialIntrinsic() && insn.GetBothDefUseOpnd() != kInsnMaxOpnd) {
939         lr->SetDefUse();
940     }
941 }
942 
943 /* handle live range for bb->live_out */
SetupLiveRangeByRegNO(regno_t liveOut,BB & bb,uint32 currPoint)944 void GraphColorRegAllocator::SetupLiveRangeByRegNO(regno_t liveOut, BB &bb, uint32 currPoint)
945 {
946     if (IsUnconcernedReg(liveOut)) {
947         return;
948     }
949     if (liveOut >= kAllRegNum) {
950         (void)vregLive.insert(liveOut);
951         CreateLiveRange(liveOut, bb, false, currPoint, false);
952         return;
953     }
954 
955     (void)pregLive.insert(liveOut);
956     for (const auto &vregNO : vregLive) {
957         LiveRange *lr = lrMap[vregNO];
958         lr->InsertElemToPregveto(liveOut);
959     }
960 
961     /* See if phys reg is livein also. Then assume it span the entire bb. */
962     if (!FindIn(bb.GetLiveInRegNO(), liveOut)) {
963         return;
964     }
965     LocalRaInfo *lraInfo = localRegVec[bb.GetId()];
966     if (lraInfo == nullptr) {
967         lraInfo = memPool->New<LocalRaInfo>(alloc);
968         localRegVec[bb.GetId()] = lraInfo;
969     }
970     /* Make it a large enough so no locals can be allocated. */
971     lraInfo->SetUseCntElem(liveOut, kMaxUint16);
972 }
973 
ClassifyOperand(std::unordered_set<regno_t> & pregs,std::unordered_set<regno_t> & vregs,const Operand & opnd) const974 void GraphColorRegAllocator::ClassifyOperand(std::unordered_set<regno_t> &pregs, std::unordered_set<regno_t> &vregs,
975                                              const Operand &opnd) const
976 {
977     if (!opnd.IsRegister()) {
978         return;
979     }
980     auto &regOpnd = static_cast<const RegOperand &>(opnd);
981     regno_t regNO = regOpnd.GetRegisterNumber();
982     if (IsUnconcernedReg(regNO)) {
983         return;
984     }
985     if (regOpnd.IsPhysicalRegister()) {
986         (void)pregs.insert(regNO);
987     } else {
988         (void)vregs.insert(regNO);
989     }
990 }
991 
SetOpndConflict(const Insn & insn,bool onlyDef)992 void GraphColorRegAllocator::SetOpndConflict(const Insn &insn, bool onlyDef)
993 {
994     uint32 opndNum = insn.GetOperandSize();
995     if (opndNum <= 1) {
996         return;
997     }
998     const InsnDesc *md = insn.GetDesc();
999     std::unordered_set<regno_t> pregs;
1000     std::unordered_set<regno_t> vregs;
1001 
1002     for (uint32 i = 0; i < opndNum; ++i) {
1003         Operand &opnd = insn.GetOperand(i);
1004         if (!onlyDef) {
1005             if (opnd.IsList()) {
1006                 auto &listOpnd = static_cast<ListOperand &>(opnd);
1007                 for (auto op : listOpnd.GetOperands()) {
1008                     ClassifyOperand(pregs, vregs, *op);
1009                 }
1010             } else if (opnd.IsMemoryAccessOperand()) {
1011                 auto &memOpnd = static_cast<MemOperand &>(opnd);
1012                 Operand *base = memOpnd.GetBaseRegister();
1013                 Operand *offset = memOpnd.GetIndexRegister();
1014                 if (base != nullptr) {
1015                     ClassifyOperand(pregs, vregs, *base);
1016                 }
1017                 if (offset != nullptr) {
1018                     ClassifyOperand(pregs, vregs, *offset);
1019                 }
1020             } else if (opnd.IsRegister()) {
1021                 ClassifyOperand(pregs, vregs, opnd);
1022             }
1023         } else {
1024             if (md->GetOpndDes(i)->IsRegDef()) {
1025                 ClassifyOperand(pregs, vregs, opnd);
1026             }
1027             if (opnd.IsMemoryAccessOperand()) {
1028                 auto &memOpnd = static_cast<MemOperand &>(opnd);
1029                 Operand *base = memOpnd.GetBaseRegister();
1030                 if (base != nullptr && !memOpnd.IsIntactIndexed()) {
1031                     ClassifyOperand(pregs, vregs, *base);
1032                 }
1033             }
1034         }
1035     }
1036 
1037     if (vregs.empty()) {
1038         return;
1039     }
1040     /* Set BBConflict and Pregveto */
1041     for (regno_t vregNO : vregs) {
1042         for (regno_t conflictVregNO : vregs) {
1043             if (conflictVregNO != vregNO) {
1044                 lrMap[vregNO]->SetConflictBitArrElem(conflictVregNO);
1045             }
1046         }
1047         for (regno_t conflictPregNO : pregs) {
1048             lrMap[vregNO]->InsertElemToPregveto(conflictPregNO);
1049         }
1050     }
1051 }
1052 
UpdateOpndConflict(const Insn & insn,bool multiDef)1053 void GraphColorRegAllocator::UpdateOpndConflict(const Insn &insn, bool multiDef)
1054 {
1055     /* if IsSpecialIntrinsic or IsAtomicStore, set conflicts for all opnds */
1056     if (insn.IsAtomicStore() || insn.IsSpecialIntrinsic()) {
1057         SetOpndConflict(insn, false);
1058         return;
1059     }
1060     if (multiDef) {
1061         SetOpndConflict(insn, true);
1062     }
1063 }
1064 
ComputeLiveRangesForEachDefOperand(Insn & insn,bool & multiDef)1065 void GraphColorRegAllocator::ComputeLiveRangesForEachDefOperand(Insn &insn, bool &multiDef)
1066 {
1067     uint32 numDefs = 0;
1068     uint32 numUses = 0;
1069     const InsnDesc *md = insn.GetDesc();
1070     uint32 opndNum = insn.GetOperandSize();
1071     for (uint32 i = 0; i < opndNum; ++i) {
1072         if (insn.GetMachineOpcode() == MOP_asm && (i == kAsmOutputListOpnd || i == kAsmClobberListOpnd)) {
1073             for (auto opnd : static_cast<ListOperand &>(insn.GetOperand(i)).GetOperands()) {
1074                 SetupLiveRangeByOp(*static_cast<RegOperand *>(opnd), insn, true, numUses);
1075                 ++numDefs;
1076             }
1077             continue;
1078         }
1079         Operand &opnd = insn.GetOperand(i);
1080         if (opnd.IsMemoryAccessOperand()) {
1081             auto &memOpnd = static_cast<MemOperand &>(opnd);
1082             if (!memOpnd.IsIntactIndexed()) {
1083                 SetupLiveRangeByOp(opnd, insn, true, numUses);
1084                 ++numDefs;
1085             }
1086         }
1087         if (!md->GetOpndDes(i)->IsRegDef()) {
1088             continue;
1089         }
1090         SetupLiveRangeByOp(opnd, insn, true, numUses);
1091         ++numDefs;
1092     }
1093     DEBUG_ASSERT(numUses == 0, "should only be def opnd");
1094     if (numDefs > 1) {
1095         multiDef = true;
1096         needExtraSpillReg = true;
1097     }
1098 }
1099 
ComputeLiveRangesForEachUseOperand(Insn & insn)1100 void GraphColorRegAllocator::ComputeLiveRangesForEachUseOperand(Insn &insn)
1101 {
1102     uint32 numUses = 0;
1103     const InsnDesc *md = insn.GetDesc();
1104     uint32 opndNum = insn.GetOperandSize();
1105     for (uint32 i = 0; i < opndNum; ++i) {
1106         if (insn.GetMachineOpcode() == MOP_asm && i == kAsmInputListOpnd) {
1107             for (auto opnd : static_cast<ListOperand &>(insn.GetOperand(i)).GetOperands()) {
1108                 SetupLiveRangeByOp(*static_cast<RegOperand *>(opnd), insn, false, numUses);
1109             }
1110             continue;
1111         }
1112         if (md->GetOpndDes(i)->IsRegDef() && !md->GetOpndDes(i)->IsRegUse()) {
1113             continue;
1114         }
1115         Operand &opnd = insn.GetOperand(i);
1116         if (opnd.IsList()) {
1117             auto &listOpnd = static_cast<ListOperand &>(opnd);
1118             for (auto op : listOpnd.GetOperands()) {
1119                 SetupLiveRangeByOp(*op, insn, false, numUses);
1120             }
1121         } else if (opnd.IsMemoryAccessOperand()) {
1122             auto &memOpnd = static_cast<MemOperand &>(opnd);
1123             Operand *base = memOpnd.GetBaseRegister();
1124             Operand *offset = memOpnd.GetIndexRegister();
1125             if (base != nullptr) {
1126                 SetupLiveRangeByOp(*base, insn, false, numUses);
1127             }
1128             if (offset != nullptr) {
1129                 SetupLiveRangeByOp(*offset, insn, false, numUses);
1130             }
1131         } else {
1132             SetupLiveRangeByOp(opnd, insn, false, numUses);
1133         }
1134     }
1135     if (numUses >= AArch64Abi::kNormalUseOperandNum || insn.GetMachineOpcode() == MOP_lazy_ldr) {
1136         needExtraSpillReg = true;
1137     }
1138 }
1139 
ComputeLiveRangesUpdateIfInsnIsCall(const Insn & insn)1140 void GraphColorRegAllocator::ComputeLiveRangesUpdateIfInsnIsCall(const Insn &insn)
1141 {
1142     if (!insn.IsCall()) {
1143         return;
1144     }
1145     /* def the return value */
1146     pregLive.erase(R0);
1147     pregLive.erase(V0);
1148 
1149     /* active the parametes */
1150     Operand &opnd1 = insn.GetOperand(1);
1151     if (opnd1.IsList()) {
1152         auto &srcOpnds = static_cast<ListOperand &>(opnd1);
1153         for (auto regOpnd : srcOpnds.GetOperands()) {
1154             DEBUG_ASSERT(!regOpnd->IsVirtualRegister(), "not be a virtual register");
1155             auto physicalReg = static_cast<AArch64reg>(regOpnd->GetRegisterNumber());
1156             (void)pregLive.insert(physicalReg);
1157         }
1158     }
1159 }
1160 
ComputeLiveRangesUpdateLiveUnitInsnRange(BB & bb,uint32 currPoint)1161 void GraphColorRegAllocator::ComputeLiveRangesUpdateLiveUnitInsnRange(BB &bb, uint32 currPoint)
1162 {
1163     for (auto lin : bb.GetLiveInRegNO()) {
1164         if (lin < kAllRegNum) {
1165             continue;
1166         }
1167         LiveRange *lr = GetLiveRange(lin);
1168         if (lr == nullptr) {
1169             continue;
1170         }
1171         auto lu = lr->FindInLuMap(bb.GetId());
1172         DEBUG_ASSERT(lu != lr->EndOfLuMap(), "container empty check");
1173         if (bb.GetFirstInsn()) {
1174             lu->second->SetBegin(bb.GetFirstInsn()->GetId());
1175         } else {
1176             /* since bb is empty, then use pointer as is */
1177             lu->second->SetBegin(currPoint);
1178         }
1179         lu->second->SetBegin(lu->second->GetBegin() - 1);
1180     }
1181 }
1182 
UpdateInsnCntAndSkipUseless(Insn & insn,uint32 & currPoint) const1183 bool GraphColorRegAllocator::UpdateInsnCntAndSkipUseless(Insn &insn, uint32 &currPoint) const
1184 {
1185     insn.SetId(currPoint);
1186     if (insn.IsImmaterialInsn() || !insn.IsMachineInstruction()) {
1187         --currPoint;
1188         return true;
1189     }
1190     return false;
1191 }
1192 
UpdateCallInfo(uint32 bbId,uint32 currPoint,const Insn & insn)1193 void GraphColorRegAllocator::UpdateCallInfo(uint32 bbId, uint32 currPoint, const Insn &insn)
1194 {
1195     auto *targetOpnd = insn.GetCallTargetOperand();
1196     CHECK_FATAL(targetOpnd != nullptr, "target is null in Insn::IsCallToFunctionThatNeverReturns");
1197     if (CGOptions::DoIPARA() && targetOpnd->IsFuncNameOpnd()) {
1198         FuncNameOperand *target = static_cast<FuncNameOperand *>(targetOpnd);
1199         const MIRSymbol *funcSt = target->GetFunctionSymbol();
1200         DEBUG_ASSERT(funcSt->GetSKind() == kStFunc, "funcst must be a function name symbol");
1201         MIRFunction *func = funcSt->GetFunction();
1202         if (func != nullptr && func->IsReferedRegsValid()) {
1203             for (auto preg : func->GetReferedRegs()) {
1204                 if (AArch64Abi::IsCallerSaveReg(static_cast<AArch64reg>(preg))) {
1205                     for (auto vregNO : vregLive) {
1206                         LiveRange *lr = lrMap[vregNO];
1207                         lr->InsertElemToCallDef(preg);
1208                     }
1209                 }
1210             }
1211         } else {
1212             for (auto vregNO : vregLive) {
1213                 LiveRange *lr = lrMap[vregNO];
1214                 lr->SetCrossCall();
1215             }
1216         }
1217     } else {
1218         for (auto vregNO : vregLive) {
1219             LiveRange *lr = lrMap[vregNO];
1220             lr->SetCrossCall();
1221         }
1222     }
1223     for (auto vregNO : vregLive) {
1224         LiveRange *lr = lrMap[vregNO];
1225         lr->IncNumCall();
1226         lr->AddRef(bbId, currPoint, kIsCall);
1227 
1228         auto lu = lr->FindInLuMap(bbId);
1229         if (lu != lr->EndOfLuMap()) {
1230             lu->second->SetHasCall(true);
1231         }
1232     }
1233 }
1234 
SetLrMustAssign(const RegOperand * regOpnd)1235 void GraphColorRegAllocator::SetLrMustAssign(const RegOperand *regOpnd)
1236 {
1237     regno_t regNO = regOpnd->GetRegisterNumber();
1238     LiveRange *lr = GetLiveRange(regNO);
1239     if (lr != nullptr) {
1240         lr->SetMustAssigned();
1241         lr->SetIsNonLocal(true);
1242     }
1243 }
1244 
SetupMustAssignedLiveRanges(const Insn & insn)1245 void GraphColorRegAllocator::SetupMustAssignedLiveRanges(const Insn &insn)
1246 {
1247     if (!insn.IsSpecialIntrinsic()) {
1248         return;
1249     }
1250     if (insn.GetMachineOpcode() == MOP_asm) {
1251         for (auto regOpnd : static_cast<ListOperand &>(insn.GetOperand(kAsmOutputListOpnd)).GetOperands()) {
1252             SetLrMustAssign(regOpnd);
1253         }
1254         for (auto regOpnd : static_cast<ListOperand &>(insn.GetOperand(kAsmInputListOpnd)).GetOperands()) {
1255             SetLrMustAssign(regOpnd);
1256         }
1257         return;
1258     }
1259     uint32 opndNum = insn.GetOperandSize();
1260     for (uint32 i = 0; i < opndNum; ++i) {
1261         Operand *opnd = &insn.GetOperand(i);
1262         if (!opnd->IsRegister()) {
1263             continue;
1264         }
1265         auto regOpnd = static_cast<RegOperand *>(opnd);
1266         SetLrMustAssign(regOpnd);
1267     }
1268 }
1269 
1270 /*
1271  *  For each succ bb->GetSuccs(), if bb->liveout - succ->livein is not empty, the vreg(s) is
1272  *  dead on this path (but alive on the other path as there is some use of it on the
1273  *  other path).  This might be useful for optimization of reload placement later for
1274  *  splits (lr split into lr1 & lr2 and lr2 will need to reload.)
1275  *  Not for now though.
1276  */
ComputeLiveRanges()1277 void GraphColorRegAllocator::ComputeLiveRanges()
1278 {
1279     bbVec.clear();
1280     bbVec.resize(cgFunc->NumBBs());
1281 
1282     auto currPoint = static_cast<uint32>(cgFunc->GetTotalNumberOfInstructions() + bfs->sortedBBs.size());
1283     /* distinguish use/def */
1284     CHECK_FATAL(currPoint < (INT_MAX >> 2), "integer overflow check"); // multiply by 4 to distinguish def or use reg
1285     currPoint = currPoint << 2; // multiply by 4 to distinguish def or use reg
1286     for (size_t bbIdx = bfs->sortedBBs.size(); bbIdx > 0; --bbIdx) {
1287         BB *bb = bfs->sortedBBs[bbIdx - 1];
1288         bbVec[bb->GetId()] = bb;
1289         bb->SetLevel(bbIdx - 1);
1290 
1291         pregLive.clear();
1292         vregLive.clear();
1293         for (auto liveOut : bb->GetLiveOutRegNO()) {
1294             SetupLiveRangeByRegNO(liveOut, *bb, currPoint);
1295         }
1296         --currPoint;
1297 
1298         if (bb->GetLastInsn() != nullptr && bb->GetLastInsn()->IsMachineInstruction() && bb->GetLastInsn()->IsCall()) {
1299             UpdateCallInfo(bb->GetId(), currPoint, *bb->GetLastInsn());
1300         }
1301 
1302         FOR_BB_INSNS_REV_SAFE(insn, bb, ninsn) {
1303 #ifdef MOVE_COALESCE
1304             if ((insn->GetMachineOpcode() == MOP_xmovrr || insn->GetMachineOpcode() == MOP_wmovrr) &&
1305                 (!AArch64isa::IsPhysicalRegister(static_cast<RegOperand &>(insn->GetOperand(0)).GetRegisterNumber())) &&
1306                 (static_cast<RegOperand &>(insn->GetOperand(0)).GetRegisterNumber() ==
1307                  static_cast<RegOperand &>(insn->GetOperand(1)).GetRegisterNumber())) {
1308                 bb->RemoveInsn(*insn);
1309                 continue;
1310             }
1311 #endif
1312             if (UpdateInsnCntAndSkipUseless(*insn, currPoint)) {
1313                 if (ninsn && ninsn->IsMachineInstruction() && ninsn->IsCall()) {
1314                     UpdateCallInfo(bb->GetId(), currPoint, *ninsn);
1315                 }
1316                 continue;
1317             }
1318 
1319             bool multiDef = false;
1320             ComputeLiveRangesForEachDefOperand(*insn, multiDef);
1321             ComputeLiveRangesForEachUseOperand(*insn);
1322 
1323             UpdateOpndConflict(*insn, multiDef);
1324             SetupMustAssignedLiveRanges(*insn);
1325 
1326             if (ninsn && ninsn->IsMachineInstruction() && ninsn->IsCall()) {
1327                 UpdateCallInfo(bb->GetId(), currPoint - kInsnStep, *ninsn);
1328             }
1329 
1330             ComputeLiveRangesUpdateIfInsnIsCall(*insn);
1331             /* distinguish use/def */
1332             currPoint -= k2BitSize;
1333         }
1334         ComputeLiveRangesUpdateLiveUnitInsnRange(*bb, currPoint);
1335         /* move one more step for each BB */
1336         --currPoint;
1337     }
1338 
1339     if (needDump) {
1340         LogInfo::MapleLogger() << "After ComputeLiveRanges\n";
1341         PrintLiveRanges();
1342 #ifdef USE_LRA
1343         if (doLRA) {
1344             PrintLocalRAInfo("After ComputeLiveRanges");
1345         }
1346 #endif /* USE_LRA */
1347     }
1348 }
1349 
1350 /* Create a common stack space for spilling with need_spill */
CreateSpillMem(uint32 spillIdx,SpillMemCheck check)1351 MemOperand *GraphColorRegAllocator::CreateSpillMem(uint32 spillIdx, SpillMemCheck check)
1352 {
1353     if (spillIdx >= spillMemOpnds.size()) {
1354         return nullptr;
1355     }
1356 
1357     if (operandSpilled[spillIdx]) {
1358         /* For this insn, spill slot already used, need to find next available slot. */
1359         uint32 i;
1360         for (i = spillIdx + 1; i < kSpillMemOpndNum; ++i) {
1361             if (!operandSpilled[i]) {
1362                 break;
1363             }
1364         }
1365         CHECK_FATAL(i < kSpillMemOpndNum, "no more available spill mem slot");
1366         spillIdx = i;
1367     }
1368     if (check == kSpillMemPost) {
1369         operandSpilled[spillIdx] = true;
1370     }
1371 
1372     if (spillMemOpnds[spillIdx] == nullptr) {
1373         regno_t reg = cgFunc->NewVReg(kRegTyInt, sizeof(int64));
1374         auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
1375         spillMemOpnds[spillIdx] = a64CGFunc->GetOrCreatSpillMem(reg);
1376     }
1377     return spillMemOpnds[spillIdx];
1378 }
1379 
IsLocalReg(regno_t regNO) const1380 bool GraphColorRegAllocator::IsLocalReg(regno_t regNO) const
1381 {
1382     LiveRange *lr = GetLiveRange(regNO);
1383     if (lr == nullptr) {
1384         LogInfo::MapleLogger() << "unexpected regNO" << regNO;
1385         return true;
1386     }
1387     return IsLocalReg(*lr);
1388 }
1389 
IsLocalReg(const LiveRange & lr) const1390 bool GraphColorRegAllocator::IsLocalReg(const LiveRange &lr) const
1391 {
1392     return !lr.GetSplitLr() && (lr.GetNumBBMembers() == 1) && !lr.IsNonLocal();
1393 }
1394 
CheckOverlap(uint64 val,uint32 i,LiveRange & lr1,LiveRange & lr2) const1395 bool GraphColorRegAllocator::CheckOverlap(uint64 val, uint32 i, LiveRange &lr1, LiveRange &lr2) const
1396 {
1397     regno_t lr1RegNO = lr1.GetRegNO();
1398     regno_t lr2RegNO = lr2.GetRegNO();
1399     for (uint32 x = 0; x < kU64; ++x) {
1400         if ((val & (1ULL << x)) != 0) {
1401             uint32 lastBitSet = i * kU64 + x;
1402             /*
1403              * begin and end should be in the bb info (LU)
1404              * Need to rethink this if.
1405              * Under some circumstance, lr->begin can occur after lr->end.
1406              */
1407             auto lu1 = lr1.FindInLuMap(lastBitSet);
1408             auto lu2 = lr2.FindInLuMap(lastBitSet);
1409             if (lu1 != lr1.EndOfLuMap() && lu2 != lr2.EndOfLuMap() &&
1410                 !((lu1->second->GetBegin() < lu2->second->GetBegin() &&
1411                    lu1->second->GetEnd() < lu2->second->GetBegin()) ||
1412                   (lu2->second->GetBegin() < lu1->second->GetEnd() &&
1413                    lu2->second->GetEnd() < lu1->second->GetBegin()))) {
1414                 lr1.SetConflictBitArrElem(lr2RegNO);
1415                 lr2.SetConflictBitArrElem(lr1RegNO);
1416                 return true;
1417             }
1418         }
1419     }
1420     return false;
1421 }
1422 
CheckInterference(LiveRange & lr1,LiveRange & lr2) const1423 void GraphColorRegAllocator::CheckInterference(LiveRange &lr1, LiveRange &lr2) const
1424 {
1425     uint64 bitArr[bbBuckets];
1426     for (uint32 i = 0; i < bbBuckets; ++i) {
1427         bitArr[i] = lr1.GetBBMember()[i] & lr2.GetBBMember()[i];
1428     }
1429 
1430     for (uint32 i = 0; i < bbBuckets; ++i) {
1431         uint64 val = bitArr[i];
1432         if (val == 0) {
1433             continue;
1434         }
1435         if (CheckOverlap(val, i, lr1, lr2)) {
1436             break;
1437         }
1438     }
1439 }
1440 
BuildInterferenceGraphSeparateIntFp(std::vector<LiveRange * > & intLrVec,std::vector<LiveRange * > & fpLrVec)1441 void GraphColorRegAllocator::BuildInterferenceGraphSeparateIntFp(std::vector<LiveRange *> &intLrVec,
1442                                                                  std::vector<LiveRange *> &fpLrVec)
1443 {
1444     for (auto it : lrMap) {
1445         LiveRange *lr = it.second;
1446         if (lr->GetRegNO() == 0) {
1447             continue;
1448         }
1449 #ifdef USE_LRA
1450         if (doLRA && IsLocalReg(*lr)) {
1451             continue;
1452         }
1453 #endif /* USE_LRA */
1454         if (lr->GetRegType() == kRegTyInt) {
1455             intLrVec.emplace_back(lr);
1456         } else if (lr->GetRegType() == kRegTyFloat) {
1457             fpLrVec.emplace_back(lr);
1458         } else {
1459             DEBUG_ASSERT(false, "Illegal regType in BuildInterferenceGraph");
1460             LogInfo::MapleLogger() << "error: Illegal regType in BuildInterferenceGraph\n";
1461         }
1462     }
1463 }
1464 
1465 /*
1466  *  Based on intersection of LRs.  When two LRs interfere, add to each other's
1467  *  interference list.
1468  */
BuildInterferenceGraph()1469 void GraphColorRegAllocator::BuildInterferenceGraph()
1470 {
1471     std::vector<LiveRange *> intLrVec;
1472     std::vector<LiveRange *> fpLrVec;
1473     BuildInterferenceGraphSeparateIntFp(intLrVec, fpLrVec);
1474 
1475     /*
1476      * Once number of BB becomes larger for big functions, the checking for interferences
1477      * takes significant long time. Taking advantage of unique bucket is one of strategies
1478      * to avoid unnecessary computation
1479      */
1480     auto lrSize = intLrVec.size();
1481     std::vector<int32> uniqueBucketIdx(lrSize);
1482     for (uint32 i = 0; i < lrSize; i++) {
1483         uint32 count = 0;
1484         uint32 uniqueIdx;
1485         LiveRange *lr = intLrVec[i];
1486         for (uint32 j = 0; j < bbBuckets; ++j) {
1487             if (lr->GetBBMember()[j]) {
1488                 count++;
1489                 uniqueIdx = j;
1490             }
1491         }
1492         if (count == 1) {
1493             uniqueBucketIdx[i] = static_cast<int32>(uniqueIdx);
1494         } else {
1495             /* LR spans multiple buckets */
1496             DEBUG_ASSERT(count >= 1, "A live range can not be empty");
1497             uniqueBucketIdx[i] = -1;
1498         }
1499     }
1500 
1501     for (auto it1 = intLrVec.begin(); it1 != intLrVec.end(); ++it1) {
1502         LiveRange *lr1 = *it1;
1503         CalculatePriority(*lr1);
1504         int32 lr1UniqueBucketIdx = uniqueBucketIdx[static_cast<uint64>(std::distance(intLrVec.begin(), it1))];
1505         for (auto it2 = it1 + 1; it2 != intLrVec.end(); ++it2) {
1506             LiveRange *lr2 = *it2;
1507             if (lr1->GetRegNO() < lr2->GetRegNO()) {
1508                 int32 lr2UniqueBucketIdx = uniqueBucketIdx[static_cast<uint64>(std::distance(intLrVec.begin(), it2))];
1509                 if (lr1UniqueBucketIdx == -1 && lr2UniqueBucketIdx == -1) {
1510                     CheckInterference(*lr1, *lr2);
1511                 } else if (((lr1UniqueBucketIdx >= 0) &&
1512                             (lr1->GetBBMember()[lr1UniqueBucketIdx] & lr2->GetBBMember()[lr1UniqueBucketIdx])) ||
1513                            ((lr2UniqueBucketIdx >= 0) &&
1514                             (lr1->GetBBMember()[lr2UniqueBucketIdx] & lr2->GetBBMember()[lr2UniqueBucketIdx]))) {
1515                     CheckInterference(*lr1, *lr2);
1516                 }
1517             }
1518         }
1519     }
1520 
1521     // Might need to do same as to intLrVec
1522     for (auto it1 = fpLrVec.begin(); it1 != fpLrVec.end(); ++it1) {
1523         LiveRange *lr1 = *it1;
1524         CalculatePriority(*lr1);
1525         for (auto it2 = it1 + 1; it2 != fpLrVec.end(); ++it2) {
1526             LiveRange *lr2 = *it2;
1527             if (lr1->GetRegNO() < lr2->GetRegNO()) {
1528                 CheckInterference(*lr1, *lr2);
1529             }
1530         }
1531     }
1532 
1533     if (needDump) {
1534         LogInfo::MapleLogger() << "After BuildInterferenceGraph\n";
1535         PrintLiveRanges();
1536     }
1537 }
1538 
SetBBInfoGlobalAssigned(uint32 bbID,regno_t regNO)1539 void GraphColorRegAllocator::SetBBInfoGlobalAssigned(uint32 bbID, regno_t regNO)
1540 {
1541     DEBUG_ASSERT(bbID < bbRegInfo.size(), "index out of range in GraphColorRegAllocator::SetBBInfoGlobalAssigned");
1542     BBAssignInfo *bbInfo = bbRegInfo[bbID];
1543     if (bbInfo == nullptr) {
1544         bbInfo = memPool->New<BBAssignInfo>(alloc);
1545         bbRegInfo[bbID] = bbInfo;
1546         bbInfo->InitGlobalAssigned();
1547     }
1548     bbInfo->InsertElemToGlobalsAssigned(regNO);
1549 }
1550 
HaveAvailableColor(const LiveRange & lr,uint32 num) const1551 bool GraphColorRegAllocator::HaveAvailableColor(const LiveRange &lr, uint32 num) const
1552 {
1553     return ((lr.GetRegType() == kRegTyInt && num < intRegNum) || (lr.GetRegType() == kRegTyFloat && num < fpRegNum));
1554 }
1555 
1556 /*
1557  * If the members on the interference list is less than #colors, then
1558  * it can be trivially assigned a register.  Otherwise it is constrained.
1559  * Separate the LR based on if it is contrained or not.
1560  *
1561  * The unconstrained LRs are colored last.
1562  *
1563  * Compute a sorted list of constrained LRs based on priority cost.
1564  */
Separate()1565 void GraphColorRegAllocator::Separate()
1566 {
1567     for (auto it : lrMap) {
1568         LiveRange *lr = it.second;
1569 #ifdef USE_LRA
1570         if (doLRA && IsLocalReg(*lr)) {
1571             continue;
1572         }
1573 #endif /* USE_LRA */
1574 #ifdef OPTIMIZE_FOR_PROLOG
1575         if (doOptProlog && ((lr->GetNumDefs() <= 1) && (lr->GetNumUses() <= 1) && (lr->GetNumCall() > 0)) &&
1576             (lr->GetFrequency() <= (cgFunc->GetFirstBB()->GetFrequency() << 1))) {
1577             if (lr->GetRegType() == kRegTyInt) {
1578                 intDelayed.emplace_back(lr);
1579             } else {
1580                 fpDelayed.emplace_back(lr);
1581             }
1582             continue;
1583         }
1584 #endif /* OPTIMIZE_FOR_PROLOG */
1585         if (lr->GetRematLevel() != kRematOff) {
1586             unconstrained.emplace_back(lr);
1587         } else if (HaveAvailableColor(*lr, lr->GetNumBBConflicts() + static_cast<uint32>(lr->GetPregvetoSize()) +
1588                                                static_cast<uint32>(lr->GetForbiddenSize()))) {
1589             if (lr->GetPrefs().size()) {
1590                 unconstrainedPref.emplace_back(lr);
1591             } else {
1592                 unconstrained.emplace_back(lr);
1593             }
1594         } else if (lr->IsMustAssigned()) {
1595             mustAssigned.emplace_back(lr);
1596         } else {
1597             if (lr->GetPrefs().size() && lr->GetNumCall() == 0) {
1598                 unconstrainedPref.emplace_back(lr);
1599             } else {
1600                 constrained.emplace_back(lr);
1601             }
1602         }
1603     }
1604     if (needDump) {
1605         LogInfo::MapleLogger() << "Unconstrained : ";
1606         for (auto lr : unconstrainedPref) {
1607             LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1608         }
1609         for (auto lr : unconstrained) {
1610             LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1611         }
1612         LogInfo::MapleLogger() << "\n";
1613         LogInfo::MapleLogger() << "Constrained : ";
1614         for (auto lr : constrained) {
1615             LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1616         }
1617         LogInfo::MapleLogger() << "\n";
1618         LogInfo::MapleLogger() << "mustAssigned : ";
1619         for (auto lr : mustAssigned) {
1620             LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1621         }
1622         LogInfo::MapleLogger() << "\n";
1623     }
1624 }
1625 
GetHighPriorityLr(MapleVector<LiveRange * > & lrSet) const1626 MapleVector<LiveRange *>::iterator GraphColorRegAllocator::GetHighPriorityLr(MapleVector<LiveRange *> &lrSet) const
1627 {
1628     auto it = lrSet.begin();
1629     auto highestIt = it;
1630     LiveRange *startLr = *it;
1631     float maxPrio = startLr->GetPriority();
1632     ++it;
1633     for (; it != lrSet.end(); ++it) {
1634         LiveRange *lr = *it;
1635         if (lr->GetPriority() > maxPrio) {
1636             maxPrio = lr->GetPriority();
1637             highestIt = it;
1638         }
1639     }
1640     return highestIt;
1641 }
1642 
UpdateForbiddenForNeighbors(const LiveRange & lr) const1643 void GraphColorRegAllocator::UpdateForbiddenForNeighbors(const LiveRange &lr) const
1644 {
1645     auto updateForbidden = [&lr, this](regno_t regNO) {
1646         LiveRange *newLr = GetLiveRange(regNO);
1647         DEBUG_ASSERT(newLr != nullptr, "newLr should not be nullptr");
1648         if (!newLr->GetPregveto(lr.GetAssignedRegNO())) {
1649             newLr->InsertElemToForbidden(lr.GetAssignedRegNO());
1650         }
1651     };
1652     ForEachRegArrElem(lr.GetBBConflict(), updateForbidden);
1653 }
1654 
UpdatePregvetoForNeighbors(const LiveRange & lr) const1655 void GraphColorRegAllocator::UpdatePregvetoForNeighbors(const LiveRange &lr) const
1656 {
1657     auto updatePregveto = [&lr, this](regno_t regNO) {
1658         LiveRange *newLr = GetLiveRange(regNO);
1659         DEBUG_ASSERT(newLr != nullptr, "newLr should not be nullptr");
1660         newLr->InsertElemToPregveto(lr.GetAssignedRegNO());
1661         newLr->EraseElemFromForbidden(lr.GetAssignedRegNO());
1662     };
1663     ForEachRegArrElem(lr.GetBBConflict(), updatePregveto);
1664 }
1665 
1666 /*
1667  *  For cases with only one def/use and crosses a call.
1668  *  It might be more beneficial to spill vs save/restore in prolog/epilog.
1669  *  But if the callee register is already used, then it is ok to reuse it again.
1670  *  Or in certain cases, just use the callee.
1671  */
ShouldUseCallee(LiveRange & lr,const MapleSet<regno_t> & calleeUsed,const MapleVector<LiveRange * > & delayed) const1672 bool GraphColorRegAllocator::ShouldUseCallee(LiveRange &lr, const MapleSet<regno_t> &calleeUsed,
1673                                              const MapleVector<LiveRange *> &delayed) const
1674 {
1675     if (FindIn(calleeUsed, lr.GetAssignedRegNO())) {
1676         return true;
1677     }
1678     if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr.GetAssignedRegNO())) &&
1679         (calleeUsed.size() % kDivide2) != 0) {
1680         return true;
1681     }
1682     if (delayed.size() > 1 && calleeUsed.empty()) {
1683         /* If there are more than 1 vreg that can benefit from callee, use callee */
1684         return true;
1685     }
1686     lr.SetAssignedRegNO(0);
1687     return false;
1688 }
1689 
AddCalleeUsed(regno_t regNO,RegType regType)1690 void GraphColorRegAllocator::AddCalleeUsed(regno_t regNO, RegType regType)
1691 {
1692     DEBUG_ASSERT(AArch64isa::IsPhysicalRegister(regNO), "regNO should be physical register");
1693     bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO));
1694     if (isCalleeReg) {
1695         if (regType == kRegTyInt) {
1696             (void)intCalleeUsed.insert(regNO);
1697         } else {
1698             (void)fpCalleeUsed.insert(regNO);
1699         }
1700     }
1701 }
1702 
FindColorForLr(const LiveRange & lr) const1703 regno_t GraphColorRegAllocator::FindColorForLr(const LiveRange &lr) const
1704 {
1705     regno_t reg = 0;
1706     regno_t base;
1707     RegType regType = lr.GetRegType();
1708     const MapleSet<uint32> *currRegSet = nullptr;
1709     const MapleSet<uint32> *nextRegSet = nullptr;
1710     if (regType == kRegTyInt) {
1711         if (lr.GetNumCall() != 0) {
1712             currRegSet = &intCalleeRegSet;
1713             nextRegSet = &intCallerRegSet;
1714         } else {
1715             currRegSet = &intCallerRegSet;
1716             nextRegSet = &intCalleeRegSet;
1717         }
1718         base = R0;
1719     } else {
1720         if (lr.GetNumCall() != 0) {
1721             currRegSet = &fpCalleeRegSet;
1722             nextRegSet = &fpCallerRegSet;
1723         } else {
1724             currRegSet = &fpCallerRegSet;
1725             nextRegSet = &fpCalleeRegSet;
1726         }
1727         base = V0;
1728     }
1729 
1730 #ifdef MOVE_COALESCE
1731     if (lr.GetNumCall() == 0 || (lr.GetNumDefs() + lr.GetNumUses() <= kRegNum2)) {
1732         for (const auto &it : lr.GetPrefs()) {
1733             reg = it + base;
1734             if ((FindIn(*currRegSet, reg) || FindIn(*nextRegSet, reg)) && !lr.GetForbidden(reg) &&
1735                 !lr.GetPregveto(reg)) {
1736                 return reg;
1737             }
1738         }
1739     }
1740 #endif /*  MOVE_COALESCE */
1741     for (const auto &it : *currRegSet) {
1742         reg = it + base;
1743         if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) {
1744             return reg;
1745         }
1746     }
1747     /* Failed to allocate in first choice. Try 2nd choice. */
1748     for (const auto &it : *nextRegSet) {
1749         reg = it + base;
1750         if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) {
1751             return reg;
1752         }
1753     }
1754     DEBUG_ASSERT(false, "Failed to find a register");
1755     return 0;
1756 }
1757 
TryToAssignCallerSave(const LiveRange & lr) const1758 regno_t GraphColorRegAllocator::TryToAssignCallerSave(const LiveRange &lr) const
1759 {
1760     regno_t base;
1761     RegType regType = lr.GetRegType();
1762     const MapleSet<uint32> *currRegSet = nullptr;
1763     if (regType == kRegTyInt) {
1764         currRegSet = &intCallerRegSet;
1765         base = R0;
1766     } else {
1767         currRegSet = &fpCallerRegSet;
1768         base = V0;
1769     }
1770 
1771     regno_t reg = 0;
1772 #ifdef MOVE_COALESCE
1773     if (lr.GetNumCall() == 0 || (lr.GetNumDefs() + lr.GetNumUses() <= kRegNum2)) {
1774         for (const auto &it : lr.GetPrefs()) {
1775             reg = it + base;
1776             if ((FindIn(*currRegSet, reg)) && !lr.GetForbidden(reg) && !lr.GetPregveto(reg) && !lr.GetCallDef(reg)) {
1777                 return reg;
1778             }
1779         }
1780     }
1781 #endif /*  MOVE_COALESCE */
1782     for (const auto &it : *currRegSet) {
1783         reg = it + base;
1784         if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg) && !lr.GetCallDef(reg)) {
1785             return reg;
1786         }
1787     }
1788     return 0;
1789 }
1790 
1791 /*
1792  * If forbidden list has more registers than max of all BB's local reg
1793  *  requirement, then LR can be colored.
1794  *  Update LR's color if success, return true, else return false.
1795  */
AssignColorToLr(LiveRange & lr,bool isDelayed)1796 bool GraphColorRegAllocator::AssignColorToLr(LiveRange &lr, bool isDelayed)
1797 {
1798     if (lr.GetAssignedRegNO() > 0) {
1799         /* Already assigned. */
1800         return true;
1801     }
1802     if (!HaveAvailableColor(lr, lr.GetForbiddenSize() + lr.GetPregvetoSize())) {
1803         if (needDump) {
1804             LogInfo::MapleLogger() << "assigned fail to R" << lr.GetRegNO() << "\n";
1805         }
1806         return false;
1807     }
1808     regno_t callerSaveReg = 0;
1809     regno_t reg = FindColorForLr(lr);
1810     if (lr.GetNumCall() != 0 && !lr.GetCrossCall()) {
1811         callerSaveReg = TryToAssignCallerSave(lr);
1812         bool prefCaller = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg)) &&
1813                           intCalleeUsed.find(reg) == intCalleeUsed.end() &&
1814                           fpCalleeUsed.find(reg) == fpCalleeUsed.end();
1815         if (callerSaveReg != 0 && (prefCaller || !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg)))) {
1816             reg = callerSaveReg;
1817             lr.SetNumCall(0);
1818         }
1819     }
1820     lr.SetAssignedRegNO(reg);
1821     if (needDump) {
1822         LogInfo::MapleLogger() << "assigned " << lr.GetAssignedRegNO() << " to R" << lr.GetRegNO() << "\n";
1823     }
1824     if (lr.GetAssignedRegNO() == 0) {
1825         return false;
1826     }
1827 #ifdef OPTIMIZE_FOR_PROLOG
1828     if (doOptProlog && isDelayed) {
1829         if ((lr.GetRegType() == kRegTyInt && !ShouldUseCallee(lr, intCalleeUsed, intDelayed)) ||
1830             (lr.GetRegType() == kRegTyFloat && !ShouldUseCallee(lr, fpCalleeUsed, fpDelayed))) {
1831             return false;
1832         }
1833     }
1834 #endif /* OPTIMIZE_FOR_PROLOG */
1835 
1836     AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType());
1837 
1838     UpdateForbiddenForNeighbors(lr);
1839     ForEachBBArrElem(lr.GetBBMember(),
1840                      [&lr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, lr.GetAssignedRegNO()); });
1841     return true;
1842 }
1843 
PruneLrForSplit(LiveRange & lr,BB & bb,bool remove,std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1844 void GraphColorRegAllocator::PruneLrForSplit(LiveRange &lr, BB &bb, bool remove,
1845                                              std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1846                                              std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1847 {
1848     if (bb.GetInternalFlag1()) {
1849         /* already visited */
1850         return;
1851     }
1852 
1853     bb.SetInternalFlag1(true);
1854     auto lu = lr.FindInLuMap(bb.GetId());
1855     uint32 defNum = 0;
1856     uint32 useNum = 0;
1857     if (lu != lr.EndOfLuMap()) {
1858         defNum = lu->second->GetDefNum();
1859         useNum = lu->second->GetUseNum();
1860     }
1861 
1862     if (remove) {
1863         /* In removal mode, has not encountered a ref yet. */
1864         if (defNum == 0 && useNum == 0) {
1865             if (bb.GetLoop() != nullptr && FindIn(candidateInLoop, bb.GetLoop())) {
1866                 /*
1867                  * Upward search has found a loop.  Regardless of def/use
1868                  *  The loop members must be included in the new LR.
1869                  */
1870                 remove = false;
1871             } else {
1872                 /* No ref in this bb. mark as potential remove. */
1873                 bb.SetInternalFlag2(true);
1874                 return;
1875             }
1876         } else {
1877             /* found a ref, no more removal of bb and preds. */
1878             remove = false;
1879         }
1880     }
1881 
1882     if (bb.GetLoop() != nullptr) {
1883         /* With a def in loop, cannot prune that loop */
1884         if (defNum > 0) {
1885             (void)defInLoop.insert(bb.GetLoop());
1886         }
1887         /* bb in loop, need to make sure of loop carried dependency */
1888         (void)candidateInLoop.insert(bb.GetLoop());
1889     }
1890     for (auto pred : bb.GetPreds()) {
1891         if (FindNotIn(bb.GetLoopPreds(), pred)) {
1892             PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop);
1893         }
1894     }
1895     for (auto pred : bb.GetEhPreds()) {
1896         if (FindNotIn(bb.GetLoopPreds(), pred)) {
1897             PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop);
1898         }
1899     }
1900 }
1901 
FindBBSharedInSplit(LiveRange & lr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1902 void GraphColorRegAllocator::FindBBSharedInSplit(LiveRange &lr,
1903                                                  const std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1904                                                  std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1905 {
1906     /* A loop might be split into two.  Need to see over the entire LR if there is a def in the loop. */
1907     auto FindBBSharedFunc = [&lr, &candidateInLoop, &defInLoop, this](uint32 bbID) {
1908         BB *bb = bbVec[bbID];
1909         if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) {
1910             auto lu = lr.FindInLuMap(bb->GetId());
1911             if (lu != lr.EndOfLuMap() && lu->second->GetDefNum() > 0) {
1912                 (void)defInLoop.insert(bb->GetLoop());
1913             }
1914         }
1915     };
1916     ForEachBBArrElem(lr.GetBBMember(), FindBBSharedFunc);
1917 }
1918 
1919 /*
1920  *  Backward traversal of the top part of the split LR.
1921  *  Prune the part of the LR that has no downward exposing references.
1922  *  Take into account of loops and loop carried dependencies.
1923  *  The candidate bb to be removed, if in a loop, store that info.
1924  *  If a LR crosses a loop, even if the loop has no def/use, it must
1925  *  be included in the new LR.
1926  */
ComputeBBForNewSplit(LiveRange & newLr,LiveRange & origLr)1927 void GraphColorRegAllocator::ComputeBBForNewSplit(LiveRange &newLr, LiveRange &origLr)
1928 {
1929     /*
1930      *  The candidate bb to be removed, if in a loop, store that info.
1931      *  If a LR crosses a loop, even if the loop has no def/use, it must
1932      *  be included in the new LR.
1933      */
1934     std::set<CGFuncLoops *, CGFuncLoopCmp> candidateInLoop;
1935     /* If a bb has a def and is in a loop, store that info. */
1936     std::set<CGFuncLoops *, CGFuncLoopCmp> defInLoop;
1937     std::set<BB *, SortedBBCmpFunc> smember;
1938     ForEachBBArrElem(newLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
1939     for (auto bbIt = smember.rbegin(); bbIt != smember.rend(); ++bbIt) {
1940         BB *bb = *bbIt;
1941         if (bb->GetInternalFlag1() != 0) {
1942             continue;
1943         }
1944         PruneLrForSplit(newLr, *bb, true, candidateInLoop, defInLoop);
1945     }
1946     FindBBSharedInSplit(origLr, candidateInLoop, defInLoop);
1947     auto pruneTopLr = [this, &newLr, &candidateInLoop, &defInLoop](uint32 bbID) {
1948         BB *bb = bbVec[bbID];
1949         if (bb->GetInternalFlag2() != 0) {
1950             if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) {
1951                 return;
1952             }
1953             if (bb->GetLoop() != nullptr || FindNotIn(defInLoop, bb->GetLoop())) {
1954                 /* defInLoop should be a subset of candidateInLoop.  remove. */
1955                 newLr.UnsetMemberBitArrElem(bbID);
1956             }
1957         }
1958     };
1959     ForEachBBArrElem(newLr.GetBBMember(), pruneTopLr); /* prune the top LR. */
1960 }
1961 
UseIsUncovered(const BB & bb,const BB & startBB,std::vector<bool> & visitedBB)1962 bool GraphColorRegAllocator::UseIsUncovered(const BB &bb, const BB &startBB, std::vector<bool> &visitedBB)
1963 {
1964     CHECK_FATAL(bb.GetId() < visitedBB.size(), "index out of range");
1965     visitedBB[bb.GetId()] = true;
1966     for (auto pred : bb.GetPreds()) {
1967         if (visitedBB[pred->GetId()]) {
1968             continue;
1969         }
1970         if (pred->GetLevel() <= startBB.GetLevel()) {
1971             return true;
1972         }
1973         if (UseIsUncovered(*pred, startBB, visitedBB)) {
1974             return true;
1975         }
1976     }
1977     for (auto pred : bb.GetEhPreds()) {
1978         if (visitedBB[pred->GetId()]) {
1979             continue;
1980         }
1981         if (pred->GetLevel() <= startBB.GetLevel()) {
1982             return true;
1983         }
1984         if (UseIsUncovered(*pred, startBB, visitedBB)) {
1985             return true;
1986         }
1987     }
1988     return false;
1989 }
1990 
FindUseForSplit(LiveRange & lr,SplitBBInfo & bbInfo,bool & remove,std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1991 void GraphColorRegAllocator::FindUseForSplit(LiveRange &lr, SplitBBInfo &bbInfo, bool &remove,
1992                                              std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1993                                              std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1994 {
1995     BB *bb = bbInfo.GetCandidateBB();
1996     const BB *startBB = bbInfo.GetStartBB();
1997     if (bb->GetInternalFlag1() != 0) {
1998         /* already visited */
1999         return;
2000     }
2001     for (auto pred : bb->GetPreds()) {
2002         if (pred->GetInternalFlag1() == 0) {
2003             return;
2004         }
2005     }
2006     for (auto pred : bb->GetEhPreds()) {
2007         if (pred->GetInternalFlag1() == 0) {
2008             return;
2009         }
2010     }
2011 
2012     bb->SetInternalFlag1(true);
2013     auto lu = lr.FindInLuMap(bb->GetId());
2014     uint32 defNum = 0;
2015     uint32 useNum = 0;
2016     if (lu != lr.EndOfLuMap()) {
2017         defNum = lu->second->GetDefNum();
2018         useNum = lu->second->GetUseNum();
2019     }
2020 
2021     std::vector<bool> visitedBB(cgFunc->GetAllBBs().size(), false);
2022     if (remove) {
2023         /* In removal mode, has not encountered a ref yet. */
2024         if (defNum == 0 && useNum == 0) {
2025             /* No ref in this bb. mark as potential remove. */
2026             bb->SetInternalFlag2(true);
2027             if (bb->GetLoop() != nullptr) {
2028                 /* bb in loop, need to make sure of loop carried dependency */
2029                 (void)candidateInLoop.insert(bb->GetLoop());
2030             }
2031         } else {
2032             /* found a ref, no more removal of bb and preds. */
2033             remove = false;
2034             /* A potential point for a upward exposing use. (might be a def). */
2035             lu->second->SetNeedReload(true);
2036         }
2037     } else if ((defNum > 0 || useNum > 0) && UseIsUncovered(*bb, *startBB, visitedBB)) {
2038         lu->second->SetNeedReload(true);
2039     }
2040 
2041     /* With a def in loop, cannot prune that loop */
2042     if (bb->GetLoop() != nullptr && defNum > 0) {
2043         (void)defInLoop.insert(bb->GetLoop());
2044     }
2045 
2046     for (auto succ : bb->GetSuccs()) {
2047         if (FindNotIn(bb->GetLoopSuccs(), succ)) {
2048             bbInfo.SetCandidateBB(*succ);
2049             FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop);
2050         }
2051     }
2052     for (auto succ : bb->GetEhSuccs()) {
2053         if (FindNotIn(bb->GetLoopSuccs(), succ)) {
2054             bbInfo.SetCandidateBB(*succ);
2055             FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop);
2056         }
2057     }
2058 }
2059 
ClearLrBBFlags(const std::set<BB *,SortedBBCmpFunc> & member) const2060 void GraphColorRegAllocator::ClearLrBBFlags(const std::set<BB *, SortedBBCmpFunc> &member) const
2061 {
2062     for (auto bb : member) {
2063         bb->SetInternalFlag1(0);
2064         bb->SetInternalFlag2(0);
2065         for (auto pred : bb->GetPreds()) {
2066             pred->SetInternalFlag1(0);
2067             pred->SetInternalFlag2(0);
2068         }
2069         for (auto pred : bb->GetEhPreds()) {
2070             pred->SetInternalFlag1(0);
2071             pred->SetInternalFlag2(0);
2072         }
2073     }
2074 }
2075 
2076 /*
2077  *  Downward traversal of the bottom part of the split LR.
2078  *  Prune the part of the LR that has no upward exposing references.
2079  *  Take into account of loops and loop carried dependencies.
2080  */
ComputeBBForOldSplit(LiveRange & newLr,LiveRange & origLr)2081 void GraphColorRegAllocator::ComputeBBForOldSplit(LiveRange &newLr, LiveRange &origLr)
2082 {
2083     /* The candidate bb to be removed, if in a loop, store that info. */
2084     std::set<CGFuncLoops *, CGFuncLoopCmp> candidateInLoop;
2085     /* If a bb has a def and is in a loop, store that info. */
2086     std::set<CGFuncLoops *, CGFuncLoopCmp> defInLoop;
2087     SplitBBInfo bbInfo;
2088     bool remove = true;
2089 
2090     std::set<BB *, SortedBBCmpFunc> smember;
2091     ForEachBBArrElem(origLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
2092     ClearLrBBFlags(smember);
2093     for (auto bb : smember) {
2094         if (bb->GetInternalFlag1() != 0) {
2095             continue;
2096         }
2097         for (auto pred : bb->GetPreds()) {
2098             pred->SetInternalFlag1(true);
2099         }
2100         for (auto pred : bb->GetEhPreds()) {
2101             pred->SetInternalFlag1(true);
2102         }
2103         bbInfo.SetCandidateBB(*bb);
2104         bbInfo.SetStartBB(*bb);
2105         FindUseForSplit(origLr, bbInfo, remove, candidateInLoop, defInLoop);
2106     }
2107     FindBBSharedInSplit(newLr, candidateInLoop, defInLoop);
2108     auto pruneLrFunc = [&origLr, &defInLoop, this](uint32 bbID) {
2109         BB *bb = bbVec[bbID];
2110         if (bb->GetInternalFlag2() != 0) {
2111             if (bb->GetLoop() != nullptr && FindNotIn(defInLoop, bb->GetLoop())) {
2112                 origLr.UnsetMemberBitArrElem(bbID);
2113             }
2114         }
2115     };
2116     ForEachBBArrElem(origLr.GetBBMember(), pruneLrFunc);
2117 }
2118 
2119 /*
2120  *  There is at least one available color for this BB from the neighbors
2121  *  minus the ones reserved for local allocation.
2122  *  bbAdded : The new BB to be added into the split LR if color is available.
2123  *  conflictRegs : Reprent the LR before adding the bbAdded.  These are the
2124  *                 forbidden regs before adding the new BBs.
2125  *  Side effect : Adding the new forbidden regs from bbAdded into
2126  *                conflictRegs if the LR can still be colored.
2127  */
LrCanBeColored(const LiveRange & lr,const BB & bbAdded,std::unordered_set<regno_t> & conflictRegs)2128 bool GraphColorRegAllocator::LrCanBeColored(const LiveRange &lr, const BB &bbAdded,
2129                                             std::unordered_set<regno_t> &conflictRegs)
2130 {
2131     RegType type = lr.GetRegType();
2132 
2133     std::unordered_set<regno_t> newConflict;
2134     auto updateConflictFunc = [&bbAdded, &conflictRegs, &newConflict, &lr, this](regno_t regNO) {
2135         /* check the real conflict in current bb */
2136         LiveRange *conflictLr = lrMap[regNO];
2137         /*
2138          *  If the bb to be added to the new LR has an actual
2139          *  conflict with another LR, and if that LR has already
2140          *  assigned a color that is not in the conflictRegs,
2141          *  then add it as a newConflict.
2142          */
2143         if (IsBitArrElemSet(conflictLr->GetBBMember(), bbAdded.GetId())) {
2144             regno_t confReg = conflictLr->GetAssignedRegNO();
2145             if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) {
2146                 (void)newConflict.insert(confReg);
2147             }
2148         } else if (conflictLr->GetSplitLr() != nullptr &&
2149                    IsBitArrElemSet(conflictLr->GetSplitLr()->GetBBMember(), bbAdded.GetId())) {
2150             /*
2151              * The after split LR is split into pieces, and this ensures
2152              * the after split color is taken into consideration.
2153              */
2154             regno_t confReg = conflictLr->GetSplitLr()->GetAssignedRegNO();
2155             if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) {
2156                 (void)newConflict.insert(confReg);
2157             }
2158         }
2159     };
2160     ForEachRegArrElem(lr.GetBBConflict(), updateConflictFunc);
2161 
2162     size_t numRegs = newConflict.size() + lr.GetPregvetoSize() + conflictRegs.size();
2163 
2164     bool canColor = false;
2165     if (type == kRegTyInt) {
2166         if (numRegs < intRegNum) {
2167             canColor = true;
2168         }
2169     } else if (numRegs < fpRegNum) {
2170         canColor = true;
2171     }
2172 
2173     if (canColor) {
2174         for (auto regNO : newConflict) {
2175             (void)conflictRegs.insert(regNO);
2176         }
2177     }
2178 
2179     /* Update all the registers conflicting when adding thew new bb. */
2180     return canColor;
2181 }
2182 
2183 /* Support function for LR split.  Move one BB from LR1 to LR2. */
MoveLrBBInfo(LiveRange & oldLr,LiveRange & newLr,BB & bb) const2184 void GraphColorRegAllocator::MoveLrBBInfo(LiveRange &oldLr, LiveRange &newLr, BB &bb) const
2185 {
2186     /* initialize backward traversal flag for the bb pruning phase */
2187     bb.SetInternalFlag1(false);
2188     /* initialize bb removal marker */
2189     bb.SetInternalFlag2(false);
2190     /* Insert BB into new LR */
2191     uint32 bbID = bb.GetId();
2192     newLr.SetMemberBitArrElem(bbID);
2193 
2194     /* Move LU from old LR to new LR */
2195     auto luIt = oldLr.FindInLuMap(bb.GetId());
2196     if (luIt != oldLr.EndOfLuMap()) {
2197         newLr.SetElemToLuMap(luIt->first, *(luIt->second));
2198         oldLr.EraseLuMap(luIt);
2199     }
2200 
2201     /* Remove BB from old LR */
2202     oldLr.UnsetMemberBitArrElem(bbID);
2203 }
2204 
2205 /* Is the set of loops inside the loop? */
ContainsLoop(const CGFuncLoops & loop,const std::set<CGFuncLoops *,CGFuncLoopCmp> & loops) const2206 bool GraphColorRegAllocator::ContainsLoop(const CGFuncLoops &loop,
2207                                           const std::set<CGFuncLoops *, CGFuncLoopCmp> &loops) const
2208 {
2209     for (const CGFuncLoops *lp : loops) {
2210         while (lp != nullptr) {
2211             if (lp == &loop) {
2212                 return true;
2213             }
2214             lp = lp->GetOuterLoop();
2215         }
2216     }
2217     return false;
2218 }
2219 
GetAllLrMemberLoops(LiveRange & lr,std::set<CGFuncLoops *,CGFuncLoopCmp> & loops)2220 void GraphColorRegAllocator::GetAllLrMemberLoops(LiveRange &lr, std::set<CGFuncLoops *, CGFuncLoopCmp> &loops)
2221 {
2222     auto GetLrMemberFunc = [&loops, this](uint32 bbID) {
2223         BB *bb = bbVec[bbID];
2224         CGFuncLoops *loop = bb->GetLoop();
2225         if (loop != nullptr) {
2226             (void)loops.insert(loop);
2227         }
2228     };
2229     ForEachBBArrElem(lr.GetBBMember(), GetLrMemberFunc);
2230 }
2231 
SplitLrShouldSplit(LiveRange & lr)2232 bool GraphColorRegAllocator::SplitLrShouldSplit(LiveRange &lr)
2233 {
2234     if (lr.GetSplitLr() != nullptr || lr.GetNumBBMembers() == 1) {
2235         return false;
2236     }
2237     /* Need to split within the same hierarchy */
2238     uint32 loopID = 0xFFFFFFFF; /* loopID is initialized the maximum value,and then be assigned in function */
2239     bool needSplit = true;
2240     auto setNeedSplit = [&needSplit, &loopID, this](uint32 bbID) -> bool {
2241         BB *bb = bbVec[bbID];
2242         if (loopID == 0xFFFFFFFF) {
2243             if (bb->GetLoop() != nullptr) {
2244                 loopID = static_cast<int32>(bb->GetLoop()->GetHeader()->GetId());
2245             } else {
2246                 loopID = 0;
2247             }
2248         } else if ((bb->GetLoop() != nullptr && bb->GetLoop()->GetHeader()->GetId() != loopID) ||
2249                    (bb->GetLoop() == nullptr && loopID != 0)) {
2250             needSplit = false;
2251             return true;
2252         }
2253         return false;
2254     };
2255     ForEachBBArrElemWithInterrupt(lr.GetBBMember(), setNeedSplit);
2256     return needSplit;
2257 }
2258 
2259 /*
2260  * When a BB in the LR has no def or use in it, then potentially
2261  * there is no conflict within these BB for the new LR, since
2262  * the new LR will need to spill the defs which terminates the
2263  * new LR unless there is a use later which extends the new LR.
2264  * There is no need to compute conflicting register set unless
2265  * there is a def or use.
2266  * It is assumed that the new LR is extended to the def or use.
2267  * Initially newLr is empty, then add bb if can be colored.
2268  * Return true if there is a split.
2269  */
SplitLrFindCandidateLr(LiveRange & lr,LiveRange & newLr,std::unordered_set<regno_t> & conflictRegs)2270 bool GraphColorRegAllocator::SplitLrFindCandidateLr(LiveRange &lr, LiveRange &newLr,
2271                                                     std::unordered_set<regno_t> &conflictRegs)
2272 {
2273     if (needDump) {
2274         LogInfo::MapleLogger() << "start split lr for vreg " << lr.GetRegNO() << "\n";
2275     }
2276     std::set<BB *, SortedBBCmpFunc> smember;
2277     ForEachBBArrElem(lr.GetBBMember(), [&smember, this](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
2278     for (auto bb : smember) {
2279         if (!LrCanBeColored(lr, *bb, conflictRegs)) {
2280             break;
2281         }
2282         MoveLrBBInfo(lr, newLr, *bb);
2283     }
2284 
2285     /* return ture if split is successful */
2286     return newLr.GetNumBBMembers() != 0;
2287 }
2288 
SplitLrHandleLoops(LiveRange & lr,LiveRange & newLr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & origLoops,const std::set<CGFuncLoops *,CGFuncLoopCmp> & newLoops)2289 void GraphColorRegAllocator::SplitLrHandleLoops(LiveRange &lr, LiveRange &newLr,
2290                                                 const std::set<CGFuncLoops *, CGFuncLoopCmp> &origLoops,
2291                                                 const std::set<CGFuncLoops *, CGFuncLoopCmp> &newLoops)
2292 {
2293     /*
2294      * bb in loops might need a reload due to loop carried dependency.
2295      * Compute this before pruning the LRs.
2296      * if there is no re-definition, then reload is not necessary.
2297      * Part of the new LR region after the last reference is
2298      * no longer in the LR.  Remove those bb.
2299      */
2300     ComputeBBForNewSplit(newLr, lr);
2301 
2302     /* With new LR, recompute conflict. */
2303     auto recomputeConflict = [&lr, &newLr, this](uint32 bbID) {
2304         auto lrFunc = [&newLr, &bbID, this](regno_t regNO) {
2305             LiveRange *confLrVec = lrMap[regNO];
2306             if (IsBitArrElemSet(confLrVec->GetBBMember(), bbID) ||
2307                 (confLrVec->GetSplitLr() != nullptr && IsBitArrElemSet(confLrVec->GetSplitLr()->GetBBMember(), bbID))) {
2308                 /*
2309                  * New LR getting the interference does not mean the
2310                  * old LR can remove the interference.
2311                  * Old LR's interference will be handled at the end of split.
2312                  */
2313                 newLr.SetConflictBitArrElem(regNO);
2314             }
2315         };
2316         ForEachRegArrElem(lr.GetBBConflict(), lrFunc);
2317     };
2318     ForEachBBArrElem(newLr.GetBBMember(), recomputeConflict);
2319 
2320     /* update bb/loop same as for new LR. */
2321     ComputeBBForOldSplit(newLr, lr);
2322     /* Update the conflict interference for the original LR later. */
2323     for (auto loop : newLoops) {
2324         if (!ContainsLoop(*loop, origLoops)) {
2325             continue;
2326         }
2327         for (auto bb : loop->GetLoopMembers()) {
2328             if (!IsBitArrElemSet(newLr.GetBBMember(), bb->GetId())) {
2329                 continue;
2330             }
2331             LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bb->GetId());
2332             if (lu->GetUseNum() != 0) {
2333                 lu->SetNeedReload(true);
2334             }
2335         }
2336     }
2337 }
2338 
SplitLrFixNewLrCallsAndRlod(LiveRange & newLr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & origLoops)2339 void GraphColorRegAllocator::SplitLrFixNewLrCallsAndRlod(LiveRange &newLr,
2340                                                          const std::set<CGFuncLoops *, CGFuncLoopCmp> &origLoops)
2341 {
2342     /* If a 2nd split loop is before the bb in 1st split bb. */
2343     newLr.SetNumCall(0);
2344     auto fixCallsAndRlod = [&newLr, &origLoops, this](uint32 bbID) {
2345         BB *bb = bbVec[bbID];
2346         for (auto loop : origLoops) {
2347             if (loop->GetHeader()->GetLevel() >= bb->GetLevel()) {
2348                 continue;
2349             }
2350             LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID);
2351             if (lu->GetUseNum() != 0) {
2352                 lu->SetNeedReload(true);
2353             }
2354         }
2355         LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID);
2356         if (lu->HasCall()) {
2357             newLr.IncNumCall();
2358         }
2359     };
2360     ForEachBBArrElem(newLr.GetBBMember(), fixCallsAndRlod);
2361 }
2362 
SplitLrFixOrigLrCalls(LiveRange & lr) const2363 void GraphColorRegAllocator::SplitLrFixOrigLrCalls(LiveRange &lr) const
2364 {
2365     lr.SetNumCall(0);
2366     auto fixOrigCalls = [&lr](uint32 bbID) {
2367         LiveUnit *lu = lr.GetLiveUnitFromLuMap(bbID);
2368         if (lu->HasCall()) {
2369             lr.IncNumCall();
2370         }
2371     };
2372     ForEachBBArrElem(lr.GetBBMember(), fixOrigCalls);
2373 }
2374 
SplitLrUpdateInterference(LiveRange & lr)2375 void GraphColorRegAllocator::SplitLrUpdateInterference(LiveRange &lr)
2376 {
2377     /*
2378      * newLr is now a separate LR from the original lr.
2379      * Update the interference info.
2380      * Also recompute the forbidden info
2381      */
2382     lr.ClearForbidden();
2383     auto updateInterfrence = [&lr, this](regno_t regNO) {
2384         LiveRange *confLrVec = lrMap[regNO];
2385         if (IsBBsetOverlap(lr.GetBBMember(), confLrVec->GetBBMember(), bbBuckets)) {
2386             /* interfere */
2387             if (confLrVec->GetAssignedRegNO() && !lr.GetPregveto(confLrVec->GetAssignedRegNO())) {
2388                 lr.InsertElemToForbidden(confLrVec->GetAssignedRegNO());
2389             }
2390         } else {
2391             /* no interference */
2392             lr.UnsetConflictBitArrElem(regNO);
2393         }
2394     };
2395     ForEachRegArrElem(lr.GetBBConflict(), updateInterfrence);
2396 }
2397 
SplitLrUpdateRegInfo(const LiveRange & origLr,LiveRange & newLr,std::unordered_set<regno_t> & conflictRegs) const2398 void GraphColorRegAllocator::SplitLrUpdateRegInfo(const LiveRange &origLr, LiveRange &newLr,
2399                                                   std::unordered_set<regno_t> &conflictRegs) const
2400 {
2401     for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
2402         if (origLr.GetPregveto(regNO)) {
2403             newLr.InsertElemToPregveto(regNO);
2404         }
2405     }
2406     for (auto regNO : conflictRegs) {
2407         if (!newLr.GetPregveto(regNO)) {
2408             newLr.InsertElemToForbidden(regNO);
2409         }
2410     }
2411 }
2412 
SplitLrErrorCheckAndDebug(const LiveRange & origLr) const2413 void GraphColorRegAllocator::SplitLrErrorCheckAndDebug(const LiveRange &origLr) const
2414 {
2415     if (origLr.GetNumBBMembers() == 0) {
2416         DEBUG_ASSERT(origLr.GetNumBBConflicts() == 0, "Error: member and conflict not match");
2417     }
2418 }
2419 
2420 /*
2421  * Pick a starting BB, then expand to maximize the new LR.
2422  * Return the new LR.
2423  */
SplitLr(LiveRange & lr)2424 void GraphColorRegAllocator::SplitLr(LiveRange &lr)
2425 {
2426     if (!SplitLrShouldSplit(lr)) {
2427         return;
2428     }
2429     LiveRange *newLr = NewLiveRange();
2430     /*
2431      * For the new LR, whenever a BB with either a def or
2432      * use is added, then add the registers that the neighbor
2433      * is using to the conflict register set indicating that these
2434      * registers cannot be used for the new LR's color.
2435      */
2436     std::unordered_set<regno_t> conflictRegs;
2437     if (!SplitLrFindCandidateLr(lr, *newLr, conflictRegs)) {
2438         return;
2439     }
2440 #ifdef REUSE_SPILLMEM
2441     /* Copy the original conflict vector for spill reuse optimization */
2442     lr.SetOldConflict(memPool->NewArray<uint64>(regBuckets));
2443     for (uint32 i = 0; i < regBuckets; ++i) {
2444         lr.SetBBConflictElem(static_cast<int32>(i), lr.GetBBConflictElem(static_cast<int32>(i)));
2445     }
2446 #endif /* REUSE_SPILLMEM */
2447 
2448     std::set<CGFuncLoops *, CGFuncLoopCmp> newLoops;
2449     std::set<CGFuncLoops *, CGFuncLoopCmp> origLoops;
2450     GetAllLrMemberLoops(*newLr, newLoops);
2451     GetAllLrMemberLoops(lr, origLoops);
2452     SplitLrHandleLoops(lr, *newLr, origLoops, newLoops);
2453     SplitLrFixNewLrCallsAndRlod(*newLr, origLoops);
2454     SplitLrFixOrigLrCalls(lr);
2455 
2456     SplitLrUpdateRegInfo(lr, *newLr, conflictRegs);
2457 
2458     CalculatePriority(lr);
2459     /* At this point, newLr should be unconstrained. */
2460     lr.SetSplitLr(*newLr);
2461 
2462     newLr->SetRegNO(lr.GetRegNO());
2463     newLr->SetRegType(lr.GetRegType());
2464     newLr->SetID(lr.GetID());
2465     newLr->CopyRematerialization(lr);
2466     CalculatePriority(*newLr);
2467     SplitLrUpdateInterference(lr);
2468     newLr->SetAssignedRegNO(FindColorForLr(*newLr));
2469 
2470     AddCalleeUsed(newLr->GetAssignedRegNO(), newLr->GetRegType());
2471 
2472     /* For the new LR, update assignment for local RA */
2473     ForEachBBArrElem(newLr->GetBBMember(),
2474                      [&newLr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, newLr->GetAssignedRegNO()); });
2475 
2476     UpdatePregvetoForNeighbors(*newLr);
2477 
2478     SplitLrErrorCheckAndDebug(lr);
2479 }
2480 
ColorForOptPrologEpilog()2481 void GraphColorRegAllocator::ColorForOptPrologEpilog()
2482 {
2483 #ifdef OPTIMIZE_FOR_PROLOG
2484     if (!doOptProlog) {
2485         return;
2486     }
2487     for (auto lr : intDelayed) {
2488         if (!AssignColorToLr(*lr, true)) {
2489             lr->SetSpilled(true);
2490         }
2491     }
2492     for (auto lr : fpDelayed) {
2493         if (!AssignColorToLr(*lr, true)) {
2494             lr->SetSpilled(true);
2495         }
2496     }
2497 #endif
2498 }
2499 
2500 /*
2501  *  From the sorted list of constrained LRs, pick the most profitable LR.
2502  *  Split the LR into LRnew1 LRnew2 where LRnew1 has the maximum number of
2503  *  BB and is colorable.
2504  *  The starting BB for traversal must have a color available.
2505  *
2506  *  Assign a color, update neighbor's forbidden list.
2507  *
2508  *  Update the conflict graph by change the interference list.
2509  *  In the case of both LRnew1 and LRnew2 conflicts with a BB, this BB's
2510  *  #neightbors increased.  If this BB was unconstrained, must check if
2511  *  it is still unconstrained.  Move to constrained if necessary.
2512  *
2513  *  Color the unconstrained LRs.
2514  */
SplitAndColorForEachLr(MapleVector<LiveRange * > & targetLrVec)2515 void GraphColorRegAllocator::SplitAndColorForEachLr(MapleVector<LiveRange *> &targetLrVec)
2516 {
2517     while (!targetLrVec.empty()) {
2518         auto highestIt = GetHighPriorityLr(targetLrVec);
2519         LiveRange *lr = *highestIt;
2520         /* check those lrs in lr->sconflict which is in unconstrained whether it turns to constrined */
2521         if (highestIt != targetLrVec.end()) {
2522             targetLrVec.erase(highestIt);
2523         } else {
2524             DEBUG_ASSERT(false, "Error: not in targetLrVec");
2525         }
2526         if (AssignColorToLr(*lr)) {
2527             continue;
2528         }
2529 #ifdef USE_SPLIT
2530         SplitLr(*lr);
2531 #endif /* USE_SPLIT */
2532         /*
2533          * When LR is spilled, it potentially has no conflicts as
2534          * each def/use is spilled/reloaded.
2535          */
2536 #ifdef COLOR_SPLIT
2537         if (!AssignColorToLr(*lr)) {
2538 #endif /* COLOR_SPLIT */
2539             lr->SetSpilled(true);
2540             hasSpill = true;
2541 #ifdef COLOR_SPLIT
2542         }
2543 #endif /* COLOR_SPLIT */
2544     }
2545 }
2546 
SplitAndColor()2547 void GraphColorRegAllocator::SplitAndColor()
2548 {
2549     /* handle mustAssigned */
2550     if (needDump) {
2551         LogInfo::MapleLogger() << " starting mustAssigned : \n";
2552     }
2553     SplitAndColorForEachLr(mustAssigned);
2554 
2555     if (needDump) {
2556         LogInfo::MapleLogger() << " starting unconstrainedPref : \n";
2557     }
2558     /* assign color for unconstained */
2559     SplitAndColorForEachLr(unconstrainedPref);
2560 
2561     if (needDump) {
2562         LogInfo::MapleLogger() << " starting constrained : \n";
2563     }
2564     /* handle constrained */
2565     SplitAndColorForEachLr(constrained);
2566 
2567     if (needDump) {
2568         LogInfo::MapleLogger() << " starting unconstrained : \n";
2569     }
2570     /* assign color for unconstained */
2571     SplitAndColorForEachLr(unconstrained);
2572 
2573 #ifdef OPTIMIZE_FOR_PROLOG
2574     if (doOptProlog) {
2575         ColorForOptPrologEpilog();
2576     }
2577 #endif /* OPTIMIZE_FOR_PROLOG */
2578 }
2579 
HandleLocalRegAssignment(regno_t regNO,LocalRegAllocator & localRa,bool isInt)2580 void GraphColorRegAllocator::HandleLocalRegAssignment(regno_t regNO, LocalRegAllocator &localRa, bool isInt)
2581 {
2582     /* vreg, get a reg for it if not assigned already. */
2583     if (!localRa.IsInRegAssigned(regNO, isInt) && !localRa.isInRegSpilled(regNO, isInt)) {
2584         /* find an available phys reg */
2585         bool founded = false;
2586         LiveRange *lr = lrMap[regNO];
2587         regno_t maxIntReg = R0 + MaxIntPhysRegNum();
2588         regno_t maxFpReg = V0 + MaxFloatPhysRegNum();
2589         regno_t startReg = isInt ? R0 : V0;
2590         regno_t endReg = isInt ? maxIntReg : maxFpReg;
2591         for (uint32 preg = startReg; preg <= endReg; ++preg) {
2592             if (!localRa.IsPregAvailable(preg, isInt)) {
2593                 continue;
2594             }
2595             if (lr->GetNumCall() != 0 && !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(preg))) {
2596                 continue;
2597             }
2598             if (lr->GetPregveto(preg)) {
2599                 continue;
2600             }
2601             regno_t assignedReg = preg;
2602             localRa.ClearPregs(assignedReg, isInt);
2603             localRa.SetPregUsed(assignedReg, isInt);
2604             localRa.SetRegAssigned(regNO, isInt);
2605             localRa.SetRegAssignmentMap(isInt, regNO, assignedReg);
2606             lr->SetAssignedRegNO(assignedReg);
2607             founded = true;
2608             break;
2609         }
2610         if (!founded) {
2611             localRa.SetRegSpilled(regNO, isInt);
2612             lr->SetSpilled(true);
2613         }
2614     }
2615 }
2616 
UpdateLocalRegDefUseCount(regno_t regNO,LocalRegAllocator & localRa,bool isDef,bool isInt) const2617 void GraphColorRegAllocator::UpdateLocalRegDefUseCount(regno_t regNO, LocalRegAllocator &localRa, bool isDef,
2618                                                        bool isInt) const
2619 {
2620     auto usedIt = localRa.GetUseInfo().find(regNO);
2621     if (usedIt != localRa.GetUseInfo().end() && !isDef) {
2622         /* reg use, decrement count */
2623         DEBUG_ASSERT(usedIt->second > 0, "Incorrect local ra info");
2624         localRa.SetUseInfoElem(regNO, usedIt->second - 1);
2625         if (!AArch64isa::IsPhysicalRegister(static_cast<AArch64reg>(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) {
2626             localRa.IncUseInfoElem(localRa.GetRegAssignmentItem(isInt, regNO));
2627         }
2628         if (needDump) {
2629             LogInfo::MapleLogger() << "\t\treg " << regNO << " update #use to " << localRa.GetUseInfoElem(regNO)
2630                                    << "\n";
2631         }
2632     }
2633 
2634     auto defIt = localRa.GetDefInfo().find(regNO);
2635     if (defIt != localRa.GetDefInfo().end() && isDef) {
2636         /* reg def, decrement count */
2637         DEBUG_ASSERT(defIt->second > 0, "Incorrect local ra info");
2638         localRa.SetDefInfoElem(regNO, defIt->second - 1);
2639         if (!AArch64isa::IsPhysicalRegister(static_cast<AArch64reg>(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) {
2640             localRa.IncDefInfoElem(localRa.GetRegAssignmentItem(isInt, regNO));
2641         }
2642         if (needDump) {
2643             LogInfo::MapleLogger() << "\t\treg " << regNO << " update #def to " << localRa.GetDefInfoElem(regNO)
2644                                    << "\n";
2645         }
2646     }
2647 }
2648 
UpdateLocalRegConflict(regno_t regNO,LocalRegAllocator & localRa,bool isInt)2649 void GraphColorRegAllocator::UpdateLocalRegConflict(regno_t regNO, LocalRegAllocator &localRa, bool isInt)
2650 {
2651     LiveRange *lr = lrMap[regNO];
2652     if (lr->GetNumBBConflicts() == 0) {
2653         return;
2654     }
2655     if (!localRa.IsInRegAssigned(regNO, isInt)) {
2656         return;
2657     }
2658     regno_t preg = localRa.GetRegAssignmentItem(isInt, regNO);
2659     ForEachRegArrElem(lr->GetBBConflict(), [&preg, this](regno_t regNO) { lrMap[regNO]->InsertElemToPregveto(preg); });
2660 }
2661 
HandleLocalRaDebug(regno_t regNO,const LocalRegAllocator & localRa,bool isInt) const2662 void GraphColorRegAllocator::HandleLocalRaDebug(regno_t regNO, const LocalRegAllocator &localRa, bool isInt) const
2663 {
2664     LogInfo::MapleLogger() << "HandleLocalReg " << regNO << "\n";
2665     LogInfo::MapleLogger() << "\tregUsed:";
2666     uint64 regUsed = localRa.GetPregUsed(isInt);
2667     regno_t base = isInt ? R0 : V0;
2668     regno_t end = isInt ? (RLR - R0) : (V31 - V0);
2669 
2670     for (uint32 i = 0; i <= end; ++i) {
2671         if ((regUsed & (1ULL << i)) != 0) {
2672             LogInfo::MapleLogger() << " " << (i + base);
2673         }
2674     }
2675     LogInfo::MapleLogger() << "\n";
2676     LogInfo::MapleLogger() << "\tregs:";
2677     uint64 regs = localRa.GetPregs(isInt);
2678     for (uint32 regnoInLoop = 0; regnoInLoop <= end; ++regnoInLoop) {
2679         if ((regs & (1ULL << regnoInLoop)) != 0) {
2680             LogInfo::MapleLogger() << " " << (regnoInLoop + base);
2681         }
2682     }
2683     LogInfo::MapleLogger() << "\n";
2684 }
2685 
HandleLocalReg(Operand & op,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo,bool isDef,bool isInt)2686 void GraphColorRegAllocator::HandleLocalReg(Operand &op, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo,
2687                                             bool isDef, bool isInt)
2688 {
2689     if (!op.IsRegister()) {
2690         return;
2691     }
2692     auto &regOpnd = static_cast<RegOperand &>(op);
2693     regno_t regNO = regOpnd.GetRegisterNumber();
2694 
2695     if (IsUnconcernedReg(regOpnd)) {
2696         return;
2697     }
2698 
2699     /* is this a local register ? */
2700     if (regNO >= kAllRegNum && !IsLocalReg(regNO)) {
2701         return;
2702     }
2703 
2704     if (needDump) {
2705         HandleLocalRaDebug(regNO, localRa, isInt);
2706     }
2707 
2708     if (regOpnd.IsPhysicalRegister()) {
2709         /* conflict with preg is record in lr->pregveto and BBAssignInfo->globalsAssigned */
2710         UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt);
2711         /* See if it is needed by global RA */
2712         if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0) {
2713             if (bbInfo && !bbInfo->GetGlobalsAssigned(regNO)) {
2714                 /* This phys reg is now available for assignment for a vreg */
2715                 localRa.SetPregs(regNO, isInt);
2716                 if (needDump) {
2717                     LogInfo::MapleLogger() << "\t\tlast ref, phys-reg " << regNO << " now available\n";
2718                 }
2719             }
2720         }
2721     } else {
2722         HandleLocalRegAssignment(regNO, localRa, isInt);
2723         UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt);
2724         UpdateLocalRegConflict(regNO, localRa, isInt);
2725         if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0 &&
2726             localRa.IsInRegAssigned(regNO, isInt)) {
2727             /* last ref of vreg, release assignment */
2728             localRa.SetPregs(localRa.GetRegAssignmentItem(isInt, regNO), isInt);
2729             if (needDump) {
2730                 LogInfo::MapleLogger() << "\t\tlast ref, release reg " << localRa.GetRegAssignmentItem(isInt, regNO)
2731                                        << " for " << regNO << "\n";
2732             }
2733         }
2734     }
2735 }
2736 
LocalRaRegSetEraseReg(LocalRegAllocator & localRa,regno_t regNO) const2737 void GraphColorRegAllocator::LocalRaRegSetEraseReg(LocalRegAllocator &localRa, regno_t regNO) const
2738 {
2739     bool isInt = AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO));
2740     if (localRa.IsPregAvailable(regNO, isInt)) {
2741         localRa.ClearPregs(regNO, isInt);
2742     }
2743 }
2744 
LocalRaInitRegSet(LocalRegAllocator & localRa,uint32 bbID)2745 bool GraphColorRegAllocator::LocalRaInitRegSet(LocalRegAllocator &localRa, uint32 bbID)
2746 {
2747     bool needLocalRa = false;
2748     /* Note physical regs start from R0, V0. */
2749     localRa.InitPregs(MaxIntPhysRegNum(), MaxFloatPhysRegNum(), cgFunc->GetCG()->GenYieldPoint(), intSpillRegSet,
2750                       fpSpillRegSet);
2751 
2752     localRa.ClearUseInfo();
2753     localRa.ClearDefInfo();
2754     LocalRaInfo *lraInfo = localRegVec[bbID];
2755     DEBUG_ASSERT(lraInfo != nullptr, "lraInfo not be nullptr");
2756     for (const auto &useCntPair : lraInfo->GetUseCnt()) {
2757         regno_t regNO = useCntPair.first;
2758         if (regNO >= kAllRegNum) {
2759             needLocalRa = true;
2760         }
2761         localRa.SetUseInfoElem(useCntPair.first, useCntPair.second);
2762     }
2763     for (const auto &defCntPair : lraInfo->GetDefCnt()) {
2764         regno_t regNO = defCntPair.first;
2765         if (regNO >= kAllRegNum) {
2766             needLocalRa = true;
2767         }
2768         localRa.SetDefInfoElem(defCntPair.first, defCntPair.second);
2769     }
2770     return needLocalRa;
2771 }
2772 
LocalRaInitAllocatableRegs(LocalRegAllocator & localRa,uint32 bbID)2773 void GraphColorRegAllocator::LocalRaInitAllocatableRegs(LocalRegAllocator &localRa, uint32 bbID)
2774 {
2775     BBAssignInfo *bbInfo = bbRegInfo[bbID];
2776     if (bbInfo != nullptr) {
2777         for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
2778             if (bbInfo->GetGlobalsAssigned(regNO)) {
2779                 LocalRaRegSetEraseReg(localRa, regNO);
2780             }
2781         }
2782     }
2783 }
2784 
LocalRaForEachDefOperand(const Insn & insn,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo)2785 void GraphColorRegAllocator::LocalRaForEachDefOperand(const Insn &insn, LocalRegAllocator &localRa,
2786                                                       const BBAssignInfo *bbInfo)
2787 {
2788     const InsnDesc *md = insn.GetDesc();
2789     uint32 opndNum = insn.GetOperandSize();
2790     for (uint32 i = 0; i < opndNum; ++i) {
2791         Operand &opnd = insn.GetOperand(i);
2792         /* handle def opnd */
2793         if (!md->GetOpndDes(i)->IsRegDef()) {
2794             continue;
2795         }
2796         auto &regOpnd = static_cast<RegOperand &>(opnd);
2797         bool isInt = (regOpnd.GetRegisterType() == kRegTyInt);
2798         HandleLocalReg(opnd, localRa, bbInfo, true, isInt);
2799     }
2800 }
2801 
LocalRaForEachUseOperand(const Insn & insn,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo)2802 void GraphColorRegAllocator::LocalRaForEachUseOperand(const Insn &insn, LocalRegAllocator &localRa,
2803                                                       const BBAssignInfo *bbInfo)
2804 {
2805     const InsnDesc *md = insn.GetDesc();
2806     uint32 opndNum = insn.GetOperandSize();
2807     for (uint32 i = 0; i < opndNum; ++i) {
2808         Operand &opnd = insn.GetOperand(i);
2809         if (opnd.IsList()) {
2810             continue;
2811         } else if (opnd.IsMemoryAccessOperand()) {
2812             auto &memOpnd = static_cast<MemOperand &>(opnd);
2813             Operand *base = memOpnd.GetBaseRegister();
2814             Operand *offset = memOpnd.GetIndexRegister();
2815             if (base != nullptr) {
2816                 HandleLocalReg(*base, localRa, bbInfo, false, true);
2817             }
2818             if (!memOpnd.IsIntactIndexed()) {
2819                 HandleLocalReg(*base, localRa, bbInfo, true, true);
2820             }
2821             if (offset != nullptr) {
2822                 HandleLocalReg(*offset, localRa, bbInfo, false, true);
2823             }
2824         } else if (md->GetOpndDes(i)->IsRegUse()) {
2825             auto &regOpnd = static_cast<RegOperand &>(opnd);
2826             bool isInt = (regOpnd.GetRegisterType() == kRegTyInt);
2827             HandleLocalReg(opnd, localRa, bbInfo, false, isInt);
2828         }
2829     }
2830 }
2831 
LocalRaPrepareBB(BB & bb,LocalRegAllocator & localRa)2832 void GraphColorRegAllocator::LocalRaPrepareBB(BB &bb, LocalRegAllocator &localRa)
2833 {
2834     BBAssignInfo *bbInfo = bbRegInfo[bb.GetId()];
2835     FOR_BB_INSNS(insn, &bb) {
2836         if (!insn->IsMachineInstruction()) {
2837             continue;
2838         }
2839 
2840         /*
2841          * Use reverse operand order, assuming use first then def for allocation.
2842          * need to free the use resource so it can be reused for def.
2843          */
2844         LocalRaForEachUseOperand(*insn, localRa, bbInfo);
2845         LocalRaForEachDefOperand(*insn, localRa, bbInfo);
2846     }
2847 }
2848 
LocalRaFinalAssignment(const LocalRegAllocator & localRa,BBAssignInfo & bbInfo)2849 void GraphColorRegAllocator::LocalRaFinalAssignment(const LocalRegAllocator &localRa, BBAssignInfo &bbInfo)
2850 {
2851     for (const auto &intRegAssignmentMapPair : localRa.GetIntRegAssignmentMap()) {
2852         regno_t regNO = intRegAssignmentMapPair.second;
2853         if (needDump) {
2854             LogInfo::MapleLogger() << "[" << intRegAssignmentMapPair.first << "," << regNO << "],";
2855         }
2856         /* Might need to get rid of this copy. */
2857         bbInfo.SetRegMapElem(intRegAssignmentMapPair.first, regNO);
2858         AddCalleeUsed(regNO, kRegTyInt);
2859     }
2860     for (const auto &fpRegAssignmentMapPair : localRa.GetFpRegAssignmentMap()) {
2861         regno_t regNO = fpRegAssignmentMapPair.second;
2862         if (needDump) {
2863             LogInfo::MapleLogger() << "[" << fpRegAssignmentMapPair.first << "," << regNO << "],";
2864         }
2865         /* Might need to get rid of this copy. */
2866         bbInfo.SetRegMapElem(fpRegAssignmentMapPair.first, regNO);
2867         AddCalleeUsed(regNO, kRegTyFloat);
2868     }
2869 }
2870 
LocalRaDebug(const BB & bb,const LocalRegAllocator & localRa) const2871 void GraphColorRegAllocator::LocalRaDebug(const BB &bb, const LocalRegAllocator &localRa) const
2872 {
2873     LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra INT need " << localRa.GetNumIntPregUsed() << " regs\n";
2874     LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra FP need " << localRa.GetNumFpPregUsed() << " regs\n";
2875     LogInfo::MapleLogger() << "\tpotential assignments:";
2876     for (auto it : localRa.GetIntRegAssignmentMap()) {
2877         LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],";
2878     }
2879     for (auto it : localRa.GetFpRegAssignmentMap()) {
2880         LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],";
2881     }
2882     LogInfo::MapleLogger() << "\n";
2883 }
2884 
2885 /*
2886  * When do_allocate is false, it is prepass:
2887  * Traverse each BB, keep track of the number of registers required
2888  * for local registers in the BB.  Communicate this to global RA.
2889  *
2890  * When do_allocate is true:
2891  * Allocate local registers for each BB based on unused registers
2892  * from global RA.  Spill if no register available.
2893  */
LocalRegisterAllocator(bool doAllocate)2894 void GraphColorRegAllocator::LocalRegisterAllocator(bool doAllocate)
2895 {
2896     if (needDump) {
2897         if (doAllocate) {
2898             LogInfo::MapleLogger() << "LRA allocation start\n";
2899             PrintBBAssignInfo();
2900         } else {
2901             LogInfo::MapleLogger() << "LRA preprocessing start\n";
2902         }
2903     }
2904     LocalRegAllocator *localRa = memPool->New<LocalRegAllocator>(*cgFunc, alloc);
2905     for (auto *bb : bfs->sortedBBs) {
2906         uint32 bbID = bb->GetId();
2907 
2908         LocalRaInfo *lraInfo = localRegVec[bb->GetId()];
2909         if (lraInfo == nullptr) {
2910             /* No locals to allocate */
2911             continue;
2912         }
2913 
2914         localRa->ClearLocalRaInfo();
2915         bool needLocalRa = LocalRaInitRegSet(*localRa, bbID);
2916         if (!needLocalRa) {
2917             /* Only physical regs in bb, no local ra needed. */
2918             continue;
2919         }
2920 
2921         if (doAllocate) {
2922             LocalRaInitAllocatableRegs(*localRa, bbID);
2923         }
2924 
2925         LocalRaPrepareBB(*bb, *localRa);
2926 
2927         BBAssignInfo *bbInfo = bbRegInfo[bb->GetId()];
2928         if (bbInfo == nullptr) {
2929             bbInfo = memPool->New<BBAssignInfo>(alloc);
2930             bbRegInfo[bbID] = bbInfo;
2931             bbInfo->InitGlobalAssigned();
2932         }
2933         bbInfo->SetIntLocalRegsNeeded(localRa->GetNumIntPregUsed());
2934         bbInfo->SetFpLocalRegsNeeded(localRa->GetNumFpPregUsed());
2935 
2936         if (doAllocate) {
2937             if (needDump) {
2938                 LogInfo::MapleLogger() << "\tbb(" << bb->GetId() << ")final local ra assignments:";
2939             }
2940             LocalRaFinalAssignment(*localRa, *bbInfo);
2941             if (needDump) {
2942                 LogInfo::MapleLogger() << "\n";
2943             }
2944         } else if (needDump) {
2945             LocalRaDebug(*bb, *localRa);
2946         }
2947     }
2948 }
2949 
GetConsistentReuseMem(const uint64 * conflict,const std::set<MemOperand * > & usedMemOpnd,uint32 size,RegType regType)2950 MemOperand *GraphColorRegAllocator::GetConsistentReuseMem(const uint64 *conflict,
2951                                                           const std::set<MemOperand *> &usedMemOpnd, uint32 size,
2952                                                           RegType regType)
2953 {
2954     std::set<LiveRange *, SetLiveRangeCmpFunc> sconflict;
2955     regno_t regNO;
2956     for (uint32 i = 0; i < regBuckets; ++i) {
2957         for (uint32 b = 0; b < kU64; ++b) {
2958             if ((conflict[i] & (1ULL << b)) != 0) {
2959                 continue;
2960             }
2961             regNO = i * kU64 + b;
2962             if (regNO >= numVregs) {
2963                 break;
2964             }
2965             if (GetLiveRange(regNO) != nullptr) {
2966                 (void)sconflict.insert(lrMap[regNO]);
2967             }
2968         }
2969     }
2970 
2971     for (auto *noConflictLr : sconflict) {
2972         if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType || noConflictLr->GetSpillSize() != size) {
2973             continue;
2974         }
2975         if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) {
2976             return noConflictLr->GetSpillMem();
2977         }
2978     }
2979     return nullptr;
2980 }
2981 
GetCommonReuseMem(const uint64 * conflict,const std::set<MemOperand * > & usedMemOpnd,uint32 size,RegType regType)2982 MemOperand *GraphColorRegAllocator::GetCommonReuseMem(const uint64 *conflict, const std::set<MemOperand *> &usedMemOpnd,
2983                                                       uint32 size, RegType regType)
2984 {
2985     regno_t regNO;
2986     for (uint32 i = 0; i < regBuckets; ++i) {
2987         for (uint32 b = 0; b < kU64; ++b) {
2988             if ((conflict[i] & (1ULL << b)) != 0) {
2989                 continue;
2990             }
2991             regNO = i * kU64 + b;
2992             if (regNO >= numVregs) {
2993                 break;
2994             }
2995             LiveRange *noConflictLr = GetLiveRange(regNO);
2996             if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType ||
2997                 noConflictLr->GetSpillSize() != size) {
2998                 continue;
2999             }
3000             if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) {
3001                 return noConflictLr->GetSpillMem();
3002             }
3003         }
3004     }
3005     return nullptr;
3006 }
3007 
3008 /* See if any of the non-conflict LR is spilled and use its memOpnd. */
GetReuseMem(uint32 vregNO,uint32 size,RegType regType)3009 MemOperand *GraphColorRegAllocator::GetReuseMem(uint32 vregNO, uint32 size, RegType regType)
3010 {
3011     if (cgFunc->GetMirModule().GetSrcLang() != kSrcLangC) {
3012         return nullptr;
3013     }
3014     if (IsLocalReg(vregNO)) {
3015         return nullptr;
3016     }
3017 
3018     LiveRange *lr = lrMap[vregNO];
3019     const uint64 *conflict;
3020     if (lr->GetSplitLr() != nullptr) {
3021         /*
3022          * For split LR, the vreg liveness is optimized, but for spill location
3023          * the stack location needs to be maintained for the entire LR.
3024          */
3025         return nullptr;
3026     } else {
3027         conflict = lr->GetBBConflict();
3028     }
3029 
3030     std::set<MemOperand *> usedMemOpnd;
3031     auto updateMemOpnd = [&usedMemOpnd, this](regno_t regNO) {
3032         if (regNO >= numVregs) {
3033             return;
3034         }
3035         LiveRange *lrInner = GetLiveRange(regNO);
3036         if (lrInner && lrInner->GetSpillMem() != nullptr) {
3037             (void)usedMemOpnd.insert(lrInner->GetSpillMem());
3038         }
3039     };
3040     ForEachRegArrElem(conflict, updateMemOpnd);
3041     uint32 regSize = (size <= k32) ? k32 : k64;
3042     /*
3043      * This is to order the search so memOpnd given out is consistent.
3044      * When vreg#s do not change going through VtableImpl.mpl file
3045      * then this can be simplified.
3046      */
3047 #ifdef CONSISTENT_MEMOPND
3048     return GetConsistentReuseMem(conflict, usedMemOpnd, regSize, regType);
3049 #else  /* CONSISTENT_MEMOPND */
3050     return GetCommonReuseMem(conflict, usedMemOpnd, regSize, regType);
3051 #endif /* CONSISTENT_MEMOPNDi */
3052 }
3053 
GetSpillMem(uint32 vregNO,bool isDest,Insn & insn,AArch64reg regNO,bool & isOutOfRange) const3054 MemOperand *GraphColorRegAllocator::GetSpillMem(uint32 vregNO, bool isDest, Insn &insn, AArch64reg regNO,
3055                                                 bool &isOutOfRange) const
3056 {
3057     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3058     MemOperand *memOpnd = a64CGFunc->GetOrCreatSpillMem(vregNO);
3059     return (a64CGFunc->AdjustMemOperandIfOffsetOutOfRange(memOpnd, vregNO, isDest, insn, regNO, isOutOfRange));
3060 }
3061 
SpillOperandForSpillPre(Insn & insn,const Operand & opnd,RegOperand & phyOpnd,uint32 spillIdx,bool needSpill)3062 void GraphColorRegAllocator::SpillOperandForSpillPre(Insn &insn, const Operand &opnd, RegOperand &phyOpnd,
3063                                                      uint32 spillIdx, bool needSpill)
3064 {
3065     if (!needSpill) {
3066         return;
3067     }
3068     auto &regOpnd = static_cast<const RegOperand &>(opnd);
3069     uint32 regNO = regOpnd.GetRegisterNumber();
3070     LiveRange *lr = lrMap[regNO];
3071 
3072     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3073 
3074     MemOperand *spillMem = CreateSpillMem(spillIdx, kSpillMemPre);
3075     DEBUG_ASSERT(spillMem != nullptr, "spillMem nullptr check");
3076 
3077     uint32 regSize = regOpnd.GetSize();
3078     PrimType stype;
3079     RegType regType = regOpnd.GetRegisterType();
3080     if (regType == kRegTyInt) {
3081         stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3082     } else {
3083         stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3084     }
3085 
3086     if (a64CGFunc->IsImmediateOffsetOutOfRange(*spillMem, k64)) {
3087         regno_t pregNO = R16;
3088         spillMem =
3089             &a64CGFunc->SplitOffsetWithAddInstruction(*spillMem, k64, static_cast<AArch64reg>(pregNO), false, &insn);
3090     }
3091     Insn &stInsn =
3092         cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickStInsn(spillMem->GetSize(), stype), phyOpnd, *spillMem);
3093     std::string comment = " SPILL for spill vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3094     stInsn.SetComment(comment);
3095     insn.GetBB()->InsertInsnBefore(insn, stInsn);
3096 }
3097 
SpillOperandForSpillPost(Insn & insn,const Operand & opnd,RegOperand & phyOpnd,uint32 spillIdx,bool needSpill)3098 void GraphColorRegAllocator::SpillOperandForSpillPost(Insn &insn, const Operand &opnd, RegOperand &phyOpnd,
3099                                                       uint32 spillIdx, bool needSpill)
3100 {
3101     if (!needSpill) {
3102         return;
3103     }
3104 
3105     auto &regOpnd = static_cast<const RegOperand &>(opnd);
3106     uint32 regNO = regOpnd.GetRegisterNumber();
3107     LiveRange *lr = lrMap[regNO];
3108     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3109     bool isLastInsn = false;
3110     if (insn.GetBB()->GetKind() == BB::kBBIf && insn.GetBB()->IsLastInsn(&insn)) {
3111         isLastInsn = true;
3112     }
3113 
3114     if (lr->GetRematLevel() != kRematOff) {
3115         std::string comment = " REMATERIALIZE for spill vreg: " + std::to_string(regNO);
3116         if (isLastInsn) {
3117             for (auto tgtBB : insn.GetBB()->GetSuccs()) {
3118                 std::vector<Insn *> rematInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3119                 for (auto &&remat : rematInsns) {
3120                     remat->SetComment(comment);
3121                     tgtBB->InsertInsnBegin(*remat);
3122                 }
3123             }
3124         } else {
3125             std::vector<Insn *> rematInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3126             for (auto &&remat : rematInsns) {
3127                 remat->SetComment(comment);
3128                 insn.GetBB()->InsertInsnAfter(insn, *remat);
3129             }
3130         }
3131         return;
3132     }
3133 
3134     MemOperand *spillMem = CreateSpillMem(spillIdx, kSpillMemPost);
3135     DEBUG_ASSERT(spillMem != nullptr, "spillMem nullptr check");
3136 
3137     uint32 regSize = regOpnd.GetSize();
3138     PrimType stype;
3139     RegType regType = regOpnd.GetRegisterType();
3140     if (regType == kRegTyInt) {
3141         stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3142     } else {
3143         stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3144     }
3145 
3146     bool isOutOfRange = false;
3147     Insn *nextInsn = insn.GetNextMachineInsn();
3148     if (a64CGFunc->IsImmediateOffsetOutOfRange(*spillMem, k64)) {
3149         regno_t pregNO = R16;
3150         spillMem =
3151             &a64CGFunc->SplitOffsetWithAddInstruction(*spillMem, k64, static_cast<AArch64reg>(pregNO), true, &insn);
3152         isOutOfRange = true;
3153     }
3154     std::string comment =
3155         " RELOAD for spill vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3156     if (isLastInsn) {
3157         for (auto tgtBB : insn.GetBB()->GetSuccs()) {
3158             MOperator mOp = a64CGFunc->PickLdInsn(spillMem->GetSize(), stype);
3159             Insn *newLd = &cgFunc->GetInsnBuilder()->BuildInsn(mOp, phyOpnd, *spillMem);
3160             newLd->SetComment(comment);
3161             tgtBB->InsertInsnBegin(*newLd);
3162         }
3163     } else {
3164         MOperator mOp = a64CGFunc->PickLdInsn(spillMem->GetSize(), stype);
3165         Insn &ldrInsn = cgFunc->GetInsnBuilder()->BuildInsn(mOp, phyOpnd, *spillMem);
3166         ldrInsn.SetComment(comment);
3167         if (isOutOfRange) {
3168             if (nextInsn == nullptr) {
3169                 insn.GetBB()->AppendInsn(ldrInsn);
3170             } else {
3171                 insn.GetBB()->InsertInsnBefore(*nextInsn, ldrInsn);
3172             }
3173         } else {
3174             insn.GetBB()->InsertInsnAfter(insn, ldrInsn);
3175         }
3176     }
3177 }
3178 
GetSpillOrReuseMem(LiveRange & lr,uint32 regSize,bool & isOutOfRange,Insn & insn,bool isDef)3179 MemOperand *GraphColorRegAllocator::GetSpillOrReuseMem(LiveRange &lr, uint32 regSize, bool &isOutOfRange, Insn &insn,
3180                                                        bool isDef)
3181 {
3182     (void)regSize;
3183     MemOperand *memOpnd = nullptr;
3184     if (lr.GetSpillMem() != nullptr) {
3185         /* the saved memOpnd cannot be out-of-range */
3186         memOpnd = lr.GetSpillMem();
3187     } else {
3188 #ifdef REUSE_SPILLMEM
3189         memOpnd = GetReuseMem(lr.GetRegNO(), regSize, lr.GetRegType());
3190         if (memOpnd != nullptr) {
3191             lr.SetSpillMem(*memOpnd);
3192             lr.SetSpillSize((regSize <= k32) ? k32 : k64);
3193         } else {
3194 #endif /* REUSE_SPILLMEM */
3195             regno_t baseRegNO;
3196             if (!isDef && lr.GetRegNO() == kRegTyInt) {
3197                 /* src will use its' spill reg as baseRegister when offset out-of-range
3198                  * add x16, x29, #max-offset  //out-of-range
3199                  * ldr x16, [x16, #offset]    //reload
3200                  * mov xd, x16
3201                  */
3202                 baseRegNO = lr.GetSpillReg();
3203                 if (baseRegNO > RLAST_INT_REG) {
3204                     baseRegNO = R16;
3205                 }
3206             } else {
3207                 /* dest will use R16 as baseRegister when offset out-of-range
3208                  * mov x16, xs
3209                  * add x17, x29, #max-offset  //out-of-range
3210                  * str x16, [x17, #offset]    //spill
3211                  */
3212                 baseRegNO = R16;
3213             }
3214             DEBUG_ASSERT(baseRegNO != kRinvalid, "invalid base register number");
3215             memOpnd = GetSpillMem(lr.GetRegNO(), isDef, insn, static_cast<AArch64reg>(baseRegNO), isOutOfRange);
3216             /* dest's spill reg can only be R15 and R16 () */
3217             if (isOutOfRange && isDef) {
3218                 DEBUG_ASSERT(lr.GetSpillReg() != R16, "can not find valid memopnd's base register");
3219             }
3220 #ifdef REUSE_SPILLMEM
3221             if (isOutOfRange == 0) {
3222                 lr.SetSpillMem(*memOpnd);
3223                 lr.SetSpillSize((regSize <= k32) ? k32 : k64);
3224             }
3225         }
3226 #endif /* REUSE_SPILLMEM */
3227     }
3228     return memOpnd;
3229 }
3230 
3231 /*
3232  * Create spill insn for the operand.
3233  * When need_spill is true, need to spill the spill operand register first
3234  * then use it for the current spill, then reload it again.
3235  */
SpillOperand(Insn & insn,const Operand & opnd,bool isDef,RegOperand & phyOpnd,bool forCall)3236 Insn *GraphColorRegAllocator::SpillOperand(Insn &insn, const Operand &opnd, bool isDef, RegOperand &phyOpnd,
3237                                            bool forCall)
3238 {
3239     auto &regOpnd = static_cast<const RegOperand &>(opnd);
3240     uint32 regNO = regOpnd.GetRegisterNumber();
3241     uint32 pregNO = phyOpnd.GetRegisterNumber();
3242     bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(pregNO));
3243     if (needDump) {
3244         LogInfo::MapleLogger() << "SpillOperand " << regNO << "\n";
3245     }
3246     LiveRange *lr = lrMap[regNO];
3247     bool isForCallerSave = lr->GetSplitLr() == nullptr && lr->GetNumCall() && !isCalleeReg;
3248     uint32 regSize = regOpnd.GetSize();
3249     bool isOutOfRange = false;
3250     PrimType stype;
3251     RegType regType = regOpnd.GetRegisterType();
3252     if (regType == kRegTyInt) {
3253         stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3254     } else {
3255         stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3256     }
3257     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3258 
3259     Insn *spillDefInsn = nullptr;
3260     if (isDef) {
3261         if (lr->GetRematLevel() == kRematOff) {
3262             lr->SetSpillReg(pregNO);
3263             Insn *nextInsn = insn.GetNextMachineInsn();
3264             MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, forCall ? false : true);
3265             spillDefInsn =
3266                 &cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickStInsn(regSize, stype), phyOpnd, *memOpnd);
3267             spillDefInsn->SetIsSpill();
3268             std::string comment = " SPILL vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3269             if (isForCallerSave) {
3270                 comment += " for caller save in BB " + std::to_string(insn.GetBB()->GetId());
3271             }
3272             spillDefInsn->SetComment(comment);
3273             if (forCall) {
3274                 insn.GetBB()->InsertInsnBefore(insn, *spillDefInsn);
3275             } else if (isOutOfRange) {
3276                 if (nextInsn == nullptr) {
3277                     insn.GetBB()->AppendInsn(*spillDefInsn);
3278                 } else {
3279                     insn.GetBB()->InsertInsnBefore(*nextInsn, *spillDefInsn);
3280                 }
3281             } else if (insn.GetNext() && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) {
3282                 insn.GetBB()->InsertInsnAfter(*insn.GetNext(), *spillDefInsn);
3283             } else {
3284                 insn.GetBB()->InsertInsnAfter(insn, *spillDefInsn);
3285             }
3286         }
3287 
3288         if ((insn.GetMachineOpcode() != MOP_xmovkri16) && (insn.GetMachineOpcode() != MOP_wmovkri16)) {
3289             return spillDefInsn;
3290         }
3291     }
3292     if (insn.GetMachineOpcode() == MOP_clinit_tail) {
3293         return nullptr;
3294     }
3295     Insn *nextInsn = insn.GetNextMachineInsn();
3296     lr->SetSpillReg(pregNO);
3297 
3298     std::vector<Insn *> spillUseInsns;
3299     std::string comment;
3300     if (lr->GetRematLevel() != kRematOff) {
3301         spillUseInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3302         comment = " REMATERIALIZE vreg: " + std::to_string(regNO);
3303     } else {
3304         MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, forCall ? true : false);
3305         Insn &spillUseInsn =
3306             cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickLdInsn(regSize, stype), phyOpnd, *memOpnd);
3307         spillUseInsn.SetIsReload();
3308         spillUseInsns.push_back(&spillUseInsn);
3309         comment = " RELOAD vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3310     }
3311     if (isForCallerSave) {
3312         comment += " for caller save in BB " + std::to_string(insn.GetBB()->GetId());
3313     }
3314     for (auto &&spillUseInsn : spillUseInsns) {
3315         spillUseInsn->SetComment(comment);
3316         if (forCall) {
3317             if (nextInsn == nullptr) {
3318                 insn.GetBB()->AppendInsn(*spillUseInsn);
3319             } else {
3320                 insn.GetBB()->InsertInsnBefore(*nextInsn, *spillUseInsn);
3321             }
3322         } else {
3323             insn.GetBB()->InsertInsnBefore(insn, *spillUseInsn);
3324         }
3325     }
3326     if (spillDefInsn != nullptr) {
3327         return spillDefInsn;
3328     }
3329     return &insn;
3330 }
3331 
3332 /* Try to find available reg for spill. */
SetAvailableSpillReg(std::unordered_set<regno_t> & cannotUseReg,LiveRange & lr,uint64 & usedRegMask)3333 bool GraphColorRegAllocator::SetAvailableSpillReg(std::unordered_set<regno_t> &cannotUseReg, LiveRange &lr,
3334                                                   uint64 &usedRegMask)
3335 {
3336     bool isInt = (lr.GetRegType() == kRegTyInt);
3337     regno_t base = isInt ? R0 : V0;
3338     uint32 pregInterval = isInt ? 0 : (V0 - R30);
3339     MapleSet<uint32> &callerRegSet = isInt ? intCallerRegSet : fpCallerRegSet;
3340     MapleSet<uint32> &calleeRegSet = isInt ? intCalleeRegSet : fpCalleeRegSet;
3341 
3342     for (const auto &it : callerRegSet) {
3343         regno_t spillReg = it + base;
3344         if (cannotUseReg.find(spillReg) == cannotUseReg.end() &&
3345             (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3346             lr.SetAssignedRegNO(spillReg);
3347             usedRegMask |= 1ULL << (spillReg - pregInterval);
3348             return true;
3349         }
3350     }
3351     for (const auto &it : calleeRegSet) {
3352         regno_t spillReg = it + base;
3353         if (cannotUseReg.find(spillReg) == cannotUseReg.end() &&
3354             (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3355             lr.SetAssignedRegNO(spillReg);
3356             usedRegMask |= 1ULL << (spillReg - pregInterval);
3357             return true;
3358         }
3359     }
3360     return false;
3361 }
3362 
CollectCannotUseReg(std::unordered_set<regno_t> & cannotUseReg,const LiveRange & lr,Insn & insn)3363 void GraphColorRegAllocator::CollectCannotUseReg(std::unordered_set<regno_t> &cannotUseReg, const LiveRange &lr,
3364                                                  Insn &insn)
3365 {
3366     /* Find the bb in the conflict LR that actually conflicts with the current bb. */
3367     for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) {
3368         if (lr.GetPregveto(regNO)) {
3369             (void)cannotUseReg.insert(regNO);
3370         }
3371     }
3372     auto updateCannotUse = [&insn, &cannotUseReg, this](regno_t regNO) {
3373         LiveRange *conflictLr = lrMap[regNO];
3374         /*
3375          * conflictLr->GetAssignedRegNO() might be zero
3376          * caller save will be inserted so the assigned reg can be released actually
3377          */
3378         if ((conflictLr->GetAssignedRegNO() > 0) && IsBitArrElemSet(conflictLr->GetBBMember(), insn.GetBB()->GetId())) {
3379             if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(conflictLr->GetAssignedRegNO())) &&
3380                 conflictLr->GetNumCall() && !conflictLr->GetProcessed()) {
3381                 return;
3382             }
3383             (void)cannotUseReg.insert(conflictLr->GetAssignedRegNO());
3384         }
3385     };
3386     ForEachRegArrElem(lr.GetBBConflict(), updateCannotUse);
3387 #ifdef USE_LRA
3388     if (!doLRA) {
3389         return;
3390     }
3391     BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()];
3392     if (bbInfo != nullptr) {
3393         for (const auto &regMapPair : bbInfo->GetRegMap()) {
3394             (void)cannotUseReg.insert(regMapPair.second);
3395         }
3396     }
3397 #endif /* USE_LRA */
3398 }
3399 
PickRegForSpill(uint64 & usedRegMask,RegType regType,uint32 spillIdx,bool & needSpillLr)3400 regno_t GraphColorRegAllocator::PickRegForSpill(uint64 &usedRegMask, RegType regType, uint32 spillIdx,
3401                                                 bool &needSpillLr)
3402 {
3403     regno_t base;
3404     regno_t spillReg;
3405     uint32 pregInterval;
3406     bool isIntReg = (regType == kRegTyInt);
3407     if (isIntReg) {
3408         base = R0;
3409         pregInterval = 0;
3410     } else {
3411         base = V0;
3412         pregInterval = V0 - R30;
3413     }
3414 
3415     if (JAVALANG) {
3416         /* Use predetermined spill register */
3417         MapleSet<uint32> &spillRegSet = isIntReg ? intSpillRegSet : fpSpillRegSet;
3418         DEBUG_ASSERT(spillIdx < spillRegSet.size(), "spillIdx large than spillRegSet.size()");
3419         auto regNumIt = spillRegSet.begin();
3420         for (; spillIdx > 0; --spillIdx) {
3421             ++regNumIt;
3422         }
3423         spillReg = *regNumIt + base;
3424         return spillReg;
3425     }
3426 
3427     /* Temporary find a unused reg to spill */
3428     uint32 maxPhysRegNum = isIntReg ? MaxIntPhysRegNum() : MaxFloatPhysRegNum();
3429     for (spillReg = (maxPhysRegNum + base); spillReg > base; --spillReg) {
3430         if (spillReg >= k64BitSize) {
3431             spillReg = k64BitSize - 1;
3432         }
3433         if ((usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3434             usedRegMask |= (1ULL << (spillReg - pregInterval));
3435             needSpillLr = true;
3436             return spillReg;
3437         }
3438     }
3439 
3440     DEBUG_ASSERT(false, "can not find spillReg");
3441     return 0;
3442 }
3443 
3444 /* return true if need extra spill */
SetRegForSpill(LiveRange & lr,Insn & insn,uint32 spillIdx,uint64 & usedRegMask,bool isDef)3445 bool GraphColorRegAllocator::SetRegForSpill(LiveRange &lr, Insn &insn, uint32 spillIdx, uint64 &usedRegMask, bool isDef)
3446 {
3447     std::unordered_set<regno_t> cannotUseReg;
3448     /* SPILL COALESCE */
3449     if (!isDef && (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr)) {
3450         auto &ropnd = static_cast<RegOperand &>(insn.GetOperand(0));
3451         if (ropnd.IsPhysicalRegister()) {
3452             lr.SetAssignedRegNO(ropnd.GetRegisterNumber());
3453             return false;
3454         }
3455     }
3456 
3457     CollectCannotUseReg(cannotUseReg, lr, insn);
3458 
3459     if (SetAvailableSpillReg(cannotUseReg, lr, usedRegMask)) {
3460         return false;
3461     }
3462 
3463     bool needSpillLr = false;
3464     if (!lr.GetAssignedRegNO()) {
3465         /*
3466          * All regs are assigned and none are free.
3467          * Pick a reg to spill and reuse for this spill.
3468          * Need to make sure the reg picked is not assigned to this insn,
3469          * else there will be conflict.
3470          */
3471         RegType regType = lr.GetRegType();
3472         regno_t spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr);
3473         if (insn.GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) {
3474             CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr");
3475         }
3476         lr.SetAssignedRegNO(spillReg);
3477     }
3478     return needSpillLr;
3479 }
3480 
GetReplaceOpndForLRA(Insn & insn,const Operand & opnd,uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3481 RegOperand *GraphColorRegAllocator::GetReplaceOpndForLRA(Insn &insn, const Operand &opnd, uint32 &spillIdx,
3482                                                          uint64 &usedRegMask, bool isDef)
3483 {
3484     auto &regOpnd = static_cast<const RegOperand &>(opnd);
3485     uint32 vregNO = regOpnd.GetRegisterNumber();
3486     RegType regType = regOpnd.GetRegisterType();
3487     BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()];
3488     if (bbInfo == nullptr) {
3489         return nullptr;
3490     }
3491     auto regIt = bbInfo->GetRegMap().find(vregNO);
3492     if (regIt != bbInfo->GetRegMap().end()) {
3493         RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3494             static_cast<AArch64reg>(regIt->second), regOpnd.GetSize(), regType);
3495         return &phyOpnd;
3496     }
3497     if (needDump) {
3498         LogInfo::MapleLogger() << "spill vreg " << vregNO << "\n";
3499     }
3500     regno_t spillReg;
3501     bool needSpillLr = false;
3502     if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) ||
3503         (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) {
3504         spillReg = R16;
3505     } else {
3506         /*
3507          * use the reg that exclude livein/liveout/bbInfo->regMap
3508          * Need to make sure the reg picked is not assigned to this insn,
3509          * else there will be conflict.
3510          */
3511         spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr);
3512         if (insn.GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) {
3513             CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr");
3514         }
3515         AddCalleeUsed(spillReg, regType);
3516         if (needDump) {
3517             LogInfo::MapleLogger() << "\tassigning lra spill reg " << spillReg << "\n";
3518         }
3519     }
3520     RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3521         static_cast<AArch64reg>(spillReg), regOpnd.GetSize(), regType);
3522     SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr);
3523     Insn *spill = SpillOperand(insn, regOpnd, isDef, phyOpnd);
3524     if (spill != nullptr) {
3525         SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr);
3526     }
3527     ++spillIdx;
3528     return &phyOpnd;
3529 }
3530 
3531 /* get spill reg and check if need extra spill */
GetSpillReg(Insn & insn,LiveRange & lr,const uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3532 bool GraphColorRegAllocator::GetSpillReg(Insn &insn, LiveRange &lr, const uint32 &spillIdx, uint64 &usedRegMask,
3533                                          bool isDef)
3534 {
3535     bool needSpillLr = false;
3536     /*
3537      * Find a spill reg for the BB among interfereing LR.
3538      * Without LRA, this info is very inaccurate.  It will falsely interfere
3539      * with all locals which the spill might not be interfering.
3540      * For now, every instance of the spill requires a brand new reg assignment.
3541      */
3542     if (needDump) {
3543         LogInfo::MapleLogger() << "LR-regNO " << lr.GetRegNO() << " spilled, finding a spill reg\n";
3544     }
3545     if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) ||
3546         (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) {
3547         /*
3548          * When a cond branch reg is spilled, it cannot
3549          * restore the value after the branch since it can be the target from other br.
3550          * To do it properly, it will require creating a intermediate bb for the reload.
3551          * Use x16, it is taken out from available since it is used as a global in the system.
3552          */
3553         lr.SetAssignedRegNO(R16);
3554     } else {
3555         lr.SetAssignedRegNO(0);
3556         needSpillLr = SetRegForSpill(lr, insn, spillIdx, usedRegMask, isDef);
3557         AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType());
3558     }
3559     return needSpillLr;
3560 }
3561 
3562 // find prev use/def after prev call
EncountPrevRef(const BB & pred,LiveRange & lr,bool isDef,std::vector<bool> & visitedMap)3563 bool GraphColorRegAllocator::EncountPrevRef(const BB &pred, LiveRange &lr, bool isDef, std::vector<bool> &visitedMap)
3564 {
3565     if (!visitedMap[pred.GetId()] && lr.FindInLuMap(pred.GetId()) != lr.EndOfLuMap()) {
3566         LiveUnit *lu = lr.GetLiveUnitFromLuMap(pred.GetId());
3567         if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3568             MapleMap<uint32, uint32> refs = lr.GetRefs(pred.GetId());
3569             auto it = refs.rbegin();
3570             bool findPrevRef = (it->second & kIsCall) == 0;
3571             return findPrevRef;
3572         }
3573         if (lu->HasCall()) {
3574             return false;
3575         }
3576     }
3577     visitedMap[pred.GetId()] = true;
3578     bool found = true;
3579     for (auto predBB : pred.GetPreds()) {
3580         if (!visitedMap[predBB->GetId()]) {
3581             found &= EncountPrevRef(*predBB, lr, isDef, visitedMap);
3582         }
3583     }
3584     return found;
3585 }
3586 
FoundPrevBeforeCall(Insn & insn,LiveRange & lr,bool isDef)3587 bool GraphColorRegAllocator::FoundPrevBeforeCall(Insn &insn, LiveRange &lr, bool isDef)
3588 {
3589     bool hasFind = true;
3590     std::vector<bool> visitedMap(bbVec.size() + 1, false);
3591     for (auto pred : insn.GetBB()->GetPreds()) {
3592         hasFind &= EncountPrevRef(*pred, lr, isDef, visitedMap);
3593         if (!hasFind) {
3594             return false;
3595         }
3596     }
3597     return insn.GetBB()->GetPreds().size() == 0 ? false : true;
3598 }
3599 
3600 // find next def before next call ?  and no next use
EncountNextRef(const BB & succ,LiveRange & lr,bool isDef,std::vector<bool> & visitedMap)3601 bool GraphColorRegAllocator::EncountNextRef(const BB &succ, LiveRange &lr, bool isDef, std::vector<bool> &visitedMap)
3602 {
3603     if (lr.FindInLuMap(succ.GetId()) != lr.EndOfLuMap()) {
3604         LiveUnit *lu = lr.GetLiveUnitFromLuMap(succ.GetId());
3605         bool findNextDef = false;
3606         if (lu->GetDefNum() || lu->HasCall()) {
3607             MapleMap<uint32, uint32> refs = lr.GetRefs(succ.GetId());
3608             for (auto it = refs.begin(); it != refs.end(); ++it) {
3609                 if ((it->second & kIsDef) != 0) {
3610                     findNextDef = true;
3611                     break;
3612                 }
3613                 if ((it->second & kIsCall) != 0) {
3614                     break;
3615                 }
3616                 if ((it->second & kIsUse) != 0) {
3617                     continue;
3618                 }
3619             }
3620             return findNextDef;
3621         }
3622         if (lu->HasCall()) {
3623             return false;
3624         }
3625     }
3626     visitedMap[succ.GetId()] = true;
3627     bool found = true;
3628     for (auto succBB : succ.GetSuccs()) {
3629         if (!visitedMap[succBB->GetId()]) {
3630             found &= EncountNextRef(*succBB, lr, isDef, visitedMap);
3631             if (!found) {
3632                 return false;
3633             }
3634         }
3635     }
3636     return found;
3637 }
3638 
FoundNextBeforeCall(Insn & insn,LiveRange & lr,bool isDef)3639 bool GraphColorRegAllocator::FoundNextBeforeCall(Insn &insn, LiveRange &lr, bool isDef)
3640 {
3641     bool haveFind = true;
3642     std::vector<bool> visitedMap(bbVec.size() + 1, false);
3643     for (auto succ : insn.GetBB()->GetSuccs()) {
3644         haveFind &= EncountNextRef(*succ, lr, isDef, visitedMap);
3645         if (!haveFind) {
3646             return false;
3647         }
3648     }
3649     return insn.GetBB()->GetSuccs().size() > 0;
3650 }
3651 
HavePrevRefInCurBB(Insn & insn,LiveRange & lr,bool & contSearch) const3652 bool GraphColorRegAllocator::HavePrevRefInCurBB(Insn &insn, LiveRange &lr, bool &contSearch) const
3653 {
3654     LiveUnit *lu = lr.GetLiveUnitFromLuMap(insn.GetBB()->GetId());
3655     bool findPrevRef = false;
3656     if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3657         MapleMap<uint32, uint32> refs = lr.GetRefs(insn.GetBB()->GetId());
3658         for (auto it = refs.rbegin(); it != refs.rend(); ++it) {
3659             if (it->first >= insn.GetId()) {
3660                 continue;
3661             }
3662             if ((it->second & kIsCall) != 0) {
3663                 contSearch = false;
3664                 break;
3665             }
3666             if (((it->second & kIsUse) != 0) || ((it->second & kIsDef) != 0)) {
3667                 findPrevRef = true;
3668                 contSearch = false;
3669                 break;
3670             }
3671         }
3672     }
3673     return findPrevRef;
3674 }
3675 
HaveNextDefInCurBB(Insn & insn,LiveRange & lr,bool & contSearch) const3676 bool GraphColorRegAllocator::HaveNextDefInCurBB(Insn &insn, LiveRange &lr, bool &contSearch) const
3677 {
3678     LiveUnit *lu = lr.GetLiveUnitFromLuMap(insn.GetBB()->GetId());
3679     bool findNextDef = false;
3680     if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3681         MapleMap<uint32, uint32> refs = lr.GetRefs(insn.GetBB()->GetId());
3682         for (auto it = refs.begin(); it != refs.end(); ++it) {
3683             if (it->first <= insn.GetId()) {
3684                 continue;
3685             }
3686             if ((it->second & kIsCall) != 0) {
3687                 contSearch = false;
3688                 break;
3689             }
3690             if ((it->second & kIsDef) != 0) {
3691                 findNextDef = true;
3692                 contSearch = false;
3693             }
3694         }
3695     }
3696     return findNextDef;
3697 }
3698 
NeedCallerSave(Insn & insn,LiveRange & lr,bool isDef)3699 bool GraphColorRegAllocator::NeedCallerSave(Insn &insn, LiveRange &lr, bool isDef)
3700 {
3701     if (doLRA) {
3702         return true;
3703     }
3704     if (lr.HasDefUse()) {
3705         return true;
3706     }
3707 
3708     bool contSearch = true;
3709     bool needed = true;
3710     if (isDef) {
3711         needed = !HaveNextDefInCurBB(insn, lr, contSearch);
3712     } else {
3713         needed = !HavePrevRefInCurBB(insn, lr, contSearch);
3714     }
3715     if (!contSearch) {
3716         return needed;
3717     }
3718 
3719     if (isDef) {
3720         needed = true;
3721     } else {
3722         needed = !FoundPrevBeforeCall(insn, lr, isDef);
3723     }
3724     return needed;
3725 }
3726 
GetReplaceOpnd(Insn & insn,const Operand & opnd,uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3727 RegOperand *GraphColorRegAllocator::GetReplaceOpnd(Insn &insn, const Operand &opnd, uint32 &spillIdx,
3728                                                    uint64 &usedRegMask, bool isDef)
3729 {
3730     if (!opnd.IsRegister()) {
3731         return nullptr;
3732     }
3733     auto &regOpnd = static_cast<const RegOperand &>(opnd);
3734 
3735     uint32 vregNO = regOpnd.GetRegisterNumber();
3736     if (vregNO == RFP) {
3737         seenFP = true;
3738     }
3739     RegType regType = regOpnd.GetRegisterType();
3740     if (vregNO < kAllRegNum) {
3741         return nullptr;
3742     }
3743     if (IsUnconcernedReg(regOpnd)) {
3744         return nullptr;
3745     }
3746 
3747 #ifdef USE_LRA
3748     if (doLRA && IsLocalReg(vregNO)) {
3749         return GetReplaceOpndForLRA(insn, opnd, spillIdx, usedRegMask, isDef);
3750     }
3751 #endif /* USE_LRA */
3752 
3753     DEBUG_ASSERT(vregNO < numVregs, "index out of range of MapleVector in GraphColorRegAllocator::GetReplaceOpnd");
3754     LiveRange *lr = lrMap[vregNO];
3755 
3756     bool isSplitPart = false;
3757     bool needSpillLr = false;
3758     if (lr->GetSplitLr() && IsBitArrElemSet(lr->GetSplitLr()->GetBBMember(), insn.GetBB()->GetId())) {
3759         isSplitPart = true;
3760     }
3761 
3762     if (lr->IsSpilled() && !isSplitPart) {
3763         needSpillLr = GetSpillReg(insn, *lr, spillIdx, usedRegMask, isDef);
3764     }
3765 
3766     regno_t regNO;
3767     if (isSplitPart) {
3768         regNO = lr->GetSplitLr()->GetAssignedRegNO();
3769     } else {
3770         regNO = lr->GetAssignedRegNO();
3771     }
3772     bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO));
3773     RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3774         static_cast<AArch64reg>(regNO), opnd.GetSize(), regType);
3775     if (needDump) {
3776         LogInfo::MapleLogger() << "replace R" << vregNO << " with R" << (regNO - R0) << "\n";
3777     }
3778 
3779     insn.AppendComment(" [R" + std::to_string(vregNO) + "] ");
3780 
3781     if (isSplitPart && (isCalleeReg || lr->GetSplitLr()->GetNumCall() == 0)) {
3782         if (isDef) {
3783             SpillOperand(insn, opnd, isDef, phyOpnd);
3784             ++spillIdx;
3785         } else {
3786             if (lr->GetSplitLr()->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload()) {
3787                 SpillOperand(insn, opnd, isDef, phyOpnd);
3788                 ++spillIdx;
3789             }
3790         }
3791         return &phyOpnd;
3792     }
3793 
3794     bool needCallerSave = false;
3795     if (lr->GetNumCall() && !isCalleeReg) {
3796         if (isDef) {
3797             needCallerSave = NeedCallerSave(insn, *lr, isDef) && lr->GetRematLevel() == kRematOff;
3798         } else {
3799             needCallerSave = !lr->GetProcessed();
3800         }
3801     }
3802 
3803     if (lr->IsSpilled() || (isSplitPart && (lr->GetSplitLr()->GetNumCall() != 0)) || needCallerSave ||
3804         (!isSplitPart && !(lr->IsSpilled()) && lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload())) {
3805         SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr);
3806         Insn *spill = SpillOperand(insn, opnd, isDef, phyOpnd);
3807         if (spill != nullptr) {
3808             SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr);
3809         }
3810         ++spillIdx;
3811     }
3812 
3813     return &phyOpnd;
3814 }
3815 
MarkUsedRegs(Operand & opnd,uint64 & usedRegMask)3816 void GraphColorRegAllocator::MarkUsedRegs(Operand &opnd, uint64 &usedRegMask)
3817 {
3818     auto &regOpnd = static_cast<RegOperand &>(opnd);
3819     uint32 pregInterval = (regOpnd.GetRegisterType() == kRegTyInt) ? 0 : (V0 - R30);
3820     uint32 vregNO = regOpnd.GetRegisterNumber();
3821     LiveRange *lr = GetLiveRange(vregNO);
3822     if (lr != nullptr) {
3823         if (lr->IsSpilled()) {
3824             lr->SetAssignedRegNO(0);
3825         }
3826         if (lr->GetAssignedRegNO() != 0) {
3827             usedRegMask |= (1ULL << (lr->GetAssignedRegNO() - pregInterval));
3828         }
3829         if (lr->GetSplitLr() && lr->GetSplitLr()->GetAssignedRegNO()) {
3830             usedRegMask |= (1ULL << (lr->GetSplitLr()->GetAssignedRegNO() - pregInterval));
3831         }
3832     }
3833 }
3834 
FinalizeRegisterPreprocess(FinalizeRegisterInfo & fInfo,const Insn & insn,bool & needProcess)3835 uint64 GraphColorRegAllocator::FinalizeRegisterPreprocess(FinalizeRegisterInfo &fInfo, const Insn &insn,
3836                                                           bool &needProcess)
3837 {
3838     uint64 usedRegMask = 0;
3839     const InsnDesc *md = insn.GetDesc();
3840     uint32 opndNum = insn.GetOperandSize();
3841     bool hasVirtual = false;
3842     for (uint32 i = 0; i < opndNum; ++i) {
3843         Operand &opnd = insn.GetOperand(i);
3844         DEBUG_ASSERT(md->GetOpndDes(i) != nullptr, "pointer is null in GraphColorRegAllocator::FinalizeRegisters");
3845 
3846         if (opnd.IsList()) {
3847             if (insn.GetMachineOpcode() != MOP_asm) {
3848                 continue;
3849             }
3850             hasVirtual = true;
3851             if (i == kAsmOutputListOpnd) {
3852                 fInfo.SetDefOperand(opnd, static_cast<int32>(i));
3853             }
3854             if (i == kAsmInputListOpnd) {
3855                 fInfo.SetUseOperand(opnd, static_cast<int32>(i));
3856             }
3857         } else if (opnd.IsMemoryAccessOperand()) {
3858             auto &memOpnd = static_cast<MemOperand &>(opnd);
3859             Operand *base = memOpnd.GetBaseRegister();
3860             if (base != nullptr) {
3861                 fInfo.SetBaseOperand(opnd, static_cast<int32>(i));
3862                 MarkUsedRegs(*base, usedRegMask);
3863                 hasVirtual |= static_cast<RegOperand *>(base)->IsVirtualRegister();
3864             }
3865             Operand *offset = memOpnd.GetIndexRegister();
3866             if (offset != nullptr) {
3867                 fInfo.SetOffsetOperand(opnd);
3868                 MarkUsedRegs(*offset, usedRegMask);
3869                 hasVirtual |= static_cast<RegOperand *>(offset)->IsVirtualRegister();
3870             }
3871         } else {
3872             bool isDef = md->GetOpndDes(i)->IsRegDef();
3873             if (isDef) {
3874                 fInfo.SetDefOperand(opnd, static_cast<int32>(i));
3875                 /*
3876                  * Need to exclude def also, since it will clobber the result when the
3877                  * original value is reloaded.
3878                  */
3879                 hasVirtual |= static_cast<RegOperand &>(opnd).IsVirtualRegister();
3880                 MarkUsedRegs(opnd, usedRegMask);
3881             } else {
3882                 fInfo.SetUseOperand(opnd, static_cast<int32>(i));
3883                 if (opnd.IsRegister()) {
3884                     hasVirtual |= static_cast<RegOperand &>(opnd).IsVirtualRegister();
3885                     MarkUsedRegs(opnd, usedRegMask);
3886                 }
3887             }
3888         }
3889     } /* operand */
3890     needProcess = hasVirtual;
3891     return usedRegMask;
3892 }
3893 
GenerateSpillFillRegs(const Insn & insn)3894 void GraphColorRegAllocator::GenerateSpillFillRegs(const Insn &insn)
3895 {
3896     static regno_t intRegs[kSpillMemOpndNum] = {R10, R11, R12, R13};  // R9 is used for large stack offset temp
3897     static regno_t fpRegs[kSpillMemOpndNum] = {V16, V17, V18, V19};
3898     uint32 opndNum = insn.GetOperandSize();
3899     std::set<regno_t> defPregs;
3900     std::set<regno_t> usePregs;
3901     std::vector<LiveRange *> defLrs;
3902     std::vector<LiveRange *> useLrs;
3903     if (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr) {
3904         RegOperand &opnd1 = static_cast<RegOperand &>(insn.GetOperand(1));
3905         RegOperand &opnd0 = static_cast<RegOperand &>(insn.GetOperand(0));
3906         if (opnd1.GetRegisterNumber() < R20 && opnd0.GetRegisterNumber() >= kAllRegNum) {
3907             LiveRange *lr = lrMap[opnd0.GetRegisterNumber()];
3908             if (lr->IsSpilled()) {
3909                 lr->SetSpillReg(opnd1.GetRegisterNumber());
3910                 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no spill reg in GenerateSpillFillRegs");
3911                 return;
3912             }
3913         }
3914         if (opnd0.GetRegisterNumber() < R20 && opnd1.GetRegisterNumber() >= kAllRegNum) {
3915             LiveRange *lr = lrMap[opnd1.GetRegisterNumber()];
3916             if (lr->IsSpilled()) {
3917                 lr->SetSpillReg(opnd0.GetRegisterNumber());
3918                 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no spill reg in GenerateSpillFillRegs");
3919                 return;
3920             }
3921         }
3922     }
3923     const InsnDesc *md = insn.GetDesc();
3924     bool isIndexedMemOp = false;
3925     for (uint32 opndIdx = 0; opndIdx < opndNum; ++opndIdx) {
3926         Operand *opnd = &insn.GetOperand(opndIdx);
3927         if (opnd == nullptr) {
3928             continue;
3929         }
3930         if (opnd->IsList()) {
3931             // call parameters
3932         } else if (opnd->IsMemoryAccessOperand()) {
3933             auto *memopnd = static_cast<MemOperand *>(opnd);
3934             if (memopnd->GetIndexOpt() == MemOperand::kPreIndex || memopnd->GetIndexOpt() == MemOperand::kPostIndex) {
3935                 isIndexedMemOp = true;
3936             }
3937             auto *base = static_cast<RegOperand *>(memopnd->GetBaseRegister());
3938             if (base != nullptr && !IsUnconcernedReg(*base)) {
3939                 if (!memopnd->IsIntactIndexed()) {
3940                     if (base->IsPhysicalRegister()) {
3941                         defPregs.insert(base->GetRegisterNumber());
3942                     } else {
3943                         LiveRange *lr = lrMap[base->GetRegisterNumber()];
3944                         if (lr->IsSpilled()) {
3945                             defLrs.emplace_back(lr);
3946                         }
3947                     }
3948                 }
3949                 if (base->IsPhysicalRegister()) {
3950                     usePregs.insert(base->GetRegisterNumber());
3951                 } else {
3952                     LiveRange *lr = lrMap[base->GetRegisterNumber()];
3953                     if (lr->IsSpilled()) {
3954                         useLrs.emplace_back(lr);
3955                     }
3956                 }
3957             }
3958             RegOperand *offset = static_cast<RegOperand *>(memopnd->GetIndexRegister());
3959             if (offset != nullptr) {
3960                 if (offset->IsPhysicalRegister()) {
3961                     usePregs.insert(offset->GetRegisterNumber());
3962                 } else {
3963                     LiveRange *lr = lrMap[offset->GetRegisterNumber()];
3964                     if (lr->IsSpilled()) {
3965                         useLrs.emplace_back(lr);
3966                     }
3967                 }
3968             }
3969         } else if (opnd->IsRegister()) {
3970             bool isDef = md->GetOpndDes(static_cast<int>(opndIdx))->IsRegDef();
3971             bool isUse = md->GetOpndDes(static_cast<int>(opndIdx))->IsRegUse();
3972             RegOperand *ropnd = static_cast<RegOperand *>(opnd);
3973             if (IsUnconcernedReg(*ropnd)) {
3974                 continue;
3975             }
3976             if (ropnd != nullptr) {
3977                 if (isUse) {
3978                     if (ropnd->IsPhysicalRegister()) {
3979                         usePregs.insert(ropnd->GetRegisterNumber());
3980                     } else {
3981                         LiveRange *lr = lrMap[ropnd->GetRegisterNumber()];
3982                         if (lr->IsSpilled()) {
3983                             useLrs.emplace_back(lr);
3984                         }
3985                     }
3986                 }
3987                 if (isDef) {
3988                     if (ropnd->IsPhysicalRegister()) {
3989                         defPregs.insert(ropnd->GetRegisterNumber());
3990                     } else {
3991                         LiveRange *lr = lrMap[ropnd->GetRegisterNumber()];
3992                         if (lr->IsSpilled()) {
3993                             defLrs.emplace_back(lr);
3994                         }
3995                     }
3996                 }
3997             }
3998         }
3999     }
4000     auto comparator = [=](const LiveRange *lr1, const LiveRange *lr2) -> bool { return lr1->GetID() > lr2->GetID(); };
4001     std::sort(useLrs.begin(), useLrs.end(), comparator);
4002     for (auto lr : useLrs) {
4003         lr->SetID(insn.GetId());
4004         RegType rtype = lr->GetRegType();
4005         regno_t firstSpillReg = rtype == kRegTyInt ? intRegs[0] : fpRegs[0];
4006         if (lr->GetSpillReg() != 0 && lr->GetSpillReg() < firstSpillReg && lr->GetPregveto(lr->GetSpillReg())) {
4007             lr->SetSpillReg(0);
4008         }
4009         if (lr->GetSpillReg() != 0 && lr->GetSpillReg() >= firstSpillReg &&
4010             usePregs.find(lr->GetSpillReg()) == usePregs.end()) {
4011             usePregs.insert(lr->GetSpillReg());
4012             continue;
4013         } else {
4014             lr->SetSpillReg(0);
4015         }
4016         for (uint32 i = 0; i < kSpillMemOpndNum; i++) {
4017             regno_t preg = rtype == kRegTyInt ? intRegs[i] : fpRegs[i];
4018             if (usePregs.find(preg) == usePregs.end()) {
4019                 lr->SetSpillReg(preg);
4020                 usePregs.insert(preg);
4021                 break;
4022             }
4023         }
4024         DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg");
4025     }
4026     size_t spillRegIdx;
4027     if (isIndexedMemOp) {
4028         spillRegIdx = useLrs.size();
4029     } else {
4030         spillRegIdx = 0;
4031     }
4032     for (auto lr : defLrs) {
4033         lr->SetID(insn.GetId());
4034         RegType rtype = lr->GetRegType();
4035         regno_t firstSpillReg = rtype == kRegTyInt ? intRegs[0] : fpRegs[0];
4036         if (lr->GetSpillReg() != 0) {
4037             if (lr->GetSpillReg() < firstSpillReg && lr->GetPregveto(lr->GetSpillReg())) {
4038                 lr->SetSpillReg(0);
4039             }
4040             if (lr->GetSpillReg() >= firstSpillReg && defPregs.find(lr->GetSpillReg()) != defPregs.end()) {
4041                 lr->SetSpillReg(0);
4042             }
4043         }
4044         if (lr->GetSpillReg() != 0) {
4045             continue;
4046         }
4047         for (; spillRegIdx < kSpillMemOpndNum; spillRegIdx++) {
4048             regno_t preg = rtype == kRegTyInt ? intRegs[spillRegIdx] : fpRegs[spillRegIdx];
4049             if (defPregs.find(preg) == defPregs.end()) {
4050                 lr->SetSpillReg(preg);
4051                 defPregs.insert(preg);
4052                 break;
4053             }
4054         }
4055         DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg");
4056     }
4057 }
4058 
CreateSpillFillCode(const RegOperand & opnd,Insn & insn,uint32 spillCnt,bool isdef)4059 RegOperand *GraphColorRegAllocator::CreateSpillFillCode(const RegOperand &opnd, Insn &insn, uint32 spillCnt, bool isdef)
4060 {
4061     regno_t vregno = opnd.GetRegisterNumber();
4062     LiveRange *lr = GetLiveRange(vregno);
4063     if (lr != nullptr && lr->IsSpilled()) {
4064         AArch64CGFunc *a64cgfunc = static_cast<AArch64CGFunc *>(cgFunc);
4065         uint32 bits = opnd.GetSize();
4066         if (bits < k32BitSize) {
4067             bits = k32BitSize;
4068         }
4069         if (cgFunc->IsExtendReg(vregno)) {
4070             bits = k64BitSize;
4071         }
4072         regno_t spreg = 0;
4073         RegType rtype = lr->GetRegType();
4074         spreg = lr->GetSpillReg();
4075         DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg in CreateSpillFillCode");
4076         RegOperand *regopnd =
4077             &a64cgfunc->GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(spreg), opnd.GetSize(), rtype);
4078 
4079         if (lr->GetRematLevel() != kRematOff) {
4080             if (isdef) {
4081                 return nullptr;
4082             } else {
4083                 std::vector<Insn *> rematInsns = lr->Rematerialize(a64cgfunc, *static_cast<RegOperand *>(regopnd));
4084                 for (auto &&remat : rematInsns) {
4085                     std::string comment = " REMATERIALIZE color vreg: " + std::to_string(vregno);
4086                     remat->SetComment(comment);
4087                     insn.GetBB()->InsertInsnBefore(insn, *remat);
4088                 }
4089                 return regopnd;
4090             }
4091         }
4092 
4093         bool isOutOfRange = false;
4094         Insn *nextInsn = insn.GetNextMachineInsn();
4095         MemOperand *loadmem = GetSpillOrReuseMem(*lr, opnd.GetSize(), isOutOfRange, insn, isdef);
4096         PrimType pty = (lr->GetRegType() == kRegTyInt) ? ((bits > k32BitSize) ? PTY_i64 : PTY_i32)
4097                                                        : ((bits > k32BitSize) ? PTY_f64 : PTY_f32);
4098         CHECK_FATAL(spillCnt < kSpillMemOpndNum, "spill count exceeded");
4099         Insn *memInsn;
4100         if (isdef) {
4101             memInsn = &cgFunc->GetInsnBuilder()->BuildInsn(a64cgfunc->PickStInsn(bits, pty), *regopnd, *loadmem);
4102             memInsn->SetIsSpill();
4103             std::string comment =
4104                 " SPILLcolor vreg: " + std::to_string(vregno) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
4105             memInsn->SetComment(comment);
4106             if (nextInsn == nullptr) {
4107                 insn.GetBB()->AppendInsn(*memInsn);
4108             } else {
4109                 insn.GetBB()->InsertInsnBefore(*nextInsn, *memInsn);
4110             }
4111         } else {
4112             memInsn = &cgFunc->GetInsnBuilder()->BuildInsn(a64cgfunc->PickLdInsn(bits, pty), *regopnd, *loadmem);
4113             memInsn->SetIsReload();
4114             std::string comment =
4115                 " RELOADcolor vreg: " + std::to_string(vregno) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
4116             memInsn->SetComment(comment);
4117             insn.GetBB()->InsertInsnBefore(insn, *memInsn);
4118         }
4119         return regopnd;
4120     }
4121     return nullptr;
4122 }
4123 
SpillLiveRangeForSpills()4124 bool GraphColorRegAllocator::SpillLiveRangeForSpills()
4125 {
4126     bool done = false;
4127     for (uint32_t bbIdx = 0; bbIdx < bfs->sortedBBs.size(); bbIdx++) {
4128         BB *bb = bfs->sortedBBs[bbIdx];
4129         FOR_BB_INSNS(insn, bb) {
4130             uint32 spillCnt;
4131             if (insn->IsImmaterialInsn() || !insn->IsMachineInstruction() || insn->GetId() == 0) {
4132                 continue;
4133             }
4134             spillCnt = 0;
4135             const InsnDesc *md = insn->GetDesc();
4136             uint32 opndNum = insn->GetOperandSize();
4137             GenerateSpillFillRegs(*insn);
4138             for (uint32 i = 0; i < opndNum; ++i) {
4139                 Operand *opnd = &insn->GetOperand(i);
4140                 if (opnd == nullptr) {
4141                     continue;
4142                 }
4143                 if (opnd->IsList()) {
4144                     // call parameters
4145                 } else if (opnd->IsMemoryAccessOperand()) {
4146                     MemOperand *newmemopnd = nullptr;
4147                     auto *memopnd = static_cast<MemOperand *>(opnd);
4148                     auto *base = static_cast<RegOperand *>(memopnd->GetBaseRegister());
4149                     if (base != nullptr && base->IsVirtualRegister()) {
4150                         RegOperand *replace = CreateSpillFillCode(*base, *insn, spillCnt);
4151                         if (!memopnd->IsIntactIndexed()) {
4152                             (void)CreateSpillFillCode(*base, *insn, spillCnt, true);
4153                         }
4154                         if (replace != nullptr) {
4155                             spillCnt++;
4156                             newmemopnd = (static_cast<MemOperand *>(opnd)->Clone(*cgFunc->GetMemoryPool()));
4157                             newmemopnd->SetBaseRegister(*replace);
4158                             insn->SetOperand(i, *newmemopnd);
4159                             done = true;
4160                         }
4161                     }
4162                     RegOperand *offset = static_cast<RegOperand *>(memopnd->GetIndexRegister());
4163                     if (offset != nullptr && offset->IsVirtualRegister()) {
4164                         RegOperand *replace = CreateSpillFillCode(*offset, *insn, spillCnt);
4165                         if (replace != nullptr) {
4166                             spillCnt++;
4167                             if (newmemopnd == nullptr) {
4168                                 newmemopnd = (static_cast<MemOperand *>(opnd)->Clone(*cgFunc->GetMemoryPool()));
4169                             }
4170                             newmemopnd->SetIndexRegister(*replace);
4171                             insn->SetOperand(i, *newmemopnd);
4172                             done = true;
4173                         }
4174                     }
4175                 } else if (opnd->IsRegister()) {
4176                     bool isdef = md->opndMD[i]->IsRegDef();
4177                     bool isuse = md->opndMD[i]->IsRegUse();
4178                     RegOperand *replace = CreateSpillFillCode(*static_cast<RegOperand *>(opnd), *insn, spillCnt, isdef);
4179                     if (isuse && isdef) {
4180                         (void)CreateSpillFillCode(*static_cast<RegOperand *>(opnd), *insn, spillCnt, false);
4181                     }
4182                     if (replace != nullptr) {
4183                         if (!isdef) {
4184                             spillCnt++;
4185                         }
4186                         insn->SetOperand(i, *replace);
4187                         done = true;
4188                     }
4189                 }
4190             }
4191         }
4192     }
4193     return done;
4194 }
4195 
ReloadAtCallee(CgOccur * occ)4196 static bool ReloadAtCallee(CgOccur *occ)
4197 {
4198     auto *defOcc = occ->GetDef();
4199     if (defOcc == nullptr || defOcc->GetOccType() != kOccStore) {
4200         return false;
4201     }
4202     return static_cast<CgStoreOcc *>(defOcc)->Reload();
4203 }
4204 
DumpWorkCandAndOcc()4205 void CallerSavePre::DumpWorkCandAndOcc()
4206 {
4207     if (workCand->GetTheOperand()->IsRegister()) {
4208         LogInfo::MapleLogger() << "Cand R";
4209         LogInfo::MapleLogger() << static_cast<RegOperand *>(workCand->GetTheOperand())->GetRegisterNumber() << '\n';
4210     } else {
4211         LogInfo::MapleLogger() << "Cand Index" << workCand->GetIndex() << '\n';
4212     }
4213     for (CgOccur *occ : allOccs) {
4214         occ->Dump();
4215         LogInfo::MapleLogger() << '\n';
4216     }
4217 }
4218 
CodeMotion()4219 void CallerSavePre::CodeMotion()
4220 {
4221     constexpr uint32 limitNum = UINT32_MAX;
4222     uint32 cnt = 0;
4223     for (auto *occ : allOccs) {
4224         if (occ->GetOccType() == kOccUse) {
4225             ++cnt;
4226             beyondLimit |= (cnt == limitNum);
4227             if (!beyondLimit && dump) {
4228                 LogInfo::MapleLogger() << "opt use occur: ";
4229                 occ->Dump();
4230             }
4231         }
4232         if (occ->GetOccType() == kOccUse &&
4233             (beyondLimit || (static_cast<CgUseOcc *>(occ)->Reload() && !ReloadAtCallee(occ)))) {
4234             RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4235                 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4236                 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4237             (void)regAllocator->SpillOperand(*occ->GetInsn(), *occ->GetOperand(), false, phyOpnd);
4238             continue;
4239         }
4240         if (occ->GetOccType() == kOccPhiopnd && static_cast<CgPhiOpndOcc *>(occ)->Reload() && !ReloadAtCallee(occ)) {
4241             RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4242                 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4243                 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4244             Insn *insn = occ->GetBB()->GetLastInsn();
4245             if (insn == nullptr) {
4246                 insn = &(static_cast<AArch64CGFunc *>(func)->CreateCommentInsn("reload caller save register"));
4247                 occ->GetBB()->AppendInsn(*insn);
4248             }
4249             auto defOcc = occ->GetDef();
4250             bool forCall = (defOcc != nullptr && insn == defOcc->GetInsn());
4251             (void)regAllocator->SpillOperand(*insn, *occ->GetOperand(), false, phyOpnd, forCall);
4252             continue;
4253         }
4254         if (occ->GetOccType() == kOccStore && static_cast<CgStoreOcc *>(occ)->Reload()) {
4255             RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4256                 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4257                 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4258             (void)regAllocator->SpillOperand(*occ->GetInsn(), *occ->GetOperand(), false, phyOpnd, true);
4259             continue;
4260         }
4261     }
4262     if (dump) {
4263         PreWorkCand *curCand = workCand;
4264         LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex()
4265                                << " after codemotion ===========\n";
4266         DumpWorkCandAndOcc();
4267         func->DumpCFGToDot("raCodeMotion-");
4268     }
4269 }
4270 
UpdateLoadSite(CgOccur * occ)4271 void CallerSavePre::UpdateLoadSite(CgOccur *occ)
4272 {
4273     if (occ == nullptr) {
4274         return;
4275     }
4276     auto *defOcc = occ->GetDef();
4277     if (occ->GetOccType() == kOccUse) {
4278         defOcc = static_cast<CgUseOcc *>(occ)->GetPrevVersionOccur();
4279     }
4280     if (defOcc == nullptr) {
4281         return;
4282     }
4283     switch (defOcc->GetOccType()) {
4284         case kOccDef:
4285             break;
4286         case kOccUse:
4287             UpdateLoadSite(defOcc);
4288             return;
4289         case kOccStore: {
4290             auto *storeOcc = static_cast<CgStoreOcc *>(defOcc);
4291             if (storeOcc->Reload()) {
4292                 break;
4293             }
4294             switch (occ->GetOccType()) {
4295                 case kOccUse: {
4296                     static_cast<CgUseOcc *>(occ)->SetReload(true);
4297                     break;
4298                 }
4299                 case kOccPhiopnd: {
4300                     static_cast<CgPhiOpndOcc *>(occ)->SetReload(true);
4301                     break;
4302                 }
4303                 default: {
4304                     CHECK_FATAL(false, "must not be here");
4305                 }
4306             }
4307             return;
4308         }
4309         case kOccPhiocc: {
4310             auto *phiOcc = static_cast<CgPhiOcc *>(defOcc);
4311             if (phiOcc->IsFullyAvailable()) {
4312                 break;
4313             }
4314             if (!phiOcc->IsDownSafe() || phiOcc->IsNotAvailable()) {
4315                 switch (occ->GetOccType()) {
4316                     case kOccUse: {
4317                         static_cast<CgUseOcc *>(occ)->SetReload(true);
4318                         break;
4319                     }
4320                     case kOccPhiopnd: {
4321                         static_cast<CgPhiOpndOcc *>(occ)->SetReload(true);
4322                         break;
4323                     }
4324                     default: {
4325                         CHECK_FATAL(false, "must not be here");
4326                     }
4327                 }
4328                 return;
4329             }
4330 
4331             if (defOcc->Processed()) {
4332                 return;
4333             }
4334             defOcc->SetProcessed(true);
4335             for (auto *opndOcc : phiOcc->GetPhiOpnds()) {
4336                 UpdateLoadSite(opndOcc);
4337             }
4338             return;
4339         }
4340         default: {
4341             CHECK_FATAL(false, "NIY");
4342             break;
4343         }
4344     }
4345 }
4346 
CalLoadSites()4347 void CallerSavePre::CalLoadSites()
4348 {
4349     for (auto *occ : allOccs) {
4350         if (occ->GetOccType() == kOccUse) {
4351             UpdateLoadSite(occ);
4352         }
4353     }
4354     std::vector<CgOccur *> availableDef(classCount, nullptr);
4355     for (auto *occ : allOccs) {
4356         auto classID = static_cast<uint32>(occ->GetClassID());
4357         switch (occ->GetOccType()) {
4358             case kOccDef:
4359                 availableDef[classID] = occ;
4360                 break;
4361             case kOccStore: {
4362                 if (static_cast<CgStoreOcc *>(occ)->Reload()) {
4363                     availableDef[classID] = occ;
4364                 } else {
4365                     availableDef[classID] = nullptr;
4366                 }
4367                 break;
4368             }
4369             case kOccPhiocc: {
4370                 auto *phiOcc = static_cast<CgPhiOcc *>(occ);
4371                 if (!phiOcc->IsNotAvailable() && phiOcc->IsDownSafe()) {
4372                     availableDef[classID] = occ;
4373                 } else {
4374                     availableDef[classID] = nullptr;
4375                 }
4376                 break;
4377             }
4378             case kOccUse: {
4379                 auto *useOcc = static_cast<CgUseOcc *>(occ);
4380                 if (useOcc->Reload()) {
4381                     auto *availDef = availableDef[classID];
4382                     if (availDef != nullptr && dom->Dominate(*availDef->GetBB(), *useOcc->GetBB())) {
4383                         useOcc->SetReload(false);
4384                     } else {
4385                         availableDef[classID] = useOcc;
4386                     }
4387                 }
4388                 break;
4389             }
4390             case kOccPhiopnd: {
4391                 auto *phiOpnd = static_cast<CgPhiOpndOcc *>(occ);
4392                 if (phiOpnd->Reload()) {
4393                     auto *availDef = availableDef[classID];
4394                     if (availDef != nullptr && dom->Dominate(*availDef->GetBB(), *phiOpnd->GetBB())) {
4395                         phiOpnd->SetReload(false);
4396                     } else {
4397                         availableDef[classID] = phiOpnd;
4398                     }
4399                 }
4400                 break;
4401             }
4402             case kOccExit:
4403                 break;
4404             default:
4405                 CHECK_FATAL(false, "not supported occur type");
4406         }
4407     }
4408     if (dump) {
4409         PreWorkCand *curCand = workCand;
4410         LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex()
4411                                << " after CalLoadSite===================\n";
4412         DumpWorkCandAndOcc();
4413         LogInfo::MapleLogger() << "\n";
4414     }
4415 }
4416 
ComputeAvail()4417 void CallerSavePre::ComputeAvail()
4418 {
4419     bool changed = true;
4420     while (changed) {
4421         changed = false;
4422         for (auto *phiOcc : phiOccs) {
4423             if (phiOcc->IsNotAvailable()) {
4424                 continue;
4425             }
4426             size_t killedCnt = 0;
4427             for (auto *opndOcc : phiOcc->GetPhiOpnds()) {
4428                 auto defOcc = opndOcc->GetDef();
4429                 if (defOcc == nullptr) {
4430                     continue;
4431                 }
4432                 // for not move load too far from use site, set not-fully-available-phi killing availibity of phiOpnd
4433                 if ((defOcc->GetOccType() == kOccPhiocc && !static_cast<CgPhiOcc *>(defOcc)->IsFullyAvailable()) ||
4434                     defOcc->GetOccType() == kOccStore) {
4435                     ++killedCnt;
4436                     opndOcc->SetHasRealUse(false);
4437                     // opnd at back-edge is killed, set phi not avail
4438                     if (dom->Dominate(*phiOcc->GetBB(), *opndOcc->GetBB())) {
4439                         killedCnt = phiOcc->GetPhiOpnds().size();
4440                         break;
4441                     }
4442                     if (opndOcc->GetBB()->IsSoloGoto() && opndOcc->GetBB()->GetLoop() != nullptr) {
4443                         killedCnt = phiOcc->GetPhiOpnds().size();
4444                         break;
4445                     }
4446                     continue;
4447                 }
4448             }
4449             if (killedCnt == phiOcc->GetPhiOpnds().size()) {
4450                 changed |= !phiOcc->IsNotAvailable();
4451                 phiOcc->SetAvailability(kNotAvailable);
4452             } else if (killedCnt > 0) {
4453                 changed |= !phiOcc->IsPartialAvailable();
4454                 phiOcc->SetAvailability(kPartialAvailable);
4455             } else {
4456             }  // fully available is default state
4457         }
4458     }
4459 }
4460 
Rename1()4461 void CallerSavePre::Rename1()
4462 {
4463     std::stack<CgOccur *> occStack;
4464     classCount = 1;
4465     // iterate the occurrence according to its preorder dominator tree
4466     for (CgOccur *occ : allOccs) {
4467         while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) {
4468             occStack.pop();
4469         }
4470         switch (occ->GetOccType()) {
4471             case kOccUse: {
4472                 if (occStack.empty()) {
4473                     // assign new class
4474                     occ->SetClassID(static_cast<int>(classCount++));
4475                     occStack.push(occ);
4476                     break;
4477                 }
4478                 CgOccur *topOccur = occStack.top();
4479                 if (topOccur->GetOccType() == kOccStore || topOccur->GetOccType() == kOccDef ||
4480                     topOccur->GetOccType() == kOccPhiocc) {
4481                     // assign new class
4482                     occ->SetClassID(topOccur->GetClassID());
4483                     occ->SetPrevVersionOccur(topOccur);
4484                     occStack.push(occ);
4485                     break;
4486                 } else if (topOccur->GetOccType() == kOccUse) {
4487                     occ->SetClassID(topOccur->GetClassID());
4488                     if (topOccur->GetDef() != nullptr) {
4489                         occ->SetDef(topOccur->GetDef());
4490                     } else {
4491                         occ->SetDef(topOccur);
4492                     }
4493                     break;
4494                 }
4495                 CHECK_FATAL(false, "unsupported occur type");
4496                 break;
4497             }
4498             case kOccPhiocc: {
4499                 // assign new class
4500                 occ->SetClassID(static_cast<int>(classCount++));
4501                 occStack.push(occ);
4502                 break;
4503             }
4504             case kOccPhiopnd: {
4505                 if (!occStack.empty()) {
4506                     CgOccur *topOccur = occStack.top();
4507                     auto *phiOpndOcc = static_cast<CgPhiOpndOcc *>(occ);
4508                     phiOpndOcc->SetDef(topOccur);
4509                     phiOpndOcc->SetClassID(topOccur->GetClassID());
4510                     if (topOccur->GetOccType() == kOccUse) {
4511                         phiOpndOcc->SetHasRealUse(true);
4512                     }
4513                 }
4514                 break;
4515             }
4516             case kOccDef: {
4517                 if (!occStack.empty()) {
4518                     CgOccur *topOccur = occStack.top();
4519                     if (topOccur->GetOccType() == kOccPhiocc) {
4520                         auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4521                         phiTopOccur->SetIsDownSafe(false);
4522                     }
4523                 }
4524 
4525                 // assign new class
4526                 occ->SetClassID(static_cast<int>(classCount++));
4527                 occStack.push(occ);
4528                 break;
4529             }
4530             case kOccStore: {
4531                 if (!occStack.empty()) {
4532                     CgOccur *topOccur = occStack.top();
4533                     auto prevVersionOcc = topOccur->GetDef() ? topOccur->GetDef() : topOccur;
4534                     static_cast<CgStoreOcc *>(occ)->SetPrevVersionOccur(prevVersionOcc);
4535                     if (topOccur->GetOccType() == kOccPhiocc) {
4536                         auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4537                         phiTopOccur->SetIsDownSafe(false);
4538                     }
4539                 }
4540 
4541                 // assign new class
4542                 occ->SetClassID(static_cast<int>(classCount++));
4543                 occStack.push(occ);
4544                 break;
4545             }
4546             case kOccExit: {
4547                 if (occStack.empty()) {
4548                     break;
4549                 }
4550                 CgOccur *topOccur = occStack.top();
4551                 if (topOccur->GetOccType() == kOccPhiocc) {
4552                     auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4553                     phiTopOccur->SetIsDownSafe(false);
4554                 }
4555                 break;
4556             }
4557             default:
4558                 DEBUG_ASSERT(false, "should not be here");
4559                 break;
4560         }
4561     }
4562     if (dump) {
4563         PreWorkCand *curCand = workCand;
4564         LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex() << " after rename1============\n";
4565         DumpWorkCandAndOcc();
4566     }
4567 }
4568 
ComputeVarAndDfPhis()4569 void CallerSavePre::ComputeVarAndDfPhis()
4570 {
4571     dfPhiDfns.clear();
4572     PreWorkCand *workCand = GetWorkCand();
4573     for (auto *realOcc : workCand->GetRealOccs()) {
4574         BB *defBB = realOcc->GetBB();
4575         GetIterDomFrontier(defBB, &dfPhiDfns);
4576     }
4577 }
4578 
BuildWorkList()4579 void CallerSavePre::BuildWorkList()
4580 {
4581     size_t numBBs = dom->GetDtPreOrderSize();
4582     std::vector<LiveRange *> callSaveLrs;
4583     for (auto it : regAllocator->GetLrMap()) {
4584         LiveRange *lr = it.second;
4585         if (lr == nullptr || lr->IsSpilled()) {
4586             continue;
4587         }
4588         bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr->GetAssignedRegNO()));
4589         if (lr->GetSplitLr() == nullptr && lr->GetNumCall() && !isCalleeReg) {
4590             callSaveLrs.emplace_back(lr);
4591         }
4592     }
4593     const MapleVector<uint32> &preOrderDt = dom->GetDtPreOrder();
4594     for (size_t i = 0; i < numBBs; ++i) {
4595         BB *bb = func->GetBBFromID(preOrderDt[i]);
4596         std::map<uint32, Insn *> insnMap;
4597         FOR_BB_INSNS_SAFE(insn, bb, ninsn) {
4598             insnMap.insert(std::make_pair(insn->GetId(), insn));
4599         }
4600         for (auto lr : callSaveLrs) {
4601             LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb->GetId());
4602             RegOperand &opnd = func->GetOrCreateVirtualRegisterOperand(lr->GetRegNO());
4603             if (lu != nullptr && (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall())) {
4604                 MapleMap<uint32, uint32> refs = lr->GetRefs(bb->GetId());
4605                 for (auto it = refs.begin(); it != refs.end(); ++it) {
4606                     if (it->second & kIsUse) {
4607                         (void)CreateRealOcc(*insnMap[it->first], opnd, kOccUse);
4608                     }
4609                     if (it->second & kIsDef) {
4610                         (void)CreateRealOcc(*insnMap[it->first], opnd, kOccDef);
4611                     }
4612                     if (it->second & kIsCall) {
4613                         Insn *callInsn = insnMap[it->first];
4614                         auto *targetOpnd = callInsn->GetCallTargetOperand();
4615                         if (CGOptions::DoIPARA() && targetOpnd->IsFuncNameOpnd()) {
4616                             FuncNameOperand *target = static_cast<FuncNameOperand *>(targetOpnd);
4617                             const MIRSymbol *funcSt = target->GetFunctionSymbol();
4618                             DEBUG_ASSERT(funcSt->GetSKind() == kStFunc, "funcst must be a function name symbol");
4619                             MIRFunction *mirFunc = funcSt->GetFunction();
4620                             if (mirFunc != nullptr && mirFunc->IsReferedRegsValid()) {
4621                                 auto regSet = mirFunc->GetReferedRegs();
4622                                 if (regSet.find(lr->GetAssignedRegNO()) == regSet.end()) {
4623                                     continue;
4624                                 }
4625                             }
4626                         }
4627                         (void)CreateRealOcc(*callInsn, opnd, kOccStore);
4628                     }
4629                 }
4630             }
4631         }
4632         if (bb->GetKind() == BB::kBBReturn) {
4633             CreateExitOcc(*bb);
4634         }
4635     }
4636 }
4637 
ApplySSAPRE()4638 void CallerSavePre::ApplySSAPRE()
4639 {
4640     // #0 build worklist
4641     BuildWorkList();
4642     uint32 cnt = 0;
4643     constexpr uint32 preLimit = UINT32_MAX;
4644     while (!workList.empty()) {
4645         ++cnt;
4646         if (cnt == preLimit) {
4647             beyondLimit = true;
4648         }
4649         workCand = workList.front();
4650         workCand->SetIndex(static_cast<int32>(cnt));
4651         workLr = regAllocator->GetLiveRange(static_cast<RegOperand *>(workCand->GetTheOperand())->GetRegisterNumber());
4652         DEBUG_ASSERT(workLr != nullptr, "exepected non null lr");
4653         workList.pop_front();
4654         if (workCand->GetRealOccs().empty()) {
4655             continue;
4656         }
4657 
4658         allOccs.clear();
4659         phiOccs.clear();
4660         // #1 Insert PHI; results in allOccs and phiOccs
4661         ComputeVarAndDfPhis();
4662         CreateSortedOccs();
4663         if (workCand->GetRealOccs().empty()) {
4664             continue;
4665         }
4666         // #2 Rename
4667         Rename1();
4668         ComputeDS();
4669         ComputeAvail();
4670         CalLoadSites();
4671         // #6 CodeMotion and recompute worklist based on newly occurrence
4672         CodeMotion();
4673         DEBUG_ASSERT(workLr->GetProcessed() == false, "exepected unprocessed");
4674         workLr->SetProcessed();
4675     }
4676 }
4677 
OptCallerSave()4678 void GraphColorRegAllocator::OptCallerSave()
4679 {
4680     CallerSavePre callerSavePre(this, *cgFunc, domInfo, *memPool, *memPool, kLoadPre, UINT32_MAX);
4681     callerSavePre.SetDump(needDump);
4682     callerSavePre.ApplySSAPRE();
4683 }
4684 
SplitVregAroundLoop(const CGFuncLoops & loop,const std::vector<LiveRange * > & lrs,BB & headerPred,BB & exitSucc,const std::set<regno_t> & cands)4685 void GraphColorRegAllocator::SplitVregAroundLoop(const CGFuncLoops &loop, const std::vector<LiveRange *> &lrs,
4686                                                  BB &headerPred, BB &exitSucc, const std::set<regno_t> &cands)
4687 {
4688     size_t maxSplitCount = lrs.size() - intCalleeRegSet.size();
4689     maxSplitCount = maxSplitCount > kMaxSplitCount ? kMaxSplitCount : maxSplitCount;
4690     uint32 splitCount = 0;
4691     auto it = cands.begin();
4692     size_t candsSize = cands.size();
4693     maxSplitCount = maxSplitCount > candsSize ? candsSize : maxSplitCount;
4694     for (auto &lr : lrs) {
4695         if (lr->IsSpilled()) {
4696             continue;
4697         }
4698         if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr->GetAssignedRegNO()))) {
4699             continue;
4700         }
4701         bool hasRef = false;
4702         for (auto *bb : loop.GetLoopMembers()) {
4703             LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb->GetId());
4704             if (lu != nullptr && (lu->GetDefNum() != 0 || lu->GetUseNum() != 0)) {
4705                 hasRef = true;
4706                 break;
4707             }
4708         }
4709         if (!hasRef) {
4710             splitCount++;
4711             RegOperand *ropnd = &cgFunc->GetOrCreateVirtualRegisterOperand(lr->GetRegNO());
4712             RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
4713                 static_cast<AArch64reg>(lr->GetAssignedRegNO()), ropnd->GetSize(), (lr->GetRegType()));
4714 
4715             Insn *headerCom = &(static_cast<AArch64CGFunc *>(cgFunc)->CreateCommentInsn("split around loop begin"));
4716             headerPred.AppendInsn(*headerCom);
4717             Insn *last = headerPred.GetLastInsn();
4718             (void)SpillOperand(*last, *ropnd, true, static_cast<RegOperand &>(phyOpnd));
4719 
4720             Insn *exitCom = &(static_cast<AArch64CGFunc *>(cgFunc)->CreateCommentInsn("split around loop end"));
4721             exitSucc.InsertInsnBegin(*exitCom);
4722             Insn *first = exitSucc.GetFirstInsn();
4723             (void)SpillOperand(*first, *ropnd, false, static_cast<RegOperand &>(phyOpnd));
4724 
4725             LiveRange *replacedLr = lrMap[*it];
4726             replacedLr->SetAssignedRegNO(lr->GetAssignedRegNO());
4727             replacedLr->SetSpilled(false);
4728             ++it;
4729         }
4730         if (splitCount >= maxSplitCount) {
4731             break;
4732         }
4733     }
4734 }
4735 
LrGetBadReg(const LiveRange & lr) const4736 bool GraphColorRegAllocator::LrGetBadReg(const LiveRange &lr) const
4737 {
4738     if (lr.IsSpilled()) {
4739         return true;
4740     }
4741     if (lr.GetNumCall() != 0 && !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr.GetAssignedRegNO()))) {
4742         return true;
4743     }
4744     return false;
4745 }
4746 
LoopNeedSplit(const CGFuncLoops & loop,std::set<regno_t> & cands)4747 bool GraphColorRegAllocator::LoopNeedSplit(const CGFuncLoops &loop, std::set<regno_t> &cands)
4748 {
4749     std::set<regno_t> regPressure;
4750     const BB *header = loop.GetHeader();
4751     const MapleSet<regno_t> &liveIn = header->GetLiveInRegNO();
4752     std::set<BB *> loopBBs;
4753     for (auto *bb : loop.GetLoopMembers()) {
4754         loopBBs.insert(bb);
4755         FOR_BB_INSNS(insn, bb) {
4756             if (!insn->IsMachineInstruction()) {
4757                 continue;
4758             }
4759             if (insn->GetId() == 0) {
4760                 continue;
4761             }
4762             uint32 opndNum = insn->GetOperandSize();
4763             for (uint32 i = 0; i < opndNum; ++i) {
4764                 Operand &opnd = insn->GetOperand(i);
4765                 if (opnd.IsList()) {
4766                     continue;
4767                 } else if (opnd.IsMemoryAccessOperand()) {
4768                     auto &memOpnd = static_cast<MemOperand &>(opnd);
4769                     Operand *base = memOpnd.GetBaseRegister();
4770                     Operand *offset = memOpnd.GetIndexRegister();
4771                     if (base != nullptr && base->IsRegister()) {
4772                         RegOperand *regOpnd = static_cast<RegOperand *>(base);
4773                         regno_t regNO = regOpnd->GetRegisterNumber();
4774                         LiveRange *lr = GetLiveRange(regNO);
4775                         if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4776                             liveIn.find(regNO) == liveIn.end()) {
4777                             regPressure.insert(regOpnd->GetRegisterNumber());
4778                         }
4779                     }
4780                     if (offset != nullptr && offset->IsRegister()) {
4781                         RegOperand *regOpnd = static_cast<RegOperand *>(offset);
4782                         regno_t regNO = regOpnd->GetRegisterNumber();
4783                         LiveRange *lr = GetLiveRange(regNO);
4784                         if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4785                             liveIn.find(regNO) == liveIn.end()) {
4786                             regPressure.insert(regOpnd->GetRegisterNumber());
4787                         }
4788                     }
4789                 } else if (opnd.IsRegister()) {
4790                     auto &regOpnd = static_cast<RegOperand &>(opnd);
4791                     regno_t regNO = regOpnd.GetRegisterNumber();
4792                     LiveRange *lr = GetLiveRange(regNO);
4793                     if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4794                         liveIn.find(regNO) == liveIn.end()) {
4795                         regPressure.insert(regOpnd.GetRegisterNumber());
4796                     }
4797                 }
4798             }
4799         }
4800     }
4801     if (regPressure.size() != 0) {
4802         for (auto reg : regPressure) {
4803             LiveRange *lr = lrMap[reg];
4804             std::vector<BB *> smember;
4805             ForEachBBArrElem(lr->GetBBMember(),
4806                              [this, &smember](uint32 bbID) { (void)smember.emplace_back(bbVec[bbID]); });
4807             bool liveBeyondLoop = false;
4808             for (auto bb : smember) {
4809                 if (loopBBs.find(bb) == loopBBs.end()) {
4810                     liveBeyondLoop = true;
4811                     break;
4812                 }
4813             }
4814             if (liveBeyondLoop) {
4815                 continue;
4816             }
4817             cands.insert(reg);
4818         }
4819         if (cands.empty()) {
4820             return false;
4821         }
4822         return true;
4823     }
4824     return false;
4825 }
4826 
AnalysisLoop(const CGFuncLoops & loop)4827 void GraphColorRegAllocator::AnalysisLoop(const CGFuncLoops &loop)
4828 {
4829     const BB *header = loop.GetHeader();
4830     const MapleSet<regno_t> &liveIn = header->GetLiveInRegNO();
4831     std::vector<LiveRange *> lrs;
4832     size_t intCalleeNum = intCalleeRegSet.size();
4833     if (loop.GetMultiEntries().size() != 0) {
4834         return;
4835     }
4836     for (auto regno : liveIn) {
4837         LiveRange *lr = GetLiveRange(regno);
4838         if (lr != nullptr && lr->GetRegType() == kRegTyInt && lr->GetNumCall() != 0) {
4839             lrs.emplace_back(lr);
4840         }
4841     }
4842     if (lrs.size() < intCalleeNum) {
4843         return;
4844     }
4845     bool hasCall = false;
4846     std::set<BB *> loopBBs;
4847     for (auto *bb : loop.GetLoopMembers()) {
4848         if (bb->HasCall()) {
4849             hasCall = true;
4850         }
4851         loopBBs.insert(bb);
4852     }
4853     if (!hasCall) {
4854         return;
4855     }
4856     auto comparator = [=](const LiveRange *lr1, const LiveRange *lr2) -> bool {
4857         return lr1->GetPriority() < lr2->GetPriority();
4858     };
4859     std::sort(lrs.begin(), lrs.end(), comparator);
4860     const MapleVector<BB *> &exits = loop.GetExits();
4861     std::set<BB *> loopExits;
4862     for (auto &bb : exits) {
4863         for (auto &succ : bb->GetSuccs()) {
4864             if (loopBBs.find(succ) != loopBBs.end()) {
4865                 continue;
4866             }
4867             if (succ->IsSoloGoto() || succ->IsEmpty()) {
4868                 BB *realSucc = CGCFG::GetTargetSuc(*succ);
4869                 if (realSucc != nullptr) {
4870                     loopExits.insert(realSucc);
4871                 }
4872             } else {
4873                 loopExits.insert(succ);
4874             }
4875         }
4876     }
4877     std::set<BB *> loopEntra;
4878     for (auto &pred : header->GetPreds()) {
4879         if (loopBBs.find(pred) != loopBBs.end()) {
4880             continue;
4881         }
4882         loopEntra.insert(pred);
4883     }
4884     if (loopEntra.size() != 1 || loopExits.size() != 1) {
4885         return;
4886     }
4887     BB *headerPred = *loopEntra.begin();
4888     BB *exitSucc = *loopExits.begin();
4889     if (headerPred->GetKind() != BB::kBBFallthru) {
4890         return;
4891     }
4892     if (exitSucc->GetPreds().size() != loop.GetExits().size()) {
4893         return;
4894     }
4895     std::set<regno_t> cands;
4896     if (!LoopNeedSplit(loop, cands)) {
4897         return;
4898     }
4899     SplitVregAroundLoop(loop, lrs, *headerPred, *exitSucc, cands);
4900 }
AnalysisLoopPressureAndSplit(const CGFuncLoops & loop)4901 void GraphColorRegAllocator::AnalysisLoopPressureAndSplit(const CGFuncLoops &loop)
4902 {
4903     if (loop.GetInnerLoops().empty()) {
4904         // only handle inner-most loop
4905         AnalysisLoop(loop);
4906         return;
4907     }
4908     for (const auto *lp : loop.GetInnerLoops()) {
4909         AnalysisLoopPressureAndSplit(*lp);
4910     }
4911 }
4912 
4913 /* Iterate through all instructions and change the vreg to preg. */
FinalizeRegisters()4914 void GraphColorRegAllocator::FinalizeRegisters()
4915 {
4916     if (doMultiPass && hasSpill) {
4917         if (needDump) {
4918             LogInfo::MapleLogger() << "In this round, spill vregs : \n";
4919             for (auto it : lrMap) {
4920                 LiveRange *lr = it.second;
4921                 if (lr->IsSpilled()) {
4922                     LogInfo::MapleLogger() << "R" << lr->GetRegNO() << " ";
4923                 }
4924             }
4925             LogInfo::MapleLogger() << "\n";
4926         }
4927         bool done = SpillLiveRangeForSpills();
4928         if (done) {
4929             return;
4930         }
4931     }
4932     if (CLANG) {
4933         if (!cgFunc->GetLoops().empty()) {
4934             cgFunc->GetTheCFG()->InitInsnVisitor(*cgFunc);
4935             for (const auto *lp : cgFunc->GetLoops()) {
4936                 AnalysisLoopPressureAndSplit(*lp);
4937             }
4938         }
4939         OptCallerSave();
4940     }
4941     for (auto *bb : bfs->sortedBBs) {
4942         FOR_BB_INSNS_SAFE(insn, bb, nextInsn) {
4943             if (insn->IsImmaterialInsn()) {
4944                 continue;
4945             }
4946             if (!insn->IsMachineInstruction()) {
4947                 continue;
4948             }
4949             if (insn->GetId() == 0) {
4950                 continue;
4951             }
4952 
4953             for (uint32 i = 0; i < kSpillMemOpndNum; ++i) {
4954                 operandSpilled[i] = false;
4955             }
4956 
4957             FinalizeRegisterInfo *fInfo = memPool->New<FinalizeRegisterInfo>(alloc);
4958             bool needProcces = true;
4959             uint64 usedRegMask = FinalizeRegisterPreprocess(*fInfo, *insn, needProcces);
4960             if (!needProcces) {
4961                 continue;
4962             }
4963             uint32 defSpillIdx = 0;
4964             uint32 useSpillIdx = 0;
4965             MemOperand *memOpnd = nullptr;
4966             if (fInfo->GetBaseOperand()) {
4967                 memOpnd = static_cast<const MemOperand *>(fInfo->GetBaseOperand())->Clone(*cgFunc->GetMemoryPool());
4968                 insn->SetOperand(fInfo->GetMemOperandIdx(), *memOpnd);
4969                 Operand *base = memOpnd->GetBaseRegister();
4970                 DEBUG_ASSERT(base != nullptr, "nullptr check");
4971                 /* if base register is both defReg and useReg, defSpillIdx should also be increased. But it doesn't
4972                  * exist yet */
4973                 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *base, useSpillIdx, usedRegMask, false);
4974                 if (phyOpnd != nullptr) {
4975                     memOpnd->SetBaseRegister(*phyOpnd);
4976                 }
4977                 if (!memOpnd->IsIntactIndexed()) {
4978                     (void)GetReplaceOpnd(*insn, *base, useSpillIdx, usedRegMask, true);
4979                 }
4980             }
4981             if (fInfo->GetOffsetOperand()) {
4982                 DEBUG_ASSERT(memOpnd != nullptr, "memOpnd should not be nullptr");
4983                 Operand *offset = memOpnd->GetIndexRegister();
4984                 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *offset, useSpillIdx, usedRegMask, false);
4985                 if (phyOpnd != nullptr) {
4986                     memOpnd->SetIndexRegister(*phyOpnd);
4987                 }
4988             }
4989             for (size_t i = 0; i < fInfo->GetDefOperandsSize(); ++i) {
4990                 if (insn->GetMachineOpcode() == MOP_asm) {
4991                     const Operand *defOpnd = fInfo->GetDefOperandsElem(i);
4992                     if (defOpnd->IsList()) {
4993                         ListOperand *outList = const_cast<ListOperand *>(static_cast<const ListOperand *>(defOpnd));
4994                         auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
4995                         auto *srcOpndsNew = a64CGFunc->CreateListOpnd(*a64CGFunc->GetFuncScopeAllocator());
4996                         RegOperand *phyOpnd;
4997                         for (auto opnd : outList->GetOperands()) {
4998                             if (opnd->IsPhysicalRegister()) {
4999                                 phyOpnd = opnd;
5000                             } else {
5001                                 phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, true);
5002                             }
5003                             srcOpndsNew->PushOpnd(*phyOpnd);
5004                         }
5005                         insn->SetOperand(kAsmOutputListOpnd, *srcOpndsNew);
5006                         continue;
5007                     }
5008                 }
5009                 const Operand *opnd = fInfo->GetDefOperandsElem(i);
5010                 RegOperand *phyOpnd = nullptr;
5011                 if (insn->IsSpecialIntrinsic()) {
5012                     phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, true);
5013                 } else {
5014                     phyOpnd = GetReplaceOpnd(*insn, *opnd, defSpillIdx, usedRegMask, true);
5015                 }
5016                 if (phyOpnd != nullptr) {
5017                     insn->SetOperand(fInfo->GetDefIdxElem(i), *phyOpnd);
5018                 }
5019             }
5020             for (size_t i = 0; i < fInfo->GetUseOperandsSize(); ++i) {
5021                 if (insn->GetMachineOpcode() == MOP_asm) {
5022                     const Operand *useOpnd = fInfo->GetUseOperandsElem(i);
5023                     if (useOpnd->IsList()) {
5024                         ListOperand *inList = const_cast<ListOperand *>(static_cast<const ListOperand *>(useOpnd));
5025                         auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
5026                         auto *srcOpndsNew = a64CGFunc->CreateListOpnd(*a64CGFunc->GetFuncScopeAllocator());
5027                         for (auto opnd : inList->GetOperands()) {
5028                             if ((static_cast<const RegOperand *>(opnd))->GetRegisterNumber() < kAllRegNum) {
5029                                 srcOpndsNew->PushOpnd(*opnd);
5030                             } else {
5031                                 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, false);
5032                                 srcOpndsNew->PushOpnd(*phyOpnd);
5033                             }
5034                         }
5035                         insn->SetOperand(kAsmInputListOpnd, *srcOpndsNew);
5036                         continue;
5037                     }
5038                 }
5039                 const Operand *opnd = fInfo->GetUseOperandsElem(i);
5040                 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, false);
5041                 if (phyOpnd != nullptr) {
5042                     insn->SetOperand(fInfo->GetUseIdxElem(i), *phyOpnd);
5043                 }
5044             }
5045             if (insn->GetMachineOpcode() == MOP_wmovrr || insn->GetMachineOpcode() == MOP_xmovrr) {
5046                 auto &reg1 = static_cast<RegOperand &>(insn->GetOperand(kInsnFirstOpnd));
5047                 auto &reg2 = static_cast<RegOperand &>(insn->GetOperand(kInsnSecondOpnd));
5048                 /* remove mov x0,x0 when it cast i32 to i64 */
5049                 if ((reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) && (reg1.GetSize() >= reg2.GetSize())) {
5050                     bb->RemoveInsn(*insn);
5051                 }
5052             }
5053         } /* insn */
5054     }     /* BB */
5055 }
5056 
MarkCalleeSaveRegs()5057 void GraphColorRegAllocator::MarkCalleeSaveRegs()
5058 {
5059     for (auto regNO : intCalleeUsed) {
5060         static_cast<AArch64CGFunc *>(cgFunc)->AddtoCalleeSaved(static_cast<AArch64reg>(regNO));
5061     }
5062     for (auto regNO : fpCalleeUsed) {
5063         static_cast<AArch64CGFunc *>(cgFunc)->AddtoCalleeSaved(static_cast<AArch64reg>(regNO));
5064     }
5065 }
5066 
AllocateRegisters()5067 bool GraphColorRegAllocator::AllocateRegisters()
5068 {
5069 #ifdef RANDOM_PRIORITY
5070     /* Change this seed for different random numbers */
5071     srand(0);
5072 #endif /* RANDOM_PRIORITY */
5073     auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
5074 
5075     if (needDump && doMultiPass) {
5076         LogInfo::MapleLogger() << "\n round start: \n";
5077         cgFunc->DumpCGIR();
5078     }
5079     /*
5080      * we store both FP/LR if using FP or if not using FP, but func has a call
5081      * Using FP, record it for saving
5082      */
5083     a64CGFunc->AddtoCalleeSaved(RFP);
5084     a64CGFunc->AddtoCalleeSaved(RLR);
5085     a64CGFunc->NoteFPLRAddedToCalleeSavedList();
5086 
5087 #if DEBUG
5088     uint32 cnt = 0;
5089     FOR_ALL_BB(bb, cgFunc) {
5090         FOR_BB_INSNS(insn, bb) {
5091             ++cnt;
5092         }
5093     }
5094     DEBUG_ASSERT(cnt <= cgFunc->GetTotalNumberOfInstructions(), "Incorrect insn count");
5095 #endif
5096     cgFunc->SetIsAfterRegAlloc();
5097     /* EBO propgation extent the live range and might need to be turned off. */
5098     Bfs localBfs(*cgFunc, *memPool);
5099     bfs = &localBfs;
5100     bfs->ComputeBlockOrder();
5101 
5102     InitCCReg();
5103 
5104     ComputeLiveRanges();
5105 
5106     InitFreeRegPool();
5107 
5108     BuildInterferenceGraph();
5109 
5110     Separate();
5111 
5112     SplitAndColor();
5113 
5114 #ifdef USE_LRA
5115     if (doLRA) {
5116         LocalRegisterAllocator(true);
5117     }
5118 #endif /* USE_LRA */
5119 
5120     FinalizeRegisters();
5121 
5122     MarkCalleeSaveRegs();
5123 
5124     if (!seenFP) {
5125         cgFunc->UnsetSeenFP();
5126     }
5127     if (needDump) {
5128         cgFunc->DumpCGIR();
5129     }
5130 
5131     bfs = nullptr; /* bfs is not utilized outside the function. */
5132 
5133     if (doMultiPass && hasSpill) {
5134         return false;
5135     } else {
5136         return true;
5137     }
5138 }
5139 } /* namespace maplebe */
5140