1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_color_ra.h"
17 #include <iostream>
18 #include <fstream>
19 #include "aarch64_cg.h"
20 #include "mir_lower.h"
21 #include "securec.h"
22 /*
23 * Based on concepts from Chow and Hennessey.
24 * Phases are as follows:
25 * Prepass to collect local BB information.
26 * Compute local register allocation demands for global RA.
27 * Compute live ranges.
28 * Live ranges LR represented by a vector of size #BBs.
29 * for each cross bb vreg, a bit is set in the vector.
30 * Build interference graph with basic block as granularity.
31 * When intersection of two LRs is not null, they interfere.
32 * Separate unconstrained and constrained LRs.
33 * unconstrained - LR with connect edges less than available colors.
34 * These LR can always be colored.
35 * constrained - not uncontrained.
36 * Split LR based on priority cost
37 * Repetitive adding BB from original LR to new LR until constrained.
38 * Update all LR the new LR interferes with.
39 * Color the new LR
40 * Each LR has a forbidden list, the registers cannot be assigned
41 * Coalesce move using preferred color first.
42 * Mark the remaining uncolorable LR after split as spill.
43 * Local register allocate.
44 * Emit and insert spills.
45 */
46 namespace maplebe {
47 #define JAVALANG (cgFunc->GetMirModule().IsJavaModule())
48 #define CLANG (cgFunc->GetMirModule().IsCModule())
49
50 /*
51 * for physical regOpnd phyOpnd,
52 * R0->GetRegisterNumber() == 1
53 * V0->GetRegisterNumber() == 33
54 */
55 constexpr uint32 kLoopWeight = 20;
56 constexpr uint32 kAdjustWeight = 2;
57 constexpr uint32 kInsnStep = 2;
58 constexpr uint32 kMaxSplitCount = 3;
59 constexpr uint32 kRematWeight = 3;
60 constexpr uint32 kPriorityDefThreashold = 1;
61 constexpr uint32 kPriorityUseThreashold = 5;
62 constexpr uint32 kPriorityBBThreashold = 1000;
63 constexpr float kPriorityRatioThreashold = 0.9;
64
PrintLiveUnit() const65 void LiveUnit::PrintLiveUnit() const
66 {
67 LogInfo::MapleLogger() << "[" << begin << "," << end << "]"
68 << "<D" << defNum << "U" << useNum << ">";
69 if (!hasCall) {
70 /* Too many calls, so only print when there is no call. */
71 LogInfo::MapleLogger() << " nc";
72 }
73 if (needReload) {
74 LogInfo::MapleLogger() << " rlod";
75 }
76 if (needRestore) {
77 LogInfo::MapleLogger() << " rstr";
78 }
79 }
80
IsRematerializable(AArch64CGFunc & cgFunc,uint8 rematLev) const81 bool LiveRange::IsRematerializable(AArch64CGFunc &cgFunc, uint8 rematLev) const
82 {
83 if (rematLev == kRematOff)
84 return false;
85
86 switch (op) {
87 case OP_undef:
88 return false;
89 case OP_constval: {
90 const MIRConst *mirConst = rematInfo.mirConst;
91 if (mirConst->GetKind() != kConstInt) {
92 return false;
93 }
94 const MIRIntConst *intConst = static_cast<const MIRIntConst *>(rematInfo.mirConst);
95 int64 val = intConst->GetExtValue();
96 if (val >= -kMax16UnsignedImm && val <= kMax16UnsignedImm) {
97 return true;
98 }
99 auto uval = static_cast<uint64>(val);
100 if (IsMoveWidableImmediate(uval, GetSpillSize())) {
101 return true;
102 }
103 return IsBitmaskImmediate(uval, GetSpillSize());
104 }
105 case OP_addrof: {
106 if (rematLev < kRematAddr) {
107 return false;
108 }
109 const MIRSymbol *symbol = rematInfo.sym;
110 if (symbol->IsDeleted()) {
111 return false;
112 }
113 /* cost too much to remat */
114 if ((symbol->GetStorageClass() == kScFormal) && (symbol->GetSKind() == kStVar) &&
115 ((fieldID != 0) ||
116 (cgFunc.GetBecommon().GetTypeSize(symbol->GetType()->GetTypeIndex().GetIdx()) > k16ByteSize))) {
117 return false;
118 }
119 if (!addrUpper && CGOptions::IsPIC() &&
120 ((symbol->GetStorageClass() == kScGlobal) || (symbol->GetStorageClass() == kScExtern))) {
121 /* check if in loop */
122 bool useInLoop = false;
123 bool defOutLoop = false;
124 for (auto luIt : luMap) {
125 BB *bb = cgFunc.GetBBFromID(luIt.first);
126 LiveUnit *curLu = luIt.second;
127 if (bb->GetLoop() != nullptr && curLu->GetUseNum() != 0) {
128 useInLoop = true;
129 }
130 if (bb->GetLoop() == nullptr && curLu->GetDefNum() != 0) {
131 defOutLoop = true;
132 }
133 }
134 return !(useInLoop && defOutLoop);
135 }
136 return true;
137 }
138 case OP_dread: {
139 if (rematLev < kRematDreadLocal) {
140 return false;
141 }
142 const MIRSymbol *symbol = rematInfo.sym;
143 if (symbol->IsDeleted()) {
144 return false;
145 }
146 MIRStorageClass storageClass = symbol->GetStorageClass();
147 if ((storageClass == kScAuto) || (storageClass == kScFormal)) {
148 /* cost too much to remat. */
149 return false;
150 }
151 PrimType symType = symbol->GetType()->GetPrimType();
152 int32 offset = 0;
153 if (fieldID != 0) {
154 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
155 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
156 symType = structType->GetFieldType(fieldID)->GetPrimType();
157 offset = cgFunc.GetBecommon().GetFieldOffset(*structType, fieldID).first;
158 }
159 /* check stImm.GetOffset() is in addri12 */
160 StImmOperand &stOpnd = cgFunc.CreateStImmOperand(*symbol, offset, 0);
161 uint32 dataSize = GetPrimTypeBitSize(symType);
162 ImmOperand &immOpnd = cgFunc.CreateImmOperand(stOpnd.GetOffset(), dataSize, false);
163 if (!immOpnd.IsInBitSize(kMaxImmVal12Bits, 0)) {
164 return false;
165 }
166 if (rematLev < kRematDreadGlobal && !symbol->IsLocal()) {
167 return false;
168 }
169 return true;
170 }
171 default:
172 return false;
173 }
174 }
175
Rematerialize(AArch64CGFunc * cgFunc,RegOperand & regOp)176 std::vector<Insn *> LiveRange::Rematerialize(AArch64CGFunc *cgFunc, RegOperand ®Op)
177 {
178 std::vector<Insn *> insns;
179 switch (op) {
180 case OP_constval:
181 switch (rematInfo.mirConst->GetKind()) {
182 case kConstInt: {
183 MIRIntConst *intConst =
184 const_cast<MIRIntConst *>(static_cast<const MIRIntConst *>(rematInfo.mirConst));
185
186 Operand *immOp = cgFunc->SelectIntConst(*intConst);
187 MOperator movOp = (GetSpillSize() == k32BitSize) ? MOP_wmovri32 : MOP_xmovri64;
188 insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(movOp, regOp, *immOp));
189 break;
190 }
191 default:
192 DEBUG_ASSERT(false, "Unsupported constant for rematerialization");
193 }
194 break;
195 case OP_dread: {
196 const MIRSymbol *symbol = rematInfo.sym;
197 PrimType symType = symbol->GetType()->GetPrimType();
198 RegOperand *regOp64 = &cgFunc->GetOrCreatePhysicalRegisterOperand(
199 static_cast<AArch64reg>(regOp.GetRegisterNumber()), k64BitSize, regOp.GetRegisterType());
200 int32 offset = 0;
201 if (fieldID != 0) {
202 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
203 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
204 symType = structType->GetFieldType(fieldID)->GetPrimType();
205 offset = cgFunc->GetBecommon().GetFieldOffset(*structType, fieldID).first;
206 }
207
208 uint32 dataSize = GetPrimTypeBitSize(symType);
209 MemOperand *spillMemOp =
210 &cgFunc->GetOrCreateMemOpndAfterRa(*symbol, offset, dataSize, false, regOp64, insns);
211 MOperator mOp = cgFunc->PickLdInsn(spillMemOp->GetSize(), symType);
212 insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(mOp, regOp, *spillMemOp));
213 break;
214 }
215 case OP_addrof: {
216 const MIRSymbol *symbol = rematInfo.sym;
217 int32 offset = 0;
218 if (fieldID != 0) {
219 MIRStructType *structType = static_cast<MIRStructType *>(symbol->GetType());
220 DEBUG_ASSERT(structType != nullptr, "Rematerialize: non-zero fieldID for non-structure");
221 offset = cgFunc->GetBecommon().GetFieldOffset(*structType, fieldID).first;
222 }
223 StImmOperand &stImm = cgFunc->CreateStImmOperand(*symbol, offset, 0);
224 if ((symbol->GetStorageClass() == kScAuto) || (symbol->GetStorageClass() == kScFormal)) {
225 AArch64SymbolAlloc *symLoc =
226 static_cast<AArch64SymbolAlloc *>(cgFunc->GetMemlayout()->GetSymAllocInfo(symbol->GetStIndex()));
227 ImmOperand *offsetOp = nullptr;
228 offsetOp = &cgFunc->CreateImmOperand(cgFunc->GetBaseOffset(*symLoc) + offset, k64BitSize, false);
229
230 Insn *insn =
231 &cgFunc->GetInsnBuilder()->BuildInsn(MOP_xaddrri12, regOp, *cgFunc->GetBaseReg(*symLoc), *offsetOp);
232 if (CGOptions::kVerboseCG) {
233 std::string comm = "local/formal var: ";
234 comm.append(symbol->GetName());
235 insn->SetComment(comm);
236 }
237 insns.push_back(insn);
238 } else {
239 Insn *insn = &cgFunc->GetInsnBuilder()->BuildInsn(MOP_xadrp, regOp, stImm);
240 insns.push_back(insn);
241 if (!addrUpper && CGOptions::IsPIC() &&
242 ((symbol->GetStorageClass() == kScGlobal) || (symbol->GetStorageClass() == kScExtern))) {
243 /* ldr x0, [x0, #:got_lo12:Ljava_2Flang_2FSystem_3B_7Cout] */
244 OfstOperand &offsetOp = cgFunc->CreateOfstOpnd(*symbol, offset, 0);
245 MemOperand &memOpnd =
246 cgFunc->GetOrCreateMemOpnd(MemOperand::kAddrModeBOi, GetPointerSize() * kBitsPerByte,
247 static_cast<RegOperand *>(®Op), nullptr, &offsetOp, nullptr);
248 MOperator ldOp = (memOpnd.GetSize() == k64BitSize) ? MOP_xldr : MOP_wldr;
249 insn = &cgFunc->GetInsnBuilder()->BuildInsn(ldOp, regOp, memOpnd);
250 insns.push_back(insn);
251 if (offset > 0) {
252 OfstOperand &ofstOpnd =
253 cgFunc->GetOrCreateOfstOpnd(static_cast<uint64>(static_cast<int64>(offset)), k32BitSize);
254 insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(MOP_xaddrri12, regOp, regOp, ofstOpnd));
255 }
256 } else if (!addrUpper) {
257 insns.push_back(&cgFunc->GetInsnBuilder()->BuildInsn(MOP_xadrpl12, regOp, regOp, stImm));
258 }
259 }
260 break;
261 }
262 default:
263 DEBUG_ASSERT(false, "Unexpected op in live range");
264 }
265
266 return insns;
267 }
268
269 template <typename Func>
ForEachBBArrElem(const uint64 * vec,Func functor) const270 void GraphColorRegAllocator::ForEachBBArrElem(const uint64 *vec, Func functor) const
271 {
272 for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) {
273 for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
274 if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
275 functor(iBBArrElem * kU64 + bBBArrElem);
276 }
277 }
278 }
279 }
280
281 template <typename Func>
ForEachBBArrElemWithInterrupt(const uint64 * vec,Func functor) const282 void GraphColorRegAllocator::ForEachBBArrElemWithInterrupt(const uint64 *vec, Func functor) const
283 {
284 for (uint32 iBBArrElem = 0; iBBArrElem < bbBuckets; ++iBBArrElem) {
285 for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
286 if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
287 if (functor(iBBArrElem * kU64 + bBBArrElem)) {
288 return;
289 }
290 }
291 }
292 }
293 }
294
295 template <typename Func>
ForEachRegArrElem(const uint64 * vec,Func functor) const296 void GraphColorRegAllocator::ForEachRegArrElem(const uint64 *vec, Func functor) const
297 {
298 for (uint32 iBBArrElem = 0; iBBArrElem < regBuckets; ++iBBArrElem) {
299 for (uint32 bBBArrElem = 0; bBBArrElem < kU64; ++bBBArrElem) {
300 if ((vec[iBBArrElem] & (1ULL << bBBArrElem)) != 0) {
301 functor(iBBArrElem * kU64 + bBBArrElem);
302 }
303 }
304 }
305 }
306
PrintLiveUnitMap(const LiveRange & lr) const307 void GraphColorRegAllocator::PrintLiveUnitMap(const LiveRange &lr) const
308 {
309 LogInfo::MapleLogger() << "\n\tlu:";
310 for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) {
311 if (!IsBitArrElemSet(lr.GetBBMember(), i)) {
312 continue;
313 }
314 auto lu = lr.GetLuMap().find(i);
315 if (lu != lr.GetLuMap().end() && (lu->second->GetDefNum() || lu->second->GetUseNum())) {
316 LogInfo::MapleLogger() << "(" << i << " ";
317 lu->second->PrintLiveUnit();
318 LogInfo::MapleLogger() << ")";
319 }
320 }
321 LogInfo::MapleLogger() << "\n";
322 }
323
PrintLiveRangeConflicts(const LiveRange & lr) const324 void GraphColorRegAllocator::PrintLiveRangeConflicts(const LiveRange &lr) const
325 {
326 LogInfo::MapleLogger() << "\n\tinterfere(" << lr.GetNumBBConflicts() << "): ";
327 for (uint32 i = 0; i < regBuckets; ++i) {
328 uint64 chunk = lr.GetBBConflictElem(i);
329 for (uint64 bit = 0; bit < kU64; ++bit) {
330 if (chunk & (1ULL << bit)) {
331 regno_t newNO = i * kU64 + bit;
332 LogInfo::MapleLogger() << newNO << ",";
333 }
334 }
335 }
336 LogInfo::MapleLogger() << "\n";
337 }
338
PrintLiveBBBit(const LiveRange & lr) const339 void GraphColorRegAllocator::PrintLiveBBBit(const LiveRange &lr) const
340 {
341 LogInfo::MapleLogger() << "live_bb(" << lr.GetNumBBMembers() << "): ";
342 for (uint32 i = 0; i < cgFunc->NumBBs(); ++i) {
343 if (IsBitArrElemSet(lr.GetBBMember(), i)) {
344 LogInfo::MapleLogger() << i << " ";
345 }
346 }
347 LogInfo::MapleLogger() << "\n";
348 }
349
PrintLiveRange(const LiveRange & lr,const std::string & str) const350 void GraphColorRegAllocator::PrintLiveRange(const LiveRange &lr, const std::string &str) const
351 {
352 LogInfo::MapleLogger() << str << "\n";
353
354 LogInfo::MapleLogger() << "R" << lr.GetRegNO();
355 if (lr.GetRegType() == kRegTyInt) {
356 LogInfo::MapleLogger() << "(I)";
357 } else if (lr.GetRegType() == kRegTyFloat) {
358 LogInfo::MapleLogger() << "(F)";
359 } else {
360 LogInfo::MapleLogger() << "(U)";
361 }
362 if (lr.GetSpillSize() == k32) {
363 LogInfo::MapleLogger() << "S32";
364 } else if (lr.GetSpillSize() == k64) {
365 LogInfo::MapleLogger() << "S64";
366 } else {
367 LogInfo::MapleLogger() << "S0(nodef)";
368 }
369 LogInfo::MapleLogger() << "\tnumCall " << lr.GetNumCall();
370 LogInfo::MapleLogger() << "\tpriority " << lr.GetPriority();
371 LogInfo::MapleLogger() << "\tforbidden: ";
372 for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) {
373 if (lr.GetForbidden(preg)) {
374 LogInfo::MapleLogger() << preg << ",";
375 }
376 }
377 LogInfo::MapleLogger() << "\tpregveto: ";
378 for (regno_t preg = kInvalidRegNO; preg < kMaxRegNum; preg++) {
379 if (lr.GetPregveto(preg)) {
380 LogInfo::MapleLogger() << preg << ",";
381 }
382 }
383 if (lr.IsSpilled()) {
384 LogInfo::MapleLogger() << " spilled";
385 }
386 if (lr.GetSplitLr()) {
387 LogInfo::MapleLogger() << " split";
388 }
389 LogInfo::MapleLogger() << "\top: " << kOpcodeInfo.GetName(lr.GetOp());
390 LogInfo::MapleLogger() << "\n";
391 PrintLiveBBBit(lr);
392 PrintLiveRangeConflicts(lr);
393 PrintLiveUnitMap(lr);
394 if (lr.GetSplitLr()) {
395 PrintLiveRange(*lr.GetSplitLr(), "===>Split LR");
396 }
397 }
398
PrintLiveRanges() const399 void GraphColorRegAllocator::PrintLiveRanges() const
400 {
401 LogInfo::MapleLogger() << "PrintLiveRanges: size = " << lrMap.size() << "\n";
402 for (auto it : lrMap) {
403 PrintLiveRange(*it.second, "");
404 }
405 LogInfo::MapleLogger() << "\n";
406 }
407
PrintLocalRAInfo(const std::string & str) const408 void GraphColorRegAllocator::PrintLocalRAInfo(const std::string &str) const
409 {
410 LogInfo::MapleLogger() << str << "\n";
411 for (uint32 id = 0; id < cgFunc->NumBBs(); ++id) {
412 LocalRaInfo *lraInfo = localRegVec[id];
413 if (lraInfo == nullptr) {
414 continue;
415 }
416 LogInfo::MapleLogger() << "bb " << id << " def ";
417 for (const auto &defCntPair : lraInfo->GetDefCnt()) {
418 LogInfo::MapleLogger() << "[" << defCntPair.first << ":" << defCntPair.second << "],";
419 }
420 LogInfo::MapleLogger() << "\n";
421 LogInfo::MapleLogger() << "use ";
422 for (const auto &useCntPair : lraInfo->GetUseCnt()) {
423 LogInfo::MapleLogger() << "[" << useCntPair.first << ":" << useCntPair.second << "],";
424 }
425 LogInfo::MapleLogger() << "\n";
426 }
427 }
428
PrintBBAssignInfo() const429 void GraphColorRegAllocator::PrintBBAssignInfo() const
430 {
431 for (size_t id = 0; id < bfs->sortedBBs.size(); ++id) {
432 uint32 bbID = bfs->sortedBBs[id]->GetId();
433 BBAssignInfo *bbInfo = bbRegInfo[bbID];
434 if (bbInfo == nullptr) {
435 continue;
436 }
437 LogInfo::MapleLogger() << "BBinfo(" << id << ")";
438 LogInfo::MapleLogger() << " lra-needed int " << bbInfo->GetIntLocalRegsNeeded();
439 LogInfo::MapleLogger() << " fp " << bbInfo->GetFpLocalRegsNeeded();
440 LogInfo::MapleLogger() << " greg-used ";
441 for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
442 if (bbInfo->GetGlobalsAssigned(regNO)) {
443 LogInfo::MapleLogger() << regNO << ",";
444 }
445 }
446 LogInfo::MapleLogger() << "\n";
447 }
448 }
449
CalculatePriority(LiveRange & lr) const450 void GraphColorRegAllocator::CalculatePriority(LiveRange &lr) const
451 {
452 #ifdef RANDOM_PRIORITY
453 unsigned long seed = 0;
454 size_t size = sizeof(seed);
455 std::ifstream randomNum("/dev/random", std::ios::in | std::ios::binary);
456 if (randomNum) {
457 randomNum.read(reinterpret_cast<char *>(&seed), size);
458 if (randomNum) {
459 lr.SetPriority(1 / (seed + 1));
460 }
461 randomNum.close();
462 } else {
463 std::cerr << "Failed to open /dev/urandom" << '\n';
464 }
465 return;
466 #endif /* RANDOM_PRIORITY */
467 float pri = 0.0;
468 uint32 bbNum = 0;
469 uint32 numDefs = 0;
470 uint32 numUses = 0;
471 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
472 CG *cg = a64CGFunc->GetCG();
473
474 if (cg->GetRematLevel() >= kRematConst && lr.IsRematerializable(*a64CGFunc, kRematConst)) {
475 lr.SetRematLevel(kRematConst);
476 } else if (cg->GetRematLevel() >= kRematAddr && lr.IsRematerializable(*a64CGFunc, kRematAddr)) {
477 lr.SetRematLevel(kRematAddr);
478 } else if (cg->GetRematLevel() >= kRematDreadLocal && lr.IsRematerializable(*a64CGFunc, kRematDreadLocal)) {
479 lr.SetRematLevel(kRematDreadLocal);
480 } else if (cg->GetRematLevel() >= kRematDreadGlobal && lr.IsRematerializable(*a64CGFunc, kRematDreadGlobal)) {
481 lr.SetRematLevel(kRematDreadGlobal);
482 }
483
484 auto calculatePriorityFunc = [&lr, &bbNum, &numDefs, &numUses, &pri, this](uint32 bbID) {
485 auto lu = lr.FindInLuMap(bbID);
486 DEBUG_ASSERT(lu != lr.EndOfLuMap(), "can not find live unit");
487 BB *bb = bbVec[bbID];
488 if (bb->GetFirstInsn() != nullptr && !bb->IsSoloGoto()) {
489 ++bbNum;
490 numDefs += lu->second->GetDefNum();
491 numUses += lu->second->GetUseNum();
492 uint32 useCnt = lu->second->GetDefNum() + lu->second->GetUseNum();
493 uint32 mult;
494 #ifdef USE_BB_FREQUENCY
495 mult = bb->GetFrequency();
496 #else /* USE_BB_FREQUENCY */
497 if (bb->GetLoop() != nullptr) {
498 uint32 loopFactor;
499 if (lr.GetNumCall() > 0 && lr.GetRematLevel() == kRematOff) {
500 loopFactor = bb->GetLoop()->GetLoopLevel() * kAdjustWeight;
501 } else {
502 loopFactor = bb->GetLoop()->GetLoopLevel() / kAdjustWeight;
503 }
504 mult = static_cast<uint32>(pow(kLoopWeight, loopFactor));
505 } else {
506 mult = 1;
507 }
508 #endif /* USE_BB_FREQUENCY */
509 pri += useCnt * mult;
510 }
511 };
512 ForEachBBArrElem(lr.GetBBMember(), calculatePriorityFunc);
513
514 if (lr.GetRematLevel() == kRematAddr || lr.GetRematLevel() == kRematConst) {
515 if (numDefs <= 1 && numUses <= 1) {
516 pri = -0xFFFF;
517 } else {
518 pri /= kRematWeight;
519 }
520 } else if (lr.GetRematLevel() == kRematDreadLocal) {
521 pri /= 4; // divide 4 to lower priority
522 } else if (lr.GetRematLevel() == kRematDreadGlobal) {
523 pri /= 2; // divide 2 to lower priority
524 }
525
526 lr.SetPriority(pri);
527 lr.SetNumDefs(numDefs);
528 lr.SetNumUses(numUses);
529 if (lr.GetPriority() > 0 && numDefs <= kPriorityDefThreashold && numUses <= kPriorityUseThreashold &&
530 cgFunc->NumBBs() > kPriorityBBThreashold &&
531 (static_cast<float>(lr.GetNumBBMembers()) / cgFunc->NumBBs()) > kPriorityRatioThreashold) {
532 /* for large functions, delay allocating long LR with few defs and uses */
533 lr.SetPriority(0.0);
534 }
535 }
536
PrintBBs() const537 void GraphColorRegAllocator::PrintBBs() const
538 {
539 for (auto *bb : bfs->sortedBBs) {
540 LogInfo::MapleLogger() << "\n< === > ";
541 LogInfo::MapleLogger() << bb->GetId();
542 LogInfo::MapleLogger() << " succs:";
543 for (auto *succBB : bb->GetSuccs()) {
544 LogInfo::MapleLogger() << " " << succBB->GetId();
545 }
546 LogInfo::MapleLogger() << " eh_succs:";
547 for (auto *succBB : bb->GetEhSuccs()) {
548 LogInfo::MapleLogger() << " " << succBB->GetId();
549 }
550 }
551 LogInfo::MapleLogger() << "\n";
552 }
553
MaxIntPhysRegNum() const554 uint32 GraphColorRegAllocator::MaxIntPhysRegNum() const
555 {
556 return (R28 - R0);
557 }
558
MaxFloatPhysRegNum() const559 uint32 GraphColorRegAllocator::MaxFloatPhysRegNum() const
560 {
561 return (V31 - V0);
562 }
563
IsReservedReg(AArch64reg regNO) const564 bool GraphColorRegAllocator::IsReservedReg(AArch64reg regNO) const
565 {
566 if (!doMultiPass || cgFunc->GetMirModule().GetSrcLang() != kSrcLangC) {
567 return (regNO == R16) || (regNO == R17);
568 } else {
569 return (regNO == R16);
570 }
571 }
572
InitFreeRegPool()573 void GraphColorRegAllocator::InitFreeRegPool()
574 {
575 /*
576 * ==== int regs ====
577 * FP 29, LR 30, SP 31, 0 to 7 parameters
578
579 * MapleCG defines 32 as ZR (zero register)
580 * use 8 if callee does not return large struct ? No
581 * 16 and 17 are intra-procedure call temp, can be caller saved
582 * 18 is platform reg, still use it
583 */
584 uint32 intNum = 0;
585 uint32 fpNum = 0;
586 for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) {
587 if (!AArch64Abi::IsAvailableReg(static_cast<AArch64reg>(regNO))) {
588 continue;
589 }
590
591 /*
592 * Because of the try-catch scenario in JAVALANG,
593 * we should use specialized spill register to prevent register changes when exceptions occur.
594 */
595 if (JAVALANG && AArch64Abi::IsSpillRegInRA(static_cast<AArch64reg>(regNO), needExtraSpillReg)) {
596 if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
597 /* Preset int spill registers */
598 (void)intSpillRegSet.insert(regNO - R0);
599 } else {
600 /* Preset float spill registers */
601 (void)fpSpillRegSet.insert(regNO - V0);
602 }
603 continue;
604 }
605
606 #ifdef RESERVED_REGS
607 /* r16,r17 are used besides ra. */
608 if (IsReservedReg(static_cast<AArch64reg>(regNO))) {
609 continue;
610 }
611 #endif /* RESERVED_REGS */
612
613 if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
614 /* when yieldpoint is enabled, x19 is reserved. */
615 if (IsYieldPointReg(static_cast<AArch64reg>(regNO))) {
616 continue;
617 }
618 if (regNO == R29) {
619 if (!cgFunc->UseFP()) {
620 (void)intCalleeRegSet.insert(regNO - R0);
621 ++intNum;
622 }
623 continue;
624 }
625 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO))) {
626 (void)intCalleeRegSet.insert(regNO - R0);
627 } else {
628 (void)intCallerRegSet.insert(regNO - R0);
629 }
630 ++intNum;
631 } else {
632 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO))) {
633 (void)fpCalleeRegSet.insert(regNO - V0);
634 } else {
635 (void)fpCallerRegSet.insert(regNO - V0);
636 }
637 ++fpNum;
638 }
639 }
640 intRegNum = intNum;
641 fpRegNum = fpNum;
642 }
643
InitCCReg()644 void GraphColorRegAllocator::InitCCReg()
645 {
646 Operand &opnd = cgFunc->GetOrCreateRflag();
647 auto &tmpRegOp = static_cast<RegOperand &>(opnd);
648 ccReg = tmpRegOp.GetRegisterNumber();
649 }
650
IsYieldPointReg(regno_t regNO) const651 bool GraphColorRegAllocator::IsYieldPointReg(regno_t regNO) const
652 {
653 if (cgFunc->GetCG()->GenYieldPoint()) {
654 return (regNO == RYP);
655 }
656 return false;
657 }
658
IsUnconcernedReg(regno_t regNO) const659 bool GraphColorRegAllocator::IsUnconcernedReg(regno_t regNO) const
660 {
661 /* RFP = 32, RLR = 31, RSP = 33, RZR = 34 */
662 if ((regNO >= RLR && regNO <= RZR) || regNO == RFP || regNO == ccReg) {
663 return true;
664 }
665
666 /* when yieldpoint is enabled, the RYP(x19) can not be used. */
667 if (IsYieldPointReg(static_cast<AArch64reg>(regNO))) {
668 return true;
669 }
670
671 return false;
672 }
673
IsUnconcernedReg(const RegOperand & regOpnd) const674 bool GraphColorRegAllocator::IsUnconcernedReg(const RegOperand ®Opnd) const
675 {
676 RegType regType = regOpnd.GetRegisterType();
677 if (regType == kRegTyCc || regType == kRegTyVary) {
678 return true;
679 }
680 uint32 regNO = regOpnd.GetRegisterNumber();
681 if (regNO == RZR) {
682 return true;
683 }
684 return IsUnconcernedReg(regNO);
685 }
686
687 /*
688 * Based on live analysis, the live-in and live-out set determines
689 * the bit to be set in the LR vector, which is of size #BBs.
690 * If a vreg is in the live-in and live-out set, it is live in the BB.
691 *
692 * Also keep track if a LR crosses a call. If a LR crosses a call, it
693 * interferes with all caller saved registers. Add all caller registers
694 * to the LR's forbidden list.
695 *
696 * Return created LiveRange object
697 *
698 * maybe need extra info:
699 * Add info for setjmp.
700 * Add info for defBB, useBB, index in BB for def and use
701 * Add info for startingBB and endingBB
702 */
NewLiveRange()703 LiveRange *GraphColorRegAllocator::NewLiveRange()
704 {
705 LiveRange *lr = memPool->New<LiveRange>(alloc);
706
707 if (bbBuckets == 0) {
708 bbBuckets = (cgFunc->NumBBs() / kU64) + 1;
709 }
710 lr->SetBBBuckets(bbBuckets);
711 lr->InitBBMember(*memPool, bbBuckets);
712 if (regBuckets == 0) {
713 regBuckets = (cgFunc->GetMaxRegNum() / kU64) + 1;
714 }
715 lr->SetRegBuckets(regBuckets);
716 lr->InitBBConflict(*memPool, regBuckets);
717 lr->InitPregveto();
718 lr->InitForbidden();
719 return lr;
720 }
721
722 /* Create local info for LR. return true if reg is not local. */
CreateLiveRangeHandleLocal(regno_t regNO,const BB & bb,bool isDef)723 bool GraphColorRegAllocator::CreateLiveRangeHandleLocal(regno_t regNO, const BB &bb, bool isDef)
724 {
725 if (FindIn(bb.GetLiveInRegNO(), regNO) || FindIn(bb.GetLiveOutRegNO(), regNO)) {
726 return true;
727 }
728 /*
729 * register not in globals for the bb, so it is local.
730 * Compute local RA info.
731 */
732 LocalRaInfo *lraInfo = localRegVec[bb.GetId()];
733 if (lraInfo == nullptr) {
734 lraInfo = memPool->New<LocalRaInfo>(alloc);
735 localRegVec[bb.GetId()] = lraInfo;
736 }
737 if (isDef) {
738 /* movk is handled by different id for use/def in the same insn. */
739 lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1);
740 } else {
741 lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1);
742 }
743 /* lr info is useful for lra, so continue lr info */
744 return false;
745 }
746
CreateLiveRangeAllocateAndUpdate(regno_t regNO,const BB & bb,bool isDef,uint32 currId)747 LiveRange *GraphColorRegAllocator::CreateLiveRangeAllocateAndUpdate(regno_t regNO, const BB &bb, bool isDef,
748 uint32 currId)
749 {
750 LiveRange *lr = GetLiveRange(regNO);
751 if (lr == nullptr) {
752 lr = NewLiveRange();
753 lr->SetID(currId);
754
755 LiveUnit *lu = memPool->New<LiveUnit>();
756 lr->SetElemToLuMap(bb.GetId(), *lu);
757 lu->SetBegin(currId);
758 lu->SetEnd(currId);
759 if (isDef) {
760 /* means no use after def for reg, chances for ebo opt */
761 for (const auto &pregNO : pregLive) {
762 lr->InsertElemToPregveto(pregNO);
763 }
764 }
765 } else {
766 LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb.GetId());
767 if (lu == nullptr) {
768 lu = memPool->New<LiveUnit>();
769 lr->SetElemToLuMap(bb.GetId(), *lu);
770 lu->SetBegin(currId);
771 lu->SetEnd(currId);
772 }
773 if (lu->GetBegin() > currId) {
774 lu->SetBegin(currId);
775 }
776 }
777
778 if (CLANG) {
779 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
780 MIRPreg *preg = a64CGFunc->GetPseudoRegFromVirtualRegNO(regNO, CGOptions::DoCGSSA());
781 if (preg) {
782 switch (preg->GetOp()) {
783 case OP_constval:
784 lr->SetRematerializable(preg->rematInfo.mirConst);
785 break;
786 case OP_addrof:
787 case OP_dread:
788 lr->SetRematerializable(preg->GetOp(), preg->rematInfo.sym, preg->fieldID, preg->addrUpper);
789 break;
790 case OP_undef:
791 break;
792 default:
793 DEBUG_ASSERT(false, "Unexpected op in Preg");
794 }
795 }
796 }
797
798 return lr;
799 }
800
CreateLiveRange(regno_t regNO,const BB & bb,bool isDef,uint32 currId,bool updateCount)801 void GraphColorRegAllocator::CreateLiveRange(regno_t regNO, const BB &bb, bool isDef, uint32 currId, bool updateCount)
802 {
803 bool isNonLocal = CreateLiveRangeHandleLocal(regNO, bb, isDef);
804
805 if (!isDef) {
806 --currId;
807 }
808
809 LiveRange *lr = CreateLiveRangeAllocateAndUpdate(regNO, bb, isDef, currId);
810 lr->SetRegNO(regNO);
811 lr->SetIsNonLocal(isNonLocal);
812 if (isDef) {
813 (void)vregLive.erase(regNO);
814 #ifdef OPTIMIZE_FOR_PROLOG
815 if (doOptProlog && updateCount) {
816 if (lr->GetNumDefs() == 0) {
817 lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency());
818 }
819 lr->IncNumDefs();
820 }
821 #endif /* OPTIMIZE_FOR_PROLOG */
822 } else {
823 (void)vregLive.insert(regNO);
824 #ifdef OPTIMIZE_FOR_PROLOG
825 if (doOptProlog && updateCount) {
826 if (lr->GetNumUses() == 0) {
827 lr->SetFrequency(lr->GetFrequency() + bb.GetFrequency());
828 }
829 lr->IncNumUses();
830 }
831 #endif /* OPTIMIZE_FOR_PROLOG */
832 }
833 for (const auto &pregNO : pregLive) {
834 lr->InsertElemToPregveto(pregNO);
835 }
836
837 /* only handle it in live_in and def point? */
838 uint32 bbID = bb.GetId();
839 lr->SetMemberBitArrElem(bbID);
840
841 lrMap[regNO] = lr;
842 }
843
SetupLiveRangeByOpHandlePhysicalReg(const RegOperand & regOpnd,Insn & insn,regno_t regNO,bool isDef)844 bool GraphColorRegAllocator::SetupLiveRangeByOpHandlePhysicalReg(const RegOperand ®Opnd, Insn &insn, regno_t regNO,
845 bool isDef)
846 {
847 if (!regOpnd.IsPhysicalRegister()) {
848 return false;
849 }
850 LocalRaInfo *lraInfo = localRegVec[insn.GetBB()->GetId()];
851 if (lraInfo == nullptr) {
852 lraInfo = memPool->New<LocalRaInfo>(alloc);
853 localRegVec[insn.GetBB()->GetId()] = lraInfo;
854 }
855
856 if (isDef) {
857 if (FindNotIn(pregLive, regNO)) {
858 for (const auto &vRegNO : vregLive) {
859 if (IsUnconcernedReg(vRegNO)) {
860 continue;
861 }
862 lrMap[vRegNO]->InsertElemToPregveto(regNO);
863 }
864 }
865 pregLive.erase(regNO);
866 if (lraInfo != nullptr) {
867 lraInfo->SetDefCntElem(regNO, lraInfo->GetDefCntElem(regNO) + 1);
868 }
869 } else {
870 (void)pregLive.insert(regNO);
871 for (const auto &vregNO : vregLive) {
872 if (IsUnconcernedReg(vregNO)) {
873 continue;
874 }
875 LiveRange *lr = lrMap[vregNO];
876 lr->InsertElemToPregveto(regNO);
877 }
878
879 if (lraInfo != nullptr) {
880 lraInfo->SetUseCntElem(regNO, lraInfo->GetUseCntElem(regNO) + 1);
881 }
882 }
883 return true;
884 }
885
886 /*
887 * add pregs to forbidden list of lr. If preg is in
888 * the live list, then it is forbidden for other vreg on the list.
889 */
SetupLiveRangeByOp(Operand & op,Insn & insn,bool isDef,uint32 & numUses)890 void GraphColorRegAllocator::SetupLiveRangeByOp(Operand &op, Insn &insn, bool isDef, uint32 &numUses)
891 {
892 if (!op.IsRegister()) {
893 return;
894 }
895 auto ®Opnd = static_cast<RegOperand &>(op);
896 uint32 regNO = regOpnd.GetRegisterNumber();
897 if (IsUnconcernedReg(regOpnd)) {
898 if (GetLiveRange(regNO) != nullptr) {
899 DEBUG_ASSERT(false, "Unconcerned reg");
900 lrMap.erase(regNO);
901 }
902 return;
903 }
904 if (SetupLiveRangeByOpHandlePhysicalReg(regOpnd, insn, regNO, isDef)) {
905 return;
906 }
907
908 CreateLiveRange(regNO, *insn.GetBB(), isDef, insn.GetId(), true);
909
910 LiveRange *lr = GetLiveRange(regNO);
911 DEBUG_ASSERT(lr != nullptr, "lr should not be nullptr");
912 if (isDef) {
913 lr->SetSpillSize((regOpnd.GetSize() <= k32) ? k32 : k64);
914 }
915 if (lr->GetRegType() == kRegTyUndef) {
916 lr->SetRegType(regOpnd.GetRegisterType());
917 }
918 if (isDef) {
919 lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncDefNum();
920 lr->AddRef(insn.GetBB()->GetId(), insn.GetId(), kIsDef);
921 } else {
922 lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->IncUseNum();
923 lr->AddRef(insn.GetBB()->GetId(), insn.GetId(), kIsUse);
924 ++numUses;
925 }
926 #ifdef MOVE_COALESCE
927 if (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr) {
928 RegOperand &opnd1 = static_cast<RegOperand &>(insn.GetOperand(1));
929 if (opnd1.GetRegisterNumber() < kAllRegNum && !IsUnconcernedReg(opnd1)) {
930 lr->InsertElemToPrefs(opnd1.GetRegisterNumber() - R0);
931 }
932 RegOperand &opnd0 = static_cast<RegOperand &>(insn.GetOperand(0));
933 if (opnd0.GetRegisterNumber() < kAllRegNum) {
934 lr->InsertElemToPrefs(opnd0.GetRegisterNumber() - R0);
935 }
936 }
937 #endif /* MOVE_COALESCE */
938 if (!insn.IsSpecialIntrinsic() && insn.GetBothDefUseOpnd() != kInsnMaxOpnd) {
939 lr->SetDefUse();
940 }
941 }
942
943 /* handle live range for bb->live_out */
SetupLiveRangeByRegNO(regno_t liveOut,BB & bb,uint32 currPoint)944 void GraphColorRegAllocator::SetupLiveRangeByRegNO(regno_t liveOut, BB &bb, uint32 currPoint)
945 {
946 if (IsUnconcernedReg(liveOut)) {
947 return;
948 }
949 if (liveOut >= kAllRegNum) {
950 (void)vregLive.insert(liveOut);
951 CreateLiveRange(liveOut, bb, false, currPoint, false);
952 return;
953 }
954
955 (void)pregLive.insert(liveOut);
956 for (const auto &vregNO : vregLive) {
957 LiveRange *lr = lrMap[vregNO];
958 lr->InsertElemToPregveto(liveOut);
959 }
960
961 /* See if phys reg is livein also. Then assume it span the entire bb. */
962 if (!FindIn(bb.GetLiveInRegNO(), liveOut)) {
963 return;
964 }
965 LocalRaInfo *lraInfo = localRegVec[bb.GetId()];
966 if (lraInfo == nullptr) {
967 lraInfo = memPool->New<LocalRaInfo>(alloc);
968 localRegVec[bb.GetId()] = lraInfo;
969 }
970 /* Make it a large enough so no locals can be allocated. */
971 lraInfo->SetUseCntElem(liveOut, kMaxUint16);
972 }
973
ClassifyOperand(std::unordered_set<regno_t> & pregs,std::unordered_set<regno_t> & vregs,const Operand & opnd) const974 void GraphColorRegAllocator::ClassifyOperand(std::unordered_set<regno_t> &pregs, std::unordered_set<regno_t> &vregs,
975 const Operand &opnd) const
976 {
977 if (!opnd.IsRegister()) {
978 return;
979 }
980 auto ®Opnd = static_cast<const RegOperand &>(opnd);
981 regno_t regNO = regOpnd.GetRegisterNumber();
982 if (IsUnconcernedReg(regNO)) {
983 return;
984 }
985 if (regOpnd.IsPhysicalRegister()) {
986 (void)pregs.insert(regNO);
987 } else {
988 (void)vregs.insert(regNO);
989 }
990 }
991
SetOpndConflict(const Insn & insn,bool onlyDef)992 void GraphColorRegAllocator::SetOpndConflict(const Insn &insn, bool onlyDef)
993 {
994 uint32 opndNum = insn.GetOperandSize();
995 if (opndNum <= 1) {
996 return;
997 }
998 const InsnDesc *md = insn.GetDesc();
999 std::unordered_set<regno_t> pregs;
1000 std::unordered_set<regno_t> vregs;
1001
1002 for (uint32 i = 0; i < opndNum; ++i) {
1003 Operand &opnd = insn.GetOperand(i);
1004 if (!onlyDef) {
1005 if (opnd.IsList()) {
1006 auto &listOpnd = static_cast<ListOperand &>(opnd);
1007 for (auto op : listOpnd.GetOperands()) {
1008 ClassifyOperand(pregs, vregs, *op);
1009 }
1010 } else if (opnd.IsMemoryAccessOperand()) {
1011 auto &memOpnd = static_cast<MemOperand &>(opnd);
1012 Operand *base = memOpnd.GetBaseRegister();
1013 Operand *offset = memOpnd.GetIndexRegister();
1014 if (base != nullptr) {
1015 ClassifyOperand(pregs, vregs, *base);
1016 }
1017 if (offset != nullptr) {
1018 ClassifyOperand(pregs, vregs, *offset);
1019 }
1020 } else if (opnd.IsRegister()) {
1021 ClassifyOperand(pregs, vregs, opnd);
1022 }
1023 } else {
1024 if (md->GetOpndDes(i)->IsRegDef()) {
1025 ClassifyOperand(pregs, vregs, opnd);
1026 }
1027 if (opnd.IsMemoryAccessOperand()) {
1028 auto &memOpnd = static_cast<MemOperand &>(opnd);
1029 Operand *base = memOpnd.GetBaseRegister();
1030 if (base != nullptr && !memOpnd.IsIntactIndexed()) {
1031 ClassifyOperand(pregs, vregs, *base);
1032 }
1033 }
1034 }
1035 }
1036
1037 if (vregs.empty()) {
1038 return;
1039 }
1040 /* Set BBConflict and Pregveto */
1041 for (regno_t vregNO : vregs) {
1042 for (regno_t conflictVregNO : vregs) {
1043 if (conflictVregNO != vregNO) {
1044 lrMap[vregNO]->SetConflictBitArrElem(conflictVregNO);
1045 }
1046 }
1047 for (regno_t conflictPregNO : pregs) {
1048 lrMap[vregNO]->InsertElemToPregveto(conflictPregNO);
1049 }
1050 }
1051 }
1052
UpdateOpndConflict(const Insn & insn,bool multiDef)1053 void GraphColorRegAllocator::UpdateOpndConflict(const Insn &insn, bool multiDef)
1054 {
1055 /* if IsSpecialIntrinsic or IsAtomicStore, set conflicts for all opnds */
1056 if (insn.IsAtomicStore() || insn.IsSpecialIntrinsic()) {
1057 SetOpndConflict(insn, false);
1058 return;
1059 }
1060 if (multiDef) {
1061 SetOpndConflict(insn, true);
1062 }
1063 }
1064
ComputeLiveRangesForEachDefOperand(Insn & insn,bool & multiDef)1065 void GraphColorRegAllocator::ComputeLiveRangesForEachDefOperand(Insn &insn, bool &multiDef)
1066 {
1067 uint32 numDefs = 0;
1068 uint32 numUses = 0;
1069 const InsnDesc *md = insn.GetDesc();
1070 uint32 opndNum = insn.GetOperandSize();
1071 for (uint32 i = 0; i < opndNum; ++i) {
1072 if (insn.GetMachineOpcode() == MOP_asm && (i == kAsmOutputListOpnd || i == kAsmClobberListOpnd)) {
1073 for (auto opnd : static_cast<ListOperand &>(insn.GetOperand(i)).GetOperands()) {
1074 SetupLiveRangeByOp(*static_cast<RegOperand *>(opnd), insn, true, numUses);
1075 ++numDefs;
1076 }
1077 continue;
1078 }
1079 Operand &opnd = insn.GetOperand(i);
1080 if (opnd.IsMemoryAccessOperand()) {
1081 auto &memOpnd = static_cast<MemOperand &>(opnd);
1082 if (!memOpnd.IsIntactIndexed()) {
1083 SetupLiveRangeByOp(opnd, insn, true, numUses);
1084 ++numDefs;
1085 }
1086 }
1087 if (!md->GetOpndDes(i)->IsRegDef()) {
1088 continue;
1089 }
1090 SetupLiveRangeByOp(opnd, insn, true, numUses);
1091 ++numDefs;
1092 }
1093 DEBUG_ASSERT(numUses == 0, "should only be def opnd");
1094 if (numDefs > 1) {
1095 multiDef = true;
1096 needExtraSpillReg = true;
1097 }
1098 }
1099
ComputeLiveRangesForEachUseOperand(Insn & insn)1100 void GraphColorRegAllocator::ComputeLiveRangesForEachUseOperand(Insn &insn)
1101 {
1102 uint32 numUses = 0;
1103 const InsnDesc *md = insn.GetDesc();
1104 uint32 opndNum = insn.GetOperandSize();
1105 for (uint32 i = 0; i < opndNum; ++i) {
1106 if (insn.GetMachineOpcode() == MOP_asm && i == kAsmInputListOpnd) {
1107 for (auto opnd : static_cast<ListOperand &>(insn.GetOperand(i)).GetOperands()) {
1108 SetupLiveRangeByOp(*static_cast<RegOperand *>(opnd), insn, false, numUses);
1109 }
1110 continue;
1111 }
1112 if (md->GetOpndDes(i)->IsRegDef() && !md->GetOpndDes(i)->IsRegUse()) {
1113 continue;
1114 }
1115 Operand &opnd = insn.GetOperand(i);
1116 if (opnd.IsList()) {
1117 auto &listOpnd = static_cast<ListOperand &>(opnd);
1118 for (auto op : listOpnd.GetOperands()) {
1119 SetupLiveRangeByOp(*op, insn, false, numUses);
1120 }
1121 } else if (opnd.IsMemoryAccessOperand()) {
1122 auto &memOpnd = static_cast<MemOperand &>(opnd);
1123 Operand *base = memOpnd.GetBaseRegister();
1124 Operand *offset = memOpnd.GetIndexRegister();
1125 if (base != nullptr) {
1126 SetupLiveRangeByOp(*base, insn, false, numUses);
1127 }
1128 if (offset != nullptr) {
1129 SetupLiveRangeByOp(*offset, insn, false, numUses);
1130 }
1131 } else {
1132 SetupLiveRangeByOp(opnd, insn, false, numUses);
1133 }
1134 }
1135 if (numUses >= AArch64Abi::kNormalUseOperandNum || insn.GetMachineOpcode() == MOP_lazy_ldr) {
1136 needExtraSpillReg = true;
1137 }
1138 }
1139
ComputeLiveRangesUpdateIfInsnIsCall(const Insn & insn)1140 void GraphColorRegAllocator::ComputeLiveRangesUpdateIfInsnIsCall(const Insn &insn)
1141 {
1142 if (!insn.IsCall()) {
1143 return;
1144 }
1145 /* def the return value */
1146 pregLive.erase(R0);
1147 pregLive.erase(V0);
1148
1149 /* active the parametes */
1150 Operand &opnd1 = insn.GetOperand(1);
1151 if (opnd1.IsList()) {
1152 auto &srcOpnds = static_cast<ListOperand &>(opnd1);
1153 for (auto regOpnd : srcOpnds.GetOperands()) {
1154 DEBUG_ASSERT(!regOpnd->IsVirtualRegister(), "not be a virtual register");
1155 auto physicalReg = static_cast<AArch64reg>(regOpnd->GetRegisterNumber());
1156 (void)pregLive.insert(physicalReg);
1157 }
1158 }
1159 }
1160
ComputeLiveRangesUpdateLiveUnitInsnRange(BB & bb,uint32 currPoint)1161 void GraphColorRegAllocator::ComputeLiveRangesUpdateLiveUnitInsnRange(BB &bb, uint32 currPoint)
1162 {
1163 for (auto lin : bb.GetLiveInRegNO()) {
1164 if (lin < kAllRegNum) {
1165 continue;
1166 }
1167 LiveRange *lr = GetLiveRange(lin);
1168 if (lr == nullptr) {
1169 continue;
1170 }
1171 auto lu = lr->FindInLuMap(bb.GetId());
1172 DEBUG_ASSERT(lu != lr->EndOfLuMap(), "container empty check");
1173 if (bb.GetFirstInsn()) {
1174 lu->second->SetBegin(bb.GetFirstInsn()->GetId());
1175 } else {
1176 /* since bb is empty, then use pointer as is */
1177 lu->second->SetBegin(currPoint);
1178 }
1179 lu->second->SetBegin(lu->second->GetBegin() - 1);
1180 }
1181 }
1182
UpdateInsnCntAndSkipUseless(Insn & insn,uint32 & currPoint) const1183 bool GraphColorRegAllocator::UpdateInsnCntAndSkipUseless(Insn &insn, uint32 &currPoint) const
1184 {
1185 insn.SetId(currPoint);
1186 if (insn.IsImmaterialInsn() || !insn.IsMachineInstruction()) {
1187 --currPoint;
1188 return true;
1189 }
1190 return false;
1191 }
1192
UpdateCallInfo(uint32 bbId,uint32 currPoint,const Insn & insn)1193 void GraphColorRegAllocator::UpdateCallInfo(uint32 bbId, uint32 currPoint, const Insn &insn)
1194 {
1195 auto *targetOpnd = insn.GetCallTargetOperand();
1196 CHECK_FATAL(targetOpnd != nullptr, "target is null in Insn::IsCallToFunctionThatNeverReturns");
1197 if (CGOptions::DoIPARA() && targetOpnd->IsFuncNameOpnd()) {
1198 FuncNameOperand *target = static_cast<FuncNameOperand *>(targetOpnd);
1199 const MIRSymbol *funcSt = target->GetFunctionSymbol();
1200 DEBUG_ASSERT(funcSt->GetSKind() == kStFunc, "funcst must be a function name symbol");
1201 MIRFunction *func = funcSt->GetFunction();
1202 if (func != nullptr && func->IsReferedRegsValid()) {
1203 for (auto preg : func->GetReferedRegs()) {
1204 if (AArch64Abi::IsCallerSaveReg(static_cast<AArch64reg>(preg))) {
1205 for (auto vregNO : vregLive) {
1206 LiveRange *lr = lrMap[vregNO];
1207 lr->InsertElemToCallDef(preg);
1208 }
1209 }
1210 }
1211 } else {
1212 for (auto vregNO : vregLive) {
1213 LiveRange *lr = lrMap[vregNO];
1214 lr->SetCrossCall();
1215 }
1216 }
1217 } else {
1218 for (auto vregNO : vregLive) {
1219 LiveRange *lr = lrMap[vregNO];
1220 lr->SetCrossCall();
1221 }
1222 }
1223 for (auto vregNO : vregLive) {
1224 LiveRange *lr = lrMap[vregNO];
1225 lr->IncNumCall();
1226 lr->AddRef(bbId, currPoint, kIsCall);
1227
1228 auto lu = lr->FindInLuMap(bbId);
1229 if (lu != lr->EndOfLuMap()) {
1230 lu->second->SetHasCall(true);
1231 }
1232 }
1233 }
1234
SetLrMustAssign(const RegOperand * regOpnd)1235 void GraphColorRegAllocator::SetLrMustAssign(const RegOperand *regOpnd)
1236 {
1237 regno_t regNO = regOpnd->GetRegisterNumber();
1238 LiveRange *lr = GetLiveRange(regNO);
1239 if (lr != nullptr) {
1240 lr->SetMustAssigned();
1241 lr->SetIsNonLocal(true);
1242 }
1243 }
1244
SetupMustAssignedLiveRanges(const Insn & insn)1245 void GraphColorRegAllocator::SetupMustAssignedLiveRanges(const Insn &insn)
1246 {
1247 if (!insn.IsSpecialIntrinsic()) {
1248 return;
1249 }
1250 if (insn.GetMachineOpcode() == MOP_asm) {
1251 for (auto regOpnd : static_cast<ListOperand &>(insn.GetOperand(kAsmOutputListOpnd)).GetOperands()) {
1252 SetLrMustAssign(regOpnd);
1253 }
1254 for (auto regOpnd : static_cast<ListOperand &>(insn.GetOperand(kAsmInputListOpnd)).GetOperands()) {
1255 SetLrMustAssign(regOpnd);
1256 }
1257 return;
1258 }
1259 uint32 opndNum = insn.GetOperandSize();
1260 for (uint32 i = 0; i < opndNum; ++i) {
1261 Operand *opnd = &insn.GetOperand(i);
1262 if (!opnd->IsRegister()) {
1263 continue;
1264 }
1265 auto regOpnd = static_cast<RegOperand *>(opnd);
1266 SetLrMustAssign(regOpnd);
1267 }
1268 }
1269
1270 /*
1271 * For each succ bb->GetSuccs(), if bb->liveout - succ->livein is not empty, the vreg(s) is
1272 * dead on this path (but alive on the other path as there is some use of it on the
1273 * other path). This might be useful for optimization of reload placement later for
1274 * splits (lr split into lr1 & lr2 and lr2 will need to reload.)
1275 * Not for now though.
1276 */
ComputeLiveRanges()1277 void GraphColorRegAllocator::ComputeLiveRanges()
1278 {
1279 bbVec.clear();
1280 bbVec.resize(cgFunc->NumBBs());
1281
1282 auto currPoint = static_cast<uint32>(cgFunc->GetTotalNumberOfInstructions() + bfs->sortedBBs.size());
1283 /* distinguish use/def */
1284 CHECK_FATAL(currPoint < (INT_MAX >> 2), "integer overflow check"); // multiply by 4 to distinguish def or use reg
1285 currPoint = currPoint << 2; // multiply by 4 to distinguish def or use reg
1286 for (size_t bbIdx = bfs->sortedBBs.size(); bbIdx > 0; --bbIdx) {
1287 BB *bb = bfs->sortedBBs[bbIdx - 1];
1288 bbVec[bb->GetId()] = bb;
1289 bb->SetLevel(bbIdx - 1);
1290
1291 pregLive.clear();
1292 vregLive.clear();
1293 for (auto liveOut : bb->GetLiveOutRegNO()) {
1294 SetupLiveRangeByRegNO(liveOut, *bb, currPoint);
1295 }
1296 --currPoint;
1297
1298 if (bb->GetLastInsn() != nullptr && bb->GetLastInsn()->IsMachineInstruction() && bb->GetLastInsn()->IsCall()) {
1299 UpdateCallInfo(bb->GetId(), currPoint, *bb->GetLastInsn());
1300 }
1301
1302 FOR_BB_INSNS_REV_SAFE(insn, bb, ninsn) {
1303 #ifdef MOVE_COALESCE
1304 if ((insn->GetMachineOpcode() == MOP_xmovrr || insn->GetMachineOpcode() == MOP_wmovrr) &&
1305 (!AArch64isa::IsPhysicalRegister(static_cast<RegOperand &>(insn->GetOperand(0)).GetRegisterNumber())) &&
1306 (static_cast<RegOperand &>(insn->GetOperand(0)).GetRegisterNumber() ==
1307 static_cast<RegOperand &>(insn->GetOperand(1)).GetRegisterNumber())) {
1308 bb->RemoveInsn(*insn);
1309 continue;
1310 }
1311 #endif
1312 if (UpdateInsnCntAndSkipUseless(*insn, currPoint)) {
1313 if (ninsn && ninsn->IsMachineInstruction() && ninsn->IsCall()) {
1314 UpdateCallInfo(bb->GetId(), currPoint, *ninsn);
1315 }
1316 continue;
1317 }
1318
1319 bool multiDef = false;
1320 ComputeLiveRangesForEachDefOperand(*insn, multiDef);
1321 ComputeLiveRangesForEachUseOperand(*insn);
1322
1323 UpdateOpndConflict(*insn, multiDef);
1324 SetupMustAssignedLiveRanges(*insn);
1325
1326 if (ninsn && ninsn->IsMachineInstruction() && ninsn->IsCall()) {
1327 UpdateCallInfo(bb->GetId(), currPoint - kInsnStep, *ninsn);
1328 }
1329
1330 ComputeLiveRangesUpdateIfInsnIsCall(*insn);
1331 /* distinguish use/def */
1332 currPoint -= k2BitSize;
1333 }
1334 ComputeLiveRangesUpdateLiveUnitInsnRange(*bb, currPoint);
1335 /* move one more step for each BB */
1336 --currPoint;
1337 }
1338
1339 if (needDump) {
1340 LogInfo::MapleLogger() << "After ComputeLiveRanges\n";
1341 PrintLiveRanges();
1342 #ifdef USE_LRA
1343 if (doLRA) {
1344 PrintLocalRAInfo("After ComputeLiveRanges");
1345 }
1346 #endif /* USE_LRA */
1347 }
1348 }
1349
1350 /* Create a common stack space for spilling with need_spill */
CreateSpillMem(uint32 spillIdx,SpillMemCheck check)1351 MemOperand *GraphColorRegAllocator::CreateSpillMem(uint32 spillIdx, SpillMemCheck check)
1352 {
1353 if (spillIdx >= spillMemOpnds.size()) {
1354 return nullptr;
1355 }
1356
1357 if (operandSpilled[spillIdx]) {
1358 /* For this insn, spill slot already used, need to find next available slot. */
1359 uint32 i;
1360 for (i = spillIdx + 1; i < kSpillMemOpndNum; ++i) {
1361 if (!operandSpilled[i]) {
1362 break;
1363 }
1364 }
1365 CHECK_FATAL(i < kSpillMemOpndNum, "no more available spill mem slot");
1366 spillIdx = i;
1367 }
1368 if (check == kSpillMemPost) {
1369 operandSpilled[spillIdx] = true;
1370 }
1371
1372 if (spillMemOpnds[spillIdx] == nullptr) {
1373 regno_t reg = cgFunc->NewVReg(kRegTyInt, sizeof(int64));
1374 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
1375 spillMemOpnds[spillIdx] = a64CGFunc->GetOrCreatSpillMem(reg);
1376 }
1377 return spillMemOpnds[spillIdx];
1378 }
1379
IsLocalReg(regno_t regNO) const1380 bool GraphColorRegAllocator::IsLocalReg(regno_t regNO) const
1381 {
1382 LiveRange *lr = GetLiveRange(regNO);
1383 if (lr == nullptr) {
1384 LogInfo::MapleLogger() << "unexpected regNO" << regNO;
1385 return true;
1386 }
1387 return IsLocalReg(*lr);
1388 }
1389
IsLocalReg(const LiveRange & lr) const1390 bool GraphColorRegAllocator::IsLocalReg(const LiveRange &lr) const
1391 {
1392 return !lr.GetSplitLr() && (lr.GetNumBBMembers() == 1) && !lr.IsNonLocal();
1393 }
1394
CheckOverlap(uint64 val,uint32 i,LiveRange & lr1,LiveRange & lr2) const1395 bool GraphColorRegAllocator::CheckOverlap(uint64 val, uint32 i, LiveRange &lr1, LiveRange &lr2) const
1396 {
1397 regno_t lr1RegNO = lr1.GetRegNO();
1398 regno_t lr2RegNO = lr2.GetRegNO();
1399 for (uint32 x = 0; x < kU64; ++x) {
1400 if ((val & (1ULL << x)) != 0) {
1401 uint32 lastBitSet = i * kU64 + x;
1402 /*
1403 * begin and end should be in the bb info (LU)
1404 * Need to rethink this if.
1405 * Under some circumstance, lr->begin can occur after lr->end.
1406 */
1407 auto lu1 = lr1.FindInLuMap(lastBitSet);
1408 auto lu2 = lr2.FindInLuMap(lastBitSet);
1409 if (lu1 != lr1.EndOfLuMap() && lu2 != lr2.EndOfLuMap() &&
1410 !((lu1->second->GetBegin() < lu2->second->GetBegin() &&
1411 lu1->second->GetEnd() < lu2->second->GetBegin()) ||
1412 (lu2->second->GetBegin() < lu1->second->GetEnd() &&
1413 lu2->second->GetEnd() < lu1->second->GetBegin()))) {
1414 lr1.SetConflictBitArrElem(lr2RegNO);
1415 lr2.SetConflictBitArrElem(lr1RegNO);
1416 return true;
1417 }
1418 }
1419 }
1420 return false;
1421 }
1422
CheckInterference(LiveRange & lr1,LiveRange & lr2) const1423 void GraphColorRegAllocator::CheckInterference(LiveRange &lr1, LiveRange &lr2) const
1424 {
1425 uint64 bitArr[bbBuckets];
1426 for (uint32 i = 0; i < bbBuckets; ++i) {
1427 bitArr[i] = lr1.GetBBMember()[i] & lr2.GetBBMember()[i];
1428 }
1429
1430 for (uint32 i = 0; i < bbBuckets; ++i) {
1431 uint64 val = bitArr[i];
1432 if (val == 0) {
1433 continue;
1434 }
1435 if (CheckOverlap(val, i, lr1, lr2)) {
1436 break;
1437 }
1438 }
1439 }
1440
BuildInterferenceGraphSeparateIntFp(std::vector<LiveRange * > & intLrVec,std::vector<LiveRange * > & fpLrVec)1441 void GraphColorRegAllocator::BuildInterferenceGraphSeparateIntFp(std::vector<LiveRange *> &intLrVec,
1442 std::vector<LiveRange *> &fpLrVec)
1443 {
1444 for (auto it : lrMap) {
1445 LiveRange *lr = it.second;
1446 if (lr->GetRegNO() == 0) {
1447 continue;
1448 }
1449 #ifdef USE_LRA
1450 if (doLRA && IsLocalReg(*lr)) {
1451 continue;
1452 }
1453 #endif /* USE_LRA */
1454 if (lr->GetRegType() == kRegTyInt) {
1455 intLrVec.emplace_back(lr);
1456 } else if (lr->GetRegType() == kRegTyFloat) {
1457 fpLrVec.emplace_back(lr);
1458 } else {
1459 DEBUG_ASSERT(false, "Illegal regType in BuildInterferenceGraph");
1460 LogInfo::MapleLogger() << "error: Illegal regType in BuildInterferenceGraph\n";
1461 }
1462 }
1463 }
1464
1465 /*
1466 * Based on intersection of LRs. When two LRs interfere, add to each other's
1467 * interference list.
1468 */
BuildInterferenceGraph()1469 void GraphColorRegAllocator::BuildInterferenceGraph()
1470 {
1471 std::vector<LiveRange *> intLrVec;
1472 std::vector<LiveRange *> fpLrVec;
1473 BuildInterferenceGraphSeparateIntFp(intLrVec, fpLrVec);
1474
1475 /*
1476 * Once number of BB becomes larger for big functions, the checking for interferences
1477 * takes significant long time. Taking advantage of unique bucket is one of strategies
1478 * to avoid unnecessary computation
1479 */
1480 auto lrSize = intLrVec.size();
1481 std::vector<int32> uniqueBucketIdx(lrSize);
1482 for (uint32 i = 0; i < lrSize; i++) {
1483 uint32 count = 0;
1484 uint32 uniqueIdx;
1485 LiveRange *lr = intLrVec[i];
1486 for (uint32 j = 0; j < bbBuckets; ++j) {
1487 if (lr->GetBBMember()[j]) {
1488 count++;
1489 uniqueIdx = j;
1490 }
1491 }
1492 if (count == 1) {
1493 uniqueBucketIdx[i] = static_cast<int32>(uniqueIdx);
1494 } else {
1495 /* LR spans multiple buckets */
1496 DEBUG_ASSERT(count >= 1, "A live range can not be empty");
1497 uniqueBucketIdx[i] = -1;
1498 }
1499 }
1500
1501 for (auto it1 = intLrVec.begin(); it1 != intLrVec.end(); ++it1) {
1502 LiveRange *lr1 = *it1;
1503 CalculatePriority(*lr1);
1504 int32 lr1UniqueBucketIdx = uniqueBucketIdx[static_cast<uint64>(std::distance(intLrVec.begin(), it1))];
1505 for (auto it2 = it1 + 1; it2 != intLrVec.end(); ++it2) {
1506 LiveRange *lr2 = *it2;
1507 if (lr1->GetRegNO() < lr2->GetRegNO()) {
1508 int32 lr2UniqueBucketIdx = uniqueBucketIdx[static_cast<uint64>(std::distance(intLrVec.begin(), it2))];
1509 if (lr1UniqueBucketIdx == -1 && lr2UniqueBucketIdx == -1) {
1510 CheckInterference(*lr1, *lr2);
1511 } else if (((lr1UniqueBucketIdx >= 0) &&
1512 (lr1->GetBBMember()[lr1UniqueBucketIdx] & lr2->GetBBMember()[lr1UniqueBucketIdx])) ||
1513 ((lr2UniqueBucketIdx >= 0) &&
1514 (lr1->GetBBMember()[lr2UniqueBucketIdx] & lr2->GetBBMember()[lr2UniqueBucketIdx]))) {
1515 CheckInterference(*lr1, *lr2);
1516 }
1517 }
1518 }
1519 }
1520
1521 // Might need to do same as to intLrVec
1522 for (auto it1 = fpLrVec.begin(); it1 != fpLrVec.end(); ++it1) {
1523 LiveRange *lr1 = *it1;
1524 CalculatePriority(*lr1);
1525 for (auto it2 = it1 + 1; it2 != fpLrVec.end(); ++it2) {
1526 LiveRange *lr2 = *it2;
1527 if (lr1->GetRegNO() < lr2->GetRegNO()) {
1528 CheckInterference(*lr1, *lr2);
1529 }
1530 }
1531 }
1532
1533 if (needDump) {
1534 LogInfo::MapleLogger() << "After BuildInterferenceGraph\n";
1535 PrintLiveRanges();
1536 }
1537 }
1538
SetBBInfoGlobalAssigned(uint32 bbID,regno_t regNO)1539 void GraphColorRegAllocator::SetBBInfoGlobalAssigned(uint32 bbID, regno_t regNO)
1540 {
1541 DEBUG_ASSERT(bbID < bbRegInfo.size(), "index out of range in GraphColorRegAllocator::SetBBInfoGlobalAssigned");
1542 BBAssignInfo *bbInfo = bbRegInfo[bbID];
1543 if (bbInfo == nullptr) {
1544 bbInfo = memPool->New<BBAssignInfo>(alloc);
1545 bbRegInfo[bbID] = bbInfo;
1546 bbInfo->InitGlobalAssigned();
1547 }
1548 bbInfo->InsertElemToGlobalsAssigned(regNO);
1549 }
1550
HaveAvailableColor(const LiveRange & lr,uint32 num) const1551 bool GraphColorRegAllocator::HaveAvailableColor(const LiveRange &lr, uint32 num) const
1552 {
1553 return ((lr.GetRegType() == kRegTyInt && num < intRegNum) || (lr.GetRegType() == kRegTyFloat && num < fpRegNum));
1554 }
1555
1556 /*
1557 * If the members on the interference list is less than #colors, then
1558 * it can be trivially assigned a register. Otherwise it is constrained.
1559 * Separate the LR based on if it is contrained or not.
1560 *
1561 * The unconstrained LRs are colored last.
1562 *
1563 * Compute a sorted list of constrained LRs based on priority cost.
1564 */
Separate()1565 void GraphColorRegAllocator::Separate()
1566 {
1567 for (auto it : lrMap) {
1568 LiveRange *lr = it.second;
1569 #ifdef USE_LRA
1570 if (doLRA && IsLocalReg(*lr)) {
1571 continue;
1572 }
1573 #endif /* USE_LRA */
1574 #ifdef OPTIMIZE_FOR_PROLOG
1575 if (doOptProlog && ((lr->GetNumDefs() <= 1) && (lr->GetNumUses() <= 1) && (lr->GetNumCall() > 0)) &&
1576 (lr->GetFrequency() <= (cgFunc->GetFirstBB()->GetFrequency() << 1))) {
1577 if (lr->GetRegType() == kRegTyInt) {
1578 intDelayed.emplace_back(lr);
1579 } else {
1580 fpDelayed.emplace_back(lr);
1581 }
1582 continue;
1583 }
1584 #endif /* OPTIMIZE_FOR_PROLOG */
1585 if (lr->GetRematLevel() != kRematOff) {
1586 unconstrained.emplace_back(lr);
1587 } else if (HaveAvailableColor(*lr, lr->GetNumBBConflicts() + static_cast<uint32>(lr->GetPregvetoSize()) +
1588 static_cast<uint32>(lr->GetForbiddenSize()))) {
1589 if (lr->GetPrefs().size()) {
1590 unconstrainedPref.emplace_back(lr);
1591 } else {
1592 unconstrained.emplace_back(lr);
1593 }
1594 } else if (lr->IsMustAssigned()) {
1595 mustAssigned.emplace_back(lr);
1596 } else {
1597 if (lr->GetPrefs().size() && lr->GetNumCall() == 0) {
1598 unconstrainedPref.emplace_back(lr);
1599 } else {
1600 constrained.emplace_back(lr);
1601 }
1602 }
1603 }
1604 if (needDump) {
1605 LogInfo::MapleLogger() << "Unconstrained : ";
1606 for (auto lr : unconstrainedPref) {
1607 LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1608 }
1609 for (auto lr : unconstrained) {
1610 LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1611 }
1612 LogInfo::MapleLogger() << "\n";
1613 LogInfo::MapleLogger() << "Constrained : ";
1614 for (auto lr : constrained) {
1615 LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1616 }
1617 LogInfo::MapleLogger() << "\n";
1618 LogInfo::MapleLogger() << "mustAssigned : ";
1619 for (auto lr : mustAssigned) {
1620 LogInfo::MapleLogger() << lr->GetRegNO() << " ";
1621 }
1622 LogInfo::MapleLogger() << "\n";
1623 }
1624 }
1625
GetHighPriorityLr(MapleVector<LiveRange * > & lrSet) const1626 MapleVector<LiveRange *>::iterator GraphColorRegAllocator::GetHighPriorityLr(MapleVector<LiveRange *> &lrSet) const
1627 {
1628 auto it = lrSet.begin();
1629 auto highestIt = it;
1630 LiveRange *startLr = *it;
1631 float maxPrio = startLr->GetPriority();
1632 ++it;
1633 for (; it != lrSet.end(); ++it) {
1634 LiveRange *lr = *it;
1635 if (lr->GetPriority() > maxPrio) {
1636 maxPrio = lr->GetPriority();
1637 highestIt = it;
1638 }
1639 }
1640 return highestIt;
1641 }
1642
UpdateForbiddenForNeighbors(const LiveRange & lr) const1643 void GraphColorRegAllocator::UpdateForbiddenForNeighbors(const LiveRange &lr) const
1644 {
1645 auto updateForbidden = [&lr, this](regno_t regNO) {
1646 LiveRange *newLr = GetLiveRange(regNO);
1647 DEBUG_ASSERT(newLr != nullptr, "newLr should not be nullptr");
1648 if (!newLr->GetPregveto(lr.GetAssignedRegNO())) {
1649 newLr->InsertElemToForbidden(lr.GetAssignedRegNO());
1650 }
1651 };
1652 ForEachRegArrElem(lr.GetBBConflict(), updateForbidden);
1653 }
1654
UpdatePregvetoForNeighbors(const LiveRange & lr) const1655 void GraphColorRegAllocator::UpdatePregvetoForNeighbors(const LiveRange &lr) const
1656 {
1657 auto updatePregveto = [&lr, this](regno_t regNO) {
1658 LiveRange *newLr = GetLiveRange(regNO);
1659 DEBUG_ASSERT(newLr != nullptr, "newLr should not be nullptr");
1660 newLr->InsertElemToPregveto(lr.GetAssignedRegNO());
1661 newLr->EraseElemFromForbidden(lr.GetAssignedRegNO());
1662 };
1663 ForEachRegArrElem(lr.GetBBConflict(), updatePregveto);
1664 }
1665
1666 /*
1667 * For cases with only one def/use and crosses a call.
1668 * It might be more beneficial to spill vs save/restore in prolog/epilog.
1669 * But if the callee register is already used, then it is ok to reuse it again.
1670 * Or in certain cases, just use the callee.
1671 */
ShouldUseCallee(LiveRange & lr,const MapleSet<regno_t> & calleeUsed,const MapleVector<LiveRange * > & delayed) const1672 bool GraphColorRegAllocator::ShouldUseCallee(LiveRange &lr, const MapleSet<regno_t> &calleeUsed,
1673 const MapleVector<LiveRange *> &delayed) const
1674 {
1675 if (FindIn(calleeUsed, lr.GetAssignedRegNO())) {
1676 return true;
1677 }
1678 if (AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr.GetAssignedRegNO())) &&
1679 (calleeUsed.size() % kDivide2) != 0) {
1680 return true;
1681 }
1682 if (delayed.size() > 1 && calleeUsed.empty()) {
1683 /* If there are more than 1 vreg that can benefit from callee, use callee */
1684 return true;
1685 }
1686 lr.SetAssignedRegNO(0);
1687 return false;
1688 }
1689
AddCalleeUsed(regno_t regNO,RegType regType)1690 void GraphColorRegAllocator::AddCalleeUsed(regno_t regNO, RegType regType)
1691 {
1692 DEBUG_ASSERT(AArch64isa::IsPhysicalRegister(regNO), "regNO should be physical register");
1693 bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO));
1694 if (isCalleeReg) {
1695 if (regType == kRegTyInt) {
1696 (void)intCalleeUsed.insert(regNO);
1697 } else {
1698 (void)fpCalleeUsed.insert(regNO);
1699 }
1700 }
1701 }
1702
FindColorForLr(const LiveRange & lr) const1703 regno_t GraphColorRegAllocator::FindColorForLr(const LiveRange &lr) const
1704 {
1705 regno_t reg = 0;
1706 regno_t base;
1707 RegType regType = lr.GetRegType();
1708 const MapleSet<uint32> *currRegSet = nullptr;
1709 const MapleSet<uint32> *nextRegSet = nullptr;
1710 if (regType == kRegTyInt) {
1711 if (lr.GetNumCall() != 0) {
1712 currRegSet = &intCalleeRegSet;
1713 nextRegSet = &intCallerRegSet;
1714 } else {
1715 currRegSet = &intCallerRegSet;
1716 nextRegSet = &intCalleeRegSet;
1717 }
1718 base = R0;
1719 } else {
1720 if (lr.GetNumCall() != 0) {
1721 currRegSet = &fpCalleeRegSet;
1722 nextRegSet = &fpCallerRegSet;
1723 } else {
1724 currRegSet = &fpCallerRegSet;
1725 nextRegSet = &fpCalleeRegSet;
1726 }
1727 base = V0;
1728 }
1729
1730 #ifdef MOVE_COALESCE
1731 if (lr.GetNumCall() == 0 || (lr.GetNumDefs() + lr.GetNumUses() <= kRegNum2)) {
1732 for (const auto &it : lr.GetPrefs()) {
1733 reg = it + base;
1734 if ((FindIn(*currRegSet, reg) || FindIn(*nextRegSet, reg)) && !lr.GetForbidden(reg) &&
1735 !lr.GetPregveto(reg)) {
1736 return reg;
1737 }
1738 }
1739 }
1740 #endif /* MOVE_COALESCE */
1741 for (const auto &it : *currRegSet) {
1742 reg = it + base;
1743 if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) {
1744 return reg;
1745 }
1746 }
1747 /* Failed to allocate in first choice. Try 2nd choice. */
1748 for (const auto &it : *nextRegSet) {
1749 reg = it + base;
1750 if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg)) {
1751 return reg;
1752 }
1753 }
1754 DEBUG_ASSERT(false, "Failed to find a register");
1755 return 0;
1756 }
1757
TryToAssignCallerSave(const LiveRange & lr) const1758 regno_t GraphColorRegAllocator::TryToAssignCallerSave(const LiveRange &lr) const
1759 {
1760 regno_t base;
1761 RegType regType = lr.GetRegType();
1762 const MapleSet<uint32> *currRegSet = nullptr;
1763 if (regType == kRegTyInt) {
1764 currRegSet = &intCallerRegSet;
1765 base = R0;
1766 } else {
1767 currRegSet = &fpCallerRegSet;
1768 base = V0;
1769 }
1770
1771 regno_t reg = 0;
1772 #ifdef MOVE_COALESCE
1773 if (lr.GetNumCall() == 0 || (lr.GetNumDefs() + lr.GetNumUses() <= kRegNum2)) {
1774 for (const auto &it : lr.GetPrefs()) {
1775 reg = it + base;
1776 if ((FindIn(*currRegSet, reg)) && !lr.GetForbidden(reg) && !lr.GetPregveto(reg) && !lr.GetCallDef(reg)) {
1777 return reg;
1778 }
1779 }
1780 }
1781 #endif /* MOVE_COALESCE */
1782 for (const auto &it : *currRegSet) {
1783 reg = it + base;
1784 if (!lr.GetForbidden(reg) && !lr.GetPregveto(reg) && !lr.GetCallDef(reg)) {
1785 return reg;
1786 }
1787 }
1788 return 0;
1789 }
1790
1791 /*
1792 * If forbidden list has more registers than max of all BB's local reg
1793 * requirement, then LR can be colored.
1794 * Update LR's color if success, return true, else return false.
1795 */
AssignColorToLr(LiveRange & lr,bool isDelayed)1796 bool GraphColorRegAllocator::AssignColorToLr(LiveRange &lr, bool isDelayed)
1797 {
1798 if (lr.GetAssignedRegNO() > 0) {
1799 /* Already assigned. */
1800 return true;
1801 }
1802 if (!HaveAvailableColor(lr, lr.GetForbiddenSize() + lr.GetPregvetoSize())) {
1803 if (needDump) {
1804 LogInfo::MapleLogger() << "assigned fail to R" << lr.GetRegNO() << "\n";
1805 }
1806 return false;
1807 }
1808 regno_t callerSaveReg = 0;
1809 regno_t reg = FindColorForLr(lr);
1810 if (lr.GetNumCall() != 0 && !lr.GetCrossCall()) {
1811 callerSaveReg = TryToAssignCallerSave(lr);
1812 bool prefCaller = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg)) &&
1813 intCalleeUsed.find(reg) == intCalleeUsed.end() &&
1814 fpCalleeUsed.find(reg) == fpCalleeUsed.end();
1815 if (callerSaveReg != 0 && (prefCaller || !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(reg)))) {
1816 reg = callerSaveReg;
1817 lr.SetNumCall(0);
1818 }
1819 }
1820 lr.SetAssignedRegNO(reg);
1821 if (needDump) {
1822 LogInfo::MapleLogger() << "assigned " << lr.GetAssignedRegNO() << " to R" << lr.GetRegNO() << "\n";
1823 }
1824 if (lr.GetAssignedRegNO() == 0) {
1825 return false;
1826 }
1827 #ifdef OPTIMIZE_FOR_PROLOG
1828 if (doOptProlog && isDelayed) {
1829 if ((lr.GetRegType() == kRegTyInt && !ShouldUseCallee(lr, intCalleeUsed, intDelayed)) ||
1830 (lr.GetRegType() == kRegTyFloat && !ShouldUseCallee(lr, fpCalleeUsed, fpDelayed))) {
1831 return false;
1832 }
1833 }
1834 #endif /* OPTIMIZE_FOR_PROLOG */
1835
1836 AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType());
1837
1838 UpdateForbiddenForNeighbors(lr);
1839 ForEachBBArrElem(lr.GetBBMember(),
1840 [&lr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, lr.GetAssignedRegNO()); });
1841 return true;
1842 }
1843
PruneLrForSplit(LiveRange & lr,BB & bb,bool remove,std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1844 void GraphColorRegAllocator::PruneLrForSplit(LiveRange &lr, BB &bb, bool remove,
1845 std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1846 std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1847 {
1848 if (bb.GetInternalFlag1()) {
1849 /* already visited */
1850 return;
1851 }
1852
1853 bb.SetInternalFlag1(true);
1854 auto lu = lr.FindInLuMap(bb.GetId());
1855 uint32 defNum = 0;
1856 uint32 useNum = 0;
1857 if (lu != lr.EndOfLuMap()) {
1858 defNum = lu->second->GetDefNum();
1859 useNum = lu->second->GetUseNum();
1860 }
1861
1862 if (remove) {
1863 /* In removal mode, has not encountered a ref yet. */
1864 if (defNum == 0 && useNum == 0) {
1865 if (bb.GetLoop() != nullptr && FindIn(candidateInLoop, bb.GetLoop())) {
1866 /*
1867 * Upward search has found a loop. Regardless of def/use
1868 * The loop members must be included in the new LR.
1869 */
1870 remove = false;
1871 } else {
1872 /* No ref in this bb. mark as potential remove. */
1873 bb.SetInternalFlag2(true);
1874 return;
1875 }
1876 } else {
1877 /* found a ref, no more removal of bb and preds. */
1878 remove = false;
1879 }
1880 }
1881
1882 if (bb.GetLoop() != nullptr) {
1883 /* With a def in loop, cannot prune that loop */
1884 if (defNum > 0) {
1885 (void)defInLoop.insert(bb.GetLoop());
1886 }
1887 /* bb in loop, need to make sure of loop carried dependency */
1888 (void)candidateInLoop.insert(bb.GetLoop());
1889 }
1890 for (auto pred : bb.GetPreds()) {
1891 if (FindNotIn(bb.GetLoopPreds(), pred)) {
1892 PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop);
1893 }
1894 }
1895 for (auto pred : bb.GetEhPreds()) {
1896 if (FindNotIn(bb.GetLoopPreds(), pred)) {
1897 PruneLrForSplit(lr, *pred, remove, candidateInLoop, defInLoop);
1898 }
1899 }
1900 }
1901
FindBBSharedInSplit(LiveRange & lr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1902 void GraphColorRegAllocator::FindBBSharedInSplit(LiveRange &lr,
1903 const std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1904 std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1905 {
1906 /* A loop might be split into two. Need to see over the entire LR if there is a def in the loop. */
1907 auto FindBBSharedFunc = [&lr, &candidateInLoop, &defInLoop, this](uint32 bbID) {
1908 BB *bb = bbVec[bbID];
1909 if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) {
1910 auto lu = lr.FindInLuMap(bb->GetId());
1911 if (lu != lr.EndOfLuMap() && lu->second->GetDefNum() > 0) {
1912 (void)defInLoop.insert(bb->GetLoop());
1913 }
1914 }
1915 };
1916 ForEachBBArrElem(lr.GetBBMember(), FindBBSharedFunc);
1917 }
1918
1919 /*
1920 * Backward traversal of the top part of the split LR.
1921 * Prune the part of the LR that has no downward exposing references.
1922 * Take into account of loops and loop carried dependencies.
1923 * The candidate bb to be removed, if in a loop, store that info.
1924 * If a LR crosses a loop, even if the loop has no def/use, it must
1925 * be included in the new LR.
1926 */
ComputeBBForNewSplit(LiveRange & newLr,LiveRange & origLr)1927 void GraphColorRegAllocator::ComputeBBForNewSplit(LiveRange &newLr, LiveRange &origLr)
1928 {
1929 /*
1930 * The candidate bb to be removed, if in a loop, store that info.
1931 * If a LR crosses a loop, even if the loop has no def/use, it must
1932 * be included in the new LR.
1933 */
1934 std::set<CGFuncLoops *, CGFuncLoopCmp> candidateInLoop;
1935 /* If a bb has a def and is in a loop, store that info. */
1936 std::set<CGFuncLoops *, CGFuncLoopCmp> defInLoop;
1937 std::set<BB *, SortedBBCmpFunc> smember;
1938 ForEachBBArrElem(newLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
1939 for (auto bbIt = smember.rbegin(); bbIt != smember.rend(); ++bbIt) {
1940 BB *bb = *bbIt;
1941 if (bb->GetInternalFlag1() != 0) {
1942 continue;
1943 }
1944 PruneLrForSplit(newLr, *bb, true, candidateInLoop, defInLoop);
1945 }
1946 FindBBSharedInSplit(origLr, candidateInLoop, defInLoop);
1947 auto pruneTopLr = [this, &newLr, &candidateInLoop, &defInLoop](uint32 bbID) {
1948 BB *bb = bbVec[bbID];
1949 if (bb->GetInternalFlag2() != 0) {
1950 if (bb->GetLoop() != nullptr && FindIn(candidateInLoop, bb->GetLoop())) {
1951 return;
1952 }
1953 if (bb->GetLoop() != nullptr || FindNotIn(defInLoop, bb->GetLoop())) {
1954 /* defInLoop should be a subset of candidateInLoop. remove. */
1955 newLr.UnsetMemberBitArrElem(bbID);
1956 }
1957 }
1958 };
1959 ForEachBBArrElem(newLr.GetBBMember(), pruneTopLr); /* prune the top LR. */
1960 }
1961
UseIsUncovered(const BB & bb,const BB & startBB,std::vector<bool> & visitedBB)1962 bool GraphColorRegAllocator::UseIsUncovered(const BB &bb, const BB &startBB, std::vector<bool> &visitedBB)
1963 {
1964 CHECK_FATAL(bb.GetId() < visitedBB.size(), "index out of range");
1965 visitedBB[bb.GetId()] = true;
1966 for (auto pred : bb.GetPreds()) {
1967 if (visitedBB[pred->GetId()]) {
1968 continue;
1969 }
1970 if (pred->GetLevel() <= startBB.GetLevel()) {
1971 return true;
1972 }
1973 if (UseIsUncovered(*pred, startBB, visitedBB)) {
1974 return true;
1975 }
1976 }
1977 for (auto pred : bb.GetEhPreds()) {
1978 if (visitedBB[pred->GetId()]) {
1979 continue;
1980 }
1981 if (pred->GetLevel() <= startBB.GetLevel()) {
1982 return true;
1983 }
1984 if (UseIsUncovered(*pred, startBB, visitedBB)) {
1985 return true;
1986 }
1987 }
1988 return false;
1989 }
1990
FindUseForSplit(LiveRange & lr,SplitBBInfo & bbInfo,bool & remove,std::set<CGFuncLoops *,CGFuncLoopCmp> & candidateInLoop,std::set<CGFuncLoops *,CGFuncLoopCmp> & defInLoop)1991 void GraphColorRegAllocator::FindUseForSplit(LiveRange &lr, SplitBBInfo &bbInfo, bool &remove,
1992 std::set<CGFuncLoops *, CGFuncLoopCmp> &candidateInLoop,
1993 std::set<CGFuncLoops *, CGFuncLoopCmp> &defInLoop)
1994 {
1995 BB *bb = bbInfo.GetCandidateBB();
1996 const BB *startBB = bbInfo.GetStartBB();
1997 if (bb->GetInternalFlag1() != 0) {
1998 /* already visited */
1999 return;
2000 }
2001 for (auto pred : bb->GetPreds()) {
2002 if (pred->GetInternalFlag1() == 0) {
2003 return;
2004 }
2005 }
2006 for (auto pred : bb->GetEhPreds()) {
2007 if (pred->GetInternalFlag1() == 0) {
2008 return;
2009 }
2010 }
2011
2012 bb->SetInternalFlag1(true);
2013 auto lu = lr.FindInLuMap(bb->GetId());
2014 uint32 defNum = 0;
2015 uint32 useNum = 0;
2016 if (lu != lr.EndOfLuMap()) {
2017 defNum = lu->second->GetDefNum();
2018 useNum = lu->second->GetUseNum();
2019 }
2020
2021 std::vector<bool> visitedBB(cgFunc->GetAllBBs().size(), false);
2022 if (remove) {
2023 /* In removal mode, has not encountered a ref yet. */
2024 if (defNum == 0 && useNum == 0) {
2025 /* No ref in this bb. mark as potential remove. */
2026 bb->SetInternalFlag2(true);
2027 if (bb->GetLoop() != nullptr) {
2028 /* bb in loop, need to make sure of loop carried dependency */
2029 (void)candidateInLoop.insert(bb->GetLoop());
2030 }
2031 } else {
2032 /* found a ref, no more removal of bb and preds. */
2033 remove = false;
2034 /* A potential point for a upward exposing use. (might be a def). */
2035 lu->second->SetNeedReload(true);
2036 }
2037 } else if ((defNum > 0 || useNum > 0) && UseIsUncovered(*bb, *startBB, visitedBB)) {
2038 lu->second->SetNeedReload(true);
2039 }
2040
2041 /* With a def in loop, cannot prune that loop */
2042 if (bb->GetLoop() != nullptr && defNum > 0) {
2043 (void)defInLoop.insert(bb->GetLoop());
2044 }
2045
2046 for (auto succ : bb->GetSuccs()) {
2047 if (FindNotIn(bb->GetLoopSuccs(), succ)) {
2048 bbInfo.SetCandidateBB(*succ);
2049 FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop);
2050 }
2051 }
2052 for (auto succ : bb->GetEhSuccs()) {
2053 if (FindNotIn(bb->GetLoopSuccs(), succ)) {
2054 bbInfo.SetCandidateBB(*succ);
2055 FindUseForSplit(lr, bbInfo, remove, candidateInLoop, defInLoop);
2056 }
2057 }
2058 }
2059
ClearLrBBFlags(const std::set<BB *,SortedBBCmpFunc> & member) const2060 void GraphColorRegAllocator::ClearLrBBFlags(const std::set<BB *, SortedBBCmpFunc> &member) const
2061 {
2062 for (auto bb : member) {
2063 bb->SetInternalFlag1(0);
2064 bb->SetInternalFlag2(0);
2065 for (auto pred : bb->GetPreds()) {
2066 pred->SetInternalFlag1(0);
2067 pred->SetInternalFlag2(0);
2068 }
2069 for (auto pred : bb->GetEhPreds()) {
2070 pred->SetInternalFlag1(0);
2071 pred->SetInternalFlag2(0);
2072 }
2073 }
2074 }
2075
2076 /*
2077 * Downward traversal of the bottom part of the split LR.
2078 * Prune the part of the LR that has no upward exposing references.
2079 * Take into account of loops and loop carried dependencies.
2080 */
ComputeBBForOldSplit(LiveRange & newLr,LiveRange & origLr)2081 void GraphColorRegAllocator::ComputeBBForOldSplit(LiveRange &newLr, LiveRange &origLr)
2082 {
2083 /* The candidate bb to be removed, if in a loop, store that info. */
2084 std::set<CGFuncLoops *, CGFuncLoopCmp> candidateInLoop;
2085 /* If a bb has a def and is in a loop, store that info. */
2086 std::set<CGFuncLoops *, CGFuncLoopCmp> defInLoop;
2087 SplitBBInfo bbInfo;
2088 bool remove = true;
2089
2090 std::set<BB *, SortedBBCmpFunc> smember;
2091 ForEachBBArrElem(origLr.GetBBMember(), [this, &smember](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
2092 ClearLrBBFlags(smember);
2093 for (auto bb : smember) {
2094 if (bb->GetInternalFlag1() != 0) {
2095 continue;
2096 }
2097 for (auto pred : bb->GetPreds()) {
2098 pred->SetInternalFlag1(true);
2099 }
2100 for (auto pred : bb->GetEhPreds()) {
2101 pred->SetInternalFlag1(true);
2102 }
2103 bbInfo.SetCandidateBB(*bb);
2104 bbInfo.SetStartBB(*bb);
2105 FindUseForSplit(origLr, bbInfo, remove, candidateInLoop, defInLoop);
2106 }
2107 FindBBSharedInSplit(newLr, candidateInLoop, defInLoop);
2108 auto pruneLrFunc = [&origLr, &defInLoop, this](uint32 bbID) {
2109 BB *bb = bbVec[bbID];
2110 if (bb->GetInternalFlag2() != 0) {
2111 if (bb->GetLoop() != nullptr && FindNotIn(defInLoop, bb->GetLoop())) {
2112 origLr.UnsetMemberBitArrElem(bbID);
2113 }
2114 }
2115 };
2116 ForEachBBArrElem(origLr.GetBBMember(), pruneLrFunc);
2117 }
2118
2119 /*
2120 * There is at least one available color for this BB from the neighbors
2121 * minus the ones reserved for local allocation.
2122 * bbAdded : The new BB to be added into the split LR if color is available.
2123 * conflictRegs : Reprent the LR before adding the bbAdded. These are the
2124 * forbidden regs before adding the new BBs.
2125 * Side effect : Adding the new forbidden regs from bbAdded into
2126 * conflictRegs if the LR can still be colored.
2127 */
LrCanBeColored(const LiveRange & lr,const BB & bbAdded,std::unordered_set<regno_t> & conflictRegs)2128 bool GraphColorRegAllocator::LrCanBeColored(const LiveRange &lr, const BB &bbAdded,
2129 std::unordered_set<regno_t> &conflictRegs)
2130 {
2131 RegType type = lr.GetRegType();
2132
2133 std::unordered_set<regno_t> newConflict;
2134 auto updateConflictFunc = [&bbAdded, &conflictRegs, &newConflict, &lr, this](regno_t regNO) {
2135 /* check the real conflict in current bb */
2136 LiveRange *conflictLr = lrMap[regNO];
2137 /*
2138 * If the bb to be added to the new LR has an actual
2139 * conflict with another LR, and if that LR has already
2140 * assigned a color that is not in the conflictRegs,
2141 * then add it as a newConflict.
2142 */
2143 if (IsBitArrElemSet(conflictLr->GetBBMember(), bbAdded.GetId())) {
2144 regno_t confReg = conflictLr->GetAssignedRegNO();
2145 if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) {
2146 (void)newConflict.insert(confReg);
2147 }
2148 } else if (conflictLr->GetSplitLr() != nullptr &&
2149 IsBitArrElemSet(conflictLr->GetSplitLr()->GetBBMember(), bbAdded.GetId())) {
2150 /*
2151 * The after split LR is split into pieces, and this ensures
2152 * the after split color is taken into consideration.
2153 */
2154 regno_t confReg = conflictLr->GetSplitLr()->GetAssignedRegNO();
2155 if ((confReg > 0) && FindNotIn(conflictRegs, confReg) && !lr.GetPregveto(confReg)) {
2156 (void)newConflict.insert(confReg);
2157 }
2158 }
2159 };
2160 ForEachRegArrElem(lr.GetBBConflict(), updateConflictFunc);
2161
2162 size_t numRegs = newConflict.size() + lr.GetPregvetoSize() + conflictRegs.size();
2163
2164 bool canColor = false;
2165 if (type == kRegTyInt) {
2166 if (numRegs < intRegNum) {
2167 canColor = true;
2168 }
2169 } else if (numRegs < fpRegNum) {
2170 canColor = true;
2171 }
2172
2173 if (canColor) {
2174 for (auto regNO : newConflict) {
2175 (void)conflictRegs.insert(regNO);
2176 }
2177 }
2178
2179 /* Update all the registers conflicting when adding thew new bb. */
2180 return canColor;
2181 }
2182
2183 /* Support function for LR split. Move one BB from LR1 to LR2. */
MoveLrBBInfo(LiveRange & oldLr,LiveRange & newLr,BB & bb) const2184 void GraphColorRegAllocator::MoveLrBBInfo(LiveRange &oldLr, LiveRange &newLr, BB &bb) const
2185 {
2186 /* initialize backward traversal flag for the bb pruning phase */
2187 bb.SetInternalFlag1(false);
2188 /* initialize bb removal marker */
2189 bb.SetInternalFlag2(false);
2190 /* Insert BB into new LR */
2191 uint32 bbID = bb.GetId();
2192 newLr.SetMemberBitArrElem(bbID);
2193
2194 /* Move LU from old LR to new LR */
2195 auto luIt = oldLr.FindInLuMap(bb.GetId());
2196 if (luIt != oldLr.EndOfLuMap()) {
2197 newLr.SetElemToLuMap(luIt->first, *(luIt->second));
2198 oldLr.EraseLuMap(luIt);
2199 }
2200
2201 /* Remove BB from old LR */
2202 oldLr.UnsetMemberBitArrElem(bbID);
2203 }
2204
2205 /* Is the set of loops inside the loop? */
ContainsLoop(const CGFuncLoops & loop,const std::set<CGFuncLoops *,CGFuncLoopCmp> & loops) const2206 bool GraphColorRegAllocator::ContainsLoop(const CGFuncLoops &loop,
2207 const std::set<CGFuncLoops *, CGFuncLoopCmp> &loops) const
2208 {
2209 for (const CGFuncLoops *lp : loops) {
2210 while (lp != nullptr) {
2211 if (lp == &loop) {
2212 return true;
2213 }
2214 lp = lp->GetOuterLoop();
2215 }
2216 }
2217 return false;
2218 }
2219
GetAllLrMemberLoops(LiveRange & lr,std::set<CGFuncLoops *,CGFuncLoopCmp> & loops)2220 void GraphColorRegAllocator::GetAllLrMemberLoops(LiveRange &lr, std::set<CGFuncLoops *, CGFuncLoopCmp> &loops)
2221 {
2222 auto GetLrMemberFunc = [&loops, this](uint32 bbID) {
2223 BB *bb = bbVec[bbID];
2224 CGFuncLoops *loop = bb->GetLoop();
2225 if (loop != nullptr) {
2226 (void)loops.insert(loop);
2227 }
2228 };
2229 ForEachBBArrElem(lr.GetBBMember(), GetLrMemberFunc);
2230 }
2231
SplitLrShouldSplit(LiveRange & lr)2232 bool GraphColorRegAllocator::SplitLrShouldSplit(LiveRange &lr)
2233 {
2234 if (lr.GetSplitLr() != nullptr || lr.GetNumBBMembers() == 1) {
2235 return false;
2236 }
2237 /* Need to split within the same hierarchy */
2238 uint32 loopID = 0xFFFFFFFF; /* loopID is initialized the maximum value,and then be assigned in function */
2239 bool needSplit = true;
2240 auto setNeedSplit = [&needSplit, &loopID, this](uint32 bbID) -> bool {
2241 BB *bb = bbVec[bbID];
2242 if (loopID == 0xFFFFFFFF) {
2243 if (bb->GetLoop() != nullptr) {
2244 loopID = static_cast<int32>(bb->GetLoop()->GetHeader()->GetId());
2245 } else {
2246 loopID = 0;
2247 }
2248 } else if ((bb->GetLoop() != nullptr && bb->GetLoop()->GetHeader()->GetId() != loopID) ||
2249 (bb->GetLoop() == nullptr && loopID != 0)) {
2250 needSplit = false;
2251 return true;
2252 }
2253 return false;
2254 };
2255 ForEachBBArrElemWithInterrupt(lr.GetBBMember(), setNeedSplit);
2256 return needSplit;
2257 }
2258
2259 /*
2260 * When a BB in the LR has no def or use in it, then potentially
2261 * there is no conflict within these BB for the new LR, since
2262 * the new LR will need to spill the defs which terminates the
2263 * new LR unless there is a use later which extends the new LR.
2264 * There is no need to compute conflicting register set unless
2265 * there is a def or use.
2266 * It is assumed that the new LR is extended to the def or use.
2267 * Initially newLr is empty, then add bb if can be colored.
2268 * Return true if there is a split.
2269 */
SplitLrFindCandidateLr(LiveRange & lr,LiveRange & newLr,std::unordered_set<regno_t> & conflictRegs)2270 bool GraphColorRegAllocator::SplitLrFindCandidateLr(LiveRange &lr, LiveRange &newLr,
2271 std::unordered_set<regno_t> &conflictRegs)
2272 {
2273 if (needDump) {
2274 LogInfo::MapleLogger() << "start split lr for vreg " << lr.GetRegNO() << "\n";
2275 }
2276 std::set<BB *, SortedBBCmpFunc> smember;
2277 ForEachBBArrElem(lr.GetBBMember(), [&smember, this](uint32 bbID) { (void)smember.insert(bbVec[bbID]); });
2278 for (auto bb : smember) {
2279 if (!LrCanBeColored(lr, *bb, conflictRegs)) {
2280 break;
2281 }
2282 MoveLrBBInfo(lr, newLr, *bb);
2283 }
2284
2285 /* return ture if split is successful */
2286 return newLr.GetNumBBMembers() != 0;
2287 }
2288
SplitLrHandleLoops(LiveRange & lr,LiveRange & newLr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & origLoops,const std::set<CGFuncLoops *,CGFuncLoopCmp> & newLoops)2289 void GraphColorRegAllocator::SplitLrHandleLoops(LiveRange &lr, LiveRange &newLr,
2290 const std::set<CGFuncLoops *, CGFuncLoopCmp> &origLoops,
2291 const std::set<CGFuncLoops *, CGFuncLoopCmp> &newLoops)
2292 {
2293 /*
2294 * bb in loops might need a reload due to loop carried dependency.
2295 * Compute this before pruning the LRs.
2296 * if there is no re-definition, then reload is not necessary.
2297 * Part of the new LR region after the last reference is
2298 * no longer in the LR. Remove those bb.
2299 */
2300 ComputeBBForNewSplit(newLr, lr);
2301
2302 /* With new LR, recompute conflict. */
2303 auto recomputeConflict = [&lr, &newLr, this](uint32 bbID) {
2304 auto lrFunc = [&newLr, &bbID, this](regno_t regNO) {
2305 LiveRange *confLrVec = lrMap[regNO];
2306 if (IsBitArrElemSet(confLrVec->GetBBMember(), bbID) ||
2307 (confLrVec->GetSplitLr() != nullptr && IsBitArrElemSet(confLrVec->GetSplitLr()->GetBBMember(), bbID))) {
2308 /*
2309 * New LR getting the interference does not mean the
2310 * old LR can remove the interference.
2311 * Old LR's interference will be handled at the end of split.
2312 */
2313 newLr.SetConflictBitArrElem(regNO);
2314 }
2315 };
2316 ForEachRegArrElem(lr.GetBBConflict(), lrFunc);
2317 };
2318 ForEachBBArrElem(newLr.GetBBMember(), recomputeConflict);
2319
2320 /* update bb/loop same as for new LR. */
2321 ComputeBBForOldSplit(newLr, lr);
2322 /* Update the conflict interference for the original LR later. */
2323 for (auto loop : newLoops) {
2324 if (!ContainsLoop(*loop, origLoops)) {
2325 continue;
2326 }
2327 for (auto bb : loop->GetLoopMembers()) {
2328 if (!IsBitArrElemSet(newLr.GetBBMember(), bb->GetId())) {
2329 continue;
2330 }
2331 LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bb->GetId());
2332 if (lu->GetUseNum() != 0) {
2333 lu->SetNeedReload(true);
2334 }
2335 }
2336 }
2337 }
2338
SplitLrFixNewLrCallsAndRlod(LiveRange & newLr,const std::set<CGFuncLoops *,CGFuncLoopCmp> & origLoops)2339 void GraphColorRegAllocator::SplitLrFixNewLrCallsAndRlod(LiveRange &newLr,
2340 const std::set<CGFuncLoops *, CGFuncLoopCmp> &origLoops)
2341 {
2342 /* If a 2nd split loop is before the bb in 1st split bb. */
2343 newLr.SetNumCall(0);
2344 auto fixCallsAndRlod = [&newLr, &origLoops, this](uint32 bbID) {
2345 BB *bb = bbVec[bbID];
2346 for (auto loop : origLoops) {
2347 if (loop->GetHeader()->GetLevel() >= bb->GetLevel()) {
2348 continue;
2349 }
2350 LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID);
2351 if (lu->GetUseNum() != 0) {
2352 lu->SetNeedReload(true);
2353 }
2354 }
2355 LiveUnit *lu = newLr.GetLiveUnitFromLuMap(bbID);
2356 if (lu->HasCall()) {
2357 newLr.IncNumCall();
2358 }
2359 };
2360 ForEachBBArrElem(newLr.GetBBMember(), fixCallsAndRlod);
2361 }
2362
SplitLrFixOrigLrCalls(LiveRange & lr) const2363 void GraphColorRegAllocator::SplitLrFixOrigLrCalls(LiveRange &lr) const
2364 {
2365 lr.SetNumCall(0);
2366 auto fixOrigCalls = [&lr](uint32 bbID) {
2367 LiveUnit *lu = lr.GetLiveUnitFromLuMap(bbID);
2368 if (lu->HasCall()) {
2369 lr.IncNumCall();
2370 }
2371 };
2372 ForEachBBArrElem(lr.GetBBMember(), fixOrigCalls);
2373 }
2374
SplitLrUpdateInterference(LiveRange & lr)2375 void GraphColorRegAllocator::SplitLrUpdateInterference(LiveRange &lr)
2376 {
2377 /*
2378 * newLr is now a separate LR from the original lr.
2379 * Update the interference info.
2380 * Also recompute the forbidden info
2381 */
2382 lr.ClearForbidden();
2383 auto updateInterfrence = [&lr, this](regno_t regNO) {
2384 LiveRange *confLrVec = lrMap[regNO];
2385 if (IsBBsetOverlap(lr.GetBBMember(), confLrVec->GetBBMember(), bbBuckets)) {
2386 /* interfere */
2387 if (confLrVec->GetAssignedRegNO() && !lr.GetPregveto(confLrVec->GetAssignedRegNO())) {
2388 lr.InsertElemToForbidden(confLrVec->GetAssignedRegNO());
2389 }
2390 } else {
2391 /* no interference */
2392 lr.UnsetConflictBitArrElem(regNO);
2393 }
2394 };
2395 ForEachRegArrElem(lr.GetBBConflict(), updateInterfrence);
2396 }
2397
SplitLrUpdateRegInfo(const LiveRange & origLr,LiveRange & newLr,std::unordered_set<regno_t> & conflictRegs) const2398 void GraphColorRegAllocator::SplitLrUpdateRegInfo(const LiveRange &origLr, LiveRange &newLr,
2399 std::unordered_set<regno_t> &conflictRegs) const
2400 {
2401 for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
2402 if (origLr.GetPregveto(regNO)) {
2403 newLr.InsertElemToPregveto(regNO);
2404 }
2405 }
2406 for (auto regNO : conflictRegs) {
2407 if (!newLr.GetPregveto(regNO)) {
2408 newLr.InsertElemToForbidden(regNO);
2409 }
2410 }
2411 }
2412
SplitLrErrorCheckAndDebug(const LiveRange & origLr) const2413 void GraphColorRegAllocator::SplitLrErrorCheckAndDebug(const LiveRange &origLr) const
2414 {
2415 if (origLr.GetNumBBMembers() == 0) {
2416 DEBUG_ASSERT(origLr.GetNumBBConflicts() == 0, "Error: member and conflict not match");
2417 }
2418 }
2419
2420 /*
2421 * Pick a starting BB, then expand to maximize the new LR.
2422 * Return the new LR.
2423 */
SplitLr(LiveRange & lr)2424 void GraphColorRegAllocator::SplitLr(LiveRange &lr)
2425 {
2426 if (!SplitLrShouldSplit(lr)) {
2427 return;
2428 }
2429 LiveRange *newLr = NewLiveRange();
2430 /*
2431 * For the new LR, whenever a BB with either a def or
2432 * use is added, then add the registers that the neighbor
2433 * is using to the conflict register set indicating that these
2434 * registers cannot be used for the new LR's color.
2435 */
2436 std::unordered_set<regno_t> conflictRegs;
2437 if (!SplitLrFindCandidateLr(lr, *newLr, conflictRegs)) {
2438 return;
2439 }
2440 #ifdef REUSE_SPILLMEM
2441 /* Copy the original conflict vector for spill reuse optimization */
2442 lr.SetOldConflict(memPool->NewArray<uint64>(regBuckets));
2443 for (uint32 i = 0; i < regBuckets; ++i) {
2444 lr.SetBBConflictElem(static_cast<int32>(i), lr.GetBBConflictElem(static_cast<int32>(i)));
2445 }
2446 #endif /* REUSE_SPILLMEM */
2447
2448 std::set<CGFuncLoops *, CGFuncLoopCmp> newLoops;
2449 std::set<CGFuncLoops *, CGFuncLoopCmp> origLoops;
2450 GetAllLrMemberLoops(*newLr, newLoops);
2451 GetAllLrMemberLoops(lr, origLoops);
2452 SplitLrHandleLoops(lr, *newLr, origLoops, newLoops);
2453 SplitLrFixNewLrCallsAndRlod(*newLr, origLoops);
2454 SplitLrFixOrigLrCalls(lr);
2455
2456 SplitLrUpdateRegInfo(lr, *newLr, conflictRegs);
2457
2458 CalculatePriority(lr);
2459 /* At this point, newLr should be unconstrained. */
2460 lr.SetSplitLr(*newLr);
2461
2462 newLr->SetRegNO(lr.GetRegNO());
2463 newLr->SetRegType(lr.GetRegType());
2464 newLr->SetID(lr.GetID());
2465 newLr->CopyRematerialization(lr);
2466 CalculatePriority(*newLr);
2467 SplitLrUpdateInterference(lr);
2468 newLr->SetAssignedRegNO(FindColorForLr(*newLr));
2469
2470 AddCalleeUsed(newLr->GetAssignedRegNO(), newLr->GetRegType());
2471
2472 /* For the new LR, update assignment for local RA */
2473 ForEachBBArrElem(newLr->GetBBMember(),
2474 [&newLr, this](uint32 bbID) { SetBBInfoGlobalAssigned(bbID, newLr->GetAssignedRegNO()); });
2475
2476 UpdatePregvetoForNeighbors(*newLr);
2477
2478 SplitLrErrorCheckAndDebug(lr);
2479 }
2480
ColorForOptPrologEpilog()2481 void GraphColorRegAllocator::ColorForOptPrologEpilog()
2482 {
2483 #ifdef OPTIMIZE_FOR_PROLOG
2484 if (!doOptProlog) {
2485 return;
2486 }
2487 for (auto lr : intDelayed) {
2488 if (!AssignColorToLr(*lr, true)) {
2489 lr->SetSpilled(true);
2490 }
2491 }
2492 for (auto lr : fpDelayed) {
2493 if (!AssignColorToLr(*lr, true)) {
2494 lr->SetSpilled(true);
2495 }
2496 }
2497 #endif
2498 }
2499
2500 /*
2501 * From the sorted list of constrained LRs, pick the most profitable LR.
2502 * Split the LR into LRnew1 LRnew2 where LRnew1 has the maximum number of
2503 * BB and is colorable.
2504 * The starting BB for traversal must have a color available.
2505 *
2506 * Assign a color, update neighbor's forbidden list.
2507 *
2508 * Update the conflict graph by change the interference list.
2509 * In the case of both LRnew1 and LRnew2 conflicts with a BB, this BB's
2510 * #neightbors increased. If this BB was unconstrained, must check if
2511 * it is still unconstrained. Move to constrained if necessary.
2512 *
2513 * Color the unconstrained LRs.
2514 */
SplitAndColorForEachLr(MapleVector<LiveRange * > & targetLrVec)2515 void GraphColorRegAllocator::SplitAndColorForEachLr(MapleVector<LiveRange *> &targetLrVec)
2516 {
2517 while (!targetLrVec.empty()) {
2518 auto highestIt = GetHighPriorityLr(targetLrVec);
2519 LiveRange *lr = *highestIt;
2520 /* check those lrs in lr->sconflict which is in unconstrained whether it turns to constrined */
2521 if (highestIt != targetLrVec.end()) {
2522 targetLrVec.erase(highestIt);
2523 } else {
2524 DEBUG_ASSERT(false, "Error: not in targetLrVec");
2525 }
2526 if (AssignColorToLr(*lr)) {
2527 continue;
2528 }
2529 #ifdef USE_SPLIT
2530 SplitLr(*lr);
2531 #endif /* USE_SPLIT */
2532 /*
2533 * When LR is spilled, it potentially has no conflicts as
2534 * each def/use is spilled/reloaded.
2535 */
2536 #ifdef COLOR_SPLIT
2537 if (!AssignColorToLr(*lr)) {
2538 #endif /* COLOR_SPLIT */
2539 lr->SetSpilled(true);
2540 hasSpill = true;
2541 #ifdef COLOR_SPLIT
2542 }
2543 #endif /* COLOR_SPLIT */
2544 }
2545 }
2546
SplitAndColor()2547 void GraphColorRegAllocator::SplitAndColor()
2548 {
2549 /* handle mustAssigned */
2550 if (needDump) {
2551 LogInfo::MapleLogger() << " starting mustAssigned : \n";
2552 }
2553 SplitAndColorForEachLr(mustAssigned);
2554
2555 if (needDump) {
2556 LogInfo::MapleLogger() << " starting unconstrainedPref : \n";
2557 }
2558 /* assign color for unconstained */
2559 SplitAndColorForEachLr(unconstrainedPref);
2560
2561 if (needDump) {
2562 LogInfo::MapleLogger() << " starting constrained : \n";
2563 }
2564 /* handle constrained */
2565 SplitAndColorForEachLr(constrained);
2566
2567 if (needDump) {
2568 LogInfo::MapleLogger() << " starting unconstrained : \n";
2569 }
2570 /* assign color for unconstained */
2571 SplitAndColorForEachLr(unconstrained);
2572
2573 #ifdef OPTIMIZE_FOR_PROLOG
2574 if (doOptProlog) {
2575 ColorForOptPrologEpilog();
2576 }
2577 #endif /* OPTIMIZE_FOR_PROLOG */
2578 }
2579
HandleLocalRegAssignment(regno_t regNO,LocalRegAllocator & localRa,bool isInt)2580 void GraphColorRegAllocator::HandleLocalRegAssignment(regno_t regNO, LocalRegAllocator &localRa, bool isInt)
2581 {
2582 /* vreg, get a reg for it if not assigned already. */
2583 if (!localRa.IsInRegAssigned(regNO, isInt) && !localRa.isInRegSpilled(regNO, isInt)) {
2584 /* find an available phys reg */
2585 bool founded = false;
2586 LiveRange *lr = lrMap[regNO];
2587 regno_t maxIntReg = R0 + MaxIntPhysRegNum();
2588 regno_t maxFpReg = V0 + MaxFloatPhysRegNum();
2589 regno_t startReg = isInt ? R0 : V0;
2590 regno_t endReg = isInt ? maxIntReg : maxFpReg;
2591 for (uint32 preg = startReg; preg <= endReg; ++preg) {
2592 if (!localRa.IsPregAvailable(preg, isInt)) {
2593 continue;
2594 }
2595 if (lr->GetNumCall() != 0 && !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(preg))) {
2596 continue;
2597 }
2598 if (lr->GetPregveto(preg)) {
2599 continue;
2600 }
2601 regno_t assignedReg = preg;
2602 localRa.ClearPregs(assignedReg, isInt);
2603 localRa.SetPregUsed(assignedReg, isInt);
2604 localRa.SetRegAssigned(regNO, isInt);
2605 localRa.SetRegAssignmentMap(isInt, regNO, assignedReg);
2606 lr->SetAssignedRegNO(assignedReg);
2607 founded = true;
2608 break;
2609 }
2610 if (!founded) {
2611 localRa.SetRegSpilled(regNO, isInt);
2612 lr->SetSpilled(true);
2613 }
2614 }
2615 }
2616
UpdateLocalRegDefUseCount(regno_t regNO,LocalRegAllocator & localRa,bool isDef,bool isInt) const2617 void GraphColorRegAllocator::UpdateLocalRegDefUseCount(regno_t regNO, LocalRegAllocator &localRa, bool isDef,
2618 bool isInt) const
2619 {
2620 auto usedIt = localRa.GetUseInfo().find(regNO);
2621 if (usedIt != localRa.GetUseInfo().end() && !isDef) {
2622 /* reg use, decrement count */
2623 DEBUG_ASSERT(usedIt->second > 0, "Incorrect local ra info");
2624 localRa.SetUseInfoElem(regNO, usedIt->second - 1);
2625 if (!AArch64isa::IsPhysicalRegister(static_cast<AArch64reg>(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) {
2626 localRa.IncUseInfoElem(localRa.GetRegAssignmentItem(isInt, regNO));
2627 }
2628 if (needDump) {
2629 LogInfo::MapleLogger() << "\t\treg " << regNO << " update #use to " << localRa.GetUseInfoElem(regNO)
2630 << "\n";
2631 }
2632 }
2633
2634 auto defIt = localRa.GetDefInfo().find(regNO);
2635 if (defIt != localRa.GetDefInfo().end() && isDef) {
2636 /* reg def, decrement count */
2637 DEBUG_ASSERT(defIt->second > 0, "Incorrect local ra info");
2638 localRa.SetDefInfoElem(regNO, defIt->second - 1);
2639 if (!AArch64isa::IsPhysicalRegister(static_cast<AArch64reg>(regNO)) && localRa.IsInRegAssigned(regNO, isInt)) {
2640 localRa.IncDefInfoElem(localRa.GetRegAssignmentItem(isInt, regNO));
2641 }
2642 if (needDump) {
2643 LogInfo::MapleLogger() << "\t\treg " << regNO << " update #def to " << localRa.GetDefInfoElem(regNO)
2644 << "\n";
2645 }
2646 }
2647 }
2648
UpdateLocalRegConflict(regno_t regNO,LocalRegAllocator & localRa,bool isInt)2649 void GraphColorRegAllocator::UpdateLocalRegConflict(regno_t regNO, LocalRegAllocator &localRa, bool isInt)
2650 {
2651 LiveRange *lr = lrMap[regNO];
2652 if (lr->GetNumBBConflicts() == 0) {
2653 return;
2654 }
2655 if (!localRa.IsInRegAssigned(regNO, isInt)) {
2656 return;
2657 }
2658 regno_t preg = localRa.GetRegAssignmentItem(isInt, regNO);
2659 ForEachRegArrElem(lr->GetBBConflict(), [&preg, this](regno_t regNO) { lrMap[regNO]->InsertElemToPregveto(preg); });
2660 }
2661
HandleLocalRaDebug(regno_t regNO,const LocalRegAllocator & localRa,bool isInt) const2662 void GraphColorRegAllocator::HandleLocalRaDebug(regno_t regNO, const LocalRegAllocator &localRa, bool isInt) const
2663 {
2664 LogInfo::MapleLogger() << "HandleLocalReg " << regNO << "\n";
2665 LogInfo::MapleLogger() << "\tregUsed:";
2666 uint64 regUsed = localRa.GetPregUsed(isInt);
2667 regno_t base = isInt ? R0 : V0;
2668 regno_t end = isInt ? (RLR - R0) : (V31 - V0);
2669
2670 for (uint32 i = 0; i <= end; ++i) {
2671 if ((regUsed & (1ULL << i)) != 0) {
2672 LogInfo::MapleLogger() << " " << (i + base);
2673 }
2674 }
2675 LogInfo::MapleLogger() << "\n";
2676 LogInfo::MapleLogger() << "\tregs:";
2677 uint64 regs = localRa.GetPregs(isInt);
2678 for (uint32 regnoInLoop = 0; regnoInLoop <= end; ++regnoInLoop) {
2679 if ((regs & (1ULL << regnoInLoop)) != 0) {
2680 LogInfo::MapleLogger() << " " << (regnoInLoop + base);
2681 }
2682 }
2683 LogInfo::MapleLogger() << "\n";
2684 }
2685
HandleLocalReg(Operand & op,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo,bool isDef,bool isInt)2686 void GraphColorRegAllocator::HandleLocalReg(Operand &op, LocalRegAllocator &localRa, const BBAssignInfo *bbInfo,
2687 bool isDef, bool isInt)
2688 {
2689 if (!op.IsRegister()) {
2690 return;
2691 }
2692 auto ®Opnd = static_cast<RegOperand &>(op);
2693 regno_t regNO = regOpnd.GetRegisterNumber();
2694
2695 if (IsUnconcernedReg(regOpnd)) {
2696 return;
2697 }
2698
2699 /* is this a local register ? */
2700 if (regNO >= kAllRegNum && !IsLocalReg(regNO)) {
2701 return;
2702 }
2703
2704 if (needDump) {
2705 HandleLocalRaDebug(regNO, localRa, isInt);
2706 }
2707
2708 if (regOpnd.IsPhysicalRegister()) {
2709 /* conflict with preg is record in lr->pregveto and BBAssignInfo->globalsAssigned */
2710 UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt);
2711 /* See if it is needed by global RA */
2712 if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0) {
2713 if (bbInfo && !bbInfo->GetGlobalsAssigned(regNO)) {
2714 /* This phys reg is now available for assignment for a vreg */
2715 localRa.SetPregs(regNO, isInt);
2716 if (needDump) {
2717 LogInfo::MapleLogger() << "\t\tlast ref, phys-reg " << regNO << " now available\n";
2718 }
2719 }
2720 }
2721 } else {
2722 HandleLocalRegAssignment(regNO, localRa, isInt);
2723 UpdateLocalRegDefUseCount(regNO, localRa, isDef, isInt);
2724 UpdateLocalRegConflict(regNO, localRa, isInt);
2725 if (localRa.GetUseInfoElem(regNO) == 0 && localRa.GetDefInfoElem(regNO) == 0 &&
2726 localRa.IsInRegAssigned(regNO, isInt)) {
2727 /* last ref of vreg, release assignment */
2728 localRa.SetPregs(localRa.GetRegAssignmentItem(isInt, regNO), isInt);
2729 if (needDump) {
2730 LogInfo::MapleLogger() << "\t\tlast ref, release reg " << localRa.GetRegAssignmentItem(isInt, regNO)
2731 << " for " << regNO << "\n";
2732 }
2733 }
2734 }
2735 }
2736
LocalRaRegSetEraseReg(LocalRegAllocator & localRa,regno_t regNO) const2737 void GraphColorRegAllocator::LocalRaRegSetEraseReg(LocalRegAllocator &localRa, regno_t regNO) const
2738 {
2739 bool isInt = AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO));
2740 if (localRa.IsPregAvailable(regNO, isInt)) {
2741 localRa.ClearPregs(regNO, isInt);
2742 }
2743 }
2744
LocalRaInitRegSet(LocalRegAllocator & localRa,uint32 bbID)2745 bool GraphColorRegAllocator::LocalRaInitRegSet(LocalRegAllocator &localRa, uint32 bbID)
2746 {
2747 bool needLocalRa = false;
2748 /* Note physical regs start from R0, V0. */
2749 localRa.InitPregs(MaxIntPhysRegNum(), MaxFloatPhysRegNum(), cgFunc->GetCG()->GenYieldPoint(), intSpillRegSet,
2750 fpSpillRegSet);
2751
2752 localRa.ClearUseInfo();
2753 localRa.ClearDefInfo();
2754 LocalRaInfo *lraInfo = localRegVec[bbID];
2755 DEBUG_ASSERT(lraInfo != nullptr, "lraInfo not be nullptr");
2756 for (const auto &useCntPair : lraInfo->GetUseCnt()) {
2757 regno_t regNO = useCntPair.first;
2758 if (regNO >= kAllRegNum) {
2759 needLocalRa = true;
2760 }
2761 localRa.SetUseInfoElem(useCntPair.first, useCntPair.second);
2762 }
2763 for (const auto &defCntPair : lraInfo->GetDefCnt()) {
2764 regno_t regNO = defCntPair.first;
2765 if (regNO >= kAllRegNum) {
2766 needLocalRa = true;
2767 }
2768 localRa.SetDefInfoElem(defCntPair.first, defCntPair.second);
2769 }
2770 return needLocalRa;
2771 }
2772
LocalRaInitAllocatableRegs(LocalRegAllocator & localRa,uint32 bbID)2773 void GraphColorRegAllocator::LocalRaInitAllocatableRegs(LocalRegAllocator &localRa, uint32 bbID)
2774 {
2775 BBAssignInfo *bbInfo = bbRegInfo[bbID];
2776 if (bbInfo != nullptr) {
2777 for (regno_t regNO = kInvalidRegNO; regNO < kMaxRegNum; ++regNO) {
2778 if (bbInfo->GetGlobalsAssigned(regNO)) {
2779 LocalRaRegSetEraseReg(localRa, regNO);
2780 }
2781 }
2782 }
2783 }
2784
LocalRaForEachDefOperand(const Insn & insn,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo)2785 void GraphColorRegAllocator::LocalRaForEachDefOperand(const Insn &insn, LocalRegAllocator &localRa,
2786 const BBAssignInfo *bbInfo)
2787 {
2788 const InsnDesc *md = insn.GetDesc();
2789 uint32 opndNum = insn.GetOperandSize();
2790 for (uint32 i = 0; i < opndNum; ++i) {
2791 Operand &opnd = insn.GetOperand(i);
2792 /* handle def opnd */
2793 if (!md->GetOpndDes(i)->IsRegDef()) {
2794 continue;
2795 }
2796 auto ®Opnd = static_cast<RegOperand &>(opnd);
2797 bool isInt = (regOpnd.GetRegisterType() == kRegTyInt);
2798 HandleLocalReg(opnd, localRa, bbInfo, true, isInt);
2799 }
2800 }
2801
LocalRaForEachUseOperand(const Insn & insn,LocalRegAllocator & localRa,const BBAssignInfo * bbInfo)2802 void GraphColorRegAllocator::LocalRaForEachUseOperand(const Insn &insn, LocalRegAllocator &localRa,
2803 const BBAssignInfo *bbInfo)
2804 {
2805 const InsnDesc *md = insn.GetDesc();
2806 uint32 opndNum = insn.GetOperandSize();
2807 for (uint32 i = 0; i < opndNum; ++i) {
2808 Operand &opnd = insn.GetOperand(i);
2809 if (opnd.IsList()) {
2810 continue;
2811 } else if (opnd.IsMemoryAccessOperand()) {
2812 auto &memOpnd = static_cast<MemOperand &>(opnd);
2813 Operand *base = memOpnd.GetBaseRegister();
2814 Operand *offset = memOpnd.GetIndexRegister();
2815 if (base != nullptr) {
2816 HandleLocalReg(*base, localRa, bbInfo, false, true);
2817 }
2818 if (!memOpnd.IsIntactIndexed()) {
2819 HandleLocalReg(*base, localRa, bbInfo, true, true);
2820 }
2821 if (offset != nullptr) {
2822 HandleLocalReg(*offset, localRa, bbInfo, false, true);
2823 }
2824 } else if (md->GetOpndDes(i)->IsRegUse()) {
2825 auto ®Opnd = static_cast<RegOperand &>(opnd);
2826 bool isInt = (regOpnd.GetRegisterType() == kRegTyInt);
2827 HandleLocalReg(opnd, localRa, bbInfo, false, isInt);
2828 }
2829 }
2830 }
2831
LocalRaPrepareBB(BB & bb,LocalRegAllocator & localRa)2832 void GraphColorRegAllocator::LocalRaPrepareBB(BB &bb, LocalRegAllocator &localRa)
2833 {
2834 BBAssignInfo *bbInfo = bbRegInfo[bb.GetId()];
2835 FOR_BB_INSNS(insn, &bb) {
2836 if (!insn->IsMachineInstruction()) {
2837 continue;
2838 }
2839
2840 /*
2841 * Use reverse operand order, assuming use first then def for allocation.
2842 * need to free the use resource so it can be reused for def.
2843 */
2844 LocalRaForEachUseOperand(*insn, localRa, bbInfo);
2845 LocalRaForEachDefOperand(*insn, localRa, bbInfo);
2846 }
2847 }
2848
LocalRaFinalAssignment(const LocalRegAllocator & localRa,BBAssignInfo & bbInfo)2849 void GraphColorRegAllocator::LocalRaFinalAssignment(const LocalRegAllocator &localRa, BBAssignInfo &bbInfo)
2850 {
2851 for (const auto &intRegAssignmentMapPair : localRa.GetIntRegAssignmentMap()) {
2852 regno_t regNO = intRegAssignmentMapPair.second;
2853 if (needDump) {
2854 LogInfo::MapleLogger() << "[" << intRegAssignmentMapPair.first << "," << regNO << "],";
2855 }
2856 /* Might need to get rid of this copy. */
2857 bbInfo.SetRegMapElem(intRegAssignmentMapPair.first, regNO);
2858 AddCalleeUsed(regNO, kRegTyInt);
2859 }
2860 for (const auto &fpRegAssignmentMapPair : localRa.GetFpRegAssignmentMap()) {
2861 regno_t regNO = fpRegAssignmentMapPair.second;
2862 if (needDump) {
2863 LogInfo::MapleLogger() << "[" << fpRegAssignmentMapPair.first << "," << regNO << "],";
2864 }
2865 /* Might need to get rid of this copy. */
2866 bbInfo.SetRegMapElem(fpRegAssignmentMapPair.first, regNO);
2867 AddCalleeUsed(regNO, kRegTyFloat);
2868 }
2869 }
2870
LocalRaDebug(const BB & bb,const LocalRegAllocator & localRa) const2871 void GraphColorRegAllocator::LocalRaDebug(const BB &bb, const LocalRegAllocator &localRa) const
2872 {
2873 LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra INT need " << localRa.GetNumIntPregUsed() << " regs\n";
2874 LogInfo::MapleLogger() << "bb " << bb.GetId() << " local ra FP need " << localRa.GetNumFpPregUsed() << " regs\n";
2875 LogInfo::MapleLogger() << "\tpotential assignments:";
2876 for (auto it : localRa.GetIntRegAssignmentMap()) {
2877 LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],";
2878 }
2879 for (auto it : localRa.GetFpRegAssignmentMap()) {
2880 LogInfo::MapleLogger() << "[" << it.first << "," << it.second << "],";
2881 }
2882 LogInfo::MapleLogger() << "\n";
2883 }
2884
2885 /*
2886 * When do_allocate is false, it is prepass:
2887 * Traverse each BB, keep track of the number of registers required
2888 * for local registers in the BB. Communicate this to global RA.
2889 *
2890 * When do_allocate is true:
2891 * Allocate local registers for each BB based on unused registers
2892 * from global RA. Spill if no register available.
2893 */
LocalRegisterAllocator(bool doAllocate)2894 void GraphColorRegAllocator::LocalRegisterAllocator(bool doAllocate)
2895 {
2896 if (needDump) {
2897 if (doAllocate) {
2898 LogInfo::MapleLogger() << "LRA allocation start\n";
2899 PrintBBAssignInfo();
2900 } else {
2901 LogInfo::MapleLogger() << "LRA preprocessing start\n";
2902 }
2903 }
2904 LocalRegAllocator *localRa = memPool->New<LocalRegAllocator>(*cgFunc, alloc);
2905 for (auto *bb : bfs->sortedBBs) {
2906 uint32 bbID = bb->GetId();
2907
2908 LocalRaInfo *lraInfo = localRegVec[bb->GetId()];
2909 if (lraInfo == nullptr) {
2910 /* No locals to allocate */
2911 continue;
2912 }
2913
2914 localRa->ClearLocalRaInfo();
2915 bool needLocalRa = LocalRaInitRegSet(*localRa, bbID);
2916 if (!needLocalRa) {
2917 /* Only physical regs in bb, no local ra needed. */
2918 continue;
2919 }
2920
2921 if (doAllocate) {
2922 LocalRaInitAllocatableRegs(*localRa, bbID);
2923 }
2924
2925 LocalRaPrepareBB(*bb, *localRa);
2926
2927 BBAssignInfo *bbInfo = bbRegInfo[bb->GetId()];
2928 if (bbInfo == nullptr) {
2929 bbInfo = memPool->New<BBAssignInfo>(alloc);
2930 bbRegInfo[bbID] = bbInfo;
2931 bbInfo->InitGlobalAssigned();
2932 }
2933 bbInfo->SetIntLocalRegsNeeded(localRa->GetNumIntPregUsed());
2934 bbInfo->SetFpLocalRegsNeeded(localRa->GetNumFpPregUsed());
2935
2936 if (doAllocate) {
2937 if (needDump) {
2938 LogInfo::MapleLogger() << "\tbb(" << bb->GetId() << ")final local ra assignments:";
2939 }
2940 LocalRaFinalAssignment(*localRa, *bbInfo);
2941 if (needDump) {
2942 LogInfo::MapleLogger() << "\n";
2943 }
2944 } else if (needDump) {
2945 LocalRaDebug(*bb, *localRa);
2946 }
2947 }
2948 }
2949
GetConsistentReuseMem(const uint64 * conflict,const std::set<MemOperand * > & usedMemOpnd,uint32 size,RegType regType)2950 MemOperand *GraphColorRegAllocator::GetConsistentReuseMem(const uint64 *conflict,
2951 const std::set<MemOperand *> &usedMemOpnd, uint32 size,
2952 RegType regType)
2953 {
2954 std::set<LiveRange *, SetLiveRangeCmpFunc> sconflict;
2955 regno_t regNO;
2956 for (uint32 i = 0; i < regBuckets; ++i) {
2957 for (uint32 b = 0; b < kU64; ++b) {
2958 if ((conflict[i] & (1ULL << b)) != 0) {
2959 continue;
2960 }
2961 regNO = i * kU64 + b;
2962 if (regNO >= numVregs) {
2963 break;
2964 }
2965 if (GetLiveRange(regNO) != nullptr) {
2966 (void)sconflict.insert(lrMap[regNO]);
2967 }
2968 }
2969 }
2970
2971 for (auto *noConflictLr : sconflict) {
2972 if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType || noConflictLr->GetSpillSize() != size) {
2973 continue;
2974 }
2975 if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) {
2976 return noConflictLr->GetSpillMem();
2977 }
2978 }
2979 return nullptr;
2980 }
2981
GetCommonReuseMem(const uint64 * conflict,const std::set<MemOperand * > & usedMemOpnd,uint32 size,RegType regType)2982 MemOperand *GraphColorRegAllocator::GetCommonReuseMem(const uint64 *conflict, const std::set<MemOperand *> &usedMemOpnd,
2983 uint32 size, RegType regType)
2984 {
2985 regno_t regNO;
2986 for (uint32 i = 0; i < regBuckets; ++i) {
2987 for (uint32 b = 0; b < kU64; ++b) {
2988 if ((conflict[i] & (1ULL << b)) != 0) {
2989 continue;
2990 }
2991 regNO = i * kU64 + b;
2992 if (regNO >= numVregs) {
2993 break;
2994 }
2995 LiveRange *noConflictLr = GetLiveRange(regNO);
2996 if (noConflictLr == nullptr || noConflictLr->GetRegType() != regType ||
2997 noConflictLr->GetSpillSize() != size) {
2998 continue;
2999 }
3000 if (usedMemOpnd.find(noConflictLr->GetSpillMem()) == usedMemOpnd.end()) {
3001 return noConflictLr->GetSpillMem();
3002 }
3003 }
3004 }
3005 return nullptr;
3006 }
3007
3008 /* See if any of the non-conflict LR is spilled and use its memOpnd. */
GetReuseMem(uint32 vregNO,uint32 size,RegType regType)3009 MemOperand *GraphColorRegAllocator::GetReuseMem(uint32 vregNO, uint32 size, RegType regType)
3010 {
3011 if (cgFunc->GetMirModule().GetSrcLang() != kSrcLangC) {
3012 return nullptr;
3013 }
3014 if (IsLocalReg(vregNO)) {
3015 return nullptr;
3016 }
3017
3018 LiveRange *lr = lrMap[vregNO];
3019 const uint64 *conflict;
3020 if (lr->GetSplitLr() != nullptr) {
3021 /*
3022 * For split LR, the vreg liveness is optimized, but for spill location
3023 * the stack location needs to be maintained for the entire LR.
3024 */
3025 return nullptr;
3026 } else {
3027 conflict = lr->GetBBConflict();
3028 }
3029
3030 std::set<MemOperand *> usedMemOpnd;
3031 auto updateMemOpnd = [&usedMemOpnd, this](regno_t regNO) {
3032 if (regNO >= numVregs) {
3033 return;
3034 }
3035 LiveRange *lrInner = GetLiveRange(regNO);
3036 if (lrInner && lrInner->GetSpillMem() != nullptr) {
3037 (void)usedMemOpnd.insert(lrInner->GetSpillMem());
3038 }
3039 };
3040 ForEachRegArrElem(conflict, updateMemOpnd);
3041 uint32 regSize = (size <= k32) ? k32 : k64;
3042 /*
3043 * This is to order the search so memOpnd given out is consistent.
3044 * When vreg#s do not change going through VtableImpl.mpl file
3045 * then this can be simplified.
3046 */
3047 #ifdef CONSISTENT_MEMOPND
3048 return GetConsistentReuseMem(conflict, usedMemOpnd, regSize, regType);
3049 #else /* CONSISTENT_MEMOPND */
3050 return GetCommonReuseMem(conflict, usedMemOpnd, regSize, regType);
3051 #endif /* CONSISTENT_MEMOPNDi */
3052 }
3053
GetSpillMem(uint32 vregNO,bool isDest,Insn & insn,AArch64reg regNO,bool & isOutOfRange) const3054 MemOperand *GraphColorRegAllocator::GetSpillMem(uint32 vregNO, bool isDest, Insn &insn, AArch64reg regNO,
3055 bool &isOutOfRange) const
3056 {
3057 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3058 MemOperand *memOpnd = a64CGFunc->GetOrCreatSpillMem(vregNO);
3059 return (a64CGFunc->AdjustMemOperandIfOffsetOutOfRange(memOpnd, vregNO, isDest, insn, regNO, isOutOfRange));
3060 }
3061
SpillOperandForSpillPre(Insn & insn,const Operand & opnd,RegOperand & phyOpnd,uint32 spillIdx,bool needSpill)3062 void GraphColorRegAllocator::SpillOperandForSpillPre(Insn &insn, const Operand &opnd, RegOperand &phyOpnd,
3063 uint32 spillIdx, bool needSpill)
3064 {
3065 if (!needSpill) {
3066 return;
3067 }
3068 auto ®Opnd = static_cast<const RegOperand &>(opnd);
3069 uint32 regNO = regOpnd.GetRegisterNumber();
3070 LiveRange *lr = lrMap[regNO];
3071
3072 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3073
3074 MemOperand *spillMem = CreateSpillMem(spillIdx, kSpillMemPre);
3075 DEBUG_ASSERT(spillMem != nullptr, "spillMem nullptr check");
3076
3077 uint32 regSize = regOpnd.GetSize();
3078 PrimType stype;
3079 RegType regType = regOpnd.GetRegisterType();
3080 if (regType == kRegTyInt) {
3081 stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3082 } else {
3083 stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3084 }
3085
3086 if (a64CGFunc->IsImmediateOffsetOutOfRange(*spillMem, k64)) {
3087 regno_t pregNO = R16;
3088 spillMem =
3089 &a64CGFunc->SplitOffsetWithAddInstruction(*spillMem, k64, static_cast<AArch64reg>(pregNO), false, &insn);
3090 }
3091 Insn &stInsn =
3092 cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickStInsn(spillMem->GetSize(), stype), phyOpnd, *spillMem);
3093 std::string comment = " SPILL for spill vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3094 stInsn.SetComment(comment);
3095 insn.GetBB()->InsertInsnBefore(insn, stInsn);
3096 }
3097
SpillOperandForSpillPost(Insn & insn,const Operand & opnd,RegOperand & phyOpnd,uint32 spillIdx,bool needSpill)3098 void GraphColorRegAllocator::SpillOperandForSpillPost(Insn &insn, const Operand &opnd, RegOperand &phyOpnd,
3099 uint32 spillIdx, bool needSpill)
3100 {
3101 if (!needSpill) {
3102 return;
3103 }
3104
3105 auto ®Opnd = static_cast<const RegOperand &>(opnd);
3106 uint32 regNO = regOpnd.GetRegisterNumber();
3107 LiveRange *lr = lrMap[regNO];
3108 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3109 bool isLastInsn = false;
3110 if (insn.GetBB()->GetKind() == BB::kBBIf && insn.GetBB()->IsLastInsn(&insn)) {
3111 isLastInsn = true;
3112 }
3113
3114 if (lr->GetRematLevel() != kRematOff) {
3115 std::string comment = " REMATERIALIZE for spill vreg: " + std::to_string(regNO);
3116 if (isLastInsn) {
3117 for (auto tgtBB : insn.GetBB()->GetSuccs()) {
3118 std::vector<Insn *> rematInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3119 for (auto &&remat : rematInsns) {
3120 remat->SetComment(comment);
3121 tgtBB->InsertInsnBegin(*remat);
3122 }
3123 }
3124 } else {
3125 std::vector<Insn *> rematInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3126 for (auto &&remat : rematInsns) {
3127 remat->SetComment(comment);
3128 insn.GetBB()->InsertInsnAfter(insn, *remat);
3129 }
3130 }
3131 return;
3132 }
3133
3134 MemOperand *spillMem = CreateSpillMem(spillIdx, kSpillMemPost);
3135 DEBUG_ASSERT(spillMem != nullptr, "spillMem nullptr check");
3136
3137 uint32 regSize = regOpnd.GetSize();
3138 PrimType stype;
3139 RegType regType = regOpnd.GetRegisterType();
3140 if (regType == kRegTyInt) {
3141 stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3142 } else {
3143 stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3144 }
3145
3146 bool isOutOfRange = false;
3147 Insn *nextInsn = insn.GetNextMachineInsn();
3148 if (a64CGFunc->IsImmediateOffsetOutOfRange(*spillMem, k64)) {
3149 regno_t pregNO = R16;
3150 spillMem =
3151 &a64CGFunc->SplitOffsetWithAddInstruction(*spillMem, k64, static_cast<AArch64reg>(pregNO), true, &insn);
3152 isOutOfRange = true;
3153 }
3154 std::string comment =
3155 " RELOAD for spill vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3156 if (isLastInsn) {
3157 for (auto tgtBB : insn.GetBB()->GetSuccs()) {
3158 MOperator mOp = a64CGFunc->PickLdInsn(spillMem->GetSize(), stype);
3159 Insn *newLd = &cgFunc->GetInsnBuilder()->BuildInsn(mOp, phyOpnd, *spillMem);
3160 newLd->SetComment(comment);
3161 tgtBB->InsertInsnBegin(*newLd);
3162 }
3163 } else {
3164 MOperator mOp = a64CGFunc->PickLdInsn(spillMem->GetSize(), stype);
3165 Insn &ldrInsn = cgFunc->GetInsnBuilder()->BuildInsn(mOp, phyOpnd, *spillMem);
3166 ldrInsn.SetComment(comment);
3167 if (isOutOfRange) {
3168 if (nextInsn == nullptr) {
3169 insn.GetBB()->AppendInsn(ldrInsn);
3170 } else {
3171 insn.GetBB()->InsertInsnBefore(*nextInsn, ldrInsn);
3172 }
3173 } else {
3174 insn.GetBB()->InsertInsnAfter(insn, ldrInsn);
3175 }
3176 }
3177 }
3178
GetSpillOrReuseMem(LiveRange & lr,uint32 regSize,bool & isOutOfRange,Insn & insn,bool isDef)3179 MemOperand *GraphColorRegAllocator::GetSpillOrReuseMem(LiveRange &lr, uint32 regSize, bool &isOutOfRange, Insn &insn,
3180 bool isDef)
3181 {
3182 (void)regSize;
3183 MemOperand *memOpnd = nullptr;
3184 if (lr.GetSpillMem() != nullptr) {
3185 /* the saved memOpnd cannot be out-of-range */
3186 memOpnd = lr.GetSpillMem();
3187 } else {
3188 #ifdef REUSE_SPILLMEM
3189 memOpnd = GetReuseMem(lr.GetRegNO(), regSize, lr.GetRegType());
3190 if (memOpnd != nullptr) {
3191 lr.SetSpillMem(*memOpnd);
3192 lr.SetSpillSize((regSize <= k32) ? k32 : k64);
3193 } else {
3194 #endif /* REUSE_SPILLMEM */
3195 regno_t baseRegNO;
3196 if (!isDef && lr.GetRegNO() == kRegTyInt) {
3197 /* src will use its' spill reg as baseRegister when offset out-of-range
3198 * add x16, x29, #max-offset //out-of-range
3199 * ldr x16, [x16, #offset] //reload
3200 * mov xd, x16
3201 */
3202 baseRegNO = lr.GetSpillReg();
3203 if (baseRegNO > RLAST_INT_REG) {
3204 baseRegNO = R16;
3205 }
3206 } else {
3207 /* dest will use R16 as baseRegister when offset out-of-range
3208 * mov x16, xs
3209 * add x17, x29, #max-offset //out-of-range
3210 * str x16, [x17, #offset] //spill
3211 */
3212 baseRegNO = R16;
3213 }
3214 DEBUG_ASSERT(baseRegNO != kRinvalid, "invalid base register number");
3215 memOpnd = GetSpillMem(lr.GetRegNO(), isDef, insn, static_cast<AArch64reg>(baseRegNO), isOutOfRange);
3216 /* dest's spill reg can only be R15 and R16 () */
3217 if (isOutOfRange && isDef) {
3218 DEBUG_ASSERT(lr.GetSpillReg() != R16, "can not find valid memopnd's base register");
3219 }
3220 #ifdef REUSE_SPILLMEM
3221 if (isOutOfRange == 0) {
3222 lr.SetSpillMem(*memOpnd);
3223 lr.SetSpillSize((regSize <= k32) ? k32 : k64);
3224 }
3225 }
3226 #endif /* REUSE_SPILLMEM */
3227 }
3228 return memOpnd;
3229 }
3230
3231 /*
3232 * Create spill insn for the operand.
3233 * When need_spill is true, need to spill the spill operand register first
3234 * then use it for the current spill, then reload it again.
3235 */
SpillOperand(Insn & insn,const Operand & opnd,bool isDef,RegOperand & phyOpnd,bool forCall)3236 Insn *GraphColorRegAllocator::SpillOperand(Insn &insn, const Operand &opnd, bool isDef, RegOperand &phyOpnd,
3237 bool forCall)
3238 {
3239 auto ®Opnd = static_cast<const RegOperand &>(opnd);
3240 uint32 regNO = regOpnd.GetRegisterNumber();
3241 uint32 pregNO = phyOpnd.GetRegisterNumber();
3242 bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(pregNO));
3243 if (needDump) {
3244 LogInfo::MapleLogger() << "SpillOperand " << regNO << "\n";
3245 }
3246 LiveRange *lr = lrMap[regNO];
3247 bool isForCallerSave = lr->GetSplitLr() == nullptr && lr->GetNumCall() && !isCalleeReg;
3248 uint32 regSize = regOpnd.GetSize();
3249 bool isOutOfRange = false;
3250 PrimType stype;
3251 RegType regType = regOpnd.GetRegisterType();
3252 if (regType == kRegTyInt) {
3253 stype = (regSize <= k32) ? PTY_i32 : PTY_i64;
3254 } else {
3255 stype = (regSize <= k32) ? PTY_f32 : PTY_f64;
3256 }
3257 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
3258
3259 Insn *spillDefInsn = nullptr;
3260 if (isDef) {
3261 if (lr->GetRematLevel() == kRematOff) {
3262 lr->SetSpillReg(pregNO);
3263 Insn *nextInsn = insn.GetNextMachineInsn();
3264 MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, forCall ? false : true);
3265 spillDefInsn =
3266 &cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickStInsn(regSize, stype), phyOpnd, *memOpnd);
3267 spillDefInsn->SetIsSpill();
3268 std::string comment = " SPILL vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3269 if (isForCallerSave) {
3270 comment += " for caller save in BB " + std::to_string(insn.GetBB()->GetId());
3271 }
3272 spillDefInsn->SetComment(comment);
3273 if (forCall) {
3274 insn.GetBB()->InsertInsnBefore(insn, *spillDefInsn);
3275 } else if (isOutOfRange) {
3276 if (nextInsn == nullptr) {
3277 insn.GetBB()->AppendInsn(*spillDefInsn);
3278 } else {
3279 insn.GetBB()->InsertInsnBefore(*nextInsn, *spillDefInsn);
3280 }
3281 } else if (insn.GetNext() && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail) {
3282 insn.GetBB()->InsertInsnAfter(*insn.GetNext(), *spillDefInsn);
3283 } else {
3284 insn.GetBB()->InsertInsnAfter(insn, *spillDefInsn);
3285 }
3286 }
3287
3288 if ((insn.GetMachineOpcode() != MOP_xmovkri16) && (insn.GetMachineOpcode() != MOP_wmovkri16)) {
3289 return spillDefInsn;
3290 }
3291 }
3292 if (insn.GetMachineOpcode() == MOP_clinit_tail) {
3293 return nullptr;
3294 }
3295 Insn *nextInsn = insn.GetNextMachineInsn();
3296 lr->SetSpillReg(pregNO);
3297
3298 std::vector<Insn *> spillUseInsns;
3299 std::string comment;
3300 if (lr->GetRematLevel() != kRematOff) {
3301 spillUseInsns = lr->Rematerialize(a64CGFunc, phyOpnd);
3302 comment = " REMATERIALIZE vreg: " + std::to_string(regNO);
3303 } else {
3304 MemOperand *memOpnd = GetSpillOrReuseMem(*lr, regSize, isOutOfRange, insn, forCall ? true : false);
3305 Insn &spillUseInsn =
3306 cgFunc->GetInsnBuilder()->BuildInsn(a64CGFunc->PickLdInsn(regSize, stype), phyOpnd, *memOpnd);
3307 spillUseInsn.SetIsReload();
3308 spillUseInsns.push_back(&spillUseInsn);
3309 comment = " RELOAD vreg: " + std::to_string(regNO) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
3310 }
3311 if (isForCallerSave) {
3312 comment += " for caller save in BB " + std::to_string(insn.GetBB()->GetId());
3313 }
3314 for (auto &&spillUseInsn : spillUseInsns) {
3315 spillUseInsn->SetComment(comment);
3316 if (forCall) {
3317 if (nextInsn == nullptr) {
3318 insn.GetBB()->AppendInsn(*spillUseInsn);
3319 } else {
3320 insn.GetBB()->InsertInsnBefore(*nextInsn, *spillUseInsn);
3321 }
3322 } else {
3323 insn.GetBB()->InsertInsnBefore(insn, *spillUseInsn);
3324 }
3325 }
3326 if (spillDefInsn != nullptr) {
3327 return spillDefInsn;
3328 }
3329 return &insn;
3330 }
3331
3332 /* Try to find available reg for spill. */
SetAvailableSpillReg(std::unordered_set<regno_t> & cannotUseReg,LiveRange & lr,uint64 & usedRegMask)3333 bool GraphColorRegAllocator::SetAvailableSpillReg(std::unordered_set<regno_t> &cannotUseReg, LiveRange &lr,
3334 uint64 &usedRegMask)
3335 {
3336 bool isInt = (lr.GetRegType() == kRegTyInt);
3337 regno_t base = isInt ? R0 : V0;
3338 uint32 pregInterval = isInt ? 0 : (V0 - R30);
3339 MapleSet<uint32> &callerRegSet = isInt ? intCallerRegSet : fpCallerRegSet;
3340 MapleSet<uint32> &calleeRegSet = isInt ? intCalleeRegSet : fpCalleeRegSet;
3341
3342 for (const auto &it : callerRegSet) {
3343 regno_t spillReg = it + base;
3344 if (cannotUseReg.find(spillReg) == cannotUseReg.end() &&
3345 (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3346 lr.SetAssignedRegNO(spillReg);
3347 usedRegMask |= 1ULL << (spillReg - pregInterval);
3348 return true;
3349 }
3350 }
3351 for (const auto &it : calleeRegSet) {
3352 regno_t spillReg = it + base;
3353 if (cannotUseReg.find(spillReg) == cannotUseReg.end() &&
3354 (usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3355 lr.SetAssignedRegNO(spillReg);
3356 usedRegMask |= 1ULL << (spillReg - pregInterval);
3357 return true;
3358 }
3359 }
3360 return false;
3361 }
3362
CollectCannotUseReg(std::unordered_set<regno_t> & cannotUseReg,const LiveRange & lr,Insn & insn)3363 void GraphColorRegAllocator::CollectCannotUseReg(std::unordered_set<regno_t> &cannotUseReg, const LiveRange &lr,
3364 Insn &insn)
3365 {
3366 /* Find the bb in the conflict LR that actually conflicts with the current bb. */
3367 for (regno_t regNO = kRinvalid; regNO < kMaxRegNum; ++regNO) {
3368 if (lr.GetPregveto(regNO)) {
3369 (void)cannotUseReg.insert(regNO);
3370 }
3371 }
3372 auto updateCannotUse = [&insn, &cannotUseReg, this](regno_t regNO) {
3373 LiveRange *conflictLr = lrMap[regNO];
3374 /*
3375 * conflictLr->GetAssignedRegNO() might be zero
3376 * caller save will be inserted so the assigned reg can be released actually
3377 */
3378 if ((conflictLr->GetAssignedRegNO() > 0) && IsBitArrElemSet(conflictLr->GetBBMember(), insn.GetBB()->GetId())) {
3379 if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(conflictLr->GetAssignedRegNO())) &&
3380 conflictLr->GetNumCall() && !conflictLr->GetProcessed()) {
3381 return;
3382 }
3383 (void)cannotUseReg.insert(conflictLr->GetAssignedRegNO());
3384 }
3385 };
3386 ForEachRegArrElem(lr.GetBBConflict(), updateCannotUse);
3387 #ifdef USE_LRA
3388 if (!doLRA) {
3389 return;
3390 }
3391 BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()];
3392 if (bbInfo != nullptr) {
3393 for (const auto ®MapPair : bbInfo->GetRegMap()) {
3394 (void)cannotUseReg.insert(regMapPair.second);
3395 }
3396 }
3397 #endif /* USE_LRA */
3398 }
3399
PickRegForSpill(uint64 & usedRegMask,RegType regType,uint32 spillIdx,bool & needSpillLr)3400 regno_t GraphColorRegAllocator::PickRegForSpill(uint64 &usedRegMask, RegType regType, uint32 spillIdx,
3401 bool &needSpillLr)
3402 {
3403 regno_t base;
3404 regno_t spillReg;
3405 uint32 pregInterval;
3406 bool isIntReg = (regType == kRegTyInt);
3407 if (isIntReg) {
3408 base = R0;
3409 pregInterval = 0;
3410 } else {
3411 base = V0;
3412 pregInterval = V0 - R30;
3413 }
3414
3415 if (JAVALANG) {
3416 /* Use predetermined spill register */
3417 MapleSet<uint32> &spillRegSet = isIntReg ? intSpillRegSet : fpSpillRegSet;
3418 DEBUG_ASSERT(spillIdx < spillRegSet.size(), "spillIdx large than spillRegSet.size()");
3419 auto regNumIt = spillRegSet.begin();
3420 for (; spillIdx > 0; --spillIdx) {
3421 ++regNumIt;
3422 }
3423 spillReg = *regNumIt + base;
3424 return spillReg;
3425 }
3426
3427 /* Temporary find a unused reg to spill */
3428 uint32 maxPhysRegNum = isIntReg ? MaxIntPhysRegNum() : MaxFloatPhysRegNum();
3429 for (spillReg = (maxPhysRegNum + base); spillReg > base; --spillReg) {
3430 if (spillReg >= k64BitSize) {
3431 spillReg = k64BitSize - 1;
3432 }
3433 if ((usedRegMask & (1ULL << (spillReg - pregInterval))) == 0) {
3434 usedRegMask |= (1ULL << (spillReg - pregInterval));
3435 needSpillLr = true;
3436 return spillReg;
3437 }
3438 }
3439
3440 DEBUG_ASSERT(false, "can not find spillReg");
3441 return 0;
3442 }
3443
3444 /* return true if need extra spill */
SetRegForSpill(LiveRange & lr,Insn & insn,uint32 spillIdx,uint64 & usedRegMask,bool isDef)3445 bool GraphColorRegAllocator::SetRegForSpill(LiveRange &lr, Insn &insn, uint32 spillIdx, uint64 &usedRegMask, bool isDef)
3446 {
3447 std::unordered_set<regno_t> cannotUseReg;
3448 /* SPILL COALESCE */
3449 if (!isDef && (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr)) {
3450 auto &ropnd = static_cast<RegOperand &>(insn.GetOperand(0));
3451 if (ropnd.IsPhysicalRegister()) {
3452 lr.SetAssignedRegNO(ropnd.GetRegisterNumber());
3453 return false;
3454 }
3455 }
3456
3457 CollectCannotUseReg(cannotUseReg, lr, insn);
3458
3459 if (SetAvailableSpillReg(cannotUseReg, lr, usedRegMask)) {
3460 return false;
3461 }
3462
3463 bool needSpillLr = false;
3464 if (!lr.GetAssignedRegNO()) {
3465 /*
3466 * All regs are assigned and none are free.
3467 * Pick a reg to spill and reuse for this spill.
3468 * Need to make sure the reg picked is not assigned to this insn,
3469 * else there will be conflict.
3470 */
3471 RegType regType = lr.GetRegType();
3472 regno_t spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr);
3473 if (insn.GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) {
3474 CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr");
3475 }
3476 lr.SetAssignedRegNO(spillReg);
3477 }
3478 return needSpillLr;
3479 }
3480
GetReplaceOpndForLRA(Insn & insn,const Operand & opnd,uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3481 RegOperand *GraphColorRegAllocator::GetReplaceOpndForLRA(Insn &insn, const Operand &opnd, uint32 &spillIdx,
3482 uint64 &usedRegMask, bool isDef)
3483 {
3484 auto ®Opnd = static_cast<const RegOperand &>(opnd);
3485 uint32 vregNO = regOpnd.GetRegisterNumber();
3486 RegType regType = regOpnd.GetRegisterType();
3487 BBAssignInfo *bbInfo = bbRegInfo[insn.GetBB()->GetId()];
3488 if (bbInfo == nullptr) {
3489 return nullptr;
3490 }
3491 auto regIt = bbInfo->GetRegMap().find(vregNO);
3492 if (regIt != bbInfo->GetRegMap().end()) {
3493 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3494 static_cast<AArch64reg>(regIt->second), regOpnd.GetSize(), regType);
3495 return &phyOpnd;
3496 }
3497 if (needDump) {
3498 LogInfo::MapleLogger() << "spill vreg " << vregNO << "\n";
3499 }
3500 regno_t spillReg;
3501 bool needSpillLr = false;
3502 if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) ||
3503 (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) {
3504 spillReg = R16;
3505 } else {
3506 /*
3507 * use the reg that exclude livein/liveout/bbInfo->regMap
3508 * Need to make sure the reg picked is not assigned to this insn,
3509 * else there will be conflict.
3510 */
3511 spillReg = PickRegForSpill(usedRegMask, regType, spillIdx, needSpillLr);
3512 if (insn.GetMachineOpcode() == MOP_lazy_ldr && spillReg == R17) {
3513 CHECK_FATAL(false, "register IP1(R17) may be changed when lazy_ldr");
3514 }
3515 AddCalleeUsed(spillReg, regType);
3516 if (needDump) {
3517 LogInfo::MapleLogger() << "\tassigning lra spill reg " << spillReg << "\n";
3518 }
3519 }
3520 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3521 static_cast<AArch64reg>(spillReg), regOpnd.GetSize(), regType);
3522 SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr);
3523 Insn *spill = SpillOperand(insn, regOpnd, isDef, phyOpnd);
3524 if (spill != nullptr) {
3525 SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr);
3526 }
3527 ++spillIdx;
3528 return &phyOpnd;
3529 }
3530
3531 /* get spill reg and check if need extra spill */
GetSpillReg(Insn & insn,LiveRange & lr,const uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3532 bool GraphColorRegAllocator::GetSpillReg(Insn &insn, LiveRange &lr, const uint32 &spillIdx, uint64 &usedRegMask,
3533 bool isDef)
3534 {
3535 bool needSpillLr = false;
3536 /*
3537 * Find a spill reg for the BB among interfereing LR.
3538 * Without LRA, this info is very inaccurate. It will falsely interfere
3539 * with all locals which the spill might not be interfering.
3540 * For now, every instance of the spill requires a brand new reg assignment.
3541 */
3542 if (needDump) {
3543 LogInfo::MapleLogger() << "LR-regNO " << lr.GetRegNO() << " spilled, finding a spill reg\n";
3544 }
3545 if (insn.IsBranch() || insn.IsCall() || (insn.GetMachineOpcode() == MOP_clinit_tail) ||
3546 (insn.GetNext() && isDef && insn.GetNext()->GetMachineOpcode() == MOP_clinit_tail)) {
3547 /*
3548 * When a cond branch reg is spilled, it cannot
3549 * restore the value after the branch since it can be the target from other br.
3550 * To do it properly, it will require creating a intermediate bb for the reload.
3551 * Use x16, it is taken out from available since it is used as a global in the system.
3552 */
3553 lr.SetAssignedRegNO(R16);
3554 } else {
3555 lr.SetAssignedRegNO(0);
3556 needSpillLr = SetRegForSpill(lr, insn, spillIdx, usedRegMask, isDef);
3557 AddCalleeUsed(lr.GetAssignedRegNO(), lr.GetRegType());
3558 }
3559 return needSpillLr;
3560 }
3561
3562 // find prev use/def after prev call
EncountPrevRef(const BB & pred,LiveRange & lr,bool isDef,std::vector<bool> & visitedMap)3563 bool GraphColorRegAllocator::EncountPrevRef(const BB &pred, LiveRange &lr, bool isDef, std::vector<bool> &visitedMap)
3564 {
3565 if (!visitedMap[pred.GetId()] && lr.FindInLuMap(pred.GetId()) != lr.EndOfLuMap()) {
3566 LiveUnit *lu = lr.GetLiveUnitFromLuMap(pred.GetId());
3567 if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3568 MapleMap<uint32, uint32> refs = lr.GetRefs(pred.GetId());
3569 auto it = refs.rbegin();
3570 bool findPrevRef = (it->second & kIsCall) == 0;
3571 return findPrevRef;
3572 }
3573 if (lu->HasCall()) {
3574 return false;
3575 }
3576 }
3577 visitedMap[pred.GetId()] = true;
3578 bool found = true;
3579 for (auto predBB : pred.GetPreds()) {
3580 if (!visitedMap[predBB->GetId()]) {
3581 found &= EncountPrevRef(*predBB, lr, isDef, visitedMap);
3582 }
3583 }
3584 return found;
3585 }
3586
FoundPrevBeforeCall(Insn & insn,LiveRange & lr,bool isDef)3587 bool GraphColorRegAllocator::FoundPrevBeforeCall(Insn &insn, LiveRange &lr, bool isDef)
3588 {
3589 bool hasFind = true;
3590 std::vector<bool> visitedMap(bbVec.size() + 1, false);
3591 for (auto pred : insn.GetBB()->GetPreds()) {
3592 hasFind &= EncountPrevRef(*pred, lr, isDef, visitedMap);
3593 if (!hasFind) {
3594 return false;
3595 }
3596 }
3597 return insn.GetBB()->GetPreds().size() == 0 ? false : true;
3598 }
3599
3600 // find next def before next call ? and no next use
EncountNextRef(const BB & succ,LiveRange & lr,bool isDef,std::vector<bool> & visitedMap)3601 bool GraphColorRegAllocator::EncountNextRef(const BB &succ, LiveRange &lr, bool isDef, std::vector<bool> &visitedMap)
3602 {
3603 if (lr.FindInLuMap(succ.GetId()) != lr.EndOfLuMap()) {
3604 LiveUnit *lu = lr.GetLiveUnitFromLuMap(succ.GetId());
3605 bool findNextDef = false;
3606 if (lu->GetDefNum() || lu->HasCall()) {
3607 MapleMap<uint32, uint32> refs = lr.GetRefs(succ.GetId());
3608 for (auto it = refs.begin(); it != refs.end(); ++it) {
3609 if ((it->second & kIsDef) != 0) {
3610 findNextDef = true;
3611 break;
3612 }
3613 if ((it->second & kIsCall) != 0) {
3614 break;
3615 }
3616 if ((it->second & kIsUse) != 0) {
3617 continue;
3618 }
3619 }
3620 return findNextDef;
3621 }
3622 if (lu->HasCall()) {
3623 return false;
3624 }
3625 }
3626 visitedMap[succ.GetId()] = true;
3627 bool found = true;
3628 for (auto succBB : succ.GetSuccs()) {
3629 if (!visitedMap[succBB->GetId()]) {
3630 found &= EncountNextRef(*succBB, lr, isDef, visitedMap);
3631 if (!found) {
3632 return false;
3633 }
3634 }
3635 }
3636 return found;
3637 }
3638
FoundNextBeforeCall(Insn & insn,LiveRange & lr,bool isDef)3639 bool GraphColorRegAllocator::FoundNextBeforeCall(Insn &insn, LiveRange &lr, bool isDef)
3640 {
3641 bool haveFind = true;
3642 std::vector<bool> visitedMap(bbVec.size() + 1, false);
3643 for (auto succ : insn.GetBB()->GetSuccs()) {
3644 haveFind &= EncountNextRef(*succ, lr, isDef, visitedMap);
3645 if (!haveFind) {
3646 return false;
3647 }
3648 }
3649 return insn.GetBB()->GetSuccs().size() > 0;
3650 }
3651
HavePrevRefInCurBB(Insn & insn,LiveRange & lr,bool & contSearch) const3652 bool GraphColorRegAllocator::HavePrevRefInCurBB(Insn &insn, LiveRange &lr, bool &contSearch) const
3653 {
3654 LiveUnit *lu = lr.GetLiveUnitFromLuMap(insn.GetBB()->GetId());
3655 bool findPrevRef = false;
3656 if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3657 MapleMap<uint32, uint32> refs = lr.GetRefs(insn.GetBB()->GetId());
3658 for (auto it = refs.rbegin(); it != refs.rend(); ++it) {
3659 if (it->first >= insn.GetId()) {
3660 continue;
3661 }
3662 if ((it->second & kIsCall) != 0) {
3663 contSearch = false;
3664 break;
3665 }
3666 if (((it->second & kIsUse) != 0) || ((it->second & kIsDef) != 0)) {
3667 findPrevRef = true;
3668 contSearch = false;
3669 break;
3670 }
3671 }
3672 }
3673 return findPrevRef;
3674 }
3675
HaveNextDefInCurBB(Insn & insn,LiveRange & lr,bool & contSearch) const3676 bool GraphColorRegAllocator::HaveNextDefInCurBB(Insn &insn, LiveRange &lr, bool &contSearch) const
3677 {
3678 LiveUnit *lu = lr.GetLiveUnitFromLuMap(insn.GetBB()->GetId());
3679 bool findNextDef = false;
3680 if (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall()) {
3681 MapleMap<uint32, uint32> refs = lr.GetRefs(insn.GetBB()->GetId());
3682 for (auto it = refs.begin(); it != refs.end(); ++it) {
3683 if (it->first <= insn.GetId()) {
3684 continue;
3685 }
3686 if ((it->second & kIsCall) != 0) {
3687 contSearch = false;
3688 break;
3689 }
3690 if ((it->second & kIsDef) != 0) {
3691 findNextDef = true;
3692 contSearch = false;
3693 }
3694 }
3695 }
3696 return findNextDef;
3697 }
3698
NeedCallerSave(Insn & insn,LiveRange & lr,bool isDef)3699 bool GraphColorRegAllocator::NeedCallerSave(Insn &insn, LiveRange &lr, bool isDef)
3700 {
3701 if (doLRA) {
3702 return true;
3703 }
3704 if (lr.HasDefUse()) {
3705 return true;
3706 }
3707
3708 bool contSearch = true;
3709 bool needed = true;
3710 if (isDef) {
3711 needed = !HaveNextDefInCurBB(insn, lr, contSearch);
3712 } else {
3713 needed = !HavePrevRefInCurBB(insn, lr, contSearch);
3714 }
3715 if (!contSearch) {
3716 return needed;
3717 }
3718
3719 if (isDef) {
3720 needed = true;
3721 } else {
3722 needed = !FoundPrevBeforeCall(insn, lr, isDef);
3723 }
3724 return needed;
3725 }
3726
GetReplaceOpnd(Insn & insn,const Operand & opnd,uint32 & spillIdx,uint64 & usedRegMask,bool isDef)3727 RegOperand *GraphColorRegAllocator::GetReplaceOpnd(Insn &insn, const Operand &opnd, uint32 &spillIdx,
3728 uint64 &usedRegMask, bool isDef)
3729 {
3730 if (!opnd.IsRegister()) {
3731 return nullptr;
3732 }
3733 auto ®Opnd = static_cast<const RegOperand &>(opnd);
3734
3735 uint32 vregNO = regOpnd.GetRegisterNumber();
3736 if (vregNO == RFP) {
3737 seenFP = true;
3738 }
3739 RegType regType = regOpnd.GetRegisterType();
3740 if (vregNO < kAllRegNum) {
3741 return nullptr;
3742 }
3743 if (IsUnconcernedReg(regOpnd)) {
3744 return nullptr;
3745 }
3746
3747 #ifdef USE_LRA
3748 if (doLRA && IsLocalReg(vregNO)) {
3749 return GetReplaceOpndForLRA(insn, opnd, spillIdx, usedRegMask, isDef);
3750 }
3751 #endif /* USE_LRA */
3752
3753 DEBUG_ASSERT(vregNO < numVregs, "index out of range of MapleVector in GraphColorRegAllocator::GetReplaceOpnd");
3754 LiveRange *lr = lrMap[vregNO];
3755
3756 bool isSplitPart = false;
3757 bool needSpillLr = false;
3758 if (lr->GetSplitLr() && IsBitArrElemSet(lr->GetSplitLr()->GetBBMember(), insn.GetBB()->GetId())) {
3759 isSplitPart = true;
3760 }
3761
3762 if (lr->IsSpilled() && !isSplitPart) {
3763 needSpillLr = GetSpillReg(insn, *lr, spillIdx, usedRegMask, isDef);
3764 }
3765
3766 regno_t regNO;
3767 if (isSplitPart) {
3768 regNO = lr->GetSplitLr()->GetAssignedRegNO();
3769 } else {
3770 regNO = lr->GetAssignedRegNO();
3771 }
3772 bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(regNO));
3773 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
3774 static_cast<AArch64reg>(regNO), opnd.GetSize(), regType);
3775 if (needDump) {
3776 LogInfo::MapleLogger() << "replace R" << vregNO << " with R" << (regNO - R0) << "\n";
3777 }
3778
3779 insn.AppendComment(" [R" + std::to_string(vregNO) + "] ");
3780
3781 if (isSplitPart && (isCalleeReg || lr->GetSplitLr()->GetNumCall() == 0)) {
3782 if (isDef) {
3783 SpillOperand(insn, opnd, isDef, phyOpnd);
3784 ++spillIdx;
3785 } else {
3786 if (lr->GetSplitLr()->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload()) {
3787 SpillOperand(insn, opnd, isDef, phyOpnd);
3788 ++spillIdx;
3789 }
3790 }
3791 return &phyOpnd;
3792 }
3793
3794 bool needCallerSave = false;
3795 if (lr->GetNumCall() && !isCalleeReg) {
3796 if (isDef) {
3797 needCallerSave = NeedCallerSave(insn, *lr, isDef) && lr->GetRematLevel() == kRematOff;
3798 } else {
3799 needCallerSave = !lr->GetProcessed();
3800 }
3801 }
3802
3803 if (lr->IsSpilled() || (isSplitPart && (lr->GetSplitLr()->GetNumCall() != 0)) || needCallerSave ||
3804 (!isSplitPart && !(lr->IsSpilled()) && lr->GetLiveUnitFromLuMap(insn.GetBB()->GetId())->NeedReload())) {
3805 SpillOperandForSpillPre(insn, regOpnd, phyOpnd, spillIdx, needSpillLr);
3806 Insn *spill = SpillOperand(insn, opnd, isDef, phyOpnd);
3807 if (spill != nullptr) {
3808 SpillOperandForSpillPost(*spill, regOpnd, phyOpnd, spillIdx, needSpillLr);
3809 }
3810 ++spillIdx;
3811 }
3812
3813 return &phyOpnd;
3814 }
3815
MarkUsedRegs(Operand & opnd,uint64 & usedRegMask)3816 void GraphColorRegAllocator::MarkUsedRegs(Operand &opnd, uint64 &usedRegMask)
3817 {
3818 auto ®Opnd = static_cast<RegOperand &>(opnd);
3819 uint32 pregInterval = (regOpnd.GetRegisterType() == kRegTyInt) ? 0 : (V0 - R30);
3820 uint32 vregNO = regOpnd.GetRegisterNumber();
3821 LiveRange *lr = GetLiveRange(vregNO);
3822 if (lr != nullptr) {
3823 if (lr->IsSpilled()) {
3824 lr->SetAssignedRegNO(0);
3825 }
3826 if (lr->GetAssignedRegNO() != 0) {
3827 usedRegMask |= (1ULL << (lr->GetAssignedRegNO() - pregInterval));
3828 }
3829 if (lr->GetSplitLr() && lr->GetSplitLr()->GetAssignedRegNO()) {
3830 usedRegMask |= (1ULL << (lr->GetSplitLr()->GetAssignedRegNO() - pregInterval));
3831 }
3832 }
3833 }
3834
FinalizeRegisterPreprocess(FinalizeRegisterInfo & fInfo,const Insn & insn,bool & needProcess)3835 uint64 GraphColorRegAllocator::FinalizeRegisterPreprocess(FinalizeRegisterInfo &fInfo, const Insn &insn,
3836 bool &needProcess)
3837 {
3838 uint64 usedRegMask = 0;
3839 const InsnDesc *md = insn.GetDesc();
3840 uint32 opndNum = insn.GetOperandSize();
3841 bool hasVirtual = false;
3842 for (uint32 i = 0; i < opndNum; ++i) {
3843 Operand &opnd = insn.GetOperand(i);
3844 DEBUG_ASSERT(md->GetOpndDes(i) != nullptr, "pointer is null in GraphColorRegAllocator::FinalizeRegisters");
3845
3846 if (opnd.IsList()) {
3847 if (insn.GetMachineOpcode() != MOP_asm) {
3848 continue;
3849 }
3850 hasVirtual = true;
3851 if (i == kAsmOutputListOpnd) {
3852 fInfo.SetDefOperand(opnd, static_cast<int32>(i));
3853 }
3854 if (i == kAsmInputListOpnd) {
3855 fInfo.SetUseOperand(opnd, static_cast<int32>(i));
3856 }
3857 } else if (opnd.IsMemoryAccessOperand()) {
3858 auto &memOpnd = static_cast<MemOperand &>(opnd);
3859 Operand *base = memOpnd.GetBaseRegister();
3860 if (base != nullptr) {
3861 fInfo.SetBaseOperand(opnd, static_cast<int32>(i));
3862 MarkUsedRegs(*base, usedRegMask);
3863 hasVirtual |= static_cast<RegOperand *>(base)->IsVirtualRegister();
3864 }
3865 Operand *offset = memOpnd.GetIndexRegister();
3866 if (offset != nullptr) {
3867 fInfo.SetOffsetOperand(opnd);
3868 MarkUsedRegs(*offset, usedRegMask);
3869 hasVirtual |= static_cast<RegOperand *>(offset)->IsVirtualRegister();
3870 }
3871 } else {
3872 bool isDef = md->GetOpndDes(i)->IsRegDef();
3873 if (isDef) {
3874 fInfo.SetDefOperand(opnd, static_cast<int32>(i));
3875 /*
3876 * Need to exclude def also, since it will clobber the result when the
3877 * original value is reloaded.
3878 */
3879 hasVirtual |= static_cast<RegOperand &>(opnd).IsVirtualRegister();
3880 MarkUsedRegs(opnd, usedRegMask);
3881 } else {
3882 fInfo.SetUseOperand(opnd, static_cast<int32>(i));
3883 if (opnd.IsRegister()) {
3884 hasVirtual |= static_cast<RegOperand &>(opnd).IsVirtualRegister();
3885 MarkUsedRegs(opnd, usedRegMask);
3886 }
3887 }
3888 }
3889 } /* operand */
3890 needProcess = hasVirtual;
3891 return usedRegMask;
3892 }
3893
GenerateSpillFillRegs(const Insn & insn)3894 void GraphColorRegAllocator::GenerateSpillFillRegs(const Insn &insn)
3895 {
3896 static regno_t intRegs[kSpillMemOpndNum] = {R10, R11, R12, R13}; // R9 is used for large stack offset temp
3897 static regno_t fpRegs[kSpillMemOpndNum] = {V16, V17, V18, V19};
3898 uint32 opndNum = insn.GetOperandSize();
3899 std::set<regno_t> defPregs;
3900 std::set<regno_t> usePregs;
3901 std::vector<LiveRange *> defLrs;
3902 std::vector<LiveRange *> useLrs;
3903 if (insn.GetMachineOpcode() == MOP_xmovrr || insn.GetMachineOpcode() == MOP_wmovrr) {
3904 RegOperand &opnd1 = static_cast<RegOperand &>(insn.GetOperand(1));
3905 RegOperand &opnd0 = static_cast<RegOperand &>(insn.GetOperand(0));
3906 if (opnd1.GetRegisterNumber() < R20 && opnd0.GetRegisterNumber() >= kAllRegNum) {
3907 LiveRange *lr = lrMap[opnd0.GetRegisterNumber()];
3908 if (lr->IsSpilled()) {
3909 lr->SetSpillReg(opnd1.GetRegisterNumber());
3910 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no spill reg in GenerateSpillFillRegs");
3911 return;
3912 }
3913 }
3914 if (opnd0.GetRegisterNumber() < R20 && opnd1.GetRegisterNumber() >= kAllRegNum) {
3915 LiveRange *lr = lrMap[opnd1.GetRegisterNumber()];
3916 if (lr->IsSpilled()) {
3917 lr->SetSpillReg(opnd0.GetRegisterNumber());
3918 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no spill reg in GenerateSpillFillRegs");
3919 return;
3920 }
3921 }
3922 }
3923 const InsnDesc *md = insn.GetDesc();
3924 bool isIndexedMemOp = false;
3925 for (uint32 opndIdx = 0; opndIdx < opndNum; ++opndIdx) {
3926 Operand *opnd = &insn.GetOperand(opndIdx);
3927 if (opnd == nullptr) {
3928 continue;
3929 }
3930 if (opnd->IsList()) {
3931 // call parameters
3932 } else if (opnd->IsMemoryAccessOperand()) {
3933 auto *memopnd = static_cast<MemOperand *>(opnd);
3934 if (memopnd->GetIndexOpt() == MemOperand::kPreIndex || memopnd->GetIndexOpt() == MemOperand::kPostIndex) {
3935 isIndexedMemOp = true;
3936 }
3937 auto *base = static_cast<RegOperand *>(memopnd->GetBaseRegister());
3938 if (base != nullptr && !IsUnconcernedReg(*base)) {
3939 if (!memopnd->IsIntactIndexed()) {
3940 if (base->IsPhysicalRegister()) {
3941 defPregs.insert(base->GetRegisterNumber());
3942 } else {
3943 LiveRange *lr = lrMap[base->GetRegisterNumber()];
3944 if (lr->IsSpilled()) {
3945 defLrs.emplace_back(lr);
3946 }
3947 }
3948 }
3949 if (base->IsPhysicalRegister()) {
3950 usePregs.insert(base->GetRegisterNumber());
3951 } else {
3952 LiveRange *lr = lrMap[base->GetRegisterNumber()];
3953 if (lr->IsSpilled()) {
3954 useLrs.emplace_back(lr);
3955 }
3956 }
3957 }
3958 RegOperand *offset = static_cast<RegOperand *>(memopnd->GetIndexRegister());
3959 if (offset != nullptr) {
3960 if (offset->IsPhysicalRegister()) {
3961 usePregs.insert(offset->GetRegisterNumber());
3962 } else {
3963 LiveRange *lr = lrMap[offset->GetRegisterNumber()];
3964 if (lr->IsSpilled()) {
3965 useLrs.emplace_back(lr);
3966 }
3967 }
3968 }
3969 } else if (opnd->IsRegister()) {
3970 bool isDef = md->GetOpndDes(static_cast<int>(opndIdx))->IsRegDef();
3971 bool isUse = md->GetOpndDes(static_cast<int>(opndIdx))->IsRegUse();
3972 RegOperand *ropnd = static_cast<RegOperand *>(opnd);
3973 if (IsUnconcernedReg(*ropnd)) {
3974 continue;
3975 }
3976 if (ropnd != nullptr) {
3977 if (isUse) {
3978 if (ropnd->IsPhysicalRegister()) {
3979 usePregs.insert(ropnd->GetRegisterNumber());
3980 } else {
3981 LiveRange *lr = lrMap[ropnd->GetRegisterNumber()];
3982 if (lr->IsSpilled()) {
3983 useLrs.emplace_back(lr);
3984 }
3985 }
3986 }
3987 if (isDef) {
3988 if (ropnd->IsPhysicalRegister()) {
3989 defPregs.insert(ropnd->GetRegisterNumber());
3990 } else {
3991 LiveRange *lr = lrMap[ropnd->GetRegisterNumber()];
3992 if (lr->IsSpilled()) {
3993 defLrs.emplace_back(lr);
3994 }
3995 }
3996 }
3997 }
3998 }
3999 }
4000 auto comparator = [=](const LiveRange *lr1, const LiveRange *lr2) -> bool { return lr1->GetID() > lr2->GetID(); };
4001 std::sort(useLrs.begin(), useLrs.end(), comparator);
4002 for (auto lr : useLrs) {
4003 lr->SetID(insn.GetId());
4004 RegType rtype = lr->GetRegType();
4005 regno_t firstSpillReg = rtype == kRegTyInt ? intRegs[0] : fpRegs[0];
4006 if (lr->GetSpillReg() != 0 && lr->GetSpillReg() < firstSpillReg && lr->GetPregveto(lr->GetSpillReg())) {
4007 lr->SetSpillReg(0);
4008 }
4009 if (lr->GetSpillReg() != 0 && lr->GetSpillReg() >= firstSpillReg &&
4010 usePregs.find(lr->GetSpillReg()) == usePregs.end()) {
4011 usePregs.insert(lr->GetSpillReg());
4012 continue;
4013 } else {
4014 lr->SetSpillReg(0);
4015 }
4016 for (uint32 i = 0; i < kSpillMemOpndNum; i++) {
4017 regno_t preg = rtype == kRegTyInt ? intRegs[i] : fpRegs[i];
4018 if (usePregs.find(preg) == usePregs.end()) {
4019 lr->SetSpillReg(preg);
4020 usePregs.insert(preg);
4021 break;
4022 }
4023 }
4024 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg");
4025 }
4026 size_t spillRegIdx;
4027 if (isIndexedMemOp) {
4028 spillRegIdx = useLrs.size();
4029 } else {
4030 spillRegIdx = 0;
4031 }
4032 for (auto lr : defLrs) {
4033 lr->SetID(insn.GetId());
4034 RegType rtype = lr->GetRegType();
4035 regno_t firstSpillReg = rtype == kRegTyInt ? intRegs[0] : fpRegs[0];
4036 if (lr->GetSpillReg() != 0) {
4037 if (lr->GetSpillReg() < firstSpillReg && lr->GetPregveto(lr->GetSpillReg())) {
4038 lr->SetSpillReg(0);
4039 }
4040 if (lr->GetSpillReg() >= firstSpillReg && defPregs.find(lr->GetSpillReg()) != defPregs.end()) {
4041 lr->SetSpillReg(0);
4042 }
4043 }
4044 if (lr->GetSpillReg() != 0) {
4045 continue;
4046 }
4047 for (; spillRegIdx < kSpillMemOpndNum; spillRegIdx++) {
4048 regno_t preg = rtype == kRegTyInt ? intRegs[spillRegIdx] : fpRegs[spillRegIdx];
4049 if (defPregs.find(preg) == defPregs.end()) {
4050 lr->SetSpillReg(preg);
4051 defPregs.insert(preg);
4052 break;
4053 }
4054 }
4055 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg");
4056 }
4057 }
4058
CreateSpillFillCode(const RegOperand & opnd,Insn & insn,uint32 spillCnt,bool isdef)4059 RegOperand *GraphColorRegAllocator::CreateSpillFillCode(const RegOperand &opnd, Insn &insn, uint32 spillCnt, bool isdef)
4060 {
4061 regno_t vregno = opnd.GetRegisterNumber();
4062 LiveRange *lr = GetLiveRange(vregno);
4063 if (lr != nullptr && lr->IsSpilled()) {
4064 AArch64CGFunc *a64cgfunc = static_cast<AArch64CGFunc *>(cgFunc);
4065 uint32 bits = opnd.GetSize();
4066 if (bits < k32BitSize) {
4067 bits = k32BitSize;
4068 }
4069 if (cgFunc->IsExtendReg(vregno)) {
4070 bits = k64BitSize;
4071 }
4072 regno_t spreg = 0;
4073 RegType rtype = lr->GetRegType();
4074 spreg = lr->GetSpillReg();
4075 DEBUG_ASSERT(lr->GetSpillReg() != 0, "no reg in CreateSpillFillCode");
4076 RegOperand *regopnd =
4077 &a64cgfunc->GetOrCreatePhysicalRegisterOperand(static_cast<AArch64reg>(spreg), opnd.GetSize(), rtype);
4078
4079 if (lr->GetRematLevel() != kRematOff) {
4080 if (isdef) {
4081 return nullptr;
4082 } else {
4083 std::vector<Insn *> rematInsns = lr->Rematerialize(a64cgfunc, *static_cast<RegOperand *>(regopnd));
4084 for (auto &&remat : rematInsns) {
4085 std::string comment = " REMATERIALIZE color vreg: " + std::to_string(vregno);
4086 remat->SetComment(comment);
4087 insn.GetBB()->InsertInsnBefore(insn, *remat);
4088 }
4089 return regopnd;
4090 }
4091 }
4092
4093 bool isOutOfRange = false;
4094 Insn *nextInsn = insn.GetNextMachineInsn();
4095 MemOperand *loadmem = GetSpillOrReuseMem(*lr, opnd.GetSize(), isOutOfRange, insn, isdef);
4096 PrimType pty = (lr->GetRegType() == kRegTyInt) ? ((bits > k32BitSize) ? PTY_i64 : PTY_i32)
4097 : ((bits > k32BitSize) ? PTY_f64 : PTY_f32);
4098 CHECK_FATAL(spillCnt < kSpillMemOpndNum, "spill count exceeded");
4099 Insn *memInsn;
4100 if (isdef) {
4101 memInsn = &cgFunc->GetInsnBuilder()->BuildInsn(a64cgfunc->PickStInsn(bits, pty), *regopnd, *loadmem);
4102 memInsn->SetIsSpill();
4103 std::string comment =
4104 " SPILLcolor vreg: " + std::to_string(vregno) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
4105 memInsn->SetComment(comment);
4106 if (nextInsn == nullptr) {
4107 insn.GetBB()->AppendInsn(*memInsn);
4108 } else {
4109 insn.GetBB()->InsertInsnBefore(*nextInsn, *memInsn);
4110 }
4111 } else {
4112 memInsn = &cgFunc->GetInsnBuilder()->BuildInsn(a64cgfunc->PickLdInsn(bits, pty), *regopnd, *loadmem);
4113 memInsn->SetIsReload();
4114 std::string comment =
4115 " RELOADcolor vreg: " + std::to_string(vregno) + " op:" + kOpcodeInfo.GetName(lr->GetOp());
4116 memInsn->SetComment(comment);
4117 insn.GetBB()->InsertInsnBefore(insn, *memInsn);
4118 }
4119 return regopnd;
4120 }
4121 return nullptr;
4122 }
4123
SpillLiveRangeForSpills()4124 bool GraphColorRegAllocator::SpillLiveRangeForSpills()
4125 {
4126 bool done = false;
4127 for (uint32_t bbIdx = 0; bbIdx < bfs->sortedBBs.size(); bbIdx++) {
4128 BB *bb = bfs->sortedBBs[bbIdx];
4129 FOR_BB_INSNS(insn, bb) {
4130 uint32 spillCnt;
4131 if (insn->IsImmaterialInsn() || !insn->IsMachineInstruction() || insn->GetId() == 0) {
4132 continue;
4133 }
4134 spillCnt = 0;
4135 const InsnDesc *md = insn->GetDesc();
4136 uint32 opndNum = insn->GetOperandSize();
4137 GenerateSpillFillRegs(*insn);
4138 for (uint32 i = 0; i < opndNum; ++i) {
4139 Operand *opnd = &insn->GetOperand(i);
4140 if (opnd == nullptr) {
4141 continue;
4142 }
4143 if (opnd->IsList()) {
4144 // call parameters
4145 } else if (opnd->IsMemoryAccessOperand()) {
4146 MemOperand *newmemopnd = nullptr;
4147 auto *memopnd = static_cast<MemOperand *>(opnd);
4148 auto *base = static_cast<RegOperand *>(memopnd->GetBaseRegister());
4149 if (base != nullptr && base->IsVirtualRegister()) {
4150 RegOperand *replace = CreateSpillFillCode(*base, *insn, spillCnt);
4151 if (!memopnd->IsIntactIndexed()) {
4152 (void)CreateSpillFillCode(*base, *insn, spillCnt, true);
4153 }
4154 if (replace != nullptr) {
4155 spillCnt++;
4156 newmemopnd = (static_cast<MemOperand *>(opnd)->Clone(*cgFunc->GetMemoryPool()));
4157 newmemopnd->SetBaseRegister(*replace);
4158 insn->SetOperand(i, *newmemopnd);
4159 done = true;
4160 }
4161 }
4162 RegOperand *offset = static_cast<RegOperand *>(memopnd->GetIndexRegister());
4163 if (offset != nullptr && offset->IsVirtualRegister()) {
4164 RegOperand *replace = CreateSpillFillCode(*offset, *insn, spillCnt);
4165 if (replace != nullptr) {
4166 spillCnt++;
4167 if (newmemopnd == nullptr) {
4168 newmemopnd = (static_cast<MemOperand *>(opnd)->Clone(*cgFunc->GetMemoryPool()));
4169 }
4170 newmemopnd->SetIndexRegister(*replace);
4171 insn->SetOperand(i, *newmemopnd);
4172 done = true;
4173 }
4174 }
4175 } else if (opnd->IsRegister()) {
4176 bool isdef = md->opndMD[i]->IsRegDef();
4177 bool isuse = md->opndMD[i]->IsRegUse();
4178 RegOperand *replace = CreateSpillFillCode(*static_cast<RegOperand *>(opnd), *insn, spillCnt, isdef);
4179 if (isuse && isdef) {
4180 (void)CreateSpillFillCode(*static_cast<RegOperand *>(opnd), *insn, spillCnt, false);
4181 }
4182 if (replace != nullptr) {
4183 if (!isdef) {
4184 spillCnt++;
4185 }
4186 insn->SetOperand(i, *replace);
4187 done = true;
4188 }
4189 }
4190 }
4191 }
4192 }
4193 return done;
4194 }
4195
ReloadAtCallee(CgOccur * occ)4196 static bool ReloadAtCallee(CgOccur *occ)
4197 {
4198 auto *defOcc = occ->GetDef();
4199 if (defOcc == nullptr || defOcc->GetOccType() != kOccStore) {
4200 return false;
4201 }
4202 return static_cast<CgStoreOcc *>(defOcc)->Reload();
4203 }
4204
DumpWorkCandAndOcc()4205 void CallerSavePre::DumpWorkCandAndOcc()
4206 {
4207 if (workCand->GetTheOperand()->IsRegister()) {
4208 LogInfo::MapleLogger() << "Cand R";
4209 LogInfo::MapleLogger() << static_cast<RegOperand *>(workCand->GetTheOperand())->GetRegisterNumber() << '\n';
4210 } else {
4211 LogInfo::MapleLogger() << "Cand Index" << workCand->GetIndex() << '\n';
4212 }
4213 for (CgOccur *occ : allOccs) {
4214 occ->Dump();
4215 LogInfo::MapleLogger() << '\n';
4216 }
4217 }
4218
CodeMotion()4219 void CallerSavePre::CodeMotion()
4220 {
4221 constexpr uint32 limitNum = UINT32_MAX;
4222 uint32 cnt = 0;
4223 for (auto *occ : allOccs) {
4224 if (occ->GetOccType() == kOccUse) {
4225 ++cnt;
4226 beyondLimit |= (cnt == limitNum);
4227 if (!beyondLimit && dump) {
4228 LogInfo::MapleLogger() << "opt use occur: ";
4229 occ->Dump();
4230 }
4231 }
4232 if (occ->GetOccType() == kOccUse &&
4233 (beyondLimit || (static_cast<CgUseOcc *>(occ)->Reload() && !ReloadAtCallee(occ)))) {
4234 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4235 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4236 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4237 (void)regAllocator->SpillOperand(*occ->GetInsn(), *occ->GetOperand(), false, phyOpnd);
4238 continue;
4239 }
4240 if (occ->GetOccType() == kOccPhiopnd && static_cast<CgPhiOpndOcc *>(occ)->Reload() && !ReloadAtCallee(occ)) {
4241 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4242 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4243 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4244 Insn *insn = occ->GetBB()->GetLastInsn();
4245 if (insn == nullptr) {
4246 insn = &(static_cast<AArch64CGFunc *>(func)->CreateCommentInsn("reload caller save register"));
4247 occ->GetBB()->AppendInsn(*insn);
4248 }
4249 auto defOcc = occ->GetDef();
4250 bool forCall = (defOcc != nullptr && insn == defOcc->GetInsn());
4251 (void)regAllocator->SpillOperand(*insn, *occ->GetOperand(), false, phyOpnd, forCall);
4252 continue;
4253 }
4254 if (occ->GetOccType() == kOccStore && static_cast<CgStoreOcc *>(occ)->Reload()) {
4255 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(func)->GetOrCreatePhysicalRegisterOperand(
4256 static_cast<AArch64reg>(workLr->GetAssignedRegNO()), occ->GetOperand()->GetSize(),
4257 static_cast<RegOperand *>(occ->GetOperand())->GetRegisterType());
4258 (void)regAllocator->SpillOperand(*occ->GetInsn(), *occ->GetOperand(), false, phyOpnd, true);
4259 continue;
4260 }
4261 }
4262 if (dump) {
4263 PreWorkCand *curCand = workCand;
4264 LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex()
4265 << " after codemotion ===========\n";
4266 DumpWorkCandAndOcc();
4267 func->DumpCFGToDot("raCodeMotion-");
4268 }
4269 }
4270
UpdateLoadSite(CgOccur * occ)4271 void CallerSavePre::UpdateLoadSite(CgOccur *occ)
4272 {
4273 if (occ == nullptr) {
4274 return;
4275 }
4276 auto *defOcc = occ->GetDef();
4277 if (occ->GetOccType() == kOccUse) {
4278 defOcc = static_cast<CgUseOcc *>(occ)->GetPrevVersionOccur();
4279 }
4280 if (defOcc == nullptr) {
4281 return;
4282 }
4283 switch (defOcc->GetOccType()) {
4284 case kOccDef:
4285 break;
4286 case kOccUse:
4287 UpdateLoadSite(defOcc);
4288 return;
4289 case kOccStore: {
4290 auto *storeOcc = static_cast<CgStoreOcc *>(defOcc);
4291 if (storeOcc->Reload()) {
4292 break;
4293 }
4294 switch (occ->GetOccType()) {
4295 case kOccUse: {
4296 static_cast<CgUseOcc *>(occ)->SetReload(true);
4297 break;
4298 }
4299 case kOccPhiopnd: {
4300 static_cast<CgPhiOpndOcc *>(occ)->SetReload(true);
4301 break;
4302 }
4303 default: {
4304 CHECK_FATAL(false, "must not be here");
4305 }
4306 }
4307 return;
4308 }
4309 case kOccPhiocc: {
4310 auto *phiOcc = static_cast<CgPhiOcc *>(defOcc);
4311 if (phiOcc->IsFullyAvailable()) {
4312 break;
4313 }
4314 if (!phiOcc->IsDownSafe() || phiOcc->IsNotAvailable()) {
4315 switch (occ->GetOccType()) {
4316 case kOccUse: {
4317 static_cast<CgUseOcc *>(occ)->SetReload(true);
4318 break;
4319 }
4320 case kOccPhiopnd: {
4321 static_cast<CgPhiOpndOcc *>(occ)->SetReload(true);
4322 break;
4323 }
4324 default: {
4325 CHECK_FATAL(false, "must not be here");
4326 }
4327 }
4328 return;
4329 }
4330
4331 if (defOcc->Processed()) {
4332 return;
4333 }
4334 defOcc->SetProcessed(true);
4335 for (auto *opndOcc : phiOcc->GetPhiOpnds()) {
4336 UpdateLoadSite(opndOcc);
4337 }
4338 return;
4339 }
4340 default: {
4341 CHECK_FATAL(false, "NIY");
4342 break;
4343 }
4344 }
4345 }
4346
CalLoadSites()4347 void CallerSavePre::CalLoadSites()
4348 {
4349 for (auto *occ : allOccs) {
4350 if (occ->GetOccType() == kOccUse) {
4351 UpdateLoadSite(occ);
4352 }
4353 }
4354 std::vector<CgOccur *> availableDef(classCount, nullptr);
4355 for (auto *occ : allOccs) {
4356 auto classID = static_cast<uint32>(occ->GetClassID());
4357 switch (occ->GetOccType()) {
4358 case kOccDef:
4359 availableDef[classID] = occ;
4360 break;
4361 case kOccStore: {
4362 if (static_cast<CgStoreOcc *>(occ)->Reload()) {
4363 availableDef[classID] = occ;
4364 } else {
4365 availableDef[classID] = nullptr;
4366 }
4367 break;
4368 }
4369 case kOccPhiocc: {
4370 auto *phiOcc = static_cast<CgPhiOcc *>(occ);
4371 if (!phiOcc->IsNotAvailable() && phiOcc->IsDownSafe()) {
4372 availableDef[classID] = occ;
4373 } else {
4374 availableDef[classID] = nullptr;
4375 }
4376 break;
4377 }
4378 case kOccUse: {
4379 auto *useOcc = static_cast<CgUseOcc *>(occ);
4380 if (useOcc->Reload()) {
4381 auto *availDef = availableDef[classID];
4382 if (availDef != nullptr && dom->Dominate(*availDef->GetBB(), *useOcc->GetBB())) {
4383 useOcc->SetReload(false);
4384 } else {
4385 availableDef[classID] = useOcc;
4386 }
4387 }
4388 break;
4389 }
4390 case kOccPhiopnd: {
4391 auto *phiOpnd = static_cast<CgPhiOpndOcc *>(occ);
4392 if (phiOpnd->Reload()) {
4393 auto *availDef = availableDef[classID];
4394 if (availDef != nullptr && dom->Dominate(*availDef->GetBB(), *phiOpnd->GetBB())) {
4395 phiOpnd->SetReload(false);
4396 } else {
4397 availableDef[classID] = phiOpnd;
4398 }
4399 }
4400 break;
4401 }
4402 case kOccExit:
4403 break;
4404 default:
4405 CHECK_FATAL(false, "not supported occur type");
4406 }
4407 }
4408 if (dump) {
4409 PreWorkCand *curCand = workCand;
4410 LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex()
4411 << " after CalLoadSite===================\n";
4412 DumpWorkCandAndOcc();
4413 LogInfo::MapleLogger() << "\n";
4414 }
4415 }
4416
ComputeAvail()4417 void CallerSavePre::ComputeAvail()
4418 {
4419 bool changed = true;
4420 while (changed) {
4421 changed = false;
4422 for (auto *phiOcc : phiOccs) {
4423 if (phiOcc->IsNotAvailable()) {
4424 continue;
4425 }
4426 size_t killedCnt = 0;
4427 for (auto *opndOcc : phiOcc->GetPhiOpnds()) {
4428 auto defOcc = opndOcc->GetDef();
4429 if (defOcc == nullptr) {
4430 continue;
4431 }
4432 // for not move load too far from use site, set not-fully-available-phi killing availibity of phiOpnd
4433 if ((defOcc->GetOccType() == kOccPhiocc && !static_cast<CgPhiOcc *>(defOcc)->IsFullyAvailable()) ||
4434 defOcc->GetOccType() == kOccStore) {
4435 ++killedCnt;
4436 opndOcc->SetHasRealUse(false);
4437 // opnd at back-edge is killed, set phi not avail
4438 if (dom->Dominate(*phiOcc->GetBB(), *opndOcc->GetBB())) {
4439 killedCnt = phiOcc->GetPhiOpnds().size();
4440 break;
4441 }
4442 if (opndOcc->GetBB()->IsSoloGoto() && opndOcc->GetBB()->GetLoop() != nullptr) {
4443 killedCnt = phiOcc->GetPhiOpnds().size();
4444 break;
4445 }
4446 continue;
4447 }
4448 }
4449 if (killedCnt == phiOcc->GetPhiOpnds().size()) {
4450 changed |= !phiOcc->IsNotAvailable();
4451 phiOcc->SetAvailability(kNotAvailable);
4452 } else if (killedCnt > 0) {
4453 changed |= !phiOcc->IsPartialAvailable();
4454 phiOcc->SetAvailability(kPartialAvailable);
4455 } else {
4456 } // fully available is default state
4457 }
4458 }
4459 }
4460
Rename1()4461 void CallerSavePre::Rename1()
4462 {
4463 std::stack<CgOccur *> occStack;
4464 classCount = 1;
4465 // iterate the occurrence according to its preorder dominator tree
4466 for (CgOccur *occ : allOccs) {
4467 while (!occStack.empty() && !occStack.top()->IsDominate(*dom, *occ)) {
4468 occStack.pop();
4469 }
4470 switch (occ->GetOccType()) {
4471 case kOccUse: {
4472 if (occStack.empty()) {
4473 // assign new class
4474 occ->SetClassID(static_cast<int>(classCount++));
4475 occStack.push(occ);
4476 break;
4477 }
4478 CgOccur *topOccur = occStack.top();
4479 if (topOccur->GetOccType() == kOccStore || topOccur->GetOccType() == kOccDef ||
4480 topOccur->GetOccType() == kOccPhiocc) {
4481 // assign new class
4482 occ->SetClassID(topOccur->GetClassID());
4483 occ->SetPrevVersionOccur(topOccur);
4484 occStack.push(occ);
4485 break;
4486 } else if (topOccur->GetOccType() == kOccUse) {
4487 occ->SetClassID(topOccur->GetClassID());
4488 if (topOccur->GetDef() != nullptr) {
4489 occ->SetDef(topOccur->GetDef());
4490 } else {
4491 occ->SetDef(topOccur);
4492 }
4493 break;
4494 }
4495 CHECK_FATAL(false, "unsupported occur type");
4496 break;
4497 }
4498 case kOccPhiocc: {
4499 // assign new class
4500 occ->SetClassID(static_cast<int>(classCount++));
4501 occStack.push(occ);
4502 break;
4503 }
4504 case kOccPhiopnd: {
4505 if (!occStack.empty()) {
4506 CgOccur *topOccur = occStack.top();
4507 auto *phiOpndOcc = static_cast<CgPhiOpndOcc *>(occ);
4508 phiOpndOcc->SetDef(topOccur);
4509 phiOpndOcc->SetClassID(topOccur->GetClassID());
4510 if (topOccur->GetOccType() == kOccUse) {
4511 phiOpndOcc->SetHasRealUse(true);
4512 }
4513 }
4514 break;
4515 }
4516 case kOccDef: {
4517 if (!occStack.empty()) {
4518 CgOccur *topOccur = occStack.top();
4519 if (topOccur->GetOccType() == kOccPhiocc) {
4520 auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4521 phiTopOccur->SetIsDownSafe(false);
4522 }
4523 }
4524
4525 // assign new class
4526 occ->SetClassID(static_cast<int>(classCount++));
4527 occStack.push(occ);
4528 break;
4529 }
4530 case kOccStore: {
4531 if (!occStack.empty()) {
4532 CgOccur *topOccur = occStack.top();
4533 auto prevVersionOcc = topOccur->GetDef() ? topOccur->GetDef() : topOccur;
4534 static_cast<CgStoreOcc *>(occ)->SetPrevVersionOccur(prevVersionOcc);
4535 if (topOccur->GetOccType() == kOccPhiocc) {
4536 auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4537 phiTopOccur->SetIsDownSafe(false);
4538 }
4539 }
4540
4541 // assign new class
4542 occ->SetClassID(static_cast<int>(classCount++));
4543 occStack.push(occ);
4544 break;
4545 }
4546 case kOccExit: {
4547 if (occStack.empty()) {
4548 break;
4549 }
4550 CgOccur *topOccur = occStack.top();
4551 if (topOccur->GetOccType() == kOccPhiocc) {
4552 auto *phiTopOccur = static_cast<CgPhiOcc *>(topOccur);
4553 phiTopOccur->SetIsDownSafe(false);
4554 }
4555 break;
4556 }
4557 default:
4558 DEBUG_ASSERT(false, "should not be here");
4559 break;
4560 }
4561 }
4562 if (dump) {
4563 PreWorkCand *curCand = workCand;
4564 LogInfo::MapleLogger() << "========ssapre candidate " << curCand->GetIndex() << " after rename1============\n";
4565 DumpWorkCandAndOcc();
4566 }
4567 }
4568
ComputeVarAndDfPhis()4569 void CallerSavePre::ComputeVarAndDfPhis()
4570 {
4571 dfPhiDfns.clear();
4572 PreWorkCand *workCand = GetWorkCand();
4573 for (auto *realOcc : workCand->GetRealOccs()) {
4574 BB *defBB = realOcc->GetBB();
4575 GetIterDomFrontier(defBB, &dfPhiDfns);
4576 }
4577 }
4578
BuildWorkList()4579 void CallerSavePre::BuildWorkList()
4580 {
4581 size_t numBBs = dom->GetDtPreOrderSize();
4582 std::vector<LiveRange *> callSaveLrs;
4583 for (auto it : regAllocator->GetLrMap()) {
4584 LiveRange *lr = it.second;
4585 if (lr == nullptr || lr->IsSpilled()) {
4586 continue;
4587 }
4588 bool isCalleeReg = AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr->GetAssignedRegNO()));
4589 if (lr->GetSplitLr() == nullptr && lr->GetNumCall() && !isCalleeReg) {
4590 callSaveLrs.emplace_back(lr);
4591 }
4592 }
4593 const MapleVector<uint32> &preOrderDt = dom->GetDtPreOrder();
4594 for (size_t i = 0; i < numBBs; ++i) {
4595 BB *bb = func->GetBBFromID(preOrderDt[i]);
4596 std::map<uint32, Insn *> insnMap;
4597 FOR_BB_INSNS_SAFE(insn, bb, ninsn) {
4598 insnMap.insert(std::make_pair(insn->GetId(), insn));
4599 }
4600 for (auto lr : callSaveLrs) {
4601 LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb->GetId());
4602 RegOperand &opnd = func->GetOrCreateVirtualRegisterOperand(lr->GetRegNO());
4603 if (lu != nullptr && (lu->GetDefNum() || lu->GetUseNum() || lu->HasCall())) {
4604 MapleMap<uint32, uint32> refs = lr->GetRefs(bb->GetId());
4605 for (auto it = refs.begin(); it != refs.end(); ++it) {
4606 if (it->second & kIsUse) {
4607 (void)CreateRealOcc(*insnMap[it->first], opnd, kOccUse);
4608 }
4609 if (it->second & kIsDef) {
4610 (void)CreateRealOcc(*insnMap[it->first], opnd, kOccDef);
4611 }
4612 if (it->second & kIsCall) {
4613 Insn *callInsn = insnMap[it->first];
4614 auto *targetOpnd = callInsn->GetCallTargetOperand();
4615 if (CGOptions::DoIPARA() && targetOpnd->IsFuncNameOpnd()) {
4616 FuncNameOperand *target = static_cast<FuncNameOperand *>(targetOpnd);
4617 const MIRSymbol *funcSt = target->GetFunctionSymbol();
4618 DEBUG_ASSERT(funcSt->GetSKind() == kStFunc, "funcst must be a function name symbol");
4619 MIRFunction *mirFunc = funcSt->GetFunction();
4620 if (mirFunc != nullptr && mirFunc->IsReferedRegsValid()) {
4621 auto regSet = mirFunc->GetReferedRegs();
4622 if (regSet.find(lr->GetAssignedRegNO()) == regSet.end()) {
4623 continue;
4624 }
4625 }
4626 }
4627 (void)CreateRealOcc(*callInsn, opnd, kOccStore);
4628 }
4629 }
4630 }
4631 }
4632 if (bb->GetKind() == BB::kBBReturn) {
4633 CreateExitOcc(*bb);
4634 }
4635 }
4636 }
4637
ApplySSAPRE()4638 void CallerSavePre::ApplySSAPRE()
4639 {
4640 // #0 build worklist
4641 BuildWorkList();
4642 uint32 cnt = 0;
4643 constexpr uint32 preLimit = UINT32_MAX;
4644 while (!workList.empty()) {
4645 ++cnt;
4646 if (cnt == preLimit) {
4647 beyondLimit = true;
4648 }
4649 workCand = workList.front();
4650 workCand->SetIndex(static_cast<int32>(cnt));
4651 workLr = regAllocator->GetLiveRange(static_cast<RegOperand *>(workCand->GetTheOperand())->GetRegisterNumber());
4652 DEBUG_ASSERT(workLr != nullptr, "exepected non null lr");
4653 workList.pop_front();
4654 if (workCand->GetRealOccs().empty()) {
4655 continue;
4656 }
4657
4658 allOccs.clear();
4659 phiOccs.clear();
4660 // #1 Insert PHI; results in allOccs and phiOccs
4661 ComputeVarAndDfPhis();
4662 CreateSortedOccs();
4663 if (workCand->GetRealOccs().empty()) {
4664 continue;
4665 }
4666 // #2 Rename
4667 Rename1();
4668 ComputeDS();
4669 ComputeAvail();
4670 CalLoadSites();
4671 // #6 CodeMotion and recompute worklist based on newly occurrence
4672 CodeMotion();
4673 DEBUG_ASSERT(workLr->GetProcessed() == false, "exepected unprocessed");
4674 workLr->SetProcessed();
4675 }
4676 }
4677
OptCallerSave()4678 void GraphColorRegAllocator::OptCallerSave()
4679 {
4680 CallerSavePre callerSavePre(this, *cgFunc, domInfo, *memPool, *memPool, kLoadPre, UINT32_MAX);
4681 callerSavePre.SetDump(needDump);
4682 callerSavePre.ApplySSAPRE();
4683 }
4684
SplitVregAroundLoop(const CGFuncLoops & loop,const std::vector<LiveRange * > & lrs,BB & headerPred,BB & exitSucc,const std::set<regno_t> & cands)4685 void GraphColorRegAllocator::SplitVregAroundLoop(const CGFuncLoops &loop, const std::vector<LiveRange *> &lrs,
4686 BB &headerPred, BB &exitSucc, const std::set<regno_t> &cands)
4687 {
4688 size_t maxSplitCount = lrs.size() - intCalleeRegSet.size();
4689 maxSplitCount = maxSplitCount > kMaxSplitCount ? kMaxSplitCount : maxSplitCount;
4690 uint32 splitCount = 0;
4691 auto it = cands.begin();
4692 size_t candsSize = cands.size();
4693 maxSplitCount = maxSplitCount > candsSize ? candsSize : maxSplitCount;
4694 for (auto &lr : lrs) {
4695 if (lr->IsSpilled()) {
4696 continue;
4697 }
4698 if (!AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr->GetAssignedRegNO()))) {
4699 continue;
4700 }
4701 bool hasRef = false;
4702 for (auto *bb : loop.GetLoopMembers()) {
4703 LiveUnit *lu = lr->GetLiveUnitFromLuMap(bb->GetId());
4704 if (lu != nullptr && (lu->GetDefNum() != 0 || lu->GetUseNum() != 0)) {
4705 hasRef = true;
4706 break;
4707 }
4708 }
4709 if (!hasRef) {
4710 splitCount++;
4711 RegOperand *ropnd = &cgFunc->GetOrCreateVirtualRegisterOperand(lr->GetRegNO());
4712 RegOperand &phyOpnd = static_cast<AArch64CGFunc *>(cgFunc)->GetOrCreatePhysicalRegisterOperand(
4713 static_cast<AArch64reg>(lr->GetAssignedRegNO()), ropnd->GetSize(), (lr->GetRegType()));
4714
4715 Insn *headerCom = &(static_cast<AArch64CGFunc *>(cgFunc)->CreateCommentInsn("split around loop begin"));
4716 headerPred.AppendInsn(*headerCom);
4717 Insn *last = headerPred.GetLastInsn();
4718 (void)SpillOperand(*last, *ropnd, true, static_cast<RegOperand &>(phyOpnd));
4719
4720 Insn *exitCom = &(static_cast<AArch64CGFunc *>(cgFunc)->CreateCommentInsn("split around loop end"));
4721 exitSucc.InsertInsnBegin(*exitCom);
4722 Insn *first = exitSucc.GetFirstInsn();
4723 (void)SpillOperand(*first, *ropnd, false, static_cast<RegOperand &>(phyOpnd));
4724
4725 LiveRange *replacedLr = lrMap[*it];
4726 replacedLr->SetAssignedRegNO(lr->GetAssignedRegNO());
4727 replacedLr->SetSpilled(false);
4728 ++it;
4729 }
4730 if (splitCount >= maxSplitCount) {
4731 break;
4732 }
4733 }
4734 }
4735
LrGetBadReg(const LiveRange & lr) const4736 bool GraphColorRegAllocator::LrGetBadReg(const LiveRange &lr) const
4737 {
4738 if (lr.IsSpilled()) {
4739 return true;
4740 }
4741 if (lr.GetNumCall() != 0 && !AArch64Abi::IsCalleeSavedReg(static_cast<AArch64reg>(lr.GetAssignedRegNO()))) {
4742 return true;
4743 }
4744 return false;
4745 }
4746
LoopNeedSplit(const CGFuncLoops & loop,std::set<regno_t> & cands)4747 bool GraphColorRegAllocator::LoopNeedSplit(const CGFuncLoops &loop, std::set<regno_t> &cands)
4748 {
4749 std::set<regno_t> regPressure;
4750 const BB *header = loop.GetHeader();
4751 const MapleSet<regno_t> &liveIn = header->GetLiveInRegNO();
4752 std::set<BB *> loopBBs;
4753 for (auto *bb : loop.GetLoopMembers()) {
4754 loopBBs.insert(bb);
4755 FOR_BB_INSNS(insn, bb) {
4756 if (!insn->IsMachineInstruction()) {
4757 continue;
4758 }
4759 if (insn->GetId() == 0) {
4760 continue;
4761 }
4762 uint32 opndNum = insn->GetOperandSize();
4763 for (uint32 i = 0; i < opndNum; ++i) {
4764 Operand &opnd = insn->GetOperand(i);
4765 if (opnd.IsList()) {
4766 continue;
4767 } else if (opnd.IsMemoryAccessOperand()) {
4768 auto &memOpnd = static_cast<MemOperand &>(opnd);
4769 Operand *base = memOpnd.GetBaseRegister();
4770 Operand *offset = memOpnd.GetIndexRegister();
4771 if (base != nullptr && base->IsRegister()) {
4772 RegOperand *regOpnd = static_cast<RegOperand *>(base);
4773 regno_t regNO = regOpnd->GetRegisterNumber();
4774 LiveRange *lr = GetLiveRange(regNO);
4775 if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4776 liveIn.find(regNO) == liveIn.end()) {
4777 regPressure.insert(regOpnd->GetRegisterNumber());
4778 }
4779 }
4780 if (offset != nullptr && offset->IsRegister()) {
4781 RegOperand *regOpnd = static_cast<RegOperand *>(offset);
4782 regno_t regNO = regOpnd->GetRegisterNumber();
4783 LiveRange *lr = GetLiveRange(regNO);
4784 if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4785 liveIn.find(regNO) == liveIn.end()) {
4786 regPressure.insert(regOpnd->GetRegisterNumber());
4787 }
4788 }
4789 } else if (opnd.IsRegister()) {
4790 auto ®Opnd = static_cast<RegOperand &>(opnd);
4791 regno_t regNO = regOpnd.GetRegisterNumber();
4792 LiveRange *lr = GetLiveRange(regNO);
4793 if (lr != nullptr && lr->GetRegType() == kRegTyInt && LrGetBadReg(*lr) &&
4794 liveIn.find(regNO) == liveIn.end()) {
4795 regPressure.insert(regOpnd.GetRegisterNumber());
4796 }
4797 }
4798 }
4799 }
4800 }
4801 if (regPressure.size() != 0) {
4802 for (auto reg : regPressure) {
4803 LiveRange *lr = lrMap[reg];
4804 std::vector<BB *> smember;
4805 ForEachBBArrElem(lr->GetBBMember(),
4806 [this, &smember](uint32 bbID) { (void)smember.emplace_back(bbVec[bbID]); });
4807 bool liveBeyondLoop = false;
4808 for (auto bb : smember) {
4809 if (loopBBs.find(bb) == loopBBs.end()) {
4810 liveBeyondLoop = true;
4811 break;
4812 }
4813 }
4814 if (liveBeyondLoop) {
4815 continue;
4816 }
4817 cands.insert(reg);
4818 }
4819 if (cands.empty()) {
4820 return false;
4821 }
4822 return true;
4823 }
4824 return false;
4825 }
4826
AnalysisLoop(const CGFuncLoops & loop)4827 void GraphColorRegAllocator::AnalysisLoop(const CGFuncLoops &loop)
4828 {
4829 const BB *header = loop.GetHeader();
4830 const MapleSet<regno_t> &liveIn = header->GetLiveInRegNO();
4831 std::vector<LiveRange *> lrs;
4832 size_t intCalleeNum = intCalleeRegSet.size();
4833 if (loop.GetMultiEntries().size() != 0) {
4834 return;
4835 }
4836 for (auto regno : liveIn) {
4837 LiveRange *lr = GetLiveRange(regno);
4838 if (lr != nullptr && lr->GetRegType() == kRegTyInt && lr->GetNumCall() != 0) {
4839 lrs.emplace_back(lr);
4840 }
4841 }
4842 if (lrs.size() < intCalleeNum) {
4843 return;
4844 }
4845 bool hasCall = false;
4846 std::set<BB *> loopBBs;
4847 for (auto *bb : loop.GetLoopMembers()) {
4848 if (bb->HasCall()) {
4849 hasCall = true;
4850 }
4851 loopBBs.insert(bb);
4852 }
4853 if (!hasCall) {
4854 return;
4855 }
4856 auto comparator = [=](const LiveRange *lr1, const LiveRange *lr2) -> bool {
4857 return lr1->GetPriority() < lr2->GetPriority();
4858 };
4859 std::sort(lrs.begin(), lrs.end(), comparator);
4860 const MapleVector<BB *> &exits = loop.GetExits();
4861 std::set<BB *> loopExits;
4862 for (auto &bb : exits) {
4863 for (auto &succ : bb->GetSuccs()) {
4864 if (loopBBs.find(succ) != loopBBs.end()) {
4865 continue;
4866 }
4867 if (succ->IsSoloGoto() || succ->IsEmpty()) {
4868 BB *realSucc = CGCFG::GetTargetSuc(*succ);
4869 if (realSucc != nullptr) {
4870 loopExits.insert(realSucc);
4871 }
4872 } else {
4873 loopExits.insert(succ);
4874 }
4875 }
4876 }
4877 std::set<BB *> loopEntra;
4878 for (auto &pred : header->GetPreds()) {
4879 if (loopBBs.find(pred) != loopBBs.end()) {
4880 continue;
4881 }
4882 loopEntra.insert(pred);
4883 }
4884 if (loopEntra.size() != 1 || loopExits.size() != 1) {
4885 return;
4886 }
4887 BB *headerPred = *loopEntra.begin();
4888 BB *exitSucc = *loopExits.begin();
4889 if (headerPred->GetKind() != BB::kBBFallthru) {
4890 return;
4891 }
4892 if (exitSucc->GetPreds().size() != loop.GetExits().size()) {
4893 return;
4894 }
4895 std::set<regno_t> cands;
4896 if (!LoopNeedSplit(loop, cands)) {
4897 return;
4898 }
4899 SplitVregAroundLoop(loop, lrs, *headerPred, *exitSucc, cands);
4900 }
AnalysisLoopPressureAndSplit(const CGFuncLoops & loop)4901 void GraphColorRegAllocator::AnalysisLoopPressureAndSplit(const CGFuncLoops &loop)
4902 {
4903 if (loop.GetInnerLoops().empty()) {
4904 // only handle inner-most loop
4905 AnalysisLoop(loop);
4906 return;
4907 }
4908 for (const auto *lp : loop.GetInnerLoops()) {
4909 AnalysisLoopPressureAndSplit(*lp);
4910 }
4911 }
4912
4913 /* Iterate through all instructions and change the vreg to preg. */
FinalizeRegisters()4914 void GraphColorRegAllocator::FinalizeRegisters()
4915 {
4916 if (doMultiPass && hasSpill) {
4917 if (needDump) {
4918 LogInfo::MapleLogger() << "In this round, spill vregs : \n";
4919 for (auto it : lrMap) {
4920 LiveRange *lr = it.second;
4921 if (lr->IsSpilled()) {
4922 LogInfo::MapleLogger() << "R" << lr->GetRegNO() << " ";
4923 }
4924 }
4925 LogInfo::MapleLogger() << "\n";
4926 }
4927 bool done = SpillLiveRangeForSpills();
4928 if (done) {
4929 return;
4930 }
4931 }
4932 if (CLANG) {
4933 if (!cgFunc->GetLoops().empty()) {
4934 cgFunc->GetTheCFG()->InitInsnVisitor(*cgFunc);
4935 for (const auto *lp : cgFunc->GetLoops()) {
4936 AnalysisLoopPressureAndSplit(*lp);
4937 }
4938 }
4939 OptCallerSave();
4940 }
4941 for (auto *bb : bfs->sortedBBs) {
4942 FOR_BB_INSNS_SAFE(insn, bb, nextInsn) {
4943 if (insn->IsImmaterialInsn()) {
4944 continue;
4945 }
4946 if (!insn->IsMachineInstruction()) {
4947 continue;
4948 }
4949 if (insn->GetId() == 0) {
4950 continue;
4951 }
4952
4953 for (uint32 i = 0; i < kSpillMemOpndNum; ++i) {
4954 operandSpilled[i] = false;
4955 }
4956
4957 FinalizeRegisterInfo *fInfo = memPool->New<FinalizeRegisterInfo>(alloc);
4958 bool needProcces = true;
4959 uint64 usedRegMask = FinalizeRegisterPreprocess(*fInfo, *insn, needProcces);
4960 if (!needProcces) {
4961 continue;
4962 }
4963 uint32 defSpillIdx = 0;
4964 uint32 useSpillIdx = 0;
4965 MemOperand *memOpnd = nullptr;
4966 if (fInfo->GetBaseOperand()) {
4967 memOpnd = static_cast<const MemOperand *>(fInfo->GetBaseOperand())->Clone(*cgFunc->GetMemoryPool());
4968 insn->SetOperand(fInfo->GetMemOperandIdx(), *memOpnd);
4969 Operand *base = memOpnd->GetBaseRegister();
4970 DEBUG_ASSERT(base != nullptr, "nullptr check");
4971 /* if base register is both defReg and useReg, defSpillIdx should also be increased. But it doesn't
4972 * exist yet */
4973 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *base, useSpillIdx, usedRegMask, false);
4974 if (phyOpnd != nullptr) {
4975 memOpnd->SetBaseRegister(*phyOpnd);
4976 }
4977 if (!memOpnd->IsIntactIndexed()) {
4978 (void)GetReplaceOpnd(*insn, *base, useSpillIdx, usedRegMask, true);
4979 }
4980 }
4981 if (fInfo->GetOffsetOperand()) {
4982 DEBUG_ASSERT(memOpnd != nullptr, "memOpnd should not be nullptr");
4983 Operand *offset = memOpnd->GetIndexRegister();
4984 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *offset, useSpillIdx, usedRegMask, false);
4985 if (phyOpnd != nullptr) {
4986 memOpnd->SetIndexRegister(*phyOpnd);
4987 }
4988 }
4989 for (size_t i = 0; i < fInfo->GetDefOperandsSize(); ++i) {
4990 if (insn->GetMachineOpcode() == MOP_asm) {
4991 const Operand *defOpnd = fInfo->GetDefOperandsElem(i);
4992 if (defOpnd->IsList()) {
4993 ListOperand *outList = const_cast<ListOperand *>(static_cast<const ListOperand *>(defOpnd));
4994 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
4995 auto *srcOpndsNew = a64CGFunc->CreateListOpnd(*a64CGFunc->GetFuncScopeAllocator());
4996 RegOperand *phyOpnd;
4997 for (auto opnd : outList->GetOperands()) {
4998 if (opnd->IsPhysicalRegister()) {
4999 phyOpnd = opnd;
5000 } else {
5001 phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, true);
5002 }
5003 srcOpndsNew->PushOpnd(*phyOpnd);
5004 }
5005 insn->SetOperand(kAsmOutputListOpnd, *srcOpndsNew);
5006 continue;
5007 }
5008 }
5009 const Operand *opnd = fInfo->GetDefOperandsElem(i);
5010 RegOperand *phyOpnd = nullptr;
5011 if (insn->IsSpecialIntrinsic()) {
5012 phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, true);
5013 } else {
5014 phyOpnd = GetReplaceOpnd(*insn, *opnd, defSpillIdx, usedRegMask, true);
5015 }
5016 if (phyOpnd != nullptr) {
5017 insn->SetOperand(fInfo->GetDefIdxElem(i), *phyOpnd);
5018 }
5019 }
5020 for (size_t i = 0; i < fInfo->GetUseOperandsSize(); ++i) {
5021 if (insn->GetMachineOpcode() == MOP_asm) {
5022 const Operand *useOpnd = fInfo->GetUseOperandsElem(i);
5023 if (useOpnd->IsList()) {
5024 ListOperand *inList = const_cast<ListOperand *>(static_cast<const ListOperand *>(useOpnd));
5025 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
5026 auto *srcOpndsNew = a64CGFunc->CreateListOpnd(*a64CGFunc->GetFuncScopeAllocator());
5027 for (auto opnd : inList->GetOperands()) {
5028 if ((static_cast<const RegOperand *>(opnd))->GetRegisterNumber() < kAllRegNum) {
5029 srcOpndsNew->PushOpnd(*opnd);
5030 } else {
5031 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, false);
5032 srcOpndsNew->PushOpnd(*phyOpnd);
5033 }
5034 }
5035 insn->SetOperand(kAsmInputListOpnd, *srcOpndsNew);
5036 continue;
5037 }
5038 }
5039 const Operand *opnd = fInfo->GetUseOperandsElem(i);
5040 RegOperand *phyOpnd = GetReplaceOpnd(*insn, *opnd, useSpillIdx, usedRegMask, false);
5041 if (phyOpnd != nullptr) {
5042 insn->SetOperand(fInfo->GetUseIdxElem(i), *phyOpnd);
5043 }
5044 }
5045 if (insn->GetMachineOpcode() == MOP_wmovrr || insn->GetMachineOpcode() == MOP_xmovrr) {
5046 auto ®1 = static_cast<RegOperand &>(insn->GetOperand(kInsnFirstOpnd));
5047 auto ®2 = static_cast<RegOperand &>(insn->GetOperand(kInsnSecondOpnd));
5048 /* remove mov x0,x0 when it cast i32 to i64 */
5049 if ((reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) && (reg1.GetSize() >= reg2.GetSize())) {
5050 bb->RemoveInsn(*insn);
5051 }
5052 }
5053 } /* insn */
5054 } /* BB */
5055 }
5056
MarkCalleeSaveRegs()5057 void GraphColorRegAllocator::MarkCalleeSaveRegs()
5058 {
5059 for (auto regNO : intCalleeUsed) {
5060 static_cast<AArch64CGFunc *>(cgFunc)->AddtoCalleeSaved(static_cast<AArch64reg>(regNO));
5061 }
5062 for (auto regNO : fpCalleeUsed) {
5063 static_cast<AArch64CGFunc *>(cgFunc)->AddtoCalleeSaved(static_cast<AArch64reg>(regNO));
5064 }
5065 }
5066
AllocateRegisters()5067 bool GraphColorRegAllocator::AllocateRegisters()
5068 {
5069 #ifdef RANDOM_PRIORITY
5070 /* Change this seed for different random numbers */
5071 srand(0);
5072 #endif /* RANDOM_PRIORITY */
5073 auto *a64CGFunc = static_cast<AArch64CGFunc *>(cgFunc);
5074
5075 if (needDump && doMultiPass) {
5076 LogInfo::MapleLogger() << "\n round start: \n";
5077 cgFunc->DumpCGIR();
5078 }
5079 /*
5080 * we store both FP/LR if using FP or if not using FP, but func has a call
5081 * Using FP, record it for saving
5082 */
5083 a64CGFunc->AddtoCalleeSaved(RFP);
5084 a64CGFunc->AddtoCalleeSaved(RLR);
5085 a64CGFunc->NoteFPLRAddedToCalleeSavedList();
5086
5087 #if DEBUG
5088 uint32 cnt = 0;
5089 FOR_ALL_BB(bb, cgFunc) {
5090 FOR_BB_INSNS(insn, bb) {
5091 ++cnt;
5092 }
5093 }
5094 DEBUG_ASSERT(cnt <= cgFunc->GetTotalNumberOfInstructions(), "Incorrect insn count");
5095 #endif
5096 cgFunc->SetIsAfterRegAlloc();
5097 /* EBO propgation extent the live range and might need to be turned off. */
5098 Bfs localBfs(*cgFunc, *memPool);
5099 bfs = &localBfs;
5100 bfs->ComputeBlockOrder();
5101
5102 InitCCReg();
5103
5104 ComputeLiveRanges();
5105
5106 InitFreeRegPool();
5107
5108 BuildInterferenceGraph();
5109
5110 Separate();
5111
5112 SplitAndColor();
5113
5114 #ifdef USE_LRA
5115 if (doLRA) {
5116 LocalRegisterAllocator(true);
5117 }
5118 #endif /* USE_LRA */
5119
5120 FinalizeRegisters();
5121
5122 MarkCalleeSaveRegs();
5123
5124 if (!seenFP) {
5125 cgFunc->UnsetSeenFP();
5126 }
5127 if (needDump) {
5128 cgFunc->DumpCGIR();
5129 }
5130
5131 bfs = nullptr; /* bfs is not utilized outside the function. */
5132
5133 if (doMultiPass && hasSpill) {
5134 return false;
5135 } else {
5136 return true;
5137 }
5138 }
5139 } /* namespace maplebe */
5140