• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "aarch64_schedule.h"
17 #include <ctime>
18 #include "aarch64_cg.h"
19 #include "aarch64_operand.h"
20 #include "aarch64_dependence.h"
21 #include "pressure.h"
22 
23 /*
24  * This phase is Instruction Scheduling.
25  * There is a local list scheduling, it is scheduling in basic block.
26  * The entry is AArch64Schedule::ListScheduling, will traversal all basic block,
27  * for a basic block:
28  *  1. build a dependence graph;
29  *  2. combine clinit pairs and str&ldr pairs;
30  *  3. reorder instructions.
31  */
32 namespace maplebe {
33 namespace {
34 constexpr uint32 kClinitAdvanceCycle = 10;
35 constexpr uint32 kAdrpLdrAdvanceCycle = 2;
36 constexpr uint32 kClinitTailAdvanceCycle = 4;
37 constexpr uint32 kSecondToLastNode = 2;
38 }  // namespace
39 
40 uint32 AArch64Schedule::maxUnitIndex = 0;
41 /* reserve two register for special purpose */
42 int AArch64Schedule::intRegPressureThreshold = static_cast<int>(R27 - R0);
43 int AArch64Schedule::fpRegPressureThreshold = static_cast<int>(V30 - V0);
44 int AArch64Schedule::intCalleeSaveThresholdBase = static_cast<int>(R29 - R19);
45 int AArch64Schedule::intCalleeSaveThresholdEnhance = static_cast<int>(R30 - R19);
46 int AArch64Schedule::fpCalleeSaveThreshold = static_cast<int>(R16 - R8);
47 /* Init schedule's data struction. */
Init()48 void AArch64Schedule::Init()
49 {
50     readyList.clear();
51     nodeSize = nodes.size();
52     lastSeparatorIndex = 0;
53     mad->ReleaseAllUnits();
54     DepNode *node = nodes[0];
55 
56     DEBUG_ASSERT(node->GetType() == kNodeTypeSeparator,
57                  "CG internal error, the first node should be a separator node.");
58 
59     if (CGOptions::IsDruteForceSched() || CGOptions::IsSimulateSched()) {
60         for (auto nodeTemp : nodes) {
61             nodeTemp->SetVisit(0);
62             nodeTemp->SetState(kNormal);
63             nodeTemp->SetSchedCycle(0);
64             nodeTemp->SetEStart(0);
65             nodeTemp->SetLStart(0);
66         }
67     }
68 
69     readyList.emplace_back(node);
70     node->SetState(kReady);
71 
72     /* Init validPredsSize and validSuccsSize. */
73     for (auto nodeTemp : nodes) {
74         nodeTemp->SetValidPredsSize(nodeTemp->GetPreds().size());
75         nodeTemp->SetValidSuccsSize(nodeTemp->GetSuccs().size());
76     }
77 }
78 
79 /*
80  *  A insn which can be combine should meet this conditions:
81  *  1. it is str/ldr insn;
82  *  2. address mode is kAddrModeBOi, [baseReg, offset];
83  *  3. the register operand size equal memory operand size;
84  *  4. if define USE_32BIT_REF, register operand size should be 4 byte;
85  *  5. for stp/ldp, the imm should be within -512 and 504(64bit), or -256 and 252(32bit);
86  *  6. pair instr for 8/4 byte registers must have multiple of 8/4 for imm.
87  *  If insn can be combine, return true.
88  */
CanCombine(const Insn & insn) const89 bool AArch64Schedule::CanCombine(const Insn &insn) const
90 {
91     MOperator opCode = insn.GetMachineOpcode();
92     if ((opCode != MOP_xldr) && (opCode != MOP_wldr) && (opCode != MOP_dldr) && (opCode != MOP_sldr) &&
93         (opCode != MOP_xstr) && (opCode != MOP_wstr) && (opCode != MOP_dstr) && (opCode != MOP_sstr)) {
94         return false;
95     }
96 
97     DEBUG_ASSERT(insn.GetOperand(1).IsMemoryAccessOperand(), "expects mem operands");
98     auto &memOpnd = static_cast<MemOperand &>(insn.GetOperand(1));
99     MemOperand::AArch64AddressingMode addrMode = memOpnd.GetAddrMode();
100     if ((addrMode != MemOperand::kAddrModeBOi) || !memOpnd.IsIntactIndexed()) {
101         return false;
102     }
103 
104     auto &regOpnd = static_cast<RegOperand &>(insn.GetOperand(0));
105     if (regOpnd.GetSize() != memOpnd.GetSize()) {
106         return false;
107     }
108 
109     uint32 size = regOpnd.GetSize() >> kLog2BitsPerByte;
110 #ifdef USE_32BIT_REF
111     if (insn.IsAccessRefField() && (size > (kIntregBytelen >> 1))) {
112         return false;
113     }
114 #endif /* USE_32BIT_REF */
115 
116     OfstOperand *offset = memOpnd.GetOffsetImmediate();
117     if (offset == nullptr) {
118         return false;
119     }
120     int32 offsetValue = static_cast<int32>(offset->GetOffsetValue());
121     if (size == kIntregBytelen) { /* 64 bit */
122         if ((offsetValue <= kStpLdpImm64LowerBound) || (offsetValue >= kStpLdpImm64UpperBound)) {
123             return false;
124         }
125     } else if (size == (kIntregBytelen >> 1)) { /* 32 bit */
126         if ((offsetValue <= kStpLdpImm32LowerBound) || (offsetValue >= kStpLdpImm32UpperBound)) {
127             return false;
128         }
129     }
130 
131     /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */
132     if ((static_cast<uint32>(offsetValue) % size) != 0) {
133         return false;
134     }
135     return true;
136 }
137 
138 /* After building dependence graph, combine str&ldr pairs. */
MemoryAccessPairOpt()139 void AArch64Schedule::MemoryAccessPairOpt()
140 {
141     Init();
142     std::vector<DepNode *> memList;
143 
144     while ((!readyList.empty()) || !memList.empty()) {
145         DepNode *readNode = nullptr;
146         if (!readyList.empty()) {
147             readNode = readyList[0];
148             readyList.erase(readyList.begin());
149         } else {
150             if (memList[0]->GetType() != kNodeTypeEmpty) {
151                 FindAndCombineMemoryAccessPair(memList);
152             }
153             readNode = memList[0];
154             memList.erase(memList.begin());
155         }
156 
157         /* schedule readNode */
158         CHECK_FATAL(readNode != nullptr, "readNode is null in MemoryAccessPairOpt");
159         readNode->SetState(kScheduled);
160 
161         /* add readNode's succs to readyList or memList. */
162         for (auto succLink : readNode->GetSuccs()) {
163             DepNode &succNode = succLink->GetTo();
164             succNode.DescreaseValidPredsSize();
165             if (succNode.GetValidPredsSize() == 0) {
166                 DEBUG_ASSERT(succNode.GetState() == kNormal, "schedule state should be kNormal");
167                 succNode.SetState(kReady);
168                 DEBUG_ASSERT(succNode.GetInsn() != nullptr, "insn can't be nullptr!");
169                 if (CanCombine(*succNode.GetInsn())) {
170                     memList.emplace_back(&succNode);
171                 } else {
172                     readyList.emplace_back(&succNode);
173                 }
174             }
175         }
176     }
177 
178     for (auto node : nodes) {
179         node->SetVisit(0);
180         node->SetState(kNormal);
181     }
182 }
183 
184 /* Find and combine correct MemoryAccessPair for memList[0]. */
FindAndCombineMemoryAccessPair(const std::vector<DepNode * > & memList)185 void AArch64Schedule::FindAndCombineMemoryAccessPair(const std::vector<DepNode *> &memList)
186 {
187     DEBUG_ASSERT(!memList.empty(), "memList should not be empty");
188     CHECK_FATAL(memList[0]->GetInsn() != nullptr, "memList[0]'s insn should not be nullptr");
189     MemOperand *currMemOpnd = static_cast<MemOperand *>(memList[0]->GetInsn()->GetMemOpnd());
190     DEBUG_ASSERT(currMemOpnd != nullptr, "opnd should not be nullptr");
191     DEBUG_ASSERT(currMemOpnd->IsMemoryAccessOperand(), "opnd should be memOpnd");
192     int32 currOffsetVal = static_cast<int32>(currMemOpnd->GetOffsetImmediate()->GetOffsetValue());
193     MOperator currMop = memList[0]->GetInsn()->GetMachineOpcode();
194     /* find a depNode to combine with memList[0], and break; */
195     for (auto it = std::next(memList.begin(), 1); it != memList.end(); ++it) {
196         DEBUG_ASSERT((*it)->GetInsn() != nullptr, "null ptr check");
197 
198         if (currMop == (*it)->GetInsn()->GetMachineOpcode()) {
199             MemOperand *nextMemOpnd = static_cast<MemOperand *>((*it)->GetInsn()->GetMemOpnd());
200             CHECK_FATAL(nextMemOpnd != nullptr, "opnd should not be nullptr");
201             CHECK_FATAL(nextMemOpnd->IsMemoryAccessOperand(), "opnd should be MemOperand");
202             int32 nextOffsetVal = static_cast<int32>(nextMemOpnd->GetOffsetImmediate()->GetOffsetValue());
203             uint32 size = currMemOpnd->GetSize() >> kLog2BitsPerByte;
204             if ((nextMemOpnd->GetBaseRegister() == currMemOpnd->GetBaseRegister()) &&
205                 (nextMemOpnd->GetSize() == currMemOpnd->GetSize()) &&
206                 (static_cast<uint32>(abs(nextOffsetVal - currOffsetVal)) == size)) {
207                 /*
208                  * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile
209                  * LDP on page K1-6125 declare that ldp can't use same reg
210                  */
211                 if (((currMop == MOP_xldr) || (currMop == MOP_sldr) || (currMop == MOP_dldr) ||
212                      (currMop == MOP_wldr)) &&
213                     &(memList[0]->GetInsn()->GetOperand(0)) == &((*it)->GetInsn()->GetOperand(0))) {
214                     continue;
215                 }
216                 if (static_cast<RegOperand &>((*it)->GetInsn()->GetOperand(0)).GetRegisterType() !=
217                     static_cast<RegOperand &>(memList[0]->GetInsn()->GetOperand(0)).GetRegisterType()) {
218                     continue;
219                 }
220 
221                 if (LIST_SCHED_DUMP_REF) {
222                     LogInfo::MapleLogger() << "Combine insn: "
223                                            << "\n";
224                     memList[0]->GetInsn()->Dump();
225                     (*it)->GetInsn()->Dump();
226                 }
227                 depAnalysis->CombineMemoryAccessPair(*memList[0], **it, nextOffsetVal > currOffsetVal);
228                 if (LIST_SCHED_DUMP_REF) {
229                     LogInfo::MapleLogger() << "To: "
230                                            << "\n";
231                     memList[0]->GetInsn()->Dump();
232                 }
233                 break;
234             }
235         }
236     }
237 }
238 
239 /* combine clinit pairs. */
ClinitPairOpt()240 void AArch64Schedule::ClinitPairOpt()
241 {
242     for (auto it = nodes.begin(); it != nodes.end(); ++it) {
243         auto nextIt = std::next(it, 1);
244         if (nextIt == nodes.end()) {
245             return;
246         }
247 
248         if ((*it)->GetInsn()->GetMachineOpcode() == MOP_adrp_ldr) {
249             if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) {
250                 depAnalysis->CombineClinit(**it, **(nextIt), false);
251             } else if ((*nextIt)->GetType() == kNodeTypeSeparator) {
252                 nextIt = std::next(nextIt, 1);
253                 if (nextIt == nodes.end()) {
254                     return;
255                 }
256                 if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) {
257                     /* Do something. */
258                     depAnalysis->CombineClinit(**it, **(nextIt), true);
259                 }
260             }
261         }
262     }
263 }
264 
265 /* Return the next node's index who is kNodeTypeSeparator. */
GetNextSepIndex() const266 uint32 AArch64Schedule::GetNextSepIndex() const
267 {
268     return ((lastSeparatorIndex + kMaxDependenceNum) < nodeSize) ? (lastSeparatorIndex + kMaxDependenceNum)
269                                                                  : (nodes.size() - 1);
270 }
271 
272 /* Do register pressure schduling. */
RegPressureScheduling(BB & bb,MapleVector<DepNode * > & nodes)273 void AArch64Schedule::RegPressureScheduling(BB &bb, MapleVector<DepNode *> &nodes)
274 {
275     RegPressureSchedule *regSchedule = memPool.New<RegPressureSchedule>(cgFunc, alloc);
276     /*
277      * Get physical register amount currently
278      * undef, Int Reg, Float Reg, Flag Reg
279      */
280     const std::vector<int32> kRegNumVec = {0, V0, (kMaxRegNum - V0) + 1, 1};
281     regSchedule->InitBBInfo(bb, memPool, nodes);
282     regSchedule->BuildPhyRegInfo(kRegNumVec);
283     regSchedule->DoScheduling(nodes);
284 }
285 
286 /*
287  * Compute earliest start of the node,
288  * return value : the maximum estart.
289  */
ComputeEstart(uint32 cycle)290 uint32 AArch64Schedule::ComputeEstart(uint32 cycle)
291 {
292     std::vector<DepNode *> readyNodes;
293     uint32 maxIndex = GetNextSepIndex();
294 
295     if (CGOptions::IsDebugSched()) {
296         /* Check validPredsSize. */
297         for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
298             DepNode *node = nodes[i];
299             [[maybe_unused]] int32 schedNum = 0;
300             for (const auto *predLink : node->GetPreds()) {
301                 if (predLink->GetFrom().GetState() == kScheduled) {
302                     ++schedNum;
303                 }
304             }
305             DEBUG_ASSERT((node->GetPreds().size() - schedNum) == node->GetValidPredsSize(), "validPredsSize error.");
306         }
307     }
308 
309     DEBUG_ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator,
310                  "CG internal error, nodes[maxIndex] should be a separator node.");
311 
312     (void)readyNodes.insert(readyNodes.begin(), readyList.begin(), readyList.end());
313 
314     uint32 maxEstart = cycle;
315     for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
316         DepNode *node = nodes[i];
317         node->SetVisit(0);
318     }
319 
320     for (auto *node : readyNodes) {
321         DEBUG_ASSERT(node->GetState() == kReady, "CG internal error, all nodes in ready list should be ready.");
322         if (node->GetEStart() < cycle) {
323             node->SetEStart(cycle);
324         }
325     }
326 
327     while (!readyNodes.empty()) {
328         DepNode *node = readyNodes.front();
329         readyNodes.erase(readyNodes.begin());
330 
331         for (const auto *succLink : node->GetSuccs()) {
332             DepNode &succNode = succLink->GetTo();
333             if (succNode.GetType() == kNodeTypeSeparator) {
334                 continue;
335             }
336 
337             if (succNode.GetEStart() < (node->GetEStart() + succLink->GetLatency())) {
338                 succNode.SetEStart(node->GetEStart() + succLink->GetLatency());
339             }
340             maxEstart = (maxEstart < succNode.GetEStart() ? succNode.GetEStart() : maxEstart);
341             succNode.IncreaseVisit();
342             if ((succNode.GetVisit() >= succNode.GetValidPredsSize()) && (succNode.GetType() != kNodeTypeSeparator)) {
343                 readyNodes.emplace_back(&succNode);
344             }
345             DEBUG_ASSERT(succNode.GetVisit() <= succNode.GetValidPredsSize(), "CG internal error.");
346         }
347     }
348 
349     return maxEstart;
350 }
351 
352 /* Compute latest start of the node. */
ComputeLstart(uint32 maxEstart)353 void AArch64Schedule::ComputeLstart(uint32 maxEstart)
354 {
355     /* std::vector is better than std::queue in run time */
356     std::vector<DepNode *> readyNodes;
357     uint32 maxIndex = GetNextSepIndex();
358 
359     DEBUG_ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator,
360                  "CG internal error, nodes[maxIndex] should be a separator node.");
361 
362     for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
363         DepNode *node = nodes[i];
364         node->SetLStart(maxEstart);
365         node->SetVisit(0);
366     }
367 
368     readyNodes.emplace_back(nodes[maxIndex]);
369     while (!readyNodes.empty()) {
370         DepNode *node = readyNodes.front();
371         readyNodes.erase(readyNodes.begin());
372         for (const auto *predLink : node->GetPreds()) {
373             DepNode &predNode = predLink->GetFrom();
374             if (predNode.GetState() == kScheduled) {
375                 continue;
376             }
377 
378             if (predNode.GetLStart() > (node->GetLStart() - predLink->GetLatency())) {
379                 predNode.SetLStart(node->GetLStart() - predLink->GetLatency());
380             }
381             predNode.IncreaseVisit();
382             if ((predNode.GetVisit() >= predNode.GetValidSuccsSize()) && (predNode.GetType() != kNodeTypeSeparator)) {
383                 readyNodes.emplace_back(&predNode);
384             }
385 
386             DEBUG_ASSERT(predNode.GetVisit() <= predNode.GetValidSuccsSize(), "CG internal error.");
387         }
388     }
389 }
390 
391 /* Compute earliest start and latest start of the node that is in readyList and not be scheduled. */
UpdateELStartsOnCycle(uint32 cycle)392 void AArch64Schedule::UpdateELStartsOnCycle(uint32 cycle)
393 {
394     ComputeLstart(ComputeEstart(cycle));
395 }
396 
397 /*
398  * If all unit of this node need when it be scheduling is free, this node can be scheduled,
399  * Return true.
400  */
CanBeScheduled() const401 bool DepNode::CanBeScheduled() const
402 {
403     for (uint32 i = 0; i < unitNum; ++i) {
404         Unit *unit = units[i];
405         if (unit != nullptr) {
406             if (!unit->IsFree(i)) {
407                 return false;
408             }
409         }
410     }
411     return true;
412 }
413 
414 /* Mark those unit that this node need occupy unit when it is being scheduled. */
OccupyUnits()415 void DepNode::OccupyUnits()
416 {
417     for (uint32 i = 0; i < unitNum; ++i) {
418         Unit *unit = units[i];
419         if (unit != nullptr) {
420             unit->Occupy(*insn, i);
421         }
422     }
423 }
424 
425 /* Get unit kind of this node's units[0]. */
GetUnitKind() const426 uint32 DepNode::GetUnitKind() const
427 {
428     uint32 retValue = 0;
429     if ((units == nullptr) || (units[0] == nullptr)) {
430         return retValue;
431     }
432 
433     switch (units[0]->GetUnitId()) {
434         case kUnitIdSlotD:
435             retValue |= kUnitKindSlot0;
436             break;
437         case kUnitIdAgen:
438         case kUnitIdSlotSAgen:
439             retValue |= kUnitKindAgen;
440             break;
441         case kUnitIdSlotDAgen:
442             retValue |= kUnitKindAgen;
443             retValue |= kUnitKindSlot0;
444             break;
445         case kUnitIdHazard:
446         case kUnitIdSlotSHazard:
447             retValue |= kUnitKindHazard;
448             break;
449         case kUnitIdCrypto:
450             retValue |= kUnitKindCrypto;
451             break;
452         case kUnitIdMul:
453         case kUnitIdSlotSMul:
454             retValue |= kUnitKindMul;
455             break;
456         case kUnitIdDiv:
457             retValue |= kUnitKindDiv;
458             break;
459         case kUnitIdBranch:
460         case kUnitIdSlotSBranch:
461             retValue |= kUnitKindBranch;
462             break;
463         case kUnitIdStAgu:
464             retValue |= kUnitKindStAgu;
465             break;
466         case kUnitIdLdAgu:
467             retValue |= kUnitKindLdAgu;
468             break;
469         case kUnitIdFpAluS:
470         case kUnitIdFpAluD:
471             retValue |= kUnitKindFpAlu;
472             break;
473         case kUnitIdFpMulS:
474         case kUnitIdFpMulD:
475             retValue |= kUnitKindFpMul;
476             break;
477         case kUnitIdFpDivS:
478         case kUnitIdFpDivD:
479             retValue |= kUnitKindFpDiv;
480             break;
481         case kUnitIdSlot0LdAgu:
482             retValue |= kUnitKindSlot0;
483             retValue |= kUnitKindLdAgu;
484             break;
485         case kUnitIdSlot0StAgu:
486             retValue |= kUnitKindSlot0;
487             retValue |= kUnitKindStAgu;
488             break;
489         default:
490             break;
491     }
492 
493     return retValue;
494 }
495 
496 /* Count unit kinds to an array. Each element of the array indicates the unit kind number of a node set. */
CountUnitKind(const DepNode & depNode,uint32 array[],const uint32 arraySize) const497 void AArch64Schedule::CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const
498 {
499     (void)arraySize;
500     DEBUG_ASSERT(arraySize >= kUnitKindLast, "CG internal error. unit kind number is not correct.");
501     uint32 unitKind = depNode.GetUnitKind();
502     int32 index = static_cast<int32>(__builtin_ffs(unitKind));
503     while (index) {
504         DEBUG_ASSERT(static_cast<uint32>(index) < kUnitKindLast, "CG internal error. index error.");
505         ++array[index];
506         unitKind &= ~(1u << (index - 1u));
507         index = __builtin_ffs(unitKind);
508     }
509 }
510 
511 /* Check if a node use a specific unit kind. */
IfUseUnitKind(const DepNode & depNode,uint32 index)512 bool AArch64Schedule::IfUseUnitKind(const DepNode &depNode, uint32 index)
513 {
514     uint32 unitKind = depNode.GetUnitKind();
515     int32 idx = static_cast<int32>(__builtin_ffs(unitKind));
516     while (idx) {
517         DEBUG_ASSERT(index < kUnitKindLast, "CG internal error. index error.");
518         if (idx == static_cast<int32>(index)) {
519             return true;
520         }
521         unitKind &= ~(1u << (idx - 1u));
522         idx = __builtin_ffs(unitKind);
523     }
524 
525     return false;
526 }
527 
528 /* A sample schedule according dependence graph only, to verify correctness of dependence graph. */
RandomTest()529 void AArch64Schedule::RandomTest()
530 {
531     Init();
532     nodes.clear();
533 
534     while (!readyList.empty()) {
535         DepNode *currNode = readyList.back();
536         currNode->SetState(kScheduled);
537         readyList.pop_back();
538         nodes.emplace_back(currNode);
539 
540         for (auto succLink : currNode->GetSuccs()) {
541             DepNode &succNode = succLink->GetTo();
542             bool ready = true;
543             for (auto predLink : succNode.GetPreds()) {
544                 DepNode &predNode = predLink->GetFrom();
545                 if (predNode.GetState() != kScheduled) {
546                     ready = false;
547                     break;
548                 }
549             }
550 
551             if (ready) {
552                 DEBUG_ASSERT(succNode.GetState() == kNormal, "succNode must be kNormal");
553                 readyList.emplace_back(&succNode);
554                 succNode.SetState(kReady);
555             }
556         }
557     }
558 }
559 
560 /* Remove target from readyList. */
EraseNodeFromReadyList(const DepNode & target)561 void AArch64Schedule::EraseNodeFromReadyList(const DepNode &target)
562 {
563     EraseNodeFromNodeList(target, readyList);
564 }
565 
566 /* Remove target from nodeList. */
EraseNodeFromNodeList(const DepNode & target,MapleVector<DepNode * > & nodeList)567 void AArch64Schedule::EraseNodeFromNodeList(const DepNode &target, MapleVector<DepNode *> &nodeList)
568 {
569     for (auto it = nodeList.begin(); it != nodeList.end(); ++it) {
570         if ((*it) == &target) {
571             nodeList.erase(it);
572             return;
573         }
574     }
575 
576     DEBUG_ASSERT(false, "CG internal error, erase node fail.");
577 }
578 
579 /* Dump all node of availableReadyList schedule information in current cycle. */
DumpDebugInfo(const ScheduleProcessInfo & scheduleInfo)580 void AArch64Schedule::DumpDebugInfo(const ScheduleProcessInfo &scheduleInfo)
581 {
582     LogInfo::MapleLogger() << "Current cycle[ " << scheduleInfo.GetCurrCycle() << " ], Available in readyList is : \n";
583     for (auto node : scheduleInfo.GetAvailableReadyList()) {
584         LogInfo::MapleLogger() << "NodeIndex[ " << node->GetIndex() << " ], Estart[ " << node->GetEStart()
585                                << " ], Lstart[ ";
586         LogInfo::MapleLogger() << node->GetLStart() << " ], slot[ ";
587         LogInfo::MapleLogger() << (node->GetReservation() == nullptr ? "SlotNone"
588                                                                      : node->GetReservation()->GetSlotName())
589                                << " ], ";
590         LogInfo::MapleLogger() << "succNodeNum[ " << node->GetSuccs().size() << " ], ";
591         node->GetInsn()->Dump();
592         LogInfo::MapleLogger() << '\n';
593     }
594 }
595 
596 /*
597  * Select a node from availableReadyList according to some heuristic rules, then:
598  *   1. change targetNode's schedule information;
599  *   2. try to add successors of targetNode to readyList;
600  *   3. update unscheduled node set, when targetNode is last kNodeTypeSeparator;
601  *   4. update AdvanceCycle.
602  */
SelectNode(AArch64ScheduleProcessInfo & scheduleInfo)603 void AArch64Schedule::SelectNode(AArch64ScheduleProcessInfo &scheduleInfo)
604 {
605     auto &availableReadyList = scheduleInfo.GetAvailableReadyList();
606     auto it = availableReadyList.begin();
607     DepNode *targetNode = *it;
608     if (availableReadyList.size() > 1) {
609         CalculateMaxUnitKindCount(scheduleInfo);
610         if (GetConsiderRegPressure()) {
611             UpdateReleaseRegInfo(scheduleInfo);
612         }
613         ++it;
614         for (; it != availableReadyList.end(); ++it) {
615             if (CompareDepNode(**it, *targetNode, scheduleInfo)) {
616                 targetNode = *it;
617             }
618         }
619     }
620     /* The priority of free-reg node is higher than pipeline */
621     while (!targetNode->CanBeScheduled()) {
622         scheduleInfo.IncCurrCycle();
623         mad->AdvanceCycle();
624     }
625     if (GetConsiderRegPressure() && !scheduleInfo.IsFirstSeparator()) {
626         UpdateLiveRegSet(scheduleInfo, *targetNode);
627     }
628     /* push target node into scheduled nodes and turn it into kScheduled state */
629     scheduleInfo.PushElemIntoScheduledNodes(targetNode);
630 
631     EraseNodeFromReadyList(*targetNode);
632 
633     if (CGOptions::IsDebugSched()) {
634         LogInfo::MapleLogger() << "TargetNode : ";
635         targetNode->GetInsn()->Dump();
636         LogInfo::MapleLogger() << "\n";
637     }
638 
639     /* Update readyList. */
640     UpdateReadyList(*targetNode, readyList, true);
641 
642     if (targetNode->GetType() == kNodeTypeSeparator) {
643         /* If target node is separator node, update lastSeparatorIndex and calculate those depNodes's estart and lstart
644          * between current separator node  and new Separator node.
645          */
646         if (!scheduleInfo.IsFirstSeparator()) {
647             lastSeparatorIndex += kMaxDependenceNum;
648             UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle());
649         } else {
650             scheduleInfo.ResetIsFirstSeparator();
651         }
652     }
653 
654     UpdateAdvanceCycle(scheduleInfo, *targetNode);
655 }
656 
UpdateAdvanceCycle(AArch64ScheduleProcessInfo & scheduleInfo,const DepNode & targetNode)657 void AArch64Schedule::UpdateAdvanceCycle(AArch64ScheduleProcessInfo &scheduleInfo, const DepNode &targetNode)
658 {
659     switch (targetNode.GetInsn()->GetLatencyType()) {
660         case kLtClinit:
661             scheduleInfo.SetAdvanceCycle(kClinitAdvanceCycle);
662             break;
663         case kLtAdrpLdr:
664             scheduleInfo.SetAdvanceCycle(kAdrpLdrAdvanceCycle);
665             break;
666         case kLtClinitTail:
667             scheduleInfo.SetAdvanceCycle(kClinitTailAdvanceCycle);
668             break;
669         default:
670             break;
671     }
672 
673     if ((scheduleInfo.GetAdvanceCycle() == 0) && mad->IsFullIssued()) {
674         if (targetNode.GetEStart() > scheduleInfo.GetCurrCycle()) {
675             scheduleInfo.SetAdvanceCycle(1 + targetNode.GetEStart() - scheduleInfo.GetCurrCycle());
676         } else {
677             scheduleInfo.SetAdvanceCycle(1);
678         }
679     }
680 }
681 
682 /*
683  * Advance mad's cycle until info's advanceCycle equal zero,
684  * and then clear info's availableReadyList.
685  */
UpdateScheduleProcessInfo(AArch64ScheduleProcessInfo & info)686 void AArch64Schedule::UpdateScheduleProcessInfo(AArch64ScheduleProcessInfo &info)
687 {
688     while (info.GetAdvanceCycle() > 0) {
689         info.IncCurrCycle();
690         mad->AdvanceCycle();
691         info.DecAdvanceCycle();
692     }
693     info.ClearAvailableReadyList();
694 }
695 
696 /*
697  * Forward traversal readyList, if a node in readyList can be Schedule, add it to availableReadyList.
698  * Return true, if availableReadyList is not empty.
699  */
CheckSchedulable(AArch64ScheduleProcessInfo & info) const700 bool AArch64Schedule::CheckSchedulable(AArch64ScheduleProcessInfo &info) const
701 {
702     for (auto node : readyList) {
703         if (GetConsiderRegPressure()) {
704             info.PushElemIntoAvailableReadyList(node);
705         } else {
706             if (node->CanBeScheduled() && node->GetEStart() <= info.GetCurrCycle()) {
707                 info.PushElemIntoAvailableReadyList(node);
708             }
709         }
710     }
711     return info.AvailableReadyListIsEmpty() ? false : true;
712 }
713 
714 /*
715  * Calculate estimated machine cycle count for an input node series
716  */
CalSeriesCycles(const MapleVector<DepNode * > & nodes)717 int AArch64Schedule::CalSeriesCycles(const MapleVector<DepNode *> &nodes)
718 {
719     int currentCycle = 0;
720     /* after an instruction is issued, the minimum cycle count for the next instruction is 1 */
721     int instructionBaseCycleCount = 1;
722     std::map<DepNode *, int> scheduledCycleMap;
723     for (auto node : nodes) {
724         int latencyCycle = 0;
725         /* calculate the latest begin time of this node based on its predecessor's issue time and latency */
726         for (auto pred : node->GetPreds()) {
727             DepNode &from = pred->GetFrom();
728             int latency = static_cast<int>(pred->GetLatency());
729             int fromCycle = scheduledCycleMap[&from];
730             if (fromCycle + latency > latencyCycle) {
731                 latencyCycle = fromCycle + latency;
732             }
733         }
734         /* the issue time of this node is the max value between the next cycle and latest begin time */
735         if (currentCycle + instructionBaseCycleCount >= latencyCycle) {
736             currentCycle = currentCycle + instructionBaseCycleCount;
737         } else {
738             currentCycle = latencyCycle;
739         }
740         /* record this node's issue cycle */
741         scheduledCycleMap[node] = currentCycle;
742     }
743     return currentCycle;
744 }
745 
746 /* After building dependence graph, schedule insns. */
DoSchedule()747 uint32 AArch64Schedule::DoSchedule()
748 {
749     AArch64ScheduleProcessInfo scheduleInfo(nodeSize);
750     Init();
751     UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle());
752     InitLiveRegSet(scheduleInfo);
753     while (!readyList.empty()) {
754         UpdateScheduleProcessInfo(scheduleInfo);
755         /* Check if schedulable */
756         if (!CheckSchedulable(scheduleInfo)) {
757             /* Advance cycle. */
758             scheduleInfo.SetAdvanceCycle(1);
759             continue;
760         }
761 
762         if (scheduleInfo.GetLastUpdateCycle() < scheduleInfo.GetCurrCycle()) {
763             scheduleInfo.SetLastUpdateCycle(scheduleInfo.GetCurrCycle());
764         }
765 
766         if (CGOptions::IsDebugSched()) {
767             DumpDebugInfo(scheduleInfo);
768         }
769 
770         /* Select a node to scheduling */
771         SelectNode(scheduleInfo);
772     }
773 
774     DEBUG_ASSERT(scheduleInfo.SizeOfScheduledNodes() == nodes.size(), "CG internal error, Not all nodes scheduled.");
775 
776     nodes.clear();
777     (void)nodes.insert(nodes.begin(), scheduleInfo.GetScheduledNodes().begin(), scheduleInfo.GetScheduledNodes().end());
778     /* the second to last node is the true last node, because the last is kNodeTypeSeparator node */
779     DEBUG_ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); // size not less than 2
780     return (nodes[nodes.size() - 2]->GetSchedCycle()); // size-2 get the second to last node
781 }
782 
783 struct RegisterInfoUnit {
RegisterInfoUnitmaplebe::RegisterInfoUnit784     RegisterInfoUnit() : intRegNum(0), fpRegNum(0), ccRegNum(0) {}
785     uint32 intRegNum = 0;
786     uint32 fpRegNum = 0;
787     uint32 ccRegNum = 0;
788 };
789 
GetDepNodeDefType(const DepNode & depNode,CGFunc & f)790 RegisterInfoUnit GetDepNodeDefType(const DepNode &depNode, CGFunc &f)
791 {
792     RegisterInfoUnit rIU;
793     for (auto defRegNO : depNode.GetDefRegnos()) {
794         RegType defRegTy = AArch64ScheduleProcessInfo::GetRegisterType(f, defRegNO);
795         if (defRegTy == kRegTyInt) {
796             rIU.intRegNum++;
797         } else if (defRegTy == kRegTyFloat) {
798             rIU.fpRegNum++;
799         } else if (defRegTy == kRegTyCc) {
800             rIU.ccRegNum++;
801             DEBUG_ASSERT(rIU.ccRegNum <= 1, "spill cc reg?");
802         } else {
803             CHECK_FATAL(false, "NIY aarch64 register type");
804         }
805     }
806     /* call node will not increase reg def pressure */
807     if (depNode.GetInsn() != nullptr && depNode.GetInsn()->IsCall()) {
808         rIU.intRegNum = 0;
809         rIU.fpRegNum = 0;
810     }
811     return rIU;
812 }
813 
DoCSR(DepNode & node1,DepNode & node2,AArch64ScheduleProcessInfo & scheduleInfo) const814 AArch64Schedule::CSRResult AArch64Schedule::DoCSR(DepNode &node1, DepNode &node2,
815                                                   AArch64ScheduleProcessInfo &scheduleInfo) const
816 {
817     RegisterInfoUnit defRIU1 = GetDepNodeDefType(node1, cgFunc);
818     RegisterInfoUnit defRIU2 = GetDepNodeDefType(node2, cgFunc);
819     /* do not increase callee save pressure before call */
820     if (static_cast<int>(scheduleInfo.SizeOfCalleeSaveLiveRegister(true)) >= intCalleeSaveThreshold) {
821         if (defRIU1.intRegNum > 0 && defRIU2.intRegNum > 0) {
822             CSRResult csrInfo = ScheduleCrossCall(node1, node2);
823             if ((csrInfo == kNode1 && defRIU1.intRegNum >= scheduleInfo.GetFreeIntRegs(node1)) ||
824                 (csrInfo == kNode2 && defRIU2.intRegNum >= scheduleInfo.GetFreeIntRegs(node2))) {
825                 return csrInfo;
826             }
827         }
828     }
829     if (static_cast<int>(scheduleInfo.SizeOfCalleeSaveLiveRegister(false)) >= fpCalleeSaveThreshold) {
830         if (defRIU1.fpRegNum > 0 && defRIU2.fpRegNum > 0) {
831             CSRResult csrInfo = ScheduleCrossCall(node1, node2);
832             if ((csrInfo == kNode1 && defRIU1.fpRegNum >= scheduleInfo.GetFreeFpRegs(node1)) ||
833                 (csrInfo == kNode2 && defRIU2.fpRegNum >= scheduleInfo.GetFreeFpRegs(node2))) {
834                 return csrInfo;
835             }
836         }
837     }
838     auto FindFreeRegNode = [&](bool isInt) -> CSRResult {
839         auto freeRegNodes = isInt ? scheduleInfo.GetFreeIntRegNodeSet() : scheduleInfo.GetFreeFpRegNodeSet();
840         if (freeRegNodes.find(&node1) != freeRegNodes.end() && freeRegNodes.find(&node2) == freeRegNodes.end()) {
841             return kNode1;
842         }
843         if (freeRegNodes.find(&node1) == freeRegNodes.end() && freeRegNodes.find(&node2) != freeRegNodes.end()) {
844             return kNode2;
845         }
846         return kDoCSP;
847     };
848     if (static_cast<int>(scheduleInfo.SizeOfIntLiveRegSet()) >= intRegPressureThreshold) {
849         if (FindFreeRegNode(true) != kDoCSP) {
850             return FindFreeRegNode(true);
851         }
852     }
853     if (static_cast<int>(scheduleInfo.SizeOfFpLiveRegSet()) >= fpRegPressureThreshold) {
854         if (FindFreeRegNode(false) != kDoCSP) {
855             return FindFreeRegNode(false);
856         }
857     }
858 
859     bool canDoCSPFurther = false;
860     if (static_cast<int>(scheduleInfo.SizeOfIntLiveRegSet()) >= intRegPressureThreshold) {
861         if (defRIU1.intRegNum != defRIU2.intRegNum) {
862             return defRIU1.intRegNum < defRIU2.intRegNum ? kNode1 : kNode2;
863         } else {
864             canDoCSPFurther = defRIU1.intRegNum == 0;
865         }
866     }
867     if (static_cast<int>(scheduleInfo.SizeOfFpLiveRegSet()) >= fpRegPressureThreshold) {
868         if (defRIU1.fpRegNum != defRIU2.fpRegNum) {
869             return defRIU1.fpRegNum < defRIU2.fpRegNum ? kNode1 : kNode2;
870         } else {
871             canDoCSPFurther = (defRIU1.fpRegNum == 0 && canDoCSPFurther);
872         }
873     }
874     /* if both nodes are going to increase reg pressure, do not do CSP further */
875     return canDoCSPFurther ? kDoCSP : (node1.GetInsn()->GetId() < node2.GetInsn()->GetId() ? kNode1 : kNode2);
876 }
877 
ScheduleCrossCall(const DepNode & node1,const DepNode & node2) const878 AArch64Schedule::CSRResult AArch64Schedule::ScheduleCrossCall(const DepNode &node1, const DepNode &node2) const
879 {
880     uint32 node1ID = node1.GetInsn()->GetId();
881     uint32 node2ID = node2.GetInsn()->GetId();
882     bool order = node1ID < node2ID; /* true -- node1 before node2  false -- node1 after node2 */
883     Insn *beginInsn = order ? node1.GetInsn() : node2.GetInsn();
884     uint32 finialId = order ? node2ID : node1ID;
885     for (Insn *checkInsn = beginInsn; (checkInsn != nullptr && checkInsn->GetId() <= finialId);
886          checkInsn = checkInsn->GetNextMachineInsn()) {
887         if (checkInsn->IsCall()) {
888             return order ? kNode1 : kNode2;
889         }
890     }
891     return kDoCSP;
892 };
893 
894 /*
895  * Comparing priorities of node1 and node2 according to some heuristic rules
896  * return true if node1's priority is higher
897  * crp -- consider reg pressure
898  */
CompareDepNode(DepNode & node1,DepNode & node2,AArch64ScheduleProcessInfo & scheduleInfo) const899 bool AArch64Schedule::CompareDepNode(DepNode &node1, DepNode &node2, AArch64ScheduleProcessInfo &scheduleInfo) const
900 {
901     /*
902      * strategy CSR -- code schedule for register pressure
903      * if pressure is above the threshold, select the node which can reduce register pressure
904      */
905     if (GetConsiderRegPressure()) {
906         switch (DoCSR(node1, node2, scheduleInfo)) {
907             case kNode1:
908                 return true;
909             case kNode2:
910                 return false;
911             default:
912                 break;
913         }
914     }
915     /* strategy CSP -- code schedule for CPU pipeline */
916     /* less LStart first */
917     if (node1.GetLStart() != node2.GetLStart()) {
918         return node1.GetLStart() < node2.GetLStart();
919     }
920 
921     /* max unit kind use */
922     bool use1 = IfUseUnitKind(node1, maxUnitIndex);
923     bool use2 = IfUseUnitKind(node2, maxUnitIndex);
924     if (use1 != use2) {
925         return use1;
926     }
927 
928     /* slot0 first */
929     SlotType slotType1 = node1.GetReservation()->GetSlot();
930     SlotType slotType2 = node2.GetReservation()->GetSlot();
931     if (slotType1 == kSlots) {
932         slotType1 = kSlot0;
933     }
934     if (slotType2 == kSlots) {
935         slotType2 = kSlot0;
936     }
937     if (slotType1 != slotType2) {
938         return slotType1 < slotType2;
939     }
940 
941     /* more succNodes fisrt */
942     if (node1.GetSuccs().size() != node2.GetSuccs().size()) {
943         return node1.GetSuccs().size() > node2.GetSuccs().size();
944     }
945 
946     /* default order */
947     return node1.GetInsn()->GetId() < node2.GetInsn()->GetId();
948 }
949 
950 /*
951  * Calculate number of every unit that used by avaliableReadyList's nodes and save the max in maxUnitIndex
952  */
CalculateMaxUnitKindCount(ScheduleProcessInfo & scheduleInfo)953 void AArch64Schedule::CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo)
954 {
955     uint32 unitKindCount[kUnitKindLast] = {0};
956     for (auto node : scheduleInfo.GetAvailableReadyList()) {
957         CountUnitKind(*node, unitKindCount, kUnitKindLast);
958     }
959 
960     uint32 maxCount = 0;
961     maxUnitIndex = 0;
962     for (size_t i = 1; i < kUnitKindLast; ++i) {
963         if (maxCount < unitKindCount[i]) {
964             maxCount = unitKindCount[i];
965             maxUnitIndex = i;
966         }
967     }
968 }
969 
970 /*
971  * Update the release reg node set
972  * When node in this set is scheduled, register pressure can be reduced
973  */
UpdateReleaseRegInfo(AArch64ScheduleProcessInfo & scheduleInfo)974 void AArch64Schedule::UpdateReleaseRegInfo(AArch64ScheduleProcessInfo &scheduleInfo)
975 {
976     auto &availableReadyList = scheduleInfo.GetAvailableReadyList();
977     scheduleInfo.ClearALLFreeRegNodeSet();
978     /* Traverse availableReadyList and add those can reduce register pressure to release reg node set */
979     for (auto node : availableReadyList) {
980         std::set<regno_t> freeRegNO = CanFreeRegister(*node);
981         if (!freeRegNO.empty()) {
982             scheduleInfo.VaryFreeRegSet(cgFunc, freeRegNO, *node);
983         }
984     }
985 }
986 
987 /*
988  * return registers which an instruction can release after being scheduled
989  */
CanFreeRegister(const DepNode & node) const990 std::set<regno_t> AArch64Schedule::CanFreeRegister(const DepNode &node) const
991 {
992     std::set<regno_t> freeRegSet;
993     for (auto reg : node.GetUseRegnos()) {
994         if (RegPressureSchedule::IsLastUse(node, reg)) {
995             freeRegSet.emplace(reg);
996         }
997     }
998     return freeRegSet;
999 }
1000 
1001 /*
1002  * After an instruction is scheduled, update live reg set
1003  */
UpdateLiveRegSet(AArch64ScheduleProcessInfo & scheduleInfo,const DepNode & node)1004 void AArch64Schedule::UpdateLiveRegSet(AArch64ScheduleProcessInfo &scheduleInfo, const DepNode &node)
1005 {
1006     /* dealing with def reg, add def reg into the live reg set */
1007     size_t i = 1;
1008     for (auto &defReg : node.GetDefRegnos()) {
1009         if (scheduleInfo.FindIntLiveReg(defReg) == 0 && scheduleInfo.FindFpLiveReg(defReg) == 0) {
1010             scheduleInfo.VaryLiveRegSet(cgFunc, defReg, true);
1011         }
1012         /* delete dead def reg from live reg set because its live range is only 1 cycle */
1013         if (node.GetRegDefs(i) == nullptr && liveOutRegNo.find(defReg) == liveOutRegNo.end()) {
1014             scheduleInfo.VaryLiveRegSet(cgFunc, defReg, false);
1015         }
1016         ++i;
1017     }
1018     /* dealing with use reg, delete use reg from live reg set if this instruction is last use of it */
1019     for (auto &useReg : node.GetUseRegnos()) {
1020         if (RegPressureSchedule::IsLastUse(node, useReg)) {
1021             if ((scheduleInfo.FindIntLiveReg(useReg) != 0 || scheduleInfo.FindFpLiveReg(useReg) != 0) &&
1022                 liveOutRegNo.find(useReg) == liveOutRegNo.end()) {
1023                 scheduleInfo.VaryLiveRegSet(cgFunc, useReg, false);
1024             }
1025         }
1026     }
1027 }
1028 
1029 /*
1030  * Initialize the live reg set based on the live in reg information
1031  */
InitLiveRegSet(AArch64ScheduleProcessInfo & scheduleInfo)1032 void AArch64Schedule::InitLiveRegSet(AArch64ScheduleProcessInfo &scheduleInfo)
1033 {
1034     if (GetConsiderRegPressure()) {
1035         for (auto reg : liveInRegNo) {
1036             scheduleInfo.VaryLiveRegSet(cgFunc, reg, true);
1037         }
1038     }
1039 }
1040 
1041 /*
1042  * A simulated schedule:
1043  * scheduling instruction in original order to calculate original execute cycles.
1044  */
SimulateOnly()1045 uint32 AArch64Schedule::SimulateOnly()
1046 {
1047     uint32 currCycle = 0;
1048     uint32 advanceCycle = 0;
1049     Init();
1050 
1051     for (uint32 i = 0; i < nodes.size();) {
1052         while (advanceCycle > 0) {
1053             ++currCycle;
1054             mad->AdvanceCycle();
1055             --advanceCycle;
1056         }
1057 
1058         DepNode *targetNode = nodes[i];
1059         if ((currCycle >= targetNode->GetEStart()) && targetNode->CanBeScheduled()) {
1060             targetNode->SetSimulateCycle(currCycle);
1061             targetNode->OccupyUnits();
1062 
1063             /* Update estart. */
1064             for (auto succLink : targetNode->GetSuccs()) {
1065                 DepNode &succNode = succLink->GetTo();
1066                 uint32 eStart = currCycle + succLink->GetLatency();
1067                 if (succNode.GetEStart() < eStart) {
1068                     succNode.SetEStart(eStart);
1069                 }
1070             }
1071 
1072             if (CGOptions::IsDebugSched()) {
1073                 LogInfo::MapleLogger() << "[Simulate] TargetNode : ";
1074                 targetNode->GetInsn()->Dump();
1075                 LogInfo::MapleLogger() << "\n";
1076             }
1077 
1078             switch (targetNode->GetInsn()->GetLatencyType()) {
1079                 case kLtClinit:
1080                     advanceCycle = kClinitAdvanceCycle;
1081                     break;
1082                 case kLtAdrpLdr:
1083                     advanceCycle = kAdrpLdrAdvanceCycle;
1084                     break;
1085                 case kLtClinitTail:
1086                     advanceCycle = kClinitTailAdvanceCycle;
1087                     break;
1088                 default:
1089                     break;
1090             }
1091 
1092             ++i;
1093         } else {
1094             advanceCycle = 1;
1095         }
1096     }
1097     /* the second to last node is the true last node, because the last is kNodeTypeSeparator nod */
1098     DEBUG_ASSERT(nodes.size() - kSecondToLastNode >= 0, "size of nodes should be greater than or equal 2");
1099     return (nodes[nodes.size() - kSecondToLastNode]->GetSimulateCycle());
1100 }
1101 
1102 /* Restore dependence graph to normal CGIR. */
FinalizeScheduling(BB & bb,const DepAnalysis & depAnalysis)1103 void AArch64Schedule::FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis)
1104 {
1105     bb.ClearInsns();
1106 
1107     const Insn *prevLocInsn = (bb.GetPrev() != nullptr ? bb.GetPrev()->GetLastLoc() : nullptr);
1108     for (auto node : nodes) {
1109         /* Append comments first. */
1110         for (auto comment : node->GetComments()) {
1111             if (comment->GetPrev() != nullptr && comment->GetPrev()->IsDbgInsn()) {
1112                 bb.AppendInsn(*comment->GetPrev());
1113             }
1114             bb.AppendInsn(*comment);
1115         }
1116         /* Append insn. */
1117         if (!node->GetClinitInsns().empty()) {
1118             for (auto clinit : node->GetClinitInsns()) {
1119                 bb.AppendInsn(*clinit);
1120             }
1121         } else if (node->GetType() == kNodeTypeNormal) {
1122             if (node->GetInsn()->GetPrev() != nullptr && node->GetInsn()->GetPrev()->IsDbgInsn()) {
1123                 bb.AppendInsn(*node->GetInsn()->GetPrev());
1124             }
1125             bb.AppendInsn(*node->GetInsn());
1126         }
1127 
1128         /* Append cfi instructions. */
1129         for (auto cfi : node->GetCfiInsns()) {
1130             bb.AppendInsn(*cfi);
1131         }
1132     }
1133     bb.SetLastLoc(prevLocInsn);
1134 
1135     for (auto lastComment : depAnalysis.GetLastComments()) {
1136         bb.AppendInsn(*lastComment);
1137     }
1138 }
1139 
1140 /* For every node of nodes, update it's bruteForceSchedCycle. */
UpdateBruteForceSchedCycle()1141 void AArch64Schedule::UpdateBruteForceSchedCycle()
1142 {
1143     for (auto node : nodes) {
1144         node->SetBruteForceSchedCycle(node->GetSchedCycle());
1145     }
1146 }
1147 
1148 /* Recursively schedule all of the possible node. */
IterateBruteForce(DepNode & targetNode,MapleVector<DepNode * > & readyList,uint32 currCycle,MapleVector<DepNode * > & scheduledNodes,uint32 & maxCycleCount,MapleVector<DepNode * > & optimizedScheduledNodes)1149 void AArch64Schedule::IterateBruteForce(DepNode &targetNode, MapleVector<DepNode *> &readyList, uint32 currCycle,
1150                                         MapleVector<DepNode *> &scheduledNodes, uint32 &maxCycleCount,
1151                                         MapleVector<DepNode *> &optimizedScheduledNodes)
1152 {
1153     /* Save states. */
1154     constexpr int32 unitSize = 31;
1155     DEBUG_ASSERT(unitSize == mad->GetAllUnitsSize(), "CG internal error.");
1156     std::vector<uint32> occupyTable;
1157     occupyTable.resize(unitSize, 0);
1158     mad->SaveStates(occupyTable, unitSize);
1159 
1160     /* Schedule targetNode first. */
1161     targetNode.SetState(kScheduled);
1162     targetNode.SetSchedCycle(currCycle);
1163     scheduledNodes.emplace_back(&targetNode);
1164 
1165     MapleVector<DepNode *> tempList = readyList;
1166     EraseNodeFromNodeList(targetNode, tempList);
1167     targetNode.OccupyUnits();
1168 
1169     /* Update readyList. */
1170     UpdateReadyList(targetNode, tempList, true);
1171 
1172     if (targetNode.GetType() == kNodeTypeSeparator) {
1173         /* If target node is separator node, update lastSeparatorIndex. */
1174         lastSeparatorIndex += kMaxDependenceNum;
1175     }
1176 
1177     if (tempList.empty()) {
1178         DEBUG_ASSERT(scheduledNodes.size() == nodes.size(), "CG internal error, Not all nodes scheduled.");
1179         if (currCycle < maxCycleCount) {
1180             maxCycleCount = currCycle;
1181             UpdateBruteForceSchedCycle();
1182             optimizedScheduledNodes = scheduledNodes;
1183         }
1184     } else {
1185         uint32 advanceCycle = 0;
1186         switch (targetNode.GetInsn()->GetLatencyType()) {
1187             case kLtClinit:
1188                 advanceCycle = kClinitAdvanceCycle;
1189                 break;
1190             case kLtAdrpLdr:
1191                 advanceCycle = kAdrpLdrAdvanceCycle;
1192                 break;
1193             case kLtClinitTail:
1194                 advanceCycle = kClinitTailAdvanceCycle;
1195                 break;
1196             default:
1197                 break;
1198         }
1199 
1200         do {
1201             std::vector<DepNode *> availableReadyList;
1202             std::vector<DepNode *> tempAvailableList;
1203             while (advanceCycle > 0) {
1204                 ++currCycle;
1205                 mad->AdvanceCycle();
1206                 --advanceCycle;
1207             }
1208             /* Check EStart. */
1209             for (auto node : tempList) {
1210                 if (node->GetEStart() <= currCycle) {
1211                     tempAvailableList.emplace_back(node);
1212                 }
1213             }
1214 
1215             if (tempAvailableList.empty()) {
1216                 /* Advance cycle. */
1217                 advanceCycle = 1;
1218                 continue;
1219             }
1220 
1221             /* Check if schedulable */
1222             for (auto node : tempAvailableList) {
1223                 if (node->CanBeScheduled()) {
1224                     availableReadyList.emplace_back(node);
1225                 }
1226             }
1227 
1228             if (availableReadyList.empty()) {
1229                 /* Advance cycle. */
1230                 advanceCycle = 1;
1231                 continue;
1232             }
1233 
1234             for (auto node : availableReadyList) {
1235                 IterateBruteForce(*node, tempList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes);
1236             }
1237 
1238             break;
1239         } while (true);
1240     }
1241 
1242     /*
1243      * Recover states.
1244      * Restore targetNode first.
1245      */
1246     targetNode.SetState(kReady);
1247     targetNode.SetSchedCycle(0);
1248     scheduledNodes.pop_back();
1249     mad->RestoreStates(occupyTable, unitSize);
1250 
1251     /* Update readyList. */
1252     for (auto succLink : targetNode.GetSuccs()) {
1253         DepNode &succNode = succLink->GetTo();
1254         succNode.IncreaseValidPredsSize();
1255         succNode.SetState(kNormal);
1256     }
1257 
1258     if (targetNode.GetType() == kNodeTypeSeparator) {
1259         /* If target node is separator node, update lastSeparatorIndex. */
1260         lastSeparatorIndex -= kMaxDependenceNum;
1261     }
1262 }
1263 
1264 /*
1265  * Brute force schedule:
1266  * Finding all possibile schedule list of current bb, and calculate every list's execute cycles,
1267  * return the optimal schedule list and it's cycles.
1268  */
DoBruteForceSchedule()1269 uint32 AArch64Schedule::DoBruteForceSchedule()
1270 {
1271     MapleVector<DepNode *> scheduledNodes(alloc.Adapter());
1272     MapleVector<DepNode *> optimizedScheduledNodes(alloc.Adapter());
1273 
1274     uint32 currCycle = 0;
1275     uint32 maxCycleCount = 0xFFFFFFFF;
1276     Init();
1277 
1278     /* Schedule First separator. */
1279     DepNode *targetNode = readyList.front();
1280     targetNode->SetState(kScheduled);
1281     targetNode->SetSchedCycle(currCycle);
1282     scheduledNodes.emplace_back(targetNode);
1283     readyList.clear();
1284 
1285     /* Update readyList. */
1286     UpdateReadyList(*targetNode, readyList, false);
1287 
1288     DEBUG_ASSERT(targetNode->GetType() == kNodeTypeSeparator, "The first node should be separator node.");
1289     DEBUG_ASSERT(!readyList.empty(), "readyList should not be empty.");
1290 
1291     for (auto targetNodeTemp : readyList) {
1292         IterateBruteForce(*targetNodeTemp, readyList, currCycle, scheduledNodes, maxCycleCount,
1293                           optimizedScheduledNodes);
1294     }
1295 
1296     nodes = optimizedScheduledNodes;
1297     return maxCycleCount;
1298 }
1299 
1300 /*
1301  * Update ready list after the targetNode has been scheduled.
1302  * For every targetNode's successor, if it's all predecessors have been scheduled,
1303  * add it to ready list and update it's information (like state, estart).
1304  */
UpdateReadyList(DepNode & targetNode,MapleVector<DepNode * > & readyList,bool updateEStart)1305 void AArch64Schedule::UpdateReadyList(DepNode &targetNode, MapleVector<DepNode *> &readyList, bool updateEStart)
1306 {
1307     for (auto succLink : targetNode.GetSuccs()) {
1308         DepNode &succNode = succLink->GetTo();
1309         succNode.DescreaseValidPredsSize();
1310         if (succNode.GetValidPredsSize() == 0) {
1311             readyList.emplace_back(&succNode);
1312             succNode.SetState(kReady);
1313 
1314             /* Set eStart. */
1315             if (updateEStart) {
1316                 uint32 maxEstart = 0;
1317                 for (auto predLink : succNode.GetPreds()) {
1318                     DepNode &predNode = predLink->GetFrom();
1319                     uint32 eStart = predNode.GetSchedCycle() + predLink->GetLatency();
1320                     maxEstart = (maxEstart < eStart ? eStart : maxEstart);
1321                 }
1322                 succNode.SetEStart(maxEstart);
1323             }
1324         }
1325     }
1326 }
1327 
1328 /* For every node of nodes, dump it's Depdence information. */
DumpDepGraph(const MapleVector<DepNode * > & nodes) const1329 void AArch64Schedule::DumpDepGraph(const MapleVector<DepNode *> &nodes) const
1330 {
1331     for (auto node : nodes) {
1332         depAnalysis->DumpDepNode(*node);
1333         LogInfo::MapleLogger() << "---------- preds ----------"
1334                                << "\n";
1335         for (auto pred : node->GetPreds()) {
1336             depAnalysis->DumpDepLink(*pred, &(pred->GetFrom()));
1337         }
1338         LogInfo::MapleLogger() << "---------- succs ----------"
1339                                << "\n";
1340         for (auto succ : node->GetSuccs()) {
1341             depAnalysis->DumpDepLink(*succ, &(succ->GetTo()));
1342         }
1343         LogInfo::MapleLogger() << "---------------------------"
1344                                << "\n";
1345     }
1346 }
1347 
1348 /* For every node of nodes, dump it's schedule time according simulate type and instruction information. */
DumpScheduleResult(const MapleVector<DepNode * > & nodes,SimulateType type) const1349 void AArch64Schedule::DumpScheduleResult(const MapleVector<DepNode *> &nodes, SimulateType type) const
1350 {
1351     for (auto node : nodes) {
1352         LogInfo::MapleLogger() << "cycle[ ";
1353         switch (type) {
1354             case kListSchedule:
1355                 LogInfo::MapleLogger() << node->GetSchedCycle();
1356                 break;
1357             case kBruteForce:
1358                 LogInfo::MapleLogger() << node->GetBruteForceSchedCycle();
1359                 break;
1360             case kSimulateOnly:
1361                 LogInfo::MapleLogger() << node->GetSimulateCycle();
1362                 break;
1363         }
1364         LogInfo::MapleLogger() << " ]  ";
1365         node->GetInsn()->Dump();
1366         LogInfo::MapleLogger() << "\n";
1367     }
1368 }
1369 
1370 /* Print bb's dependence dot graph information to a file. */
GenerateDot(const BB & bb,const MapleVector<DepNode * > & nodes) const1371 void AArch64Schedule::GenerateDot(const BB &bb, const MapleVector<DepNode *> &nodes) const
1372 {
1373     std::streambuf *coutBuf = std::cout.rdbuf(); /* keep original cout buffer */
1374     std::ofstream dgFile;
1375     std::streambuf *buf = dgFile.rdbuf();
1376     std::cout.rdbuf(buf);
1377 
1378     /* construct the file name */
1379     std::string fileName;
1380     fileName.append(phaseName);
1381     fileName.append("_");
1382     fileName.append(cgFunc.GetName());
1383     fileName.append("_BB");
1384     auto str = std::to_string(bb.GetId());
1385     fileName.append(str);
1386     fileName.append("_dep_graph.dot");
1387 
1388     dgFile.open(fileName.c_str(), std::ios::trunc);
1389     if (!dgFile.is_open()) {
1390         LogInfo::MapleLogger(kLlWarn) << "fileName:" << fileName << " open failure.\n";
1391         return;
1392     }
1393     dgFile << "digraph {\n";
1394     for (auto node : nodes) {
1395         for (auto succ : node->GetSuccs()) {
1396             dgFile << "insn" << node->GetInsn() << " -> "
1397                    << "insn" << succ->GetTo().GetInsn();
1398             dgFile << " [";
1399             if (succ->GetDepType() == kDependenceTypeTrue) {
1400                 dgFile << "color=red,";
1401             }
1402             dgFile << "label= \"" << succ->GetLatency() << "\"";
1403             dgFile << "];\n";
1404         }
1405     }
1406 
1407     for (auto node : nodes) {
1408         MOperator mOp = node->GetInsn()->GetMachineOpcode();
1409         const InsnDesc *md = &AArch64CG::kMd[mOp];
1410         dgFile << "insn" << node->GetInsn() << "[";
1411         dgFile << "shape=box,label= \" " << node->GetInsn()->GetId() << ":\n";
1412         dgFile << "{ ";
1413         dgFile << md->name << "\n";
1414         dgFile << "}\"];\n";
1415     }
1416     dgFile << "}\n";
1417     dgFile.flush();
1418     dgFile.close();
1419     std::cout.rdbuf(coutBuf);
1420 }
1421 
GetRegisterType(CGFunc & f,regno_t regNO)1422 RegType AArch64ScheduleProcessInfo::GetRegisterType(CGFunc &f, regno_t regNO)
1423 {
1424     if (AArch64isa::IsPhysicalRegister(regNO)) {
1425         if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
1426             return kRegTyInt;
1427         } else if (AArch64isa::IsFPSIMDRegister(static_cast<AArch64reg>(regNO))) {
1428             return kRegTyFloat;
1429         } else {
1430             CHECK_FATAL(false, "unknown physical reg");
1431         }
1432     } else {
1433         RegOperand *curRegOpnd = f.GetVirtualRegisterOperand(regNO);
1434         DEBUG_ASSERT(curRegOpnd != nullptr, "register which is not physical and virtual");
1435         return curRegOpnd->GetRegisterType();
1436     }
1437 }
1438 
VaryLiveRegSet(CGFunc & f,regno_t regNO,bool isInc)1439 void AArch64ScheduleProcessInfo::VaryLiveRegSet(CGFunc &f, regno_t regNO, bool isInc)
1440 {
1441     RegType registerTy = GetRegisterType(f, regNO);
1442     if (registerTy == kRegTyInt || registerTy == kRegTyVary) {
1443         isInc ? IncIntLiveRegSet(regNO) : DecIntLiveRegSet(regNO);
1444     } else if (registerTy == kRegTyFloat) {
1445         isInc ? IncFpLiveRegSet(regNO) : DecFpLiveRegSet(regNO);
1446     }
1447     /* consider other type register */
1448 }
1449 
VaryFreeRegSet(CGFunc & f,std::set<regno_t> regNOs,DepNode & node)1450 void AArch64ScheduleProcessInfo::VaryFreeRegSet(CGFunc &f, std::set<regno_t> regNOs, DepNode &node)
1451 {
1452     for (auto regNO : regNOs) {
1453         RegType registerTy = GetRegisterType(f, regNO);
1454         if (registerTy == kRegTyInt || registerTy == kRegTyVary /* memory base register must be int */) {
1455             IncFreeIntRegNode(node);
1456         } else if (registerTy == kRegTyFloat) {
1457             IncFreeFpRegNode(node);
1458         } else if (registerTy == kRegTyCc) {
1459             /* do not count CC reg */
1460             return;
1461         } else {
1462             /* consider other type register */
1463             CHECK_FATAL(false, "do not support this type of register");
1464         }
1465     }
1466 }
1467 
1468 /* Do brute force scheduling and dump scheduling information */
BruteForceScheduling(const BB & bb)1469 void AArch64Schedule::BruteForceScheduling(const BB &bb)
1470 {
1471     LogInfo::MapleLogger() << "\n\n$$ Function: " << cgFunc.GetName();
1472     LogInfo::MapleLogger() << "\n    BB id = " << bb.GetId() << "; nodes.size = " << nodes.size() << "\n";
1473 
1474     constexpr uint32 maxBruteForceNum = 50;
1475     if (nodes.size() < maxBruteForceNum) {
1476         GenerateDot(bb, nodes);
1477         uint32 maxBruteForceCycle = DoBruteForceSchedule();
1478         MapleVector<DepNode *> bruteNodes = nodes;
1479         uint32 maxSchedCycle = DoSchedule();
1480         if (maxBruteForceCycle < maxSchedCycle) {
1481             LogInfo::MapleLogger() << "maxBruteForceCycle = " << maxBruteForceCycle << "; maxSchedCycle = ";
1482             LogInfo::MapleLogger() << maxSchedCycle << "\n";
1483             LogInfo::MapleLogger() << "\n    ## Dump dependence graph ##    "
1484                                    << "\n";
1485             DumpDepGraph(nodes);
1486             LogInfo::MapleLogger() << "\n    ** Dump bruteForce scheduling result."
1487                                    << "\n";
1488             DumpScheduleResult(bruteNodes, kBruteForce);
1489             LogInfo::MapleLogger() << "\n    ^^ Dump list scheduling result."
1490                                    << "\n";
1491             DumpScheduleResult(nodes, kListSchedule);
1492         }
1493     } else {
1494         LogInfo::MapleLogger() << "Skip BruteForce scheduling."
1495                                << "\n";
1496         DoSchedule();
1497     }
1498 }
1499 
1500 /* Do simulate scheduling and dump scheduling information */
SimulateScheduling(const BB & bb)1501 void AArch64Schedule::SimulateScheduling(const BB &bb)
1502 {
1503     uint32 originCycle = SimulateOnly();
1504     MapleVector<DepNode *> oldNodes = nodes;
1505     uint32 schedCycle = DoSchedule();
1506     if (originCycle < schedCycle) {
1507         LogInfo::MapleLogger() << "Worse cycle [ " << (schedCycle - originCycle) << " ]; ";
1508         LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = ";
1509         LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size();
1510         LogInfo::MapleLogger() << ";    $$ Function: " << cgFunc.GetName();
1511         LogInfo::MapleLogger() << ";    BB id = " << bb.GetId() << "\n";
1512         LogInfo::MapleLogger() << "\n    ** Dump original result."
1513                                << "\n";
1514         DumpScheduleResult(oldNodes, kSimulateOnly);
1515         LogInfo::MapleLogger() << "\n    ^^ Dump list scheduling result."
1516                                << "\n";
1517         DumpScheduleResult(nodes, kListSchedule);
1518     } else if (originCycle > schedCycle) {
1519         LogInfo::MapleLogger() << "Advance cycle [ " << (originCycle - schedCycle) << " ]; ";
1520         LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = ";
1521         LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size();
1522         LogInfo::MapleLogger() << ";    $$ Function: " << cgFunc.GetName();
1523         LogInfo::MapleLogger() << ";    BB id = " << bb.GetId() << "\n";
1524     } else {
1525         LogInfo::MapleLogger() << "Equal cycle [ 0 ]; originCycle = " << originCycle;
1526         LogInfo::MapleLogger() << " ], ignore. nodes.size = " << nodes.size() << "\n";
1527     }
1528 }
1529 
1530 /*
1531  * A local list scheduling.
1532  * Schedule insns in basic blocks.
1533  */
ListScheduling(bool beforeRA)1534 void AArch64Schedule::ListScheduling(bool beforeRA)
1535 {
1536     InitIDAndLoc();
1537 
1538     mad = Globals::GetInstance()->GetMAD();
1539     if (beforeRA) {
1540         RegPressure::SetMaxRegClassNum(kRegisterLast);
1541     }
1542     depAnalysis = memPool.New<AArch64DepAnalysis>(cgFunc, memPool, *mad, beforeRA);
1543 
1544     FOR_ALL_BB(bb, &cgFunc) {
1545         depAnalysis->Run(*bb, nodes);
1546 
1547         if (LIST_SCHED_DUMP_REF) {
1548             GenerateDot(*bb, nodes);
1549             DumpDepGraph(nodes);
1550         }
1551         if (beforeRA) {
1552             liveInRegNo = bb->GetLiveInRegNO();
1553             liveOutRegNo = bb->GetLiveOutRegNO();
1554             if (bb->GetKind() != BB::kBBReturn) {
1555                 SetConsiderRegPressure();
1556                 DoSchedule();
1557             } else {
1558                 RegPressureScheduling(*bb, nodes);
1559             }
1560         } else {
1561             ClinitPairOpt();
1562             MemoryAccessPairOpt();
1563             if (CGOptions::IsDruteForceSched()) {
1564                 BruteForceScheduling(*bb);
1565             } else if (CGOptions::IsSimulateSched()) {
1566                 SimulateScheduling(*bb);
1567             } else {
1568                 DoSchedule();
1569             }
1570         }
1571 
1572         FinalizeScheduling(*bb, *depAnalysis);
1573     }
1574 }
1575 } /* namespace maplebe */
1576