1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "aarch64_schedule.h"
17 #include <ctime>
18 #include "aarch64_cg.h"
19 #include "aarch64_operand.h"
20 #include "aarch64_dependence.h"
21 #include "pressure.h"
22
23 /*
24 * This phase is Instruction Scheduling.
25 * There is a local list scheduling, it is scheduling in basic block.
26 * The entry is AArch64Schedule::ListScheduling, will traversal all basic block,
27 * for a basic block:
28 * 1. build a dependence graph;
29 * 2. combine clinit pairs and str&ldr pairs;
30 * 3. reorder instructions.
31 */
32 namespace maplebe {
33 namespace {
34 constexpr uint32 kClinitAdvanceCycle = 10;
35 constexpr uint32 kAdrpLdrAdvanceCycle = 2;
36 constexpr uint32 kClinitTailAdvanceCycle = 4;
37 constexpr uint32 kSecondToLastNode = 2;
38 } // namespace
39
40 uint32 AArch64Schedule::maxUnitIndex = 0;
41 /* reserve two register for special purpose */
42 int AArch64Schedule::intRegPressureThreshold = static_cast<int>(R27 - R0);
43 int AArch64Schedule::fpRegPressureThreshold = static_cast<int>(V30 - V0);
44 int AArch64Schedule::intCalleeSaveThresholdBase = static_cast<int>(R29 - R19);
45 int AArch64Schedule::intCalleeSaveThresholdEnhance = static_cast<int>(R30 - R19);
46 int AArch64Schedule::fpCalleeSaveThreshold = static_cast<int>(R16 - R8);
47 /* Init schedule's data struction. */
Init()48 void AArch64Schedule::Init()
49 {
50 readyList.clear();
51 nodeSize = nodes.size();
52 lastSeparatorIndex = 0;
53 mad->ReleaseAllUnits();
54 DepNode *node = nodes[0];
55
56 DEBUG_ASSERT(node->GetType() == kNodeTypeSeparator,
57 "CG internal error, the first node should be a separator node.");
58
59 if (CGOptions::IsDruteForceSched() || CGOptions::IsSimulateSched()) {
60 for (auto nodeTemp : nodes) {
61 nodeTemp->SetVisit(0);
62 nodeTemp->SetState(kNormal);
63 nodeTemp->SetSchedCycle(0);
64 nodeTemp->SetEStart(0);
65 nodeTemp->SetLStart(0);
66 }
67 }
68
69 readyList.emplace_back(node);
70 node->SetState(kReady);
71
72 /* Init validPredsSize and validSuccsSize. */
73 for (auto nodeTemp : nodes) {
74 nodeTemp->SetValidPredsSize(nodeTemp->GetPreds().size());
75 nodeTemp->SetValidSuccsSize(nodeTemp->GetSuccs().size());
76 }
77 }
78
79 /*
80 * A insn which can be combine should meet this conditions:
81 * 1. it is str/ldr insn;
82 * 2. address mode is kAddrModeBOi, [baseReg, offset];
83 * 3. the register operand size equal memory operand size;
84 * 4. if define USE_32BIT_REF, register operand size should be 4 byte;
85 * 5. for stp/ldp, the imm should be within -512 and 504(64bit), or -256 and 252(32bit);
86 * 6. pair instr for 8/4 byte registers must have multiple of 8/4 for imm.
87 * If insn can be combine, return true.
88 */
CanCombine(const Insn & insn) const89 bool AArch64Schedule::CanCombine(const Insn &insn) const
90 {
91 MOperator opCode = insn.GetMachineOpcode();
92 if ((opCode != MOP_xldr) && (opCode != MOP_wldr) && (opCode != MOP_dldr) && (opCode != MOP_sldr) &&
93 (opCode != MOP_xstr) && (opCode != MOP_wstr) && (opCode != MOP_dstr) && (opCode != MOP_sstr)) {
94 return false;
95 }
96
97 DEBUG_ASSERT(insn.GetOperand(1).IsMemoryAccessOperand(), "expects mem operands");
98 auto &memOpnd = static_cast<MemOperand &>(insn.GetOperand(1));
99 MemOperand::AArch64AddressingMode addrMode = memOpnd.GetAddrMode();
100 if ((addrMode != MemOperand::kAddrModeBOi) || !memOpnd.IsIntactIndexed()) {
101 return false;
102 }
103
104 auto ®Opnd = static_cast<RegOperand &>(insn.GetOperand(0));
105 if (regOpnd.GetSize() != memOpnd.GetSize()) {
106 return false;
107 }
108
109 uint32 size = regOpnd.GetSize() >> kLog2BitsPerByte;
110 #ifdef USE_32BIT_REF
111 if (insn.IsAccessRefField() && (size > (kIntregBytelen >> 1))) {
112 return false;
113 }
114 #endif /* USE_32BIT_REF */
115
116 OfstOperand *offset = memOpnd.GetOffsetImmediate();
117 if (offset == nullptr) {
118 return false;
119 }
120 int32 offsetValue = static_cast<int32>(offset->GetOffsetValue());
121 if (size == kIntregBytelen) { /* 64 bit */
122 if ((offsetValue <= kStpLdpImm64LowerBound) || (offsetValue >= kStpLdpImm64UpperBound)) {
123 return false;
124 }
125 } else if (size == (kIntregBytelen >> 1)) { /* 32 bit */
126 if ((offsetValue <= kStpLdpImm32LowerBound) || (offsetValue >= kStpLdpImm32UpperBound)) {
127 return false;
128 }
129 }
130
131 /* pair instr for 8/4 byte registers must have multiple of 8/4 for imm */
132 if ((static_cast<uint32>(offsetValue) % size) != 0) {
133 return false;
134 }
135 return true;
136 }
137
138 /* After building dependence graph, combine str&ldr pairs. */
MemoryAccessPairOpt()139 void AArch64Schedule::MemoryAccessPairOpt()
140 {
141 Init();
142 std::vector<DepNode *> memList;
143
144 while ((!readyList.empty()) || !memList.empty()) {
145 DepNode *readNode = nullptr;
146 if (!readyList.empty()) {
147 readNode = readyList[0];
148 readyList.erase(readyList.begin());
149 } else {
150 if (memList[0]->GetType() != kNodeTypeEmpty) {
151 FindAndCombineMemoryAccessPair(memList);
152 }
153 readNode = memList[0];
154 memList.erase(memList.begin());
155 }
156
157 /* schedule readNode */
158 CHECK_FATAL(readNode != nullptr, "readNode is null in MemoryAccessPairOpt");
159 readNode->SetState(kScheduled);
160
161 /* add readNode's succs to readyList or memList. */
162 for (auto succLink : readNode->GetSuccs()) {
163 DepNode &succNode = succLink->GetTo();
164 succNode.DescreaseValidPredsSize();
165 if (succNode.GetValidPredsSize() == 0) {
166 DEBUG_ASSERT(succNode.GetState() == kNormal, "schedule state should be kNormal");
167 succNode.SetState(kReady);
168 DEBUG_ASSERT(succNode.GetInsn() != nullptr, "insn can't be nullptr!");
169 if (CanCombine(*succNode.GetInsn())) {
170 memList.emplace_back(&succNode);
171 } else {
172 readyList.emplace_back(&succNode);
173 }
174 }
175 }
176 }
177
178 for (auto node : nodes) {
179 node->SetVisit(0);
180 node->SetState(kNormal);
181 }
182 }
183
184 /* Find and combine correct MemoryAccessPair for memList[0]. */
FindAndCombineMemoryAccessPair(const std::vector<DepNode * > & memList)185 void AArch64Schedule::FindAndCombineMemoryAccessPair(const std::vector<DepNode *> &memList)
186 {
187 DEBUG_ASSERT(!memList.empty(), "memList should not be empty");
188 CHECK_FATAL(memList[0]->GetInsn() != nullptr, "memList[0]'s insn should not be nullptr");
189 MemOperand *currMemOpnd = static_cast<MemOperand *>(memList[0]->GetInsn()->GetMemOpnd());
190 DEBUG_ASSERT(currMemOpnd != nullptr, "opnd should not be nullptr");
191 DEBUG_ASSERT(currMemOpnd->IsMemoryAccessOperand(), "opnd should be memOpnd");
192 int32 currOffsetVal = static_cast<int32>(currMemOpnd->GetOffsetImmediate()->GetOffsetValue());
193 MOperator currMop = memList[0]->GetInsn()->GetMachineOpcode();
194 /* find a depNode to combine with memList[0], and break; */
195 for (auto it = std::next(memList.begin(), 1); it != memList.end(); ++it) {
196 DEBUG_ASSERT((*it)->GetInsn() != nullptr, "null ptr check");
197
198 if (currMop == (*it)->GetInsn()->GetMachineOpcode()) {
199 MemOperand *nextMemOpnd = static_cast<MemOperand *>((*it)->GetInsn()->GetMemOpnd());
200 CHECK_FATAL(nextMemOpnd != nullptr, "opnd should not be nullptr");
201 CHECK_FATAL(nextMemOpnd->IsMemoryAccessOperand(), "opnd should be MemOperand");
202 int32 nextOffsetVal = static_cast<int32>(nextMemOpnd->GetOffsetImmediate()->GetOffsetValue());
203 uint32 size = currMemOpnd->GetSize() >> kLog2BitsPerByte;
204 if ((nextMemOpnd->GetBaseRegister() == currMemOpnd->GetBaseRegister()) &&
205 (nextMemOpnd->GetSize() == currMemOpnd->GetSize()) &&
206 (static_cast<uint32>(abs(nextOffsetVal - currOffsetVal)) == size)) {
207 /*
208 * In ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile
209 * LDP on page K1-6125 declare that ldp can't use same reg
210 */
211 if (((currMop == MOP_xldr) || (currMop == MOP_sldr) || (currMop == MOP_dldr) ||
212 (currMop == MOP_wldr)) &&
213 &(memList[0]->GetInsn()->GetOperand(0)) == &((*it)->GetInsn()->GetOperand(0))) {
214 continue;
215 }
216 if (static_cast<RegOperand &>((*it)->GetInsn()->GetOperand(0)).GetRegisterType() !=
217 static_cast<RegOperand &>(memList[0]->GetInsn()->GetOperand(0)).GetRegisterType()) {
218 continue;
219 }
220
221 if (LIST_SCHED_DUMP_REF) {
222 LogInfo::MapleLogger() << "Combine insn: "
223 << "\n";
224 memList[0]->GetInsn()->Dump();
225 (*it)->GetInsn()->Dump();
226 }
227 depAnalysis->CombineMemoryAccessPair(*memList[0], **it, nextOffsetVal > currOffsetVal);
228 if (LIST_SCHED_DUMP_REF) {
229 LogInfo::MapleLogger() << "To: "
230 << "\n";
231 memList[0]->GetInsn()->Dump();
232 }
233 break;
234 }
235 }
236 }
237 }
238
239 /* combine clinit pairs. */
ClinitPairOpt()240 void AArch64Schedule::ClinitPairOpt()
241 {
242 for (auto it = nodes.begin(); it != nodes.end(); ++it) {
243 auto nextIt = std::next(it, 1);
244 if (nextIt == nodes.end()) {
245 return;
246 }
247
248 if ((*it)->GetInsn()->GetMachineOpcode() == MOP_adrp_ldr) {
249 if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) {
250 depAnalysis->CombineClinit(**it, **(nextIt), false);
251 } else if ((*nextIt)->GetType() == kNodeTypeSeparator) {
252 nextIt = std::next(nextIt, 1);
253 if (nextIt == nodes.end()) {
254 return;
255 }
256 if ((*nextIt)->GetInsn()->GetMachineOpcode() == MOP_clinit_tail) {
257 /* Do something. */
258 depAnalysis->CombineClinit(**it, **(nextIt), true);
259 }
260 }
261 }
262 }
263 }
264
265 /* Return the next node's index who is kNodeTypeSeparator. */
GetNextSepIndex() const266 uint32 AArch64Schedule::GetNextSepIndex() const
267 {
268 return ((lastSeparatorIndex + kMaxDependenceNum) < nodeSize) ? (lastSeparatorIndex + kMaxDependenceNum)
269 : (nodes.size() - 1);
270 }
271
272 /* Do register pressure schduling. */
RegPressureScheduling(BB & bb,MapleVector<DepNode * > & nodes)273 void AArch64Schedule::RegPressureScheduling(BB &bb, MapleVector<DepNode *> &nodes)
274 {
275 RegPressureSchedule *regSchedule = memPool.New<RegPressureSchedule>(cgFunc, alloc);
276 /*
277 * Get physical register amount currently
278 * undef, Int Reg, Float Reg, Flag Reg
279 */
280 const std::vector<int32> kRegNumVec = {0, V0, (kMaxRegNum - V0) + 1, 1};
281 regSchedule->InitBBInfo(bb, memPool, nodes);
282 regSchedule->BuildPhyRegInfo(kRegNumVec);
283 regSchedule->DoScheduling(nodes);
284 }
285
286 /*
287 * Compute earliest start of the node,
288 * return value : the maximum estart.
289 */
ComputeEstart(uint32 cycle)290 uint32 AArch64Schedule::ComputeEstart(uint32 cycle)
291 {
292 std::vector<DepNode *> readyNodes;
293 uint32 maxIndex = GetNextSepIndex();
294
295 if (CGOptions::IsDebugSched()) {
296 /* Check validPredsSize. */
297 for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
298 DepNode *node = nodes[i];
299 [[maybe_unused]] int32 schedNum = 0;
300 for (const auto *predLink : node->GetPreds()) {
301 if (predLink->GetFrom().GetState() == kScheduled) {
302 ++schedNum;
303 }
304 }
305 DEBUG_ASSERT((node->GetPreds().size() - schedNum) == node->GetValidPredsSize(), "validPredsSize error.");
306 }
307 }
308
309 DEBUG_ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator,
310 "CG internal error, nodes[maxIndex] should be a separator node.");
311
312 (void)readyNodes.insert(readyNodes.begin(), readyList.begin(), readyList.end());
313
314 uint32 maxEstart = cycle;
315 for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
316 DepNode *node = nodes[i];
317 node->SetVisit(0);
318 }
319
320 for (auto *node : readyNodes) {
321 DEBUG_ASSERT(node->GetState() == kReady, "CG internal error, all nodes in ready list should be ready.");
322 if (node->GetEStart() < cycle) {
323 node->SetEStart(cycle);
324 }
325 }
326
327 while (!readyNodes.empty()) {
328 DepNode *node = readyNodes.front();
329 readyNodes.erase(readyNodes.begin());
330
331 for (const auto *succLink : node->GetSuccs()) {
332 DepNode &succNode = succLink->GetTo();
333 if (succNode.GetType() == kNodeTypeSeparator) {
334 continue;
335 }
336
337 if (succNode.GetEStart() < (node->GetEStart() + succLink->GetLatency())) {
338 succNode.SetEStart(node->GetEStart() + succLink->GetLatency());
339 }
340 maxEstart = (maxEstart < succNode.GetEStart() ? succNode.GetEStart() : maxEstart);
341 succNode.IncreaseVisit();
342 if ((succNode.GetVisit() >= succNode.GetValidPredsSize()) && (succNode.GetType() != kNodeTypeSeparator)) {
343 readyNodes.emplace_back(&succNode);
344 }
345 DEBUG_ASSERT(succNode.GetVisit() <= succNode.GetValidPredsSize(), "CG internal error.");
346 }
347 }
348
349 return maxEstart;
350 }
351
352 /* Compute latest start of the node. */
ComputeLstart(uint32 maxEstart)353 void AArch64Schedule::ComputeLstart(uint32 maxEstart)
354 {
355 /* std::vector is better than std::queue in run time */
356 std::vector<DepNode *> readyNodes;
357 uint32 maxIndex = GetNextSepIndex();
358
359 DEBUG_ASSERT(nodes[maxIndex]->GetType() == kNodeTypeSeparator,
360 "CG internal error, nodes[maxIndex] should be a separator node.");
361
362 for (uint32 i = lastSeparatorIndex; i <= maxIndex; ++i) {
363 DepNode *node = nodes[i];
364 node->SetLStart(maxEstart);
365 node->SetVisit(0);
366 }
367
368 readyNodes.emplace_back(nodes[maxIndex]);
369 while (!readyNodes.empty()) {
370 DepNode *node = readyNodes.front();
371 readyNodes.erase(readyNodes.begin());
372 for (const auto *predLink : node->GetPreds()) {
373 DepNode &predNode = predLink->GetFrom();
374 if (predNode.GetState() == kScheduled) {
375 continue;
376 }
377
378 if (predNode.GetLStart() > (node->GetLStart() - predLink->GetLatency())) {
379 predNode.SetLStart(node->GetLStart() - predLink->GetLatency());
380 }
381 predNode.IncreaseVisit();
382 if ((predNode.GetVisit() >= predNode.GetValidSuccsSize()) && (predNode.GetType() != kNodeTypeSeparator)) {
383 readyNodes.emplace_back(&predNode);
384 }
385
386 DEBUG_ASSERT(predNode.GetVisit() <= predNode.GetValidSuccsSize(), "CG internal error.");
387 }
388 }
389 }
390
391 /* Compute earliest start and latest start of the node that is in readyList and not be scheduled. */
UpdateELStartsOnCycle(uint32 cycle)392 void AArch64Schedule::UpdateELStartsOnCycle(uint32 cycle)
393 {
394 ComputeLstart(ComputeEstart(cycle));
395 }
396
397 /*
398 * If all unit of this node need when it be scheduling is free, this node can be scheduled,
399 * Return true.
400 */
CanBeScheduled() const401 bool DepNode::CanBeScheduled() const
402 {
403 for (uint32 i = 0; i < unitNum; ++i) {
404 Unit *unit = units[i];
405 if (unit != nullptr) {
406 if (!unit->IsFree(i)) {
407 return false;
408 }
409 }
410 }
411 return true;
412 }
413
414 /* Mark those unit that this node need occupy unit when it is being scheduled. */
OccupyUnits()415 void DepNode::OccupyUnits()
416 {
417 for (uint32 i = 0; i < unitNum; ++i) {
418 Unit *unit = units[i];
419 if (unit != nullptr) {
420 unit->Occupy(*insn, i);
421 }
422 }
423 }
424
425 /* Get unit kind of this node's units[0]. */
GetUnitKind() const426 uint32 DepNode::GetUnitKind() const
427 {
428 uint32 retValue = 0;
429 if ((units == nullptr) || (units[0] == nullptr)) {
430 return retValue;
431 }
432
433 switch (units[0]->GetUnitId()) {
434 case kUnitIdSlotD:
435 retValue |= kUnitKindSlot0;
436 break;
437 case kUnitIdAgen:
438 case kUnitIdSlotSAgen:
439 retValue |= kUnitKindAgen;
440 break;
441 case kUnitIdSlotDAgen:
442 retValue |= kUnitKindAgen;
443 retValue |= kUnitKindSlot0;
444 break;
445 case kUnitIdHazard:
446 case kUnitIdSlotSHazard:
447 retValue |= kUnitKindHazard;
448 break;
449 case kUnitIdCrypto:
450 retValue |= kUnitKindCrypto;
451 break;
452 case kUnitIdMul:
453 case kUnitIdSlotSMul:
454 retValue |= kUnitKindMul;
455 break;
456 case kUnitIdDiv:
457 retValue |= kUnitKindDiv;
458 break;
459 case kUnitIdBranch:
460 case kUnitIdSlotSBranch:
461 retValue |= kUnitKindBranch;
462 break;
463 case kUnitIdStAgu:
464 retValue |= kUnitKindStAgu;
465 break;
466 case kUnitIdLdAgu:
467 retValue |= kUnitKindLdAgu;
468 break;
469 case kUnitIdFpAluS:
470 case kUnitIdFpAluD:
471 retValue |= kUnitKindFpAlu;
472 break;
473 case kUnitIdFpMulS:
474 case kUnitIdFpMulD:
475 retValue |= kUnitKindFpMul;
476 break;
477 case kUnitIdFpDivS:
478 case kUnitIdFpDivD:
479 retValue |= kUnitKindFpDiv;
480 break;
481 case kUnitIdSlot0LdAgu:
482 retValue |= kUnitKindSlot0;
483 retValue |= kUnitKindLdAgu;
484 break;
485 case kUnitIdSlot0StAgu:
486 retValue |= kUnitKindSlot0;
487 retValue |= kUnitKindStAgu;
488 break;
489 default:
490 break;
491 }
492
493 return retValue;
494 }
495
496 /* Count unit kinds to an array. Each element of the array indicates the unit kind number of a node set. */
CountUnitKind(const DepNode & depNode,uint32 array[],const uint32 arraySize) const497 void AArch64Schedule::CountUnitKind(const DepNode &depNode, uint32 array[], const uint32 arraySize) const
498 {
499 (void)arraySize;
500 DEBUG_ASSERT(arraySize >= kUnitKindLast, "CG internal error. unit kind number is not correct.");
501 uint32 unitKind = depNode.GetUnitKind();
502 int32 index = static_cast<int32>(__builtin_ffs(unitKind));
503 while (index) {
504 DEBUG_ASSERT(static_cast<uint32>(index) < kUnitKindLast, "CG internal error. index error.");
505 ++array[index];
506 unitKind &= ~(1u << (index - 1u));
507 index = __builtin_ffs(unitKind);
508 }
509 }
510
511 /* Check if a node use a specific unit kind. */
IfUseUnitKind(const DepNode & depNode,uint32 index)512 bool AArch64Schedule::IfUseUnitKind(const DepNode &depNode, uint32 index)
513 {
514 uint32 unitKind = depNode.GetUnitKind();
515 int32 idx = static_cast<int32>(__builtin_ffs(unitKind));
516 while (idx) {
517 DEBUG_ASSERT(index < kUnitKindLast, "CG internal error. index error.");
518 if (idx == static_cast<int32>(index)) {
519 return true;
520 }
521 unitKind &= ~(1u << (idx - 1u));
522 idx = __builtin_ffs(unitKind);
523 }
524
525 return false;
526 }
527
528 /* A sample schedule according dependence graph only, to verify correctness of dependence graph. */
RandomTest()529 void AArch64Schedule::RandomTest()
530 {
531 Init();
532 nodes.clear();
533
534 while (!readyList.empty()) {
535 DepNode *currNode = readyList.back();
536 currNode->SetState(kScheduled);
537 readyList.pop_back();
538 nodes.emplace_back(currNode);
539
540 for (auto succLink : currNode->GetSuccs()) {
541 DepNode &succNode = succLink->GetTo();
542 bool ready = true;
543 for (auto predLink : succNode.GetPreds()) {
544 DepNode &predNode = predLink->GetFrom();
545 if (predNode.GetState() != kScheduled) {
546 ready = false;
547 break;
548 }
549 }
550
551 if (ready) {
552 DEBUG_ASSERT(succNode.GetState() == kNormal, "succNode must be kNormal");
553 readyList.emplace_back(&succNode);
554 succNode.SetState(kReady);
555 }
556 }
557 }
558 }
559
560 /* Remove target from readyList. */
EraseNodeFromReadyList(const DepNode & target)561 void AArch64Schedule::EraseNodeFromReadyList(const DepNode &target)
562 {
563 EraseNodeFromNodeList(target, readyList);
564 }
565
566 /* Remove target from nodeList. */
EraseNodeFromNodeList(const DepNode & target,MapleVector<DepNode * > & nodeList)567 void AArch64Schedule::EraseNodeFromNodeList(const DepNode &target, MapleVector<DepNode *> &nodeList)
568 {
569 for (auto it = nodeList.begin(); it != nodeList.end(); ++it) {
570 if ((*it) == &target) {
571 nodeList.erase(it);
572 return;
573 }
574 }
575
576 DEBUG_ASSERT(false, "CG internal error, erase node fail.");
577 }
578
579 /* Dump all node of availableReadyList schedule information in current cycle. */
DumpDebugInfo(const ScheduleProcessInfo & scheduleInfo)580 void AArch64Schedule::DumpDebugInfo(const ScheduleProcessInfo &scheduleInfo)
581 {
582 LogInfo::MapleLogger() << "Current cycle[ " << scheduleInfo.GetCurrCycle() << " ], Available in readyList is : \n";
583 for (auto node : scheduleInfo.GetAvailableReadyList()) {
584 LogInfo::MapleLogger() << "NodeIndex[ " << node->GetIndex() << " ], Estart[ " << node->GetEStart()
585 << " ], Lstart[ ";
586 LogInfo::MapleLogger() << node->GetLStart() << " ], slot[ ";
587 LogInfo::MapleLogger() << (node->GetReservation() == nullptr ? "SlotNone"
588 : node->GetReservation()->GetSlotName())
589 << " ], ";
590 LogInfo::MapleLogger() << "succNodeNum[ " << node->GetSuccs().size() << " ], ";
591 node->GetInsn()->Dump();
592 LogInfo::MapleLogger() << '\n';
593 }
594 }
595
596 /*
597 * Select a node from availableReadyList according to some heuristic rules, then:
598 * 1. change targetNode's schedule information;
599 * 2. try to add successors of targetNode to readyList;
600 * 3. update unscheduled node set, when targetNode is last kNodeTypeSeparator;
601 * 4. update AdvanceCycle.
602 */
SelectNode(AArch64ScheduleProcessInfo & scheduleInfo)603 void AArch64Schedule::SelectNode(AArch64ScheduleProcessInfo &scheduleInfo)
604 {
605 auto &availableReadyList = scheduleInfo.GetAvailableReadyList();
606 auto it = availableReadyList.begin();
607 DepNode *targetNode = *it;
608 if (availableReadyList.size() > 1) {
609 CalculateMaxUnitKindCount(scheduleInfo);
610 if (GetConsiderRegPressure()) {
611 UpdateReleaseRegInfo(scheduleInfo);
612 }
613 ++it;
614 for (; it != availableReadyList.end(); ++it) {
615 if (CompareDepNode(**it, *targetNode, scheduleInfo)) {
616 targetNode = *it;
617 }
618 }
619 }
620 /* The priority of free-reg node is higher than pipeline */
621 while (!targetNode->CanBeScheduled()) {
622 scheduleInfo.IncCurrCycle();
623 mad->AdvanceCycle();
624 }
625 if (GetConsiderRegPressure() && !scheduleInfo.IsFirstSeparator()) {
626 UpdateLiveRegSet(scheduleInfo, *targetNode);
627 }
628 /* push target node into scheduled nodes and turn it into kScheduled state */
629 scheduleInfo.PushElemIntoScheduledNodes(targetNode);
630
631 EraseNodeFromReadyList(*targetNode);
632
633 if (CGOptions::IsDebugSched()) {
634 LogInfo::MapleLogger() << "TargetNode : ";
635 targetNode->GetInsn()->Dump();
636 LogInfo::MapleLogger() << "\n";
637 }
638
639 /* Update readyList. */
640 UpdateReadyList(*targetNode, readyList, true);
641
642 if (targetNode->GetType() == kNodeTypeSeparator) {
643 /* If target node is separator node, update lastSeparatorIndex and calculate those depNodes's estart and lstart
644 * between current separator node and new Separator node.
645 */
646 if (!scheduleInfo.IsFirstSeparator()) {
647 lastSeparatorIndex += kMaxDependenceNum;
648 UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle());
649 } else {
650 scheduleInfo.ResetIsFirstSeparator();
651 }
652 }
653
654 UpdateAdvanceCycle(scheduleInfo, *targetNode);
655 }
656
UpdateAdvanceCycle(AArch64ScheduleProcessInfo & scheduleInfo,const DepNode & targetNode)657 void AArch64Schedule::UpdateAdvanceCycle(AArch64ScheduleProcessInfo &scheduleInfo, const DepNode &targetNode)
658 {
659 switch (targetNode.GetInsn()->GetLatencyType()) {
660 case kLtClinit:
661 scheduleInfo.SetAdvanceCycle(kClinitAdvanceCycle);
662 break;
663 case kLtAdrpLdr:
664 scheduleInfo.SetAdvanceCycle(kAdrpLdrAdvanceCycle);
665 break;
666 case kLtClinitTail:
667 scheduleInfo.SetAdvanceCycle(kClinitTailAdvanceCycle);
668 break;
669 default:
670 break;
671 }
672
673 if ((scheduleInfo.GetAdvanceCycle() == 0) && mad->IsFullIssued()) {
674 if (targetNode.GetEStart() > scheduleInfo.GetCurrCycle()) {
675 scheduleInfo.SetAdvanceCycle(1 + targetNode.GetEStart() - scheduleInfo.GetCurrCycle());
676 } else {
677 scheduleInfo.SetAdvanceCycle(1);
678 }
679 }
680 }
681
682 /*
683 * Advance mad's cycle until info's advanceCycle equal zero,
684 * and then clear info's availableReadyList.
685 */
UpdateScheduleProcessInfo(AArch64ScheduleProcessInfo & info)686 void AArch64Schedule::UpdateScheduleProcessInfo(AArch64ScheduleProcessInfo &info)
687 {
688 while (info.GetAdvanceCycle() > 0) {
689 info.IncCurrCycle();
690 mad->AdvanceCycle();
691 info.DecAdvanceCycle();
692 }
693 info.ClearAvailableReadyList();
694 }
695
696 /*
697 * Forward traversal readyList, if a node in readyList can be Schedule, add it to availableReadyList.
698 * Return true, if availableReadyList is not empty.
699 */
CheckSchedulable(AArch64ScheduleProcessInfo & info) const700 bool AArch64Schedule::CheckSchedulable(AArch64ScheduleProcessInfo &info) const
701 {
702 for (auto node : readyList) {
703 if (GetConsiderRegPressure()) {
704 info.PushElemIntoAvailableReadyList(node);
705 } else {
706 if (node->CanBeScheduled() && node->GetEStart() <= info.GetCurrCycle()) {
707 info.PushElemIntoAvailableReadyList(node);
708 }
709 }
710 }
711 return info.AvailableReadyListIsEmpty() ? false : true;
712 }
713
714 /*
715 * Calculate estimated machine cycle count for an input node series
716 */
CalSeriesCycles(const MapleVector<DepNode * > & nodes)717 int AArch64Schedule::CalSeriesCycles(const MapleVector<DepNode *> &nodes)
718 {
719 int currentCycle = 0;
720 /* after an instruction is issued, the minimum cycle count for the next instruction is 1 */
721 int instructionBaseCycleCount = 1;
722 std::map<DepNode *, int> scheduledCycleMap;
723 for (auto node : nodes) {
724 int latencyCycle = 0;
725 /* calculate the latest begin time of this node based on its predecessor's issue time and latency */
726 for (auto pred : node->GetPreds()) {
727 DepNode &from = pred->GetFrom();
728 int latency = static_cast<int>(pred->GetLatency());
729 int fromCycle = scheduledCycleMap[&from];
730 if (fromCycle + latency > latencyCycle) {
731 latencyCycle = fromCycle + latency;
732 }
733 }
734 /* the issue time of this node is the max value between the next cycle and latest begin time */
735 if (currentCycle + instructionBaseCycleCount >= latencyCycle) {
736 currentCycle = currentCycle + instructionBaseCycleCount;
737 } else {
738 currentCycle = latencyCycle;
739 }
740 /* record this node's issue cycle */
741 scheduledCycleMap[node] = currentCycle;
742 }
743 return currentCycle;
744 }
745
746 /* After building dependence graph, schedule insns. */
DoSchedule()747 uint32 AArch64Schedule::DoSchedule()
748 {
749 AArch64ScheduleProcessInfo scheduleInfo(nodeSize);
750 Init();
751 UpdateELStartsOnCycle(scheduleInfo.GetCurrCycle());
752 InitLiveRegSet(scheduleInfo);
753 while (!readyList.empty()) {
754 UpdateScheduleProcessInfo(scheduleInfo);
755 /* Check if schedulable */
756 if (!CheckSchedulable(scheduleInfo)) {
757 /* Advance cycle. */
758 scheduleInfo.SetAdvanceCycle(1);
759 continue;
760 }
761
762 if (scheduleInfo.GetLastUpdateCycle() < scheduleInfo.GetCurrCycle()) {
763 scheduleInfo.SetLastUpdateCycle(scheduleInfo.GetCurrCycle());
764 }
765
766 if (CGOptions::IsDebugSched()) {
767 DumpDebugInfo(scheduleInfo);
768 }
769
770 /* Select a node to scheduling */
771 SelectNode(scheduleInfo);
772 }
773
774 DEBUG_ASSERT(scheduleInfo.SizeOfScheduledNodes() == nodes.size(), "CG internal error, Not all nodes scheduled.");
775
776 nodes.clear();
777 (void)nodes.insert(nodes.begin(), scheduleInfo.GetScheduledNodes().begin(), scheduleInfo.GetScheduledNodes().end());
778 /* the second to last node is the true last node, because the last is kNodeTypeSeparator node */
779 DEBUG_ASSERT(nodes.size() - 2 >= 0, "size of nodes should be greater than or equal 2"); // size not less than 2
780 return (nodes[nodes.size() - 2]->GetSchedCycle()); // size-2 get the second to last node
781 }
782
783 struct RegisterInfoUnit {
RegisterInfoUnitmaplebe::RegisterInfoUnit784 RegisterInfoUnit() : intRegNum(0), fpRegNum(0), ccRegNum(0) {}
785 uint32 intRegNum = 0;
786 uint32 fpRegNum = 0;
787 uint32 ccRegNum = 0;
788 };
789
GetDepNodeDefType(const DepNode & depNode,CGFunc & f)790 RegisterInfoUnit GetDepNodeDefType(const DepNode &depNode, CGFunc &f)
791 {
792 RegisterInfoUnit rIU;
793 for (auto defRegNO : depNode.GetDefRegnos()) {
794 RegType defRegTy = AArch64ScheduleProcessInfo::GetRegisterType(f, defRegNO);
795 if (defRegTy == kRegTyInt) {
796 rIU.intRegNum++;
797 } else if (defRegTy == kRegTyFloat) {
798 rIU.fpRegNum++;
799 } else if (defRegTy == kRegTyCc) {
800 rIU.ccRegNum++;
801 DEBUG_ASSERT(rIU.ccRegNum <= 1, "spill cc reg?");
802 } else {
803 CHECK_FATAL(false, "NIY aarch64 register type");
804 }
805 }
806 /* call node will not increase reg def pressure */
807 if (depNode.GetInsn() != nullptr && depNode.GetInsn()->IsCall()) {
808 rIU.intRegNum = 0;
809 rIU.fpRegNum = 0;
810 }
811 return rIU;
812 }
813
DoCSR(DepNode & node1,DepNode & node2,AArch64ScheduleProcessInfo & scheduleInfo) const814 AArch64Schedule::CSRResult AArch64Schedule::DoCSR(DepNode &node1, DepNode &node2,
815 AArch64ScheduleProcessInfo &scheduleInfo) const
816 {
817 RegisterInfoUnit defRIU1 = GetDepNodeDefType(node1, cgFunc);
818 RegisterInfoUnit defRIU2 = GetDepNodeDefType(node2, cgFunc);
819 /* do not increase callee save pressure before call */
820 if (static_cast<int>(scheduleInfo.SizeOfCalleeSaveLiveRegister(true)) >= intCalleeSaveThreshold) {
821 if (defRIU1.intRegNum > 0 && defRIU2.intRegNum > 0) {
822 CSRResult csrInfo = ScheduleCrossCall(node1, node2);
823 if ((csrInfo == kNode1 && defRIU1.intRegNum >= scheduleInfo.GetFreeIntRegs(node1)) ||
824 (csrInfo == kNode2 && defRIU2.intRegNum >= scheduleInfo.GetFreeIntRegs(node2))) {
825 return csrInfo;
826 }
827 }
828 }
829 if (static_cast<int>(scheduleInfo.SizeOfCalleeSaveLiveRegister(false)) >= fpCalleeSaveThreshold) {
830 if (defRIU1.fpRegNum > 0 && defRIU2.fpRegNum > 0) {
831 CSRResult csrInfo = ScheduleCrossCall(node1, node2);
832 if ((csrInfo == kNode1 && defRIU1.fpRegNum >= scheduleInfo.GetFreeFpRegs(node1)) ||
833 (csrInfo == kNode2 && defRIU2.fpRegNum >= scheduleInfo.GetFreeFpRegs(node2))) {
834 return csrInfo;
835 }
836 }
837 }
838 auto FindFreeRegNode = [&](bool isInt) -> CSRResult {
839 auto freeRegNodes = isInt ? scheduleInfo.GetFreeIntRegNodeSet() : scheduleInfo.GetFreeFpRegNodeSet();
840 if (freeRegNodes.find(&node1) != freeRegNodes.end() && freeRegNodes.find(&node2) == freeRegNodes.end()) {
841 return kNode1;
842 }
843 if (freeRegNodes.find(&node1) == freeRegNodes.end() && freeRegNodes.find(&node2) != freeRegNodes.end()) {
844 return kNode2;
845 }
846 return kDoCSP;
847 };
848 if (static_cast<int>(scheduleInfo.SizeOfIntLiveRegSet()) >= intRegPressureThreshold) {
849 if (FindFreeRegNode(true) != kDoCSP) {
850 return FindFreeRegNode(true);
851 }
852 }
853 if (static_cast<int>(scheduleInfo.SizeOfFpLiveRegSet()) >= fpRegPressureThreshold) {
854 if (FindFreeRegNode(false) != kDoCSP) {
855 return FindFreeRegNode(false);
856 }
857 }
858
859 bool canDoCSPFurther = false;
860 if (static_cast<int>(scheduleInfo.SizeOfIntLiveRegSet()) >= intRegPressureThreshold) {
861 if (defRIU1.intRegNum != defRIU2.intRegNum) {
862 return defRIU1.intRegNum < defRIU2.intRegNum ? kNode1 : kNode2;
863 } else {
864 canDoCSPFurther = defRIU1.intRegNum == 0;
865 }
866 }
867 if (static_cast<int>(scheduleInfo.SizeOfFpLiveRegSet()) >= fpRegPressureThreshold) {
868 if (defRIU1.fpRegNum != defRIU2.fpRegNum) {
869 return defRIU1.fpRegNum < defRIU2.fpRegNum ? kNode1 : kNode2;
870 } else {
871 canDoCSPFurther = (defRIU1.fpRegNum == 0 && canDoCSPFurther);
872 }
873 }
874 /* if both nodes are going to increase reg pressure, do not do CSP further */
875 return canDoCSPFurther ? kDoCSP : (node1.GetInsn()->GetId() < node2.GetInsn()->GetId() ? kNode1 : kNode2);
876 }
877
ScheduleCrossCall(const DepNode & node1,const DepNode & node2) const878 AArch64Schedule::CSRResult AArch64Schedule::ScheduleCrossCall(const DepNode &node1, const DepNode &node2) const
879 {
880 uint32 node1ID = node1.GetInsn()->GetId();
881 uint32 node2ID = node2.GetInsn()->GetId();
882 bool order = node1ID < node2ID; /* true -- node1 before node2 false -- node1 after node2 */
883 Insn *beginInsn = order ? node1.GetInsn() : node2.GetInsn();
884 uint32 finialId = order ? node2ID : node1ID;
885 for (Insn *checkInsn = beginInsn; (checkInsn != nullptr && checkInsn->GetId() <= finialId);
886 checkInsn = checkInsn->GetNextMachineInsn()) {
887 if (checkInsn->IsCall()) {
888 return order ? kNode1 : kNode2;
889 }
890 }
891 return kDoCSP;
892 };
893
894 /*
895 * Comparing priorities of node1 and node2 according to some heuristic rules
896 * return true if node1's priority is higher
897 * crp -- consider reg pressure
898 */
CompareDepNode(DepNode & node1,DepNode & node2,AArch64ScheduleProcessInfo & scheduleInfo) const899 bool AArch64Schedule::CompareDepNode(DepNode &node1, DepNode &node2, AArch64ScheduleProcessInfo &scheduleInfo) const
900 {
901 /*
902 * strategy CSR -- code schedule for register pressure
903 * if pressure is above the threshold, select the node which can reduce register pressure
904 */
905 if (GetConsiderRegPressure()) {
906 switch (DoCSR(node1, node2, scheduleInfo)) {
907 case kNode1:
908 return true;
909 case kNode2:
910 return false;
911 default:
912 break;
913 }
914 }
915 /* strategy CSP -- code schedule for CPU pipeline */
916 /* less LStart first */
917 if (node1.GetLStart() != node2.GetLStart()) {
918 return node1.GetLStart() < node2.GetLStart();
919 }
920
921 /* max unit kind use */
922 bool use1 = IfUseUnitKind(node1, maxUnitIndex);
923 bool use2 = IfUseUnitKind(node2, maxUnitIndex);
924 if (use1 != use2) {
925 return use1;
926 }
927
928 /* slot0 first */
929 SlotType slotType1 = node1.GetReservation()->GetSlot();
930 SlotType slotType2 = node2.GetReservation()->GetSlot();
931 if (slotType1 == kSlots) {
932 slotType1 = kSlot0;
933 }
934 if (slotType2 == kSlots) {
935 slotType2 = kSlot0;
936 }
937 if (slotType1 != slotType2) {
938 return slotType1 < slotType2;
939 }
940
941 /* more succNodes fisrt */
942 if (node1.GetSuccs().size() != node2.GetSuccs().size()) {
943 return node1.GetSuccs().size() > node2.GetSuccs().size();
944 }
945
946 /* default order */
947 return node1.GetInsn()->GetId() < node2.GetInsn()->GetId();
948 }
949
950 /*
951 * Calculate number of every unit that used by avaliableReadyList's nodes and save the max in maxUnitIndex
952 */
CalculateMaxUnitKindCount(ScheduleProcessInfo & scheduleInfo)953 void AArch64Schedule::CalculateMaxUnitKindCount(ScheduleProcessInfo &scheduleInfo)
954 {
955 uint32 unitKindCount[kUnitKindLast] = {0};
956 for (auto node : scheduleInfo.GetAvailableReadyList()) {
957 CountUnitKind(*node, unitKindCount, kUnitKindLast);
958 }
959
960 uint32 maxCount = 0;
961 maxUnitIndex = 0;
962 for (size_t i = 1; i < kUnitKindLast; ++i) {
963 if (maxCount < unitKindCount[i]) {
964 maxCount = unitKindCount[i];
965 maxUnitIndex = i;
966 }
967 }
968 }
969
970 /*
971 * Update the release reg node set
972 * When node in this set is scheduled, register pressure can be reduced
973 */
UpdateReleaseRegInfo(AArch64ScheduleProcessInfo & scheduleInfo)974 void AArch64Schedule::UpdateReleaseRegInfo(AArch64ScheduleProcessInfo &scheduleInfo)
975 {
976 auto &availableReadyList = scheduleInfo.GetAvailableReadyList();
977 scheduleInfo.ClearALLFreeRegNodeSet();
978 /* Traverse availableReadyList and add those can reduce register pressure to release reg node set */
979 for (auto node : availableReadyList) {
980 std::set<regno_t> freeRegNO = CanFreeRegister(*node);
981 if (!freeRegNO.empty()) {
982 scheduleInfo.VaryFreeRegSet(cgFunc, freeRegNO, *node);
983 }
984 }
985 }
986
987 /*
988 * return registers which an instruction can release after being scheduled
989 */
CanFreeRegister(const DepNode & node) const990 std::set<regno_t> AArch64Schedule::CanFreeRegister(const DepNode &node) const
991 {
992 std::set<regno_t> freeRegSet;
993 for (auto reg : node.GetUseRegnos()) {
994 if (RegPressureSchedule::IsLastUse(node, reg)) {
995 freeRegSet.emplace(reg);
996 }
997 }
998 return freeRegSet;
999 }
1000
1001 /*
1002 * After an instruction is scheduled, update live reg set
1003 */
UpdateLiveRegSet(AArch64ScheduleProcessInfo & scheduleInfo,const DepNode & node)1004 void AArch64Schedule::UpdateLiveRegSet(AArch64ScheduleProcessInfo &scheduleInfo, const DepNode &node)
1005 {
1006 /* dealing with def reg, add def reg into the live reg set */
1007 size_t i = 1;
1008 for (auto &defReg : node.GetDefRegnos()) {
1009 if (scheduleInfo.FindIntLiveReg(defReg) == 0 && scheduleInfo.FindFpLiveReg(defReg) == 0) {
1010 scheduleInfo.VaryLiveRegSet(cgFunc, defReg, true);
1011 }
1012 /* delete dead def reg from live reg set because its live range is only 1 cycle */
1013 if (node.GetRegDefs(i) == nullptr && liveOutRegNo.find(defReg) == liveOutRegNo.end()) {
1014 scheduleInfo.VaryLiveRegSet(cgFunc, defReg, false);
1015 }
1016 ++i;
1017 }
1018 /* dealing with use reg, delete use reg from live reg set if this instruction is last use of it */
1019 for (auto &useReg : node.GetUseRegnos()) {
1020 if (RegPressureSchedule::IsLastUse(node, useReg)) {
1021 if ((scheduleInfo.FindIntLiveReg(useReg) != 0 || scheduleInfo.FindFpLiveReg(useReg) != 0) &&
1022 liveOutRegNo.find(useReg) == liveOutRegNo.end()) {
1023 scheduleInfo.VaryLiveRegSet(cgFunc, useReg, false);
1024 }
1025 }
1026 }
1027 }
1028
1029 /*
1030 * Initialize the live reg set based on the live in reg information
1031 */
InitLiveRegSet(AArch64ScheduleProcessInfo & scheduleInfo)1032 void AArch64Schedule::InitLiveRegSet(AArch64ScheduleProcessInfo &scheduleInfo)
1033 {
1034 if (GetConsiderRegPressure()) {
1035 for (auto reg : liveInRegNo) {
1036 scheduleInfo.VaryLiveRegSet(cgFunc, reg, true);
1037 }
1038 }
1039 }
1040
1041 /*
1042 * A simulated schedule:
1043 * scheduling instruction in original order to calculate original execute cycles.
1044 */
SimulateOnly()1045 uint32 AArch64Schedule::SimulateOnly()
1046 {
1047 uint32 currCycle = 0;
1048 uint32 advanceCycle = 0;
1049 Init();
1050
1051 for (uint32 i = 0; i < nodes.size();) {
1052 while (advanceCycle > 0) {
1053 ++currCycle;
1054 mad->AdvanceCycle();
1055 --advanceCycle;
1056 }
1057
1058 DepNode *targetNode = nodes[i];
1059 if ((currCycle >= targetNode->GetEStart()) && targetNode->CanBeScheduled()) {
1060 targetNode->SetSimulateCycle(currCycle);
1061 targetNode->OccupyUnits();
1062
1063 /* Update estart. */
1064 for (auto succLink : targetNode->GetSuccs()) {
1065 DepNode &succNode = succLink->GetTo();
1066 uint32 eStart = currCycle + succLink->GetLatency();
1067 if (succNode.GetEStart() < eStart) {
1068 succNode.SetEStart(eStart);
1069 }
1070 }
1071
1072 if (CGOptions::IsDebugSched()) {
1073 LogInfo::MapleLogger() << "[Simulate] TargetNode : ";
1074 targetNode->GetInsn()->Dump();
1075 LogInfo::MapleLogger() << "\n";
1076 }
1077
1078 switch (targetNode->GetInsn()->GetLatencyType()) {
1079 case kLtClinit:
1080 advanceCycle = kClinitAdvanceCycle;
1081 break;
1082 case kLtAdrpLdr:
1083 advanceCycle = kAdrpLdrAdvanceCycle;
1084 break;
1085 case kLtClinitTail:
1086 advanceCycle = kClinitTailAdvanceCycle;
1087 break;
1088 default:
1089 break;
1090 }
1091
1092 ++i;
1093 } else {
1094 advanceCycle = 1;
1095 }
1096 }
1097 /* the second to last node is the true last node, because the last is kNodeTypeSeparator nod */
1098 DEBUG_ASSERT(nodes.size() - kSecondToLastNode >= 0, "size of nodes should be greater than or equal 2");
1099 return (nodes[nodes.size() - kSecondToLastNode]->GetSimulateCycle());
1100 }
1101
1102 /* Restore dependence graph to normal CGIR. */
FinalizeScheduling(BB & bb,const DepAnalysis & depAnalysis)1103 void AArch64Schedule::FinalizeScheduling(BB &bb, const DepAnalysis &depAnalysis)
1104 {
1105 bb.ClearInsns();
1106
1107 const Insn *prevLocInsn = (bb.GetPrev() != nullptr ? bb.GetPrev()->GetLastLoc() : nullptr);
1108 for (auto node : nodes) {
1109 /* Append comments first. */
1110 for (auto comment : node->GetComments()) {
1111 if (comment->GetPrev() != nullptr && comment->GetPrev()->IsDbgInsn()) {
1112 bb.AppendInsn(*comment->GetPrev());
1113 }
1114 bb.AppendInsn(*comment);
1115 }
1116 /* Append insn. */
1117 if (!node->GetClinitInsns().empty()) {
1118 for (auto clinit : node->GetClinitInsns()) {
1119 bb.AppendInsn(*clinit);
1120 }
1121 } else if (node->GetType() == kNodeTypeNormal) {
1122 if (node->GetInsn()->GetPrev() != nullptr && node->GetInsn()->GetPrev()->IsDbgInsn()) {
1123 bb.AppendInsn(*node->GetInsn()->GetPrev());
1124 }
1125 bb.AppendInsn(*node->GetInsn());
1126 }
1127
1128 /* Append cfi instructions. */
1129 for (auto cfi : node->GetCfiInsns()) {
1130 bb.AppendInsn(*cfi);
1131 }
1132 }
1133 bb.SetLastLoc(prevLocInsn);
1134
1135 for (auto lastComment : depAnalysis.GetLastComments()) {
1136 bb.AppendInsn(*lastComment);
1137 }
1138 }
1139
1140 /* For every node of nodes, update it's bruteForceSchedCycle. */
UpdateBruteForceSchedCycle()1141 void AArch64Schedule::UpdateBruteForceSchedCycle()
1142 {
1143 for (auto node : nodes) {
1144 node->SetBruteForceSchedCycle(node->GetSchedCycle());
1145 }
1146 }
1147
1148 /* Recursively schedule all of the possible node. */
IterateBruteForce(DepNode & targetNode,MapleVector<DepNode * > & readyList,uint32 currCycle,MapleVector<DepNode * > & scheduledNodes,uint32 & maxCycleCount,MapleVector<DepNode * > & optimizedScheduledNodes)1149 void AArch64Schedule::IterateBruteForce(DepNode &targetNode, MapleVector<DepNode *> &readyList, uint32 currCycle,
1150 MapleVector<DepNode *> &scheduledNodes, uint32 &maxCycleCount,
1151 MapleVector<DepNode *> &optimizedScheduledNodes)
1152 {
1153 /* Save states. */
1154 constexpr int32 unitSize = 31;
1155 DEBUG_ASSERT(unitSize == mad->GetAllUnitsSize(), "CG internal error.");
1156 std::vector<uint32> occupyTable;
1157 occupyTable.resize(unitSize, 0);
1158 mad->SaveStates(occupyTable, unitSize);
1159
1160 /* Schedule targetNode first. */
1161 targetNode.SetState(kScheduled);
1162 targetNode.SetSchedCycle(currCycle);
1163 scheduledNodes.emplace_back(&targetNode);
1164
1165 MapleVector<DepNode *> tempList = readyList;
1166 EraseNodeFromNodeList(targetNode, tempList);
1167 targetNode.OccupyUnits();
1168
1169 /* Update readyList. */
1170 UpdateReadyList(targetNode, tempList, true);
1171
1172 if (targetNode.GetType() == kNodeTypeSeparator) {
1173 /* If target node is separator node, update lastSeparatorIndex. */
1174 lastSeparatorIndex += kMaxDependenceNum;
1175 }
1176
1177 if (tempList.empty()) {
1178 DEBUG_ASSERT(scheduledNodes.size() == nodes.size(), "CG internal error, Not all nodes scheduled.");
1179 if (currCycle < maxCycleCount) {
1180 maxCycleCount = currCycle;
1181 UpdateBruteForceSchedCycle();
1182 optimizedScheduledNodes = scheduledNodes;
1183 }
1184 } else {
1185 uint32 advanceCycle = 0;
1186 switch (targetNode.GetInsn()->GetLatencyType()) {
1187 case kLtClinit:
1188 advanceCycle = kClinitAdvanceCycle;
1189 break;
1190 case kLtAdrpLdr:
1191 advanceCycle = kAdrpLdrAdvanceCycle;
1192 break;
1193 case kLtClinitTail:
1194 advanceCycle = kClinitTailAdvanceCycle;
1195 break;
1196 default:
1197 break;
1198 }
1199
1200 do {
1201 std::vector<DepNode *> availableReadyList;
1202 std::vector<DepNode *> tempAvailableList;
1203 while (advanceCycle > 0) {
1204 ++currCycle;
1205 mad->AdvanceCycle();
1206 --advanceCycle;
1207 }
1208 /* Check EStart. */
1209 for (auto node : tempList) {
1210 if (node->GetEStart() <= currCycle) {
1211 tempAvailableList.emplace_back(node);
1212 }
1213 }
1214
1215 if (tempAvailableList.empty()) {
1216 /* Advance cycle. */
1217 advanceCycle = 1;
1218 continue;
1219 }
1220
1221 /* Check if schedulable */
1222 for (auto node : tempAvailableList) {
1223 if (node->CanBeScheduled()) {
1224 availableReadyList.emplace_back(node);
1225 }
1226 }
1227
1228 if (availableReadyList.empty()) {
1229 /* Advance cycle. */
1230 advanceCycle = 1;
1231 continue;
1232 }
1233
1234 for (auto node : availableReadyList) {
1235 IterateBruteForce(*node, tempList, currCycle, scheduledNodes, maxCycleCount, optimizedScheduledNodes);
1236 }
1237
1238 break;
1239 } while (true);
1240 }
1241
1242 /*
1243 * Recover states.
1244 * Restore targetNode first.
1245 */
1246 targetNode.SetState(kReady);
1247 targetNode.SetSchedCycle(0);
1248 scheduledNodes.pop_back();
1249 mad->RestoreStates(occupyTable, unitSize);
1250
1251 /* Update readyList. */
1252 for (auto succLink : targetNode.GetSuccs()) {
1253 DepNode &succNode = succLink->GetTo();
1254 succNode.IncreaseValidPredsSize();
1255 succNode.SetState(kNormal);
1256 }
1257
1258 if (targetNode.GetType() == kNodeTypeSeparator) {
1259 /* If target node is separator node, update lastSeparatorIndex. */
1260 lastSeparatorIndex -= kMaxDependenceNum;
1261 }
1262 }
1263
1264 /*
1265 * Brute force schedule:
1266 * Finding all possibile schedule list of current bb, and calculate every list's execute cycles,
1267 * return the optimal schedule list and it's cycles.
1268 */
DoBruteForceSchedule()1269 uint32 AArch64Schedule::DoBruteForceSchedule()
1270 {
1271 MapleVector<DepNode *> scheduledNodes(alloc.Adapter());
1272 MapleVector<DepNode *> optimizedScheduledNodes(alloc.Adapter());
1273
1274 uint32 currCycle = 0;
1275 uint32 maxCycleCount = 0xFFFFFFFF;
1276 Init();
1277
1278 /* Schedule First separator. */
1279 DepNode *targetNode = readyList.front();
1280 targetNode->SetState(kScheduled);
1281 targetNode->SetSchedCycle(currCycle);
1282 scheduledNodes.emplace_back(targetNode);
1283 readyList.clear();
1284
1285 /* Update readyList. */
1286 UpdateReadyList(*targetNode, readyList, false);
1287
1288 DEBUG_ASSERT(targetNode->GetType() == kNodeTypeSeparator, "The first node should be separator node.");
1289 DEBUG_ASSERT(!readyList.empty(), "readyList should not be empty.");
1290
1291 for (auto targetNodeTemp : readyList) {
1292 IterateBruteForce(*targetNodeTemp, readyList, currCycle, scheduledNodes, maxCycleCount,
1293 optimizedScheduledNodes);
1294 }
1295
1296 nodes = optimizedScheduledNodes;
1297 return maxCycleCount;
1298 }
1299
1300 /*
1301 * Update ready list after the targetNode has been scheduled.
1302 * For every targetNode's successor, if it's all predecessors have been scheduled,
1303 * add it to ready list and update it's information (like state, estart).
1304 */
UpdateReadyList(DepNode & targetNode,MapleVector<DepNode * > & readyList,bool updateEStart)1305 void AArch64Schedule::UpdateReadyList(DepNode &targetNode, MapleVector<DepNode *> &readyList, bool updateEStart)
1306 {
1307 for (auto succLink : targetNode.GetSuccs()) {
1308 DepNode &succNode = succLink->GetTo();
1309 succNode.DescreaseValidPredsSize();
1310 if (succNode.GetValidPredsSize() == 0) {
1311 readyList.emplace_back(&succNode);
1312 succNode.SetState(kReady);
1313
1314 /* Set eStart. */
1315 if (updateEStart) {
1316 uint32 maxEstart = 0;
1317 for (auto predLink : succNode.GetPreds()) {
1318 DepNode &predNode = predLink->GetFrom();
1319 uint32 eStart = predNode.GetSchedCycle() + predLink->GetLatency();
1320 maxEstart = (maxEstart < eStart ? eStart : maxEstart);
1321 }
1322 succNode.SetEStart(maxEstart);
1323 }
1324 }
1325 }
1326 }
1327
1328 /* For every node of nodes, dump it's Depdence information. */
DumpDepGraph(const MapleVector<DepNode * > & nodes) const1329 void AArch64Schedule::DumpDepGraph(const MapleVector<DepNode *> &nodes) const
1330 {
1331 for (auto node : nodes) {
1332 depAnalysis->DumpDepNode(*node);
1333 LogInfo::MapleLogger() << "---------- preds ----------"
1334 << "\n";
1335 for (auto pred : node->GetPreds()) {
1336 depAnalysis->DumpDepLink(*pred, &(pred->GetFrom()));
1337 }
1338 LogInfo::MapleLogger() << "---------- succs ----------"
1339 << "\n";
1340 for (auto succ : node->GetSuccs()) {
1341 depAnalysis->DumpDepLink(*succ, &(succ->GetTo()));
1342 }
1343 LogInfo::MapleLogger() << "---------------------------"
1344 << "\n";
1345 }
1346 }
1347
1348 /* For every node of nodes, dump it's schedule time according simulate type and instruction information. */
DumpScheduleResult(const MapleVector<DepNode * > & nodes,SimulateType type) const1349 void AArch64Schedule::DumpScheduleResult(const MapleVector<DepNode *> &nodes, SimulateType type) const
1350 {
1351 for (auto node : nodes) {
1352 LogInfo::MapleLogger() << "cycle[ ";
1353 switch (type) {
1354 case kListSchedule:
1355 LogInfo::MapleLogger() << node->GetSchedCycle();
1356 break;
1357 case kBruteForce:
1358 LogInfo::MapleLogger() << node->GetBruteForceSchedCycle();
1359 break;
1360 case kSimulateOnly:
1361 LogInfo::MapleLogger() << node->GetSimulateCycle();
1362 break;
1363 }
1364 LogInfo::MapleLogger() << " ] ";
1365 node->GetInsn()->Dump();
1366 LogInfo::MapleLogger() << "\n";
1367 }
1368 }
1369
1370 /* Print bb's dependence dot graph information to a file. */
GenerateDot(const BB & bb,const MapleVector<DepNode * > & nodes) const1371 void AArch64Schedule::GenerateDot(const BB &bb, const MapleVector<DepNode *> &nodes) const
1372 {
1373 std::streambuf *coutBuf = std::cout.rdbuf(); /* keep original cout buffer */
1374 std::ofstream dgFile;
1375 std::streambuf *buf = dgFile.rdbuf();
1376 std::cout.rdbuf(buf);
1377
1378 /* construct the file name */
1379 std::string fileName;
1380 fileName.append(phaseName);
1381 fileName.append("_");
1382 fileName.append(cgFunc.GetName());
1383 fileName.append("_BB");
1384 auto str = std::to_string(bb.GetId());
1385 fileName.append(str);
1386 fileName.append("_dep_graph.dot");
1387
1388 dgFile.open(fileName.c_str(), std::ios::trunc);
1389 if (!dgFile.is_open()) {
1390 LogInfo::MapleLogger(kLlWarn) << "fileName:" << fileName << " open failure.\n";
1391 return;
1392 }
1393 dgFile << "digraph {\n";
1394 for (auto node : nodes) {
1395 for (auto succ : node->GetSuccs()) {
1396 dgFile << "insn" << node->GetInsn() << " -> "
1397 << "insn" << succ->GetTo().GetInsn();
1398 dgFile << " [";
1399 if (succ->GetDepType() == kDependenceTypeTrue) {
1400 dgFile << "color=red,";
1401 }
1402 dgFile << "label= \"" << succ->GetLatency() << "\"";
1403 dgFile << "];\n";
1404 }
1405 }
1406
1407 for (auto node : nodes) {
1408 MOperator mOp = node->GetInsn()->GetMachineOpcode();
1409 const InsnDesc *md = &AArch64CG::kMd[mOp];
1410 dgFile << "insn" << node->GetInsn() << "[";
1411 dgFile << "shape=box,label= \" " << node->GetInsn()->GetId() << ":\n";
1412 dgFile << "{ ";
1413 dgFile << md->name << "\n";
1414 dgFile << "}\"];\n";
1415 }
1416 dgFile << "}\n";
1417 dgFile.flush();
1418 dgFile.close();
1419 std::cout.rdbuf(coutBuf);
1420 }
1421
GetRegisterType(CGFunc & f,regno_t regNO)1422 RegType AArch64ScheduleProcessInfo::GetRegisterType(CGFunc &f, regno_t regNO)
1423 {
1424 if (AArch64isa::IsPhysicalRegister(regNO)) {
1425 if (AArch64isa::IsGPRegister(static_cast<AArch64reg>(regNO))) {
1426 return kRegTyInt;
1427 } else if (AArch64isa::IsFPSIMDRegister(static_cast<AArch64reg>(regNO))) {
1428 return kRegTyFloat;
1429 } else {
1430 CHECK_FATAL(false, "unknown physical reg");
1431 }
1432 } else {
1433 RegOperand *curRegOpnd = f.GetVirtualRegisterOperand(regNO);
1434 DEBUG_ASSERT(curRegOpnd != nullptr, "register which is not physical and virtual");
1435 return curRegOpnd->GetRegisterType();
1436 }
1437 }
1438
VaryLiveRegSet(CGFunc & f,regno_t regNO,bool isInc)1439 void AArch64ScheduleProcessInfo::VaryLiveRegSet(CGFunc &f, regno_t regNO, bool isInc)
1440 {
1441 RegType registerTy = GetRegisterType(f, regNO);
1442 if (registerTy == kRegTyInt || registerTy == kRegTyVary) {
1443 isInc ? IncIntLiveRegSet(regNO) : DecIntLiveRegSet(regNO);
1444 } else if (registerTy == kRegTyFloat) {
1445 isInc ? IncFpLiveRegSet(regNO) : DecFpLiveRegSet(regNO);
1446 }
1447 /* consider other type register */
1448 }
1449
VaryFreeRegSet(CGFunc & f,std::set<regno_t> regNOs,DepNode & node)1450 void AArch64ScheduleProcessInfo::VaryFreeRegSet(CGFunc &f, std::set<regno_t> regNOs, DepNode &node)
1451 {
1452 for (auto regNO : regNOs) {
1453 RegType registerTy = GetRegisterType(f, regNO);
1454 if (registerTy == kRegTyInt || registerTy == kRegTyVary /* memory base register must be int */) {
1455 IncFreeIntRegNode(node);
1456 } else if (registerTy == kRegTyFloat) {
1457 IncFreeFpRegNode(node);
1458 } else if (registerTy == kRegTyCc) {
1459 /* do not count CC reg */
1460 return;
1461 } else {
1462 /* consider other type register */
1463 CHECK_FATAL(false, "do not support this type of register");
1464 }
1465 }
1466 }
1467
1468 /* Do brute force scheduling and dump scheduling information */
BruteForceScheduling(const BB & bb)1469 void AArch64Schedule::BruteForceScheduling(const BB &bb)
1470 {
1471 LogInfo::MapleLogger() << "\n\n$$ Function: " << cgFunc.GetName();
1472 LogInfo::MapleLogger() << "\n BB id = " << bb.GetId() << "; nodes.size = " << nodes.size() << "\n";
1473
1474 constexpr uint32 maxBruteForceNum = 50;
1475 if (nodes.size() < maxBruteForceNum) {
1476 GenerateDot(bb, nodes);
1477 uint32 maxBruteForceCycle = DoBruteForceSchedule();
1478 MapleVector<DepNode *> bruteNodes = nodes;
1479 uint32 maxSchedCycle = DoSchedule();
1480 if (maxBruteForceCycle < maxSchedCycle) {
1481 LogInfo::MapleLogger() << "maxBruteForceCycle = " << maxBruteForceCycle << "; maxSchedCycle = ";
1482 LogInfo::MapleLogger() << maxSchedCycle << "\n";
1483 LogInfo::MapleLogger() << "\n ## Dump dependence graph ## "
1484 << "\n";
1485 DumpDepGraph(nodes);
1486 LogInfo::MapleLogger() << "\n ** Dump bruteForce scheduling result."
1487 << "\n";
1488 DumpScheduleResult(bruteNodes, kBruteForce);
1489 LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result."
1490 << "\n";
1491 DumpScheduleResult(nodes, kListSchedule);
1492 }
1493 } else {
1494 LogInfo::MapleLogger() << "Skip BruteForce scheduling."
1495 << "\n";
1496 DoSchedule();
1497 }
1498 }
1499
1500 /* Do simulate scheduling and dump scheduling information */
SimulateScheduling(const BB & bb)1501 void AArch64Schedule::SimulateScheduling(const BB &bb)
1502 {
1503 uint32 originCycle = SimulateOnly();
1504 MapleVector<DepNode *> oldNodes = nodes;
1505 uint32 schedCycle = DoSchedule();
1506 if (originCycle < schedCycle) {
1507 LogInfo::MapleLogger() << "Worse cycle [ " << (schedCycle - originCycle) << " ]; ";
1508 LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = ";
1509 LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size();
1510 LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName();
1511 LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n";
1512 LogInfo::MapleLogger() << "\n ** Dump original result."
1513 << "\n";
1514 DumpScheduleResult(oldNodes, kSimulateOnly);
1515 LogInfo::MapleLogger() << "\n ^^ Dump list scheduling result."
1516 << "\n";
1517 DumpScheduleResult(nodes, kListSchedule);
1518 } else if (originCycle > schedCycle) {
1519 LogInfo::MapleLogger() << "Advance cycle [ " << (originCycle - schedCycle) << " ]; ";
1520 LogInfo::MapleLogger() << "originCycle = " << originCycle << "; schedCycle = ";
1521 LogInfo::MapleLogger() << schedCycle << "; nodes.size = " << nodes.size();
1522 LogInfo::MapleLogger() << "; $$ Function: " << cgFunc.GetName();
1523 LogInfo::MapleLogger() << "; BB id = " << bb.GetId() << "\n";
1524 } else {
1525 LogInfo::MapleLogger() << "Equal cycle [ 0 ]; originCycle = " << originCycle;
1526 LogInfo::MapleLogger() << " ], ignore. nodes.size = " << nodes.size() << "\n";
1527 }
1528 }
1529
1530 /*
1531 * A local list scheduling.
1532 * Schedule insns in basic blocks.
1533 */
ListScheduling(bool beforeRA)1534 void AArch64Schedule::ListScheduling(bool beforeRA)
1535 {
1536 InitIDAndLoc();
1537
1538 mad = Globals::GetInstance()->GetMAD();
1539 if (beforeRA) {
1540 RegPressure::SetMaxRegClassNum(kRegisterLast);
1541 }
1542 depAnalysis = memPool.New<AArch64DepAnalysis>(cgFunc, memPool, *mad, beforeRA);
1543
1544 FOR_ALL_BB(bb, &cgFunc) {
1545 depAnalysis->Run(*bb, nodes);
1546
1547 if (LIST_SCHED_DUMP_REF) {
1548 GenerateDot(*bb, nodes);
1549 DumpDepGraph(nodes);
1550 }
1551 if (beforeRA) {
1552 liveInRegNo = bb->GetLiveInRegNO();
1553 liveOutRegNo = bb->GetLiveOutRegNO();
1554 if (bb->GetKind() != BB::kBBReturn) {
1555 SetConsiderRegPressure();
1556 DoSchedule();
1557 } else {
1558 RegPressureScheduling(*bb, nodes);
1559 }
1560 } else {
1561 ClinitPairOpt();
1562 MemoryAccessPairOpt();
1563 if (CGOptions::IsDruteForceSched()) {
1564 BruteForceScheduling(*bb);
1565 } else if (CGOptions::IsSimulateSched()) {
1566 SimulateScheduling(*bb);
1567 } else {
1568 DoSchedule();
1569 }
1570 }
1571
1572 FinalizeScheduling(*bb, *depAnalysis);
1573 }
1574 }
1575 } /* namespace maplebe */
1576