• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "scheduler.h"
17 
18 #include "optimizer/analysis/alias_analysis.h"
19 #include "optimizer/ir/analysis.h"
20 #include "optimizer/ir/basicblock.h"
21 #include "compiler_logger.h"
22 
23 namespace ark::compiler {
24 /* Instruction scheduling.
25  * Current decisions/limitations
26  *
27  * 1. Scheduler pass is placed immediately before register allocator.
28  * 2. It rearranges instructions only inside the basic block, but not between them.
29  * 3. No liveness analysis, only calculating dependencies using barrier/users/alias information.
30  * 4. No CPU pipeline/resource modeling, only having dependency costs.
31  * 5. Forward list scheduling algorithm with standart critical-path-based priority.
32  */
RunImpl()33 bool Scheduler::RunImpl()
34 {
35     COMPILER_LOG(DEBUG, SCHEDULER) << "Run " << GetPassName();
36     SaveStateBridgesBuilder ssb;
37     bool result = false;
38     for (auto bb : GetGraph()->GetBlocksRPO()) {
39         if (!bb->IsEmpty() && !bb->IsStartBlock()) {
40             if (ScheduleBasicBlock(bb)) {
41                 ssb.FixSaveStatesInBB(bb);
42                 result = true;
43             }
44         }
45     }
46     COMPILER_LOG(DEBUG, SCHEDULER) << GetPassName() << " completed";
47     return result;
48 }
49 
50 // Dependency helper function
AddDep(uint32_t * prio,Inst * from,Inst * to,uint32_t latency,Inst * barrier)51 void Scheduler::AddDep(uint32_t *prio, Inst *from, Inst *to, uint32_t latency, Inst *barrier)
52 {
53     if (from == to) {
54         return;
55     }
56     COMPILER_LOG(DEBUG, SCHEDULER) << "Found dependency " << from->GetId() << " -> " << to->GetId() << " latency "
57                                    << latency;
58     // Estimate old cycle (without scheduling)
59     ocycle_[from] = std::max(ocycle_[from], latency + ocycle_[to]);
60 
61     // Update instruction priority - "how high instruction is in dependency tree"
62     *prio = std::max(*prio, latency + prio_[to]);
63 
64     // Do not add cross-barrier dependencies into deps_
65     if (barrier == nullptr || old_[to] > old_[barrier]) {
66         if (deps_.at(from).count(to) == 1) {
67             uint32_t oldLatency = deps_.at(from).at(to);
68             if (oldLatency >= latency) {
69                 return;
70             }
71         } else {
72             numDeps_[to]++;
73         }
74         deps_.at(from)[to] = latency;
75     }
76 }
77 
78 // Calculate priority and dependencies
BuildAllDeps(BasicBlock * bb)79 bool Scheduler::BuildAllDeps(BasicBlock *bb)
80 {
81     auto markerHolder = MarkerHolder(GetGraph());
82     mrk_ = markerHolder.GetMarker();
83 
84     oprev_ = 0;
85     numBarriers_ = 0;
86     maxPrio_ = 0;
87 
88     static constexpr uint32_t TOO_LONG_BB = 64;
89     uint32_t numInst = 0;
90     uint32_t numBetween = 0;
91     uint32_t numSpecial = 0;
92     Inst *lastBarrier = nullptr;
93     for (auto inst : bb->InstsSafeReverse()) {
94         ProcessInst(inst, &numInst, &numBetween, &numSpecial, &lastBarrier);
95 
96         if (numSpecial > TOO_LONG_BB || numBetween > TOO_LONG_BB) {
97             COMPILER_LOG(DEBUG, SCHEDULER) << "Block " << bb->GetId() << " seems too big for scheduling, skipping";
98             Cleanup();
99             return false;
100         }
101     }
102     return true;
103 }
104 
105 // One instruction deps
ProcessInst(Inst * inst,uint32_t * numInst,uint32_t * numBetween,uint32_t * numSpecial,Inst ** lastBarrier)106 void Scheduler::ProcessInst(Inst *inst, uint32_t *numInst, uint32_t *numBetween, uint32_t *numSpecial,
107                             Inst **lastBarrier)
108 {
109     uint32_t prio = 0;
110     uint32_t instLatency = inst->Latency();
111     bool barrier = inst->IsBarrier();
112 
113     (*numBetween)++;
114     old_.insert({inst, (*numInst)++});
115     ocycle_.insert({inst, ++oprev_});
116     numDeps_.insert({inst, 0U});
117     deps_.emplace(inst, GetGraph()->GetLocalAllocator()->Adapter());
118 
119     // Dependency to the barrier
120     if (*lastBarrier != nullptr) {
121         AddDep(&prio, inst, *lastBarrier, 1U, *lastBarrier);
122     }
123 
124     // Dependency from barrier
125     if (barrier) {
126         Inst *oldLastBarrier = *lastBarrier;
127         *lastBarrier = inst;
128         numBarriers_++;
129         *numBetween = 0;
130         for (auto user = inst->GetNext(); user != oldLastBarrier; user = user->GetNext()) {
131             AddDep(&prio, inst, user, 1U, *lastBarrier);
132         }
133     }
134 
135     // Users
136     for (auto &userItem : inst->GetUsers()) {
137         auto user = userItem.GetInst();
138         if (user->IsMarked(mrk_)) {
139             AddDep(&prio, inst, user, instLatency, *lastBarrier);
140         }
141     }
142 
143     if (inst->IsMovableObject()) {
144         // We take all SaveState, that has RuntimeCall users, under this reference instruction and create dependence
145         // between SaveState and this instruction. See also GraphChecker::CheckSaveStatesWithRuntimeCallUsers.
146         for (auto &ss : ssWithRuntimeCall_) {
147             AddDep(&prio, inst, ss, 1U, *lastBarrier);
148         }
149     }
150 
151     if (inst->IsMemory() || inst->IsRefSpecial()) {
152         ProcessMemory(inst, &prio, *lastBarrier);
153         (*numSpecial)++;
154     }
155 
156     if (inst->CanThrow() || inst->IsRuntimeCall() || inst->IsSaveState()) {
157         ProcessSpecial(inst, &prio, *lastBarrier);
158         (*numSpecial)++;
159     }
160 
161     inst->SetMarker(mrk_);
162     prio_.insert({inst, prio});
163     maxPrio_ = std::max(maxPrio_, prio);
164     oprev_ = ocycle_[inst];
165 }
166 
167 // Memory
ProcessMemory(Inst * inst,uint32_t * prio,Inst * lastBarrier)168 void Scheduler::ProcessMemory(Inst *inst, uint32_t *prio, Inst *lastBarrier)
169 {
170     if (inst->IsRefSpecial()) {
171         loads_.push_back(inst);
172         return;
173     }
174     for (auto mem : stores_) {
175         if (GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
176             AddDep(prio, inst, mem, 1U, lastBarrier);
177         }
178     }
179     if (inst->IsStore()) {
180         for (auto mem : loads_) {
181             if (mem->IsLoad() && GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
182                 AddDep(prio, inst, mem, 1U, lastBarrier);
183             }
184         }
185         for (auto ct : special_) {
186             AddDep(prio, inst, ct, 1U, lastBarrier);
187         }
188         stores_.push_back(inst);
189     } else {  // means inst->IsLoad()
190         loads_.push_back(inst);
191     }
192 }
193 
ProcessSpecialBoundsCheckI(Inst * inst,uint32_t * prio,Inst * lastBarrier)194 void Scheduler::ProcessSpecialBoundsCheckI(Inst *inst, uint32_t *prio, Inst *lastBarrier)
195 {
196     auto value = inst->CastToBoundsCheckI()->GetImm();
197     // Remove loads with same immediate. No easy way to check arrays are same.
198     for (auto load : loads_) {
199         if (load->GetOpcode() == Opcode::LoadArrayPairI) {
200             auto imm = load->CastToLoadArrayPairI()->GetImm();
201             if (imm == value || imm + 1 == value) {
202                 AddDep(prio, inst, load, 1U, lastBarrier);
203             }
204         } else if (load->GetOpcode() == Opcode::LoadArrayI && load->CastToLoadArrayI()->GetImm() == value) {
205             AddDep(prio, inst, load, 1U, lastBarrier);
206         }
207     }
208 }
209 
210 // CanThrow or SaveState can't be rearranged, and stores can't be moved over them
ProcessSpecial(Inst * inst,uint32_t * prio,Inst * lastBarrier)211 void Scheduler::ProcessSpecial(Inst *inst, uint32_t *prio, Inst *lastBarrier)
212 {
213     for (auto mem : stores_) {
214         AddDep(prio, inst, mem, 1U, lastBarrier);
215     }
216     for (auto ct : special_) {
217         AddDep(prio, inst, ct, 1U, lastBarrier);
218     }
219 
220     if (inst->IsSaveState() && inst != inst->GetBasicBlock()->GetFirstInst()) {
221         // SaveStates that have RuntimeCall users are being added into a vector to create dependencies from preceding
222         // reference instructions
223         for (auto &user : inst->GetUsers()) {
224             auto userInst = user.GetInst();
225             if (userInst->IsRuntimeCall()) {
226                 ssWithRuntimeCall_.push_back(inst);
227                 break;
228             }
229         }
230     }
231     // 1. SafePoint also has this flag
232     // 2. GC triggered inside can poison loaded reference value
233     if (inst->IsRuntimeCall()) {
234         for (auto mem : loads_) {
235             if (mem->GetType() == DataType::REFERENCE || mem->GetType() == DataType::ANY) {
236                 AddDep(prio, inst, mem, 1U, lastBarrier);
237             }
238         }
239     }
240     // We have to "restore" BoundsCheckI -> LoadArrayI dependency
241     if (inst->GetOpcode() == Opcode::BoundsCheckI) {
242         ProcessSpecialBoundsCheckI(inst, prio, lastBarrier);
243     }
244     special_.push_back(inst);
245 }
246 
ScheduleBarrierInst(Inst ** inst)247 void Scheduler::ScheduleBarrierInst(Inst **inst)
248 {
249     sched_.push_back((*inst));
250     if ((*inst)->WithGluedInsts()) {
251         auto next = (*inst)->GetNext();
252         auto nnext = next->GetNext();
253         ASSERT(next->GetOpcode() == Opcode::LoadPairPart && nnext->GetOpcode() == Opcode::LoadPairPart);
254         sched_.push_back(next);
255         sched_.push_back(nnext);
256         *inst = nnext;
257         for (auto pair : deps_.at(next)) {
258             uint32_t numDeps = numDeps_[pair.first];
259             ASSERT(numDeps > 0);
260             numDeps--;
261             numDeps_[pair.first] = numDeps;
262         }
263         for (auto pair : deps_.at(nnext)) {
264             uint32_t numDeps = numDeps_[pair.first];
265             ASSERT(numDeps > 0);
266             numDeps--;
267             numDeps_[pair.first] = numDeps;
268         }
269     }
270 }
271 
272 // Rearranges instructions in the basic block using list scheduling algorithm.
ScheduleBasicBlock(BasicBlock * bb)273 bool Scheduler::ScheduleBasicBlock(BasicBlock *bb)
274 {
275     COMPILER_LOG(DEBUG, SCHEDULER) << "Schedule BB " << bb->GetId();
276     if (!BuildAllDeps(bb)) {
277         return false;
278     }
279 
280     // Schedule intervals between barriers
281     uint32_t cycle = 0;
282     uint32_t numInst = 0;
283     Inst *first = nullptr;
284     for (auto inst = bb->GetFirstInst(); inst != nullptr; inst = inst->GetNext()) {
285         bool barrier = inst->IsBarrier();
286         numInst++;
287         inst->ClearMarkers();
288         if (first == nullptr) {
289             first = inst;
290         }
291         if (barrier || inst == bb->GetLastInst()) {
292             Inst *last = nullptr;
293             if (barrier) {
294                 last = inst->GetPrev();
295                 numInst--;
296             } else {
297                 last = inst;
298             }
299             if (numInst > 1) {
300                 cycle += ScheduleInstsBetweenBarriers(first, last);
301             } else if (numInst == 1) {
302                 ASSERT(first->GetOpcode() != Opcode::LoadPairPart);
303                 sched_.push_back(first);
304                 cycle++;
305             }
306             if (barrier) {
307                 ASSERT(inst->GetOpcode() != Opcode::LoadPairPart);
308                 ScheduleBarrierInst(&inst);
309                 cycle++;
310             }
311             numInst = 0;
312             first = nullptr;
313         }
314     }
315     return FinalizeBB(bb, cycle);
316 }
317 
FinalizeBB(BasicBlock * bb,uint32_t cycle)318 bool Scheduler::FinalizeBB(BasicBlock *bb, uint32_t cycle)
319 {
320     // Rearrange instructions in basic block according to schedule
321     bool result = false;
322     bool hasPrev = false;
323     uint32_t prev;
324     for (auto inst : sched_) {
325         auto cur = old_[inst];
326         if (hasPrev && prev - 1 != cur) {
327             result = true;
328         }
329         prev = cur;
330         hasPrev = true;
331 
332         bb->EraseInst(inst);
333         bb->AppendInst(inst);
334     }
335 
336     if (result) {
337         COMPILER_LOG(DEBUG, SCHEDULER) << "Stats for block " << bb->GetId() << ": old cycles = " << oprev_
338                                        << ", num barriers = " << numBarriers_ << ", critical path = " << maxPrio_
339                                        << ", scheduled = " << cycle;
340         GetGraph()->GetEventWriter().EventScheduler(bb->GetId(), bb->GetGuestPc(), oprev_, numBarriers_, maxPrio_,
341                                                     cycle);
342     }
343 
344     Cleanup();
345     return result;
346 }
347 
Cleanup()348 void Scheduler::Cleanup()
349 {
350     sched_.clear();
351     loads_.clear();
352     stores_.clear();
353     special_.clear();
354     old_.clear();
355     ocycle_.clear();
356     numDeps_.clear();
357     prio_.clear();
358     deps_.clear();
359     ssWithRuntimeCall_.clear();
360 }
361 
362 // Rearranges instructions between [first..last] inclusive, none of them are barriers.
ScheduleInstsBetweenBarriers(Inst * first,Inst * last)363 uint32_t Scheduler::ScheduleInstsBetweenBarriers(Inst *first, Inst *last)
364 {
365     COMPILER_LOG(DEBUG, SCHEDULER) << "SchedBetween " << first->GetId() << " and " << last->GetId();
366 
367     // Compare function for 'waiting' queue
368     auto cmpAsap = [this](Inst *left, Inst *right) {
369         return asap_[left] > asap_[right] || (asap_[left] == asap_[right] && old_[left] < old_[right]);
370     };
371     // Queue of instructions, which dependencies are scheduled already, but they are still not finished yet
372     SchedulerPriorityQueue waiting(cmpAsap, GetGraph()->GetLocalAllocator()->Adapter());
373 
374     // Compare function for 'ready' queue
375     auto cmpPrio = [this](Inst *left, Inst *right) {
376         return prio_[left] < prio_[right] || (prio_[left] == prio_[right] && old_[left] < old_[right]);
377     };
378     // Queue of ready instructions
379     SchedulerPriorityQueue ready(cmpPrio, GetGraph()->GetLocalAllocator()->Adapter());
380 
381     // Initialization, add leafs into 'waiting' queue
382     uint32_t numInst = 0;
383     for (auto inst = first; inst != last->GetNext(); inst = inst->GetNext()) {
384         asap_.insert({inst, 1U});
385         if (numDeps_[inst] == 0) {
386             COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << inst->GetId();
387             waiting.push(inst);
388         }
389         numInst++;
390     }
391 
392     // Scheduling
393     uint32_t cycle = 1;
394     while (numInst > 0) {
395         if (ready.empty()) {
396             ASSERT(!waiting.empty());
397             uint32_t nearest = asap_[waiting.top()];
398             // Skipping cycles where we can't schedule any instruction
399             if (nearest > cycle) {
400                 cycle = nearest;
401             }
402         }
403 
404         // Move from 'waiting' to 'ready'
405         while (!waiting.empty()) {
406             Inst *soonest = waiting.top();
407             if (asap_[soonest] <= cycle) {
408                 waiting.pop();
409                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue ready moving " << soonest->GetId();
410                 ready.push(soonest);
411             } else {
412                 break;
413             }
414         }
415         ASSERT(!ready.empty());
416 
417         // Schedule top 'ready' instruction (together with glued, when necessary)
418         numInst -= SchedWithGlued(ready.top(), &waiting, cycle++);
419         ready.pop();
420     }
421 
422     // Cleanup
423     asap_.clear();
424     return cycle;
425 }
426 
SchedWithGlued(Inst * inst,SchedulerPriorityQueue * waiting,uint32_t cycle)427 uint32_t Scheduler::SchedWithGlued(Inst *inst, SchedulerPriorityQueue *waiting, uint32_t cycle)
428 {
429     uint32_t amount = 0;
430     // Compare function for 'now' queue
431     auto cmpOld = [this](Inst *left, Inst *right) { return old_[left] < old_[right]; };
432     // Queue of instructions to schedule at current cycle
433     SchedulerPriorityQueue now(cmpOld, GetGraph()->GetLocalAllocator()->Adapter());
434     // Add inst into 'now'
435     ASSERT(now.empty());
436     now.push(inst);
437 
438     // Add glued instructions into 'now'
439     if (inst->WithGluedInsts()) {
440         for (auto &userItem : inst->GetUsers()) {
441             auto user = userItem.GetInst();
442             ASSERT(user->GetOpcode() == Opcode::LoadPairPart);
443             now.push(user);
444         }
445     }
446 
447     [[maybe_unused]] constexpr auto MAX_NOW_SIZE = 3;
448     ASSERT(now.size() <= MAX_NOW_SIZE);
449 
450     // Schedule them
451     while (!now.empty()) {
452         auto cur = now.top();
453         now.pop();
454         COMPILER_LOG(DEBUG, SCHEDULER) << "Scheduling " << cur->GetId() << " at cycle " << cycle;
455 
456         // Adjust all dependent instructions
457         for (auto pair : deps_.at(cur)) {
458             // Adjust asap
459             uint32_t asap = asap_[pair.first];
460             asap = std::max(asap, cycle + pair.second);
461             asap_[pair.first] = asap;
462 
463             // Adjust numDeps
464             uint32_t numDeps = numDeps_[pair.first];
465             ASSERT(numDeps > 0);
466             numDeps--;
467             numDeps_[pair.first] = numDeps;
468 
469             // Glued instructions scheduled immediately, so they should not go into queue
470             if (numDeps == 0 && pair.first->GetOpcode() != Opcode::LoadPairPart) {
471                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << pair.first->GetId();
472                 waiting->push(pair.first);
473             }
474         }
475 
476         // Add into schedule
477         sched_.push_back(cur);
478         amount++;
479     }
480 
481     return amount;
482 }
483 }  // namespace ark::compiler
484