• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "scheduler.h"
17 
18 #include "optimizer/analysis/alias_analysis.h"
19 #include "optimizer/ir/analysis.h"
20 #include "optimizer/ir/basicblock.h"
21 #include "compiler_logger.h"
22 
23 namespace panda::compiler {
24 /* Instruction scheduling.
25  * Current decisions/limitations
26  *
27  * 1. Scheduler pass is placed immediately before register allocator.
28  * 2. It rearranges instructions only inside the basic block, but not between them.
29  * 3. No liveness analysis, only calculating dependencies using barrier/users/alias information.
30  * 4. No CPU pipeline/resource modeling, only having dependency costs.
31  * 5. Forward list scheduling algorithm with standart critical-path-based priority.
32  */
RunImpl()33 bool Scheduler::RunImpl()
34 {
35     COMPILER_LOG(DEBUG, SCHEDULER) << "Run " << GetPassName();
36     SaveStateBridgesBuilder ssb;
37     bool result = false;
38     for (auto bb : GetGraph()->GetBlocksRPO()) {
39         if (!bb->IsEmpty() && !bb->IsStartBlock()) {
40             if (ScheduleBasicBlock(bb)) {
41                 ssb.FixSaveStatesInBB(bb);
42                 result = true;
43             }
44         }
45     }
46     COMPILER_LOG(DEBUG, SCHEDULER) << GetPassName() << " complete";
47     return result;
48 }
49 
50 // Dependency helper function
AddDep(uint32_t * prio,Inst * from,Inst * to,uint32_t latency,Inst * barrier)51 void Scheduler::AddDep(uint32_t *prio, Inst *from, Inst *to, uint32_t latency, Inst *barrier)
52 {
53     if (from == to) {
54         return;
55     }
56     COMPILER_LOG(DEBUG, SCHEDULER) << "Found dependency " << from->GetId() << " -> " << to->GetId() << " latency "
57                                    << latency;
58     // Estimate old cycle (without scheduling)
59     ocycle_[from] = std::max(ocycle_[from], latency + ocycle_[to]);
60 
61     // Update instruction priority - "how high instruction is in dependency tree"
62     *prio = std::max(*prio, latency + prio_[to]);
63 
64     // Do not add cross-barrier depenedencies into deps_
65     if (barrier == nullptr || old_[to] > old_[barrier]) {
66         if (deps_.at(from).count(to) == 1) {
67             uint32_t oldLatency = deps_.at(from).at(to);
68             if (oldLatency >= latency) {
69                 return;
70             }
71         } else {
72             numDeps_[to]++;
73         }
74         deps_.at(from)[to] = latency;
75     }
76 }
77 
78 // Calculate priority and dependencies
BuildAllDeps(BasicBlock * bb)79 bool Scheduler::BuildAllDeps(BasicBlock *bb)
80 {
81     auto markerHolder = MarkerHolder(GetGraph());
82     auto mrk = markerHolder.GetMarker();
83 
84     oprev_ = 0;
85     numBarriers_ = 0;
86     maxPrio_ = 0;
87 
88     static constexpr uint32_t TOO_LONG_BB = 64;
89     uint32_t numInst = 0;
90     uint32_t numBetween = 0;
91     uint32_t numSpecial = 0;
92     Inst *lastBarrier = nullptr;
93     for (auto inst : bb->InstsSafeReverse()) {
94         ProcessInst(inst, mrk, &numInst, &numBetween, &numSpecial, &lastBarrier);
95 
96         if (numSpecial > TOO_LONG_BB || numBetween > TOO_LONG_BB) {
97             COMPILER_LOG(DEBUG, SCHEDULER) << "Block " << bb->GetId() << " seems too big for scheduling, skipping";
98             Cleanup();
99             return false;
100         }
101     }
102     return true;
103 }
104 
105 // One instruction deps
ProcessInst(Inst * inst,Marker mrk,uint32_t * numInst,uint32_t * numBetween,uint32_t * numSpecial,Inst ** lastBarrier)106 void Scheduler::ProcessInst(Inst *inst, Marker mrk, uint32_t *numInst, uint32_t *numBetween, uint32_t *numSpecial,
107                             Inst **lastBarrier)
108 {
109     uint32_t prio = 0;
110     uint32_t instLatency = inst->Latency();
111     bool barrier = inst->IsBarrier();
112 
113     (*numBetween)++;
114     old_.insert({inst, (*numInst)++});
115     ocycle_.insert({inst, ++oprev_});
116     numDeps_.insert({inst, 0U});
117     deps_.emplace(inst, GetGraph()->GetLocalAllocator()->Adapter());
118 
119     // Dependency to the barrier
120     if (*lastBarrier != nullptr) {
121         AddDep(&prio, inst, *lastBarrier, 1U, *lastBarrier);
122     }
123 
124     // Dependency from barrier
125     if (barrier) {
126         Inst *oldLastBarrier = *lastBarrier;
127         *lastBarrier = inst;
128         numBarriers_++;
129         *numBetween = 0;
130         for (auto user = inst->GetNext(); user != oldLastBarrier; user = user->GetNext()) {
131             AddDep(&prio, inst, user, 1U, *lastBarrier);
132         }
133     }
134 
135     // Users
136     for (auto &userItem : inst->GetUsers()) {
137         auto user = userItem.GetInst();
138         if (user->IsMarked(mrk)) {
139             AddDep(&prio, inst, user, instLatency, *lastBarrier);
140         }
141     }
142 
143     if (inst->IsMovableObject()) {
144         // We take all SaveState, that has RuntimeCall users, under this reference instruction and create dependence
145         // between SaveState and this instruction. See also GraphChecker::CheckSaveStatesWithRuntimeCallUsers.
146         for (auto &ss : ssWithRuntimeCall_) {
147             AddDep(&prio, inst, ss, 1U, *lastBarrier);
148         }
149     }
150 
151     if (inst->IsMemory() || inst->IsRefSpecial()) {
152         ProcessMemory(inst, &prio, *lastBarrier);
153         (*numSpecial)++;
154     }
155 
156     if (inst->CanThrow() || inst->IsRuntimeCall() || inst->IsSaveState()) {
157         ProcessSpecial(inst, &prio, *lastBarrier);
158         (*numSpecial)++;
159     }
160 
161     inst->SetMarker(mrk);
162     prio_.insert({inst, prio});
163     maxPrio_ = std::max(maxPrio_, prio);
164     oprev_ = ocycle_[inst];
165 }
166 
167 // Memory
ProcessMemory(Inst * inst,uint32_t * prio,Inst * lastBarrier)168 void Scheduler::ProcessMemory(Inst *inst, uint32_t *prio, Inst *lastBarrier)
169 {
170     if (inst->IsRefSpecial()) {
171         loads_.push_back(inst);
172         return;
173     }
174     for (auto mem : stores_) {
175         if (GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
176             AddDep(prio, inst, mem, 1U, lastBarrier);
177         }
178     }
179     if (inst->IsStore()) {
180         for (auto mem : loads_) {
181             if (mem->IsLoad() && GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
182                 AddDep(prio, inst, mem, 1U, lastBarrier);
183             }
184         }
185         for (auto ct : special_) {
186             AddDep(prio, inst, ct, 1U, lastBarrier);
187         }
188         stores_.push_back(inst);
189     } else {  // means inst->IsLoad()
190         loads_.push_back(inst);
191     }
192 }
193 
ProcessSpecialBoundsCheckI(Inst * inst,uint32_t * prio,Inst * lastBarrier)194 void Scheduler::ProcessSpecialBoundsCheckI(Inst *inst, uint32_t *prio, Inst *lastBarrier)
195 {
196     auto value = inst->CastToBoundsCheckI()->GetImm();
197     // Remove loads with same immediate. No easy way to check arrays are same.
198     for (auto load : loads_) {
199         if (load->GetOpcode() == Opcode::LoadArrayPairI) {
200             auto imm = load->CastToLoadArrayPairI()->GetImm();
201             if (imm == value || imm + 1 == value) {
202                 AddDep(prio, inst, load, 1U, lastBarrier);
203             }
204         } else if (load->GetOpcode() == Opcode::LoadArrayI && load->CastToLoadArrayI()->GetImm() == value) {
205             AddDep(prio, inst, load, 1U, lastBarrier);
206         }
207     }
208 }
209 
210 // CanThrow or SaveState can't be rearranged, and stores can't be moved over them
ProcessSpecial(Inst * inst,uint32_t * prio,Inst * lastBarrier)211 void Scheduler::ProcessSpecial(Inst *inst, uint32_t *prio, Inst *lastBarrier)
212 {
213     for (auto mem : stores_) {
214         AddDep(prio, inst, mem, 1U, lastBarrier);
215     }
216     for (auto ct : special_) {
217         AddDep(prio, inst, ct, 1U, lastBarrier);
218     }
219 
220     if (inst->IsSaveState() && inst != inst->GetBasicBlock()->GetFirstInst()) {
221         // SaveStates that have RuntimeCall users are being added into a vector to create dependencies from preceding
222         // reference instructions
223         for (auto &user : inst->GetUsers()) {
224             auto userInst = user.GetInst();
225             if (userInst->IsRuntimeCall()) {
226                 ssWithRuntimeCall_.push_back(inst);
227                 break;
228             }
229         }
230     }
231     // 1. SafePoint also has this flag
232     // 2. GC triggered inside can poison loaded reference value
233     if (inst->IsRuntimeCall()) {
234         for (auto mem : loads_) {
235             if (mem->GetType() == DataType::REFERENCE || mem->GetType() == DataType::ANY) {
236                 AddDep(prio, inst, mem, 1U, lastBarrier);
237             }
238         }
239     }
240     // We have to "restore" BoundsCheckI -> LoadArrayI dependency
241     if (inst->GetOpcode() == Opcode::BoundsCheckI) {
242         ProcessSpecialBoundsCheckI(inst, prio, lastBarrier);
243     }
244     special_.push_back(inst);
245 }
246 
247 // Rearranges instructions in the basic block using list scheduling algorithm.
ScheduleBasicBlock(BasicBlock * bb)248 bool Scheduler::ScheduleBasicBlock(BasicBlock *bb)
249 {
250     COMPILER_LOG(DEBUG, SCHEDULER) << "Schedule BB " << bb->GetId();
251 
252     if (!BuildAllDeps(bb)) {
253         return false;
254     }
255 
256     // Schedule intervals between barriers
257     uint32_t cycle = 0;
258     uint32_t numInst = 0;
259     Inst *first = nullptr;
260     for (auto inst = bb->GetFirstInst(); inst != nullptr; inst = inst->GetNext()) {
261         bool barrier = inst->IsBarrier();
262         numInst++;
263         inst->ClearMarkers();
264         if (first == nullptr) {
265             first = inst;
266         }
267         if (barrier || inst == bb->GetLastInst()) {
268             Inst *last = nullptr;
269             if (barrier) {
270                 last = inst->GetPrev();
271                 numInst--;
272             } else {
273                 last = inst;
274             }
275             if (numInst > 1) {
276                 cycle += ScheduleInstsBetweenBarriers(first, last);
277             } else if (numInst == 1) {
278                 ASSERT(first->GetOpcode() != Opcode::LoadPairPart);
279                 sched_.push_back(first);
280                 cycle++;
281             }
282             if (barrier) {
283                 ASSERT(inst->GetOpcode() != Opcode::LoadPairPart);
284                 sched_.push_back(inst);
285                 cycle++;
286             }
287             numInst = 0;
288             first = nullptr;
289         }
290     }
291     return FinalizeBB(bb, cycle);
292 }
293 
FinalizeBB(BasicBlock * bb,uint32_t cycle)294 bool Scheduler::FinalizeBB(BasicBlock *bb, uint32_t cycle)
295 {
296     // Rearrange instructions in basic block according to schedule
297     bool result = false;
298     bool hasPrev = false;
299     uint32_t prev;
300     for (auto inst : sched_) {
301         auto cur = old_[inst];
302         if (hasPrev && prev - 1 != cur) {
303             result = true;
304         }
305         prev = cur;
306         hasPrev = true;
307 
308         bb->EraseInst(inst);
309         bb->AppendInst(inst);
310     }
311 
312     if (result) {
313         COMPILER_LOG(DEBUG, SCHEDULER) << "Stats for block " << bb->GetId() << ": old cycles = " << oprev_
314                                        << ", num barriers = " << numBarriers_ << ", critical path = " << maxPrio_
315                                        << ", scheduled = " << cycle;
316         GetGraph()->GetEventWriter().EventScheduler(bb->GetId(), bb->GetGuestPc(), oprev_, numBarriers_, maxPrio_,
317                                                     cycle);
318     }
319 
320     Cleanup();
321     return result;
322 }
323 
Cleanup()324 void Scheduler::Cleanup()
325 {
326     sched_.clear();
327     loads_.clear();
328     stores_.clear();
329     special_.clear();
330     old_.clear();
331     ocycle_.clear();
332     numDeps_.clear();
333     prio_.clear();
334     deps_.clear();
335     ssWithRuntimeCall_.clear();
336 }
337 
338 // Rearranges instructions between [first..last] inclusive, none of them are barriers.
ScheduleInstsBetweenBarriers(Inst * first,Inst * last)339 uint32_t Scheduler::ScheduleInstsBetweenBarriers(Inst *first, Inst *last)
340 {
341     COMPILER_LOG(DEBUG, SCHEDULER) << "SchedBetween " << first->GetId() << " and " << last->GetId();
342 
343     // Compare function for 'waiting' queue
344     auto cmpAsap = [this](Inst *left, Inst *right) {
345         return asap_[left] > asap_[right] || (asap_[left] == asap_[right] && old_[left] < old_[right]);
346     };
347     // Queue of instructions, which dependencies are scheduled already, but they are still not finished yet
348     SchedulerPriorityQueue waiting(cmpAsap, GetGraph()->GetLocalAllocator()->Adapter());
349 
350     // Compare function for 'ready' queue
351     auto cmpPrio = [this](Inst *left, Inst *right) {
352         return prio_[left] < prio_[right] || (prio_[left] == prio_[right] && old_[left] < old_[right]);
353     };
354     // Queue of ready instructions
355     SchedulerPriorityQueue ready(cmpPrio, GetGraph()->GetLocalAllocator()->Adapter());
356 
357     // Initialization, add leafs into 'waiting' queue
358     uint32_t numInst = 0;
359     for (auto inst = first; inst != last->GetNext(); inst = inst->GetNext()) {
360         asap_.insert({inst, 1U});
361         if (numDeps_[inst] == 0) {
362             COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << inst->GetId();
363             waiting.push(inst);
364         }
365         numInst++;
366     }
367 
368     // Scheduling
369     uint32_t cycle = 1;
370     while (numInst > 0) {
371         if (ready.empty()) {
372             ASSERT(!waiting.empty());
373             uint32_t nearest = asap_[waiting.top()];
374             // Skipping cycles where we can't schedule any instruction
375             if (nearest > cycle) {
376                 cycle = nearest;
377             }
378         }
379 
380         // Move from 'waiting' to 'ready'
381         while (!waiting.empty()) {
382             Inst *soonest = waiting.top();
383             if (asap_[soonest] <= cycle) {
384                 waiting.pop();
385                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue ready moving " << soonest->GetId();
386                 ready.push(soonest);
387             } else {
388                 break;
389             }
390         }
391         ASSERT(!ready.empty());
392 
393         // Schedule top 'ready' instruction (together with glued, when necessary)
394         numInst -= SchedWithGlued(ready.top(), &waiting, cycle++);
395         ready.pop();
396     }
397 
398     // Cleanup
399     asap_.clear();
400     return cycle;
401 }
402 
SchedWithGlued(Inst * inst,SchedulerPriorityQueue * waiting,uint32_t cycle)403 uint32_t Scheduler::SchedWithGlued(Inst *inst, SchedulerPriorityQueue *waiting, uint32_t cycle)
404 {
405     uint32_t amount = 0;
406     // Compare function for 'now' queue
407     auto cmpOld = [this](Inst *left, Inst *right) { return old_[left] < old_[right]; };
408     // Queue of instructions to schedule at current cycle
409     SchedulerPriorityQueue now(cmpOld, GetGraph()->GetLocalAllocator()->Adapter());
410     // Add inst into 'now'
411     ASSERT(now.empty());
412     now.push(inst);
413 
414     // Add glued instructions into 'now'
415     if (inst->GetOpcode() == Opcode::LoadArrayPair || inst->GetOpcode() == Opcode::LoadArrayPairI) {
416         for (auto &userItem : inst->GetUsers()) {
417             auto user = userItem.GetInst();
418             ASSERT(user->GetOpcode() == Opcode::LoadPairPart);
419             now.push(user);
420         }
421     }
422 
423     [[maybe_unused]] constexpr auto MAX_NOW_SIZE = 3;
424     ASSERT(now.size() <= MAX_NOW_SIZE);
425 
426     // Schedule them
427     while (!now.empty()) {
428         auto cur = now.top();
429         now.pop();
430         COMPILER_LOG(DEBUG, SCHEDULER) << "Scheduling " << cur->GetId() << " at cycle " << cycle;
431 
432         // Adjust all dependent instructions
433         for (auto pair : deps_.at(cur)) {
434             // Adjust asap
435             uint32_t asap = asap_[pair.first];
436             asap = std::max(asap, cycle + pair.second);
437             asap_[pair.first] = asap;
438 
439             // Adjust num_deps
440             uint32_t numDeps = numDeps_[pair.first];
441             ASSERT(numDeps > 0);
442             numDeps--;
443             numDeps_[pair.first] = numDeps;
444 
445             // Glued instructions scheduled immediately, so they should not go into queue
446             if (numDeps == 0 && pair.first->GetOpcode() != Opcode::LoadPairPart) {
447                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << pair.first->GetId();
448                 waiting->push(pair.first);
449             }
450         }
451 
452         // Add into schedule
453         sched_.push_back(cur);
454         amount++;
455     }
456 
457     return amount;
458 }
459 }  // namespace panda::compiler
460