• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "scheduler.h"
17 
18 #include "optimizer/analysis/alias_analysis.h"
19 #include "optimizer/ir/basicblock.h"
20 #include "compiler_logger.h"
21 
22 namespace panda::compiler {
23 /* Instruction scheduling.
24  * Current decisions/limitations
25  *
26  * 1. Scheduler pass is placed immediately before register allocator.
27  * 2. It rearranges instructions only inside the basic block, but not between them.
28  * 3. No liveness analysis, only calculating dependencies using barrier/users/alias information.
29  * 4. No CPU pipeline/resource modeling, only having dependency costs.
30  * 5. Forward list scheduling algorithm with standart critical-path-based priority.
31  */
RunImpl()32 bool Scheduler::RunImpl()
33 {
34     COMPILER_LOG(DEBUG, SCHEDULER) << "Run " << GetPassName();
35     bool result = false;
36     for (auto bb : GetGraph()->GetBlocksRPO()) {
37         if (!bb->IsEmpty() && !bb->IsStartBlock()) {
38             result |= ScheduleBasicBlock(bb);
39         }
40     }
41     COMPILER_LOG(DEBUG, SCHEDULER) << GetPassName() << " complete";
42 #ifndef NDEBUG
43     GetGraph()->SetSchedulerComplete();
44 #endif  // NDEBUG
45     return result;
46 }
47 
48 // Dependency helper function
AddDep(uint32_t * prio,Inst * from,Inst * to,uint32_t latency,Inst * barrier)49 void Scheduler::AddDep(uint32_t *prio, Inst *from, Inst *to, uint32_t latency, Inst *barrier)
50 {
51     if (from == to) {
52         return;
53     }
54     COMPILER_LOG(DEBUG, SCHEDULER) << "Found dependency " << from->GetId() << " -> " << to->GetId() << " latency "
55                                    << latency;
56     // Estimate old cycle (without scheduling)
57     ocycle_[from] = std::max(ocycle_[from], latency + ocycle_[to]);
58 
59     // Update instruction priority - "how high instruction is in dependency tree"
60     *prio = std::max(*prio, latency + prio_[to]);
61 
62     // Do not add cross-barrier depenedencies into deps_
63     if (barrier == nullptr || old_[to] > old_[barrier]) {
64         if (deps_.at(from).count(to) == 1) {
65             uint32_t old_latency = deps_.at(from).at(to);
66             if (old_latency >= latency) {
67                 return;
68             }
69         } else {
70             num_deps_[to]++;
71         }
72         deps_.at(from)[to] = latency;
73     }
74 }
75 
76 // Calculate priority and dependencies
BuildAllDeps(BasicBlock * bb)77 bool Scheduler::BuildAllDeps(BasicBlock *bb)
78 {
79     auto marker_holder = MarkerHolder(GetGraph());
80     auto mrk = marker_holder.GetMarker();
81 
82     oprev_ = 0;
83     num_barriers_ = 0;
84     max_prio_ = 0;
85 
86     static constexpr uint32_t TOO_LONG_BB = 256;
87     uint32_t num_inst = 0;
88     uint32_t num_between = 0;
89     uint32_t num_special = 0;
90     Inst *last_barrier = nullptr;
91     for (auto inst : bb->InstsSafeReverse()) {
92         ProcessInst(inst, mrk, &num_inst, &num_between, &num_special, &last_barrier);
93 
94         if (num_special > TOO_LONG_BB || num_between > TOO_LONG_BB) {
95             COMPILER_LOG(DEBUG, SCHEDULER) << "Block " << bb->GetId() << " seems too big for scheduling, skipping";
96             Cleanup();
97             return false;
98         }
99     }
100     return true;
101 }
102 
103 // One instruction deps
ProcessInst(Inst * inst,Marker mrk,uint32_t * num_inst,uint32_t * num_between,uint32_t * num_special,Inst ** last_barrier)104 void Scheduler::ProcessInst(Inst *inst, Marker mrk, uint32_t *num_inst, uint32_t *num_between, uint32_t *num_special,
105                             Inst **last_barrier)
106 {
107     uint32_t prio = 0;
108     uint32_t inst_latency = inst->Latency();
109     bool barrier = inst->IsBarrier();
110 
111     (*num_between)++;
112     old_.insert({inst, (*num_inst)++});
113     ocycle_.insert({inst, ++oprev_});
114     num_deps_.insert({inst, 0U});
115     deps_.emplace(inst, GetGraph()->GetLocalAllocator()->Adapter());
116 
117     // Dependency to the barrier
118     if (*last_barrier != nullptr) {
119         AddDep(&prio, inst, *last_barrier, 1U, *last_barrier);
120     }
121 
122     // Dependency from barrier
123     if (barrier) {
124         Inst *old_last_barrier = *last_barrier;
125         *last_barrier = inst;
126         num_barriers_++;
127         *num_between = 0;
128         for (auto user = inst->GetNext(); user != old_last_barrier; user = user->GetNext()) {
129             AddDep(&prio, inst, user, 1U, *last_barrier);
130         }
131     }
132 
133     // Users
134     for (auto &user_item : inst->GetUsers()) {
135         auto user = user_item.GetInst();
136         if (user->IsMarked(mrk)) {
137             AddDep(&prio, inst, user, inst_latency, *last_barrier);
138         }
139     }
140 
141     if (inst->IsMemory() || inst->IsRefSpecial()) {
142         ProcessMemory(inst, &prio, *last_barrier);
143         (*num_special)++;
144     }
145 
146     if (inst->CanThrow() || inst->IsRuntimeCall() || inst->IsSaveState()) {
147         ProcessSpecial(inst, &prio, *last_barrier);
148         (*num_special)++;
149     }
150 
151     inst->SetMarker(mrk);
152     prio_.insert({inst, prio});
153     max_prio_ = std::max(max_prio_, prio);
154     oprev_ = ocycle_[inst];
155 }
156 
157 // Memory
ProcessMemory(Inst * inst,uint32_t * prio,Inst * last_barrier)158 void Scheduler::ProcessMemory(Inst *inst, uint32_t *prio, Inst *last_barrier)
159 {
160     if (inst->IsRefSpecial()) {
161         loads_.push_back(inst);
162         return;
163     }
164     for (auto mem : stores_) {
165         if (GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
166             AddDep(prio, inst, mem, 1U, last_barrier);
167         }
168     }
169     if (inst->IsStore()) {
170         for (auto mem : loads_) {
171             if (mem->IsLoad() && GetGraph()->CheckInstAlias(inst, mem) != AliasType::NO_ALIAS) {
172                 AddDep(prio, inst, mem, 1U, last_barrier);
173             }
174         }
175         for (auto ct : special_) {
176             AddDep(prio, inst, ct, 1U, last_barrier);
177         }
178         stores_.push_back(inst);
179     } else {  // means inst->IsLoad()
180         loads_.push_back(inst);
181     }
182 }
183 
184 // CanThrow or SaveState can't be rearranged, and stores can't be moved over them
ProcessSpecial(Inst * inst,uint32_t * prio,Inst * last_barrier)185 void Scheduler::ProcessSpecial(Inst *inst, uint32_t *prio, Inst *last_barrier)
186 {
187     for (auto mem : stores_) {
188         AddDep(prio, inst, mem, 1U, last_barrier);
189     }
190     for (auto ct : special_) {
191         AddDep(prio, inst, ct, 1U, last_barrier);
192     }
193     // 1. SafePoint also has this flag
194     // 2. GC triggered inside can poison loaded reference value
195     if (inst->IsRuntimeCall()) {
196         for (auto mem : loads_) {
197             if (mem->GetType() == DataType::REFERENCE) {
198                 AddDep(prio, inst, mem, 1U, last_barrier);
199             }
200         }
201     }
202     // We have to "restore" BoundsCheckI -> LoadArrayI dependency
203     if (inst->GetOpcode() == Opcode::BoundsCheckI) {
204         auto value = inst->CastToBoundsCheckI()->GetImm();
205         // Remove loads with same immediate. No easy way to check arrays are same.
206         for (auto load : loads_) {
207             if (load->GetOpcode() == Opcode::LoadArrayPairI) {
208                 auto imm = load->CastToLoadArrayPairI()->GetImm();
209                 if (imm == value || imm + 1 == value) {
210                     AddDep(prio, inst, load, 1U, last_barrier);
211                 }
212             } else if (load->GetOpcode() == Opcode::LoadArrayI && load->CastToLoadArrayI()->GetImm() == value) {
213                 AddDep(prio, inst, load, 1U, last_barrier);
214             }
215         }
216     }
217     special_.push_back(inst);
218 }
219 
220 // Rearranges instructions in the basic block using list scheduling algorithm.
ScheduleBasicBlock(BasicBlock * bb)221 bool Scheduler::ScheduleBasicBlock(BasicBlock *bb)
222 {
223     COMPILER_LOG(DEBUG, SCHEDULER) << "Schedule BB " << bb->GetId();
224 
225     if (!BuildAllDeps(bb)) {
226         return false;
227     }
228 
229     // Schedule intervals between barriers
230     uint32_t cycle = 0;
231     uint32_t num_inst = 0;
232     Inst *first = nullptr;
233     for (auto inst = bb->GetFirstInst(); inst != nullptr; inst = inst->GetNext()) {
234         bool barrier = inst->IsBarrier();
235         num_inst++;
236         inst->ClearMarkers();
237         if (first == nullptr) {
238             first = inst;
239         }
240         if (barrier || inst == bb->GetLastInst()) {
241             Inst *last = nullptr;
242             if (barrier) {
243                 last = inst->GetPrev();
244                 num_inst--;
245             } else {
246                 last = inst;
247             }
248             if (num_inst > 1) {
249                 cycle += ScheduleInstsBetweenBarriers(first, last);
250             } else if (num_inst == 1) {
251                 ASSERT(first->GetOpcode() != Opcode::LoadPairPart);
252                 sched_.push_back(first);
253                 cycle++;
254             }
255             if (barrier) {
256                 ASSERT(inst->GetOpcode() != Opcode::LoadPairPart);
257                 sched_.push_back(inst);
258                 cycle++;
259             }
260             num_inst = 0;
261             first = nullptr;
262         }
263     }
264     return FinalizeBB(bb, cycle);
265 }
266 
FinalizeBB(BasicBlock * bb,uint32_t cycle)267 bool Scheduler::FinalizeBB(BasicBlock *bb, uint32_t cycle)
268 {
269     // Rearrange instructions in basic block according to schedule
270     bool result = false;
271     bool has_prev = false;
272     uint32_t prev;
273     for (auto inst : sched_) {
274         auto cur = old_[inst];
275         if (has_prev && prev - 1 != cur) {
276             result = true;
277         }
278         prev = cur;
279         has_prev = true;
280 
281         bb->EraseInst(inst);
282         bb->AppendInst(inst);
283     }
284 
285     if (result) {
286         COMPILER_LOG(DEBUG, SCHEDULER) << "Stats for block " << bb->GetId() << ": old cycles = " << oprev_
287                                        << ", num barriers = " << num_barriers_ << ", critical path = " << max_prio_
288                                        << ", scheduled = " << cycle;
289         GetGraph()->GetEventWriter().EventScheduler(bb->GetId(), bb->GetGuestPc(), oprev_, num_barriers_, max_prio_,
290                                                     cycle);
291     }
292 
293     Cleanup();
294     return result;
295 }
296 
Cleanup()297 void Scheduler::Cleanup()
298 {
299     sched_.clear();
300     loads_.clear();
301     stores_.clear();
302     special_.clear();
303     old_.clear();
304     ocycle_.clear();
305     num_deps_.clear();
306     prio_.clear();
307     deps_.clear();
308 }
309 
310 // Rearranges instructions between [first..last] inclusive, none of them are barriers.
ScheduleInstsBetweenBarriers(Inst * first,Inst * last)311 uint32_t Scheduler::ScheduleInstsBetweenBarriers(Inst *first, Inst *last)
312 {
313     COMPILER_LOG(DEBUG, SCHEDULER) << "SchedBetween " << first->GetId() << " and " << last->GetId();
314 
315     // Compare function for 'waiting' queue
316     auto cmp_asap = [this](Inst *left, Inst *right) {
317         return asap_[left] > asap_[right] || (asap_[left] == asap_[right] && old_[left] < old_[right]);
318     };
319     // Queue of instructions, which dependencies are scheduled already, but they are still not finished yet
320     SchedulerPriorityQueue waiting(cmp_asap, GetGraph()->GetLocalAllocator()->Adapter());
321 
322     // Compare function for 'ready' queue
323     auto cmp_prio = [this](Inst *left, Inst *right) {
324         return prio_[left] < prio_[right] || (prio_[left] == prio_[right] && old_[left] < old_[right]);
325     };
326     // Queue of ready instructions
327     SchedulerPriorityQueue ready(cmp_prio, GetGraph()->GetLocalAllocator()->Adapter());
328 
329     // Initialization, add leafs into 'waiting' queue
330     uint32_t num_inst = 0;
331     for (auto inst = first; inst != last->GetNext(); inst = inst->GetNext()) {
332         asap_.insert({inst, 1U});
333         if (num_deps_[inst] == 0) {
334             COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << inst->GetId();
335             waiting.push(inst);
336         }
337         num_inst++;
338     }
339 
340     // Scheduling
341     uint32_t cycle = 1;
342     while (num_inst > 0) {
343         if (ready.empty()) {
344             ASSERT(!waiting.empty());
345             uint32_t nearest = asap_[waiting.top()];
346             // Skipping cycles where we can't schedule any instruction
347             if (nearest > cycle) {
348                 cycle = nearest;
349             }
350         }
351 
352         // Move from 'waiting' to 'ready'
353         while (!waiting.empty()) {
354             Inst *soonest = waiting.top();
355             if (asap_[soonest] <= cycle) {
356                 waiting.pop();
357                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue ready moving " << soonest->GetId();
358                 ready.push(soonest);
359             } else {
360                 break;
361             }
362         }
363         ASSERT(!ready.empty());
364 
365         // Schedule top 'ready' instruction (together with glued, when necessary)
366         num_inst -= SchedWithGlued(ready.top(), &waiting, cycle++);
367         ready.pop();
368     }
369 
370     // Cleanup
371     asap_.clear();
372     return cycle;
373 }
374 
SchedWithGlued(Inst * inst,SchedulerPriorityQueue * waiting,uint32_t cycle)375 uint32_t Scheduler::SchedWithGlued(Inst *inst, SchedulerPriorityQueue *waiting, uint32_t cycle)
376 {
377     uint32_t amount = 0;
378     // Compare function for 'now' queue
379     auto cmp_old = [this](Inst *left, Inst *right) { return old_[left] < old_[right]; };
380     // Queue of instructions to schedule at current cycle
381     SchedulerPriorityQueue now(cmp_old, GetGraph()->GetLocalAllocator()->Adapter());
382     // Add inst into 'now'
383     ASSERT(now.empty());
384     now.push(inst);
385 
386     // Add glued instructions into 'now'
387     if (inst->GetOpcode() == Opcode::LoadArrayPair || inst->GetOpcode() == Opcode::LoadArrayPairI) {
388         for (auto &user_item : inst->GetUsers()) {
389             auto user = user_item.GetInst();
390             ASSERT(user->GetOpcode() == Opcode::LoadPairPart);
391             now.push(user);
392         }
393     }
394 
395     [[maybe_unused]] constexpr auto MAX_NOW_SIZE = 3;
396     ASSERT(now.size() <= MAX_NOW_SIZE);
397 
398     // Schedule them
399     while (!now.empty()) {
400         auto cur = now.top();
401         now.pop();
402         COMPILER_LOG(DEBUG, SCHEDULER) << "Scheduling " << cur->GetId() << " at cycle " << cycle;
403 
404         // Adjust all dependent instructions
405         for (auto pair : deps_.at(cur)) {
406             // Adjust asap
407             uint32_t asap = asap_[pair.first];
408             asap = std::max(asap, cycle + pair.second);
409             asap_[pair.first] = asap;
410 
411             // Adjust num_deps
412             uint32_t num_deps = num_deps_[pair.first];
413             ASSERT(num_deps > 0);
414             num_deps--;
415             num_deps_[pair.first] = num_deps;
416 
417             // Glued instructions scheduled immediately, so they should not go into queue
418             if (num_deps == 0 && pair.first->GetOpcode() != Opcode::LoadPairPart) {
419                 COMPILER_LOG(DEBUG, SCHEDULER) << "Queue wait add " << pair.first->GetId();
420                 waiting->push(pair.first);
421             }
422         }
423 
424         // Add into schedule
425         sched_.push_back(cur);
426         amount++;
427     }
428 
429     return amount;
430 }
431 }  // namespace panda::compiler
432