1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "scheduler.h"
18
19 #include <string>
20
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "optimizing/load_store_analysis.h"
25 #include "prepare_for_register_allocation.h"
26
27 #ifdef ART_ENABLE_CODEGEN_arm64
28 #include "scheduler_arm64.h"
29 #endif
30
31 #ifdef ART_ENABLE_CODEGEN_arm
32 #include "scheduler_arm.h"
33 #endif
34
35 namespace art HIDDEN {
36
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)37 void SchedulingGraph::AddDependency(SchedulingNode* node,
38 SchedulingNode* dependency,
39 bool is_data_dependency) {
40 if (node == nullptr || dependency == nullptr) {
41 // A `nullptr` node indicates an instruction out of scheduling range (eg. in
42 // an other block), so we do not need to add a dependency edge to the graph.
43 return;
44 }
45
46 if (is_data_dependency) {
47 node->AddDataPredecessor(dependency);
48 } else {
49 node->AddOtherPredecessor(dependency);
50 }
51 }
52
HasReorderingDependency(const HInstruction * instr1,const HInstruction * instr2)53 bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
54 const HInstruction* instr2) {
55 SideEffects instr1_side_effects = instr1->GetSideEffects();
56 SideEffects instr2_side_effects = instr2->GetSideEffects();
57
58 // Read after write.
59 if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
60 return true;
61 }
62
63 // Write after read.
64 if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
65 return true;
66 }
67
68 // Memory write after write.
69 if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
70 return true;
71 }
72
73 return false;
74 }
75
ArrayAccessHeapLocation(HInstruction * instruction) const76 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
77 HInstruction* instruction) const {
78 DCHECK(heap_location_collector_ != nullptr);
79 size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
80 // This array access should be analyzed and added to HeapLocationCollector before.
81 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
82 return heap_loc;
83 }
84
ArrayAccessMayAlias(HInstruction * instr1,HInstruction * instr2) const85 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
86 HInstruction* instr1, HInstruction* instr2) const {
87 DCHECK(heap_location_collector_ != nullptr);
88 size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
89 size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
90
91 // For example: arr[0] and arr[0]
92 if (instr1_heap_loc == instr2_heap_loc) {
93 return true;
94 }
95
96 // For example: arr[0] and arr[i]
97 if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
98 return true;
99 }
100
101 return false;
102 }
103
IsArrayAccess(const HInstruction * instruction)104 static bool IsArrayAccess(const HInstruction* instruction) {
105 return instruction->IsArrayGet() || instruction->IsArraySet();
106 }
107
IsInstanceFieldAccess(const HInstruction * instruction)108 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
109 return instruction->IsInstanceFieldGet() || instruction->IsInstanceFieldSet();
110 }
111
IsStaticFieldAccess(const HInstruction * instruction)112 static bool IsStaticFieldAccess(const HInstruction* instruction) {
113 return instruction->IsStaticFieldGet() || instruction->IsStaticFieldSet();
114 }
115
IsFieldAccess(const HInstruction * instruction)116 static bool IsFieldAccess(const HInstruction* instruction) {
117 return IsInstanceFieldAccess(instruction) || IsStaticFieldAccess(instruction);
118 }
119
GetFieldInfo(const HInstruction * instruction)120 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
121 return &instruction->GetFieldInfo();
122 }
123
FieldAccessHeapLocation(const HInstruction * instr) const124 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
125 const HInstruction* instr) const {
126 DCHECK(instr != nullptr);
127 DCHECK(GetFieldInfo(instr) != nullptr);
128 DCHECK(heap_location_collector_ != nullptr);
129
130 HInstruction* ref = instr->InputAt(0);
131 size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr));
132 // This field access should be analyzed and added to HeapLocationCollector before.
133 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
134
135 return heap_loc;
136 }
137
FieldAccessMayAlias(const HInstruction * instr1,const HInstruction * instr2) const138 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
139 const HInstruction* instr1, const HInstruction* instr2) const {
140 DCHECK(heap_location_collector_ != nullptr);
141
142 // Static and instance field accesses should not alias.
143 if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
144 (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
145 return false;
146 }
147
148 // If both fields accesses are resolved.
149 size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
150 size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
151
152 if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
153 return true;
154 }
155
156 if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
157 instr2_field_access_heap_loc)) {
158 return false;
159 }
160
161 return true;
162 }
163
HasMemoryDependency(HInstruction * instr1,HInstruction * instr2) const164 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
165 HInstruction* instr1, HInstruction* instr2) const {
166 if (!HasReorderingDependency(instr1, instr2)) {
167 return false;
168 }
169
170 if (heap_location_collector_ == nullptr ||
171 heap_location_collector_->GetNumberOfHeapLocations() == 0) {
172 // Without HeapLocation information from load store analysis,
173 // we cannot do further disambiguation analysis on these two instructions.
174 // Just simply say that those two instructions have memory dependency.
175 return true;
176 }
177
178 // Note: Unresolved field access instructions are currently marked as not schedulable.
179 // If we change that, we should still keep in mind that these instructions can throw and
180 // read or write volatile fields and, if static, cause class initialization and write to
181 // arbitrary heap locations, and therefore cannot be reordered with any other field or
182 // array access to preserve the observable behavior. The only exception is access to
183 // singleton members that could actually be reodered across these instructions but we
184 // currently do not analyze singletons here anyway.
185
186 if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
187 return ArrayAccessMayAlias(instr1, instr2);
188 }
189 if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
190 return FieldAccessMayAlias(instr1, instr2);
191 }
192
193 // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
194 if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
195 return true;
196 }
197 if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
198 return true;
199 }
200 if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
201 return true;
202 }
203
204 // Heap accesses of different kinds should not alias.
205 if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
206 return false;
207 }
208 if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
209 return false;
210 }
211 if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
212 return false;
213 }
214 if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
215 return false;
216 }
217
218 // We conservatively treat all other cases having dependency,
219 // for example, Invoke and ArrayGet.
220 return true;
221 }
222
HasExceptionDependency(const HInstruction * instr1,const HInstruction * instr2)223 bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
224 const HInstruction* instr2) {
225 if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
226 return true;
227 }
228 if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
229 return true;
230 }
231 if (instr2->CanThrow() && instr1->CanThrow()) {
232 return true;
233 }
234
235 // Above checks should cover all cases where we cannot reorder two
236 // instructions which may throw exception.
237 return false;
238 }
239
240 // Check if the specified instruction is a better candidate which more likely will
241 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)242 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
243 HInstruction* old_candidate) {
244 if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
245 // Weaker side effects.
246 return false;
247 }
248 if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
249 // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
250 return new_candidate->CanThrow() && !old_candidate->CanThrow();
251 } else {
252 // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
253 return new_candidate->CanThrow() || !old_candidate->CanThrow();
254 }
255 }
256
AddCrossIterationDependencies(SchedulingNode * node)257 void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
258 for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
259 // Having a phi-function from a loop header as an input means the current node of the
260 // scheduling graph has a cross-iteration dependency because such phi-functions bring values
261 // from the previous iteration to the current iteration.
262 if (!instruction->IsLoopHeaderPhi()) {
263 continue;
264 }
265 for (HInstruction* phi_input : instruction->GetInputs()) {
266 // As a scheduling graph of the current basic block is built by
267 // processing instructions bottom-up, nullptr returned by GetNode means
268 // an instruction defining a value for the phi is either before the
269 // instruction represented by node or it is in a different basic block.
270 SchedulingNode* def_node = GetNode(phi_input);
271
272 // We don't create a dependency if there are uses besides the use in phi.
273 // In such cases a register to hold phi_input is usually allocated and
274 // a MOV instruction is generated. In cases with multiple uses and no MOV
275 // instruction, reordering creating a MOV instruction can improve
276 // performance more than an attempt to avoid a MOV instruction.
277 if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
278 // We have an implicit data dependency between node and def_node.
279 // AddAddDataDependency cannot be used because it is for explicit data dependencies.
280 // So AddOtherDependency is used.
281 AddOtherDependency(def_node, node);
282 }
283 }
284 }
285 }
286
AddDependencies(SchedulingNode * instruction_node,bool is_scheduling_barrier)287 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
288 bool is_scheduling_barrier) {
289 HInstruction* instruction = instruction_node->GetInstruction();
290
291 // Define-use dependencies.
292 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
293 AddDataDependency(GetNode(use.GetUser()), instruction_node);
294 }
295
296 // Scheduling barrier dependencies.
297 DCHECK_IMPLIES(is_scheduling_barrier, contains_scheduling_barrier_);
298 if (contains_scheduling_barrier_) {
299 // A barrier depends on instructions after it. And instructions before the
300 // barrier depend on it.
301 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
302 SchedulingNode* other_node = GetNode(other);
303 CHECK(other_node != nullptr)
304 << other->DebugName()
305 << " is in block " << other->GetBlock()->GetBlockId()
306 << ", and expected in block " << instruction->GetBlock()->GetBlockId();
307 bool other_is_barrier = other_node->IsSchedulingBarrier();
308 if (is_scheduling_barrier || other_is_barrier) {
309 AddOtherDependency(other_node, instruction_node);
310 }
311 if (other_is_barrier) {
312 // This other scheduling barrier guarantees ordering of instructions after
313 // it, so avoid creating additional useless dependencies in the graph.
314 // For example if we have
315 // instr_1
316 // barrier_2
317 // instr_3
318 // barrier_4
319 // instr_5
320 // we only create the following non-data dependencies
321 // 1 -> 2
322 // 2 -> 3
323 // 2 -> 4
324 // 3 -> 4
325 // 4 -> 5
326 // and do not create
327 // 1 -> 4
328 // 2 -> 5
329 // Note that in this example we could also avoid creating the dependency
330 // `2 -> 4`. But if we remove `instr_3` that dependency is required to
331 // order the barriers. So we generate it to avoid a special case.
332 break;
333 }
334 }
335 }
336
337 // Side effect dependencies.
338 if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
339 HInstruction* dep_chain_candidate = nullptr;
340 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
341 SchedulingNode* other_node = GetNode(other);
342 if (other_node->IsSchedulingBarrier()) {
343 // We have reached a scheduling barrier so we can stop further
344 // processing.
345 //
346 // As a "other" dependency is not set up if a data dependency exists, we need to check that
347 // one of them must exist.
348 DCHECK(other_node->HasOtherDependency(instruction_node)
349 || other_node->HasDataDependency(instruction_node));
350 break;
351 }
352 if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
353 if (dep_chain_candidate != nullptr &&
354 side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
355 // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
356 } else {
357 AddOtherDependency(other_node, instruction_node);
358 }
359 // Check if `other` is a better candidate which more likely will have other instructions
360 // depending on it.
361 if (dep_chain_candidate == nullptr ||
362 IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
363 dep_chain_candidate = other;
364 }
365 }
366 }
367 }
368
369 // Environment dependencies.
370 // We do not need to process those if the instruction is a scheduling barrier,
371 // since the barrier already has non-data dependencies on all following
372 // instructions.
373 if (!is_scheduling_barrier) {
374 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
375 // Note that here we could stop processing if the environment holder is
376 // across a scheduling barrier. But checking this would likely require
377 // more work than simply iterating through environment uses.
378 AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
379 }
380 }
381
382 AddCrossIterationDependencies(instruction_node);
383 }
384
InstructionTypeId(const HInstruction * instruction)385 static const std::string InstructionTypeId(const HInstruction* instruction) {
386 return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
387 }
388
389 // Ideally we would reuse the graph visualizer code, but it is not available
390 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)391 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
392 const HInstruction* instruction = node->GetInstruction();
393 // Use the instruction typed id as the node identifier.
394 std::string instruction_id = InstructionTypeId(instruction);
395 output << instruction_id << "[shape=record, label=\""
396 << instruction_id << ' ' << instruction->DebugName() << " [";
397 // List the instruction's inputs in its description. When visualizing the
398 // graph this helps differentiating data inputs from other dependencies.
399 const char* seperator = "";
400 for (const HInstruction* input : instruction->GetInputs()) {
401 output << seperator << InstructionTypeId(input);
402 seperator = ",";
403 }
404 output << "]";
405 // Other properties of the node.
406 output << "\\ninternal_latency: " << node->GetInternalLatency();
407 output << "\\ncritical_path: " << node->GetCriticalPath();
408 if (node->IsSchedulingBarrier()) {
409 output << "\\n(barrier)";
410 }
411 output << "\"];\n";
412 // We want program order to go from top to bottom in the graph output, so we
413 // reverse the edges and specify `dir=back`.
414 for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
415 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
416 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
417 << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
418 }
419 for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
420 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
421 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
422 << "[dir=back,color=blue]\n";
423 }
424 }
425
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)426 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
427 const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
428 // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
429 // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
430 std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
431 // Description of this graph, as a comment.
432 output << "// " << description << "\n";
433 // Start the dot graph. Use an increasing index for easier differentiation.
434 output << "digraph G {\n";
435 for (const auto& entry : nodes_map_) {
436 SchedulingNode* node = entry.second.get();
437 DumpAsDotNode(output, node);
438 }
439 // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
440 for (SchedulingNode* node : initial_candidates) {
441 const HInstruction* instruction = node->GetInstruction();
442 output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
443 << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
444 }
445 // End of the dot graph.
446 output << "}\n";
447 output.close();
448 }
449
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const450 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
451 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
452 // Schedule condition inputs that can be materialized immediately before their use.
453 // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
454 // immediately, because it is a materialized condition, and will be emitted right before HSelect
455 // in codegen phase.
456 //
457 // i20 HLessThan [...] HLessThan HAdd HAdd
458 // i21 HAdd [...] ===> | | |
459 // i22 HAdd [...] +----------+---------+
460 // i23 HSelect [i21, i22, i20] HSelect
461
462 if (prev_select_ == nullptr) {
463 return nullptr;
464 }
465
466 const HInstruction* instruction = prev_select_->GetInstruction();
467 const HCondition* condition = nullptr;
468 DCHECK(instruction != nullptr);
469
470 if (instruction->IsIf()) {
471 condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull();
472 } else if (instruction->IsSelect()) {
473 condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull();
474 }
475
476 SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
477
478 if ((condition_node != nullptr) &&
479 condition->HasOnlyOneNonEnvironmentUse() &&
480 ContainsElement(*nodes, condition_node)) {
481 DCHECK(!condition_node->HasUnscheduledSuccessors());
482 // Remove the condition from the list of candidates and schedule it.
483 RemoveElement(*nodes, condition_node);
484 return condition_node;
485 }
486
487 return nullptr;
488 }
489
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)490 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
491 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
492 DCHECK(!nodes->empty());
493 SchedulingNode* select_node = nullptr;
494
495 // Optimize for materialized condition and its emit before use scenario.
496 select_node = SelectMaterializedCondition(nodes, graph);
497
498 if (select_node == nullptr) {
499 // Get highest priority node based on critical path information.
500 select_node = (*nodes)[0];
501 size_t select = 0;
502 for (size_t i = 1, e = nodes->size(); i < e; i++) {
503 SchedulingNode* check = (*nodes)[i];
504 SchedulingNode* candidate = (*nodes)[select];
505 select_node = GetHigherPrioritySchedulingNode(candidate, check);
506 if (select_node == check) {
507 select = i;
508 }
509 }
510 DeleteNodeAtIndex(nodes, select);
511 }
512
513 prev_select_ = select_node;
514 return select_node;
515 }
516
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const517 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
518 SchedulingNode* candidate, SchedulingNode* check) const {
519 uint32_t candidate_path = candidate->GetCriticalPath();
520 uint32_t check_path = check->GetCriticalPath();
521 // First look at the critical_path.
522 if (check_path != candidate_path) {
523 return check_path < candidate_path ? check : candidate;
524 }
525 // If both critical paths are equal, schedule instructions with a higher latency
526 // first in program order.
527 return check->GetLatency() < candidate->GetLatency() ? check : candidate;
528 }
529
Schedule(HGraph * graph)530 void HScheduler::Schedule(HGraph* graph) {
531 // We run lsa here instead of in a separate pass to better control whether we
532 // should run the analysis or not.
533 const HeapLocationCollector* heap_location_collector = nullptr;
534 ScopedArenaAllocator allocator(graph->GetArenaStack());
535 LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator);
536 if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
537 lsa.Run();
538 heap_location_collector = &lsa.GetHeapLocationCollector();
539 }
540
541 for (HBasicBlock* block : graph->GetReversePostOrder()) {
542 if (IsSchedulable(block)) {
543 Schedule(block, heap_location_collector);
544 }
545 }
546 }
547
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)548 void HScheduler::Schedule(HBasicBlock* block,
549 const HeapLocationCollector* heap_location_collector) {
550 ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
551
552 // Build the scheduling graph.
553 auto [scheduling_graph, scheduling_nodes] =
554 BuildSchedulingGraph(block, &allocator, heap_location_collector);
555
556 if (scheduling_graph.Size() <= 1) {
557 return;
558 }
559
560 cursor_ = block->GetLastInstruction();
561
562 // The list of candidates for scheduling. A node becomes a candidate when all
563 // its predecessors have been scheduled.
564 ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
565
566 // Find the initial candidates for scheduling.
567 for (SchedulingNode* node : scheduling_nodes) {
568 if (!node->HasUnscheduledSuccessors()) {
569 node->MaybeUpdateCriticalPath(node->GetLatency());
570 candidates.push_back(node);
571 }
572 }
573
574 ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
575 if (kDumpDotSchedulingGraphs) {
576 // Remember the list of initial candidates for debug output purposes.
577 initial_candidates.assign(candidates.begin(), candidates.end());
578 }
579
580 // Schedule all nodes.
581 selector_->Reset();
582 while (!candidates.empty()) {
583 SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
584 Schedule(node, &candidates);
585 }
586
587 if (kDumpDotSchedulingGraphs) {
588 // Dump the graph in `dot` format.
589 HGraph* graph = block->GetGraph();
590 std::stringstream description;
591 description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
592 << " B" << block->GetBlockId();
593 scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
594 }
595 }
596
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)597 void HScheduler::Schedule(SchedulingNode* scheduling_node,
598 /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
599 // Check whether any of the node's predecessors will be valid candidates after
600 // this node is scheduled.
601 uint32_t path_to_node = scheduling_node->GetCriticalPath();
602 for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
603 predecessor->MaybeUpdateCriticalPath(
604 path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
605 predecessor->DecrementNumberOfUnscheduledSuccessors();
606 if (!predecessor->HasUnscheduledSuccessors()) {
607 candidates->push_back(predecessor);
608 }
609 }
610 for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
611 // Do not update the critical path.
612 // The 'other' (so 'non-data') dependencies (usually) do not represent a
613 // 'material' dependency of nodes on others. They exist for program
614 // correctness. So we do not use them to compute the critical path.
615 predecessor->DecrementNumberOfUnscheduledSuccessors();
616 if (!predecessor->HasUnscheduledSuccessors()) {
617 candidates->push_back(predecessor);
618 }
619 }
620
621 Schedule(scheduling_node->GetInstruction());
622 }
623
624 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)625 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
626 DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
627 DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
628 DCHECK(!instruction->IsControlFlow());
629 DCHECK(!cursor->IsControlFlow());
630 instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
631 }
632
Schedule(HInstruction * instruction)633 void HScheduler::Schedule(HInstruction* instruction) {
634 if (instruction == cursor_) {
635 cursor_ = cursor_->GetPrevious();
636 } else {
637 MoveAfterInBlock(instruction, cursor_);
638 }
639 }
640
IsSchedulable(const HInstruction * instruction) const641 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
642 // We want to avoid exhaustively listing all instructions, so we first check
643 // for instruction categories that we know are safe.
644 if (instruction->IsControlFlow() ||
645 instruction->IsConstant()) {
646 return true;
647 }
648 // Currently all unary and binary operations are safe to schedule, so avoid
649 // checking for each of them individually.
650 // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
651 // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
652 // the exhaustive lists here.
653 if (instruction->IsUnaryOperation()) {
654 DCHECK(instruction->IsAbs() ||
655 instruction->IsBooleanNot() ||
656 instruction->IsNot() ||
657 instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
658 return true;
659 }
660 if (instruction->IsBinaryOperation()) {
661 DCHECK(instruction->IsAdd() ||
662 instruction->IsAnd() ||
663 instruction->IsCompare() ||
664 instruction->IsCondition() ||
665 instruction->IsDiv() ||
666 instruction->IsMin() ||
667 instruction->IsMax() ||
668 instruction->IsMul() ||
669 instruction->IsOr() ||
670 instruction->IsRem() ||
671 instruction->IsRor() ||
672 instruction->IsShl() ||
673 instruction->IsShr() ||
674 instruction->IsSub() ||
675 instruction->IsUShr() ||
676 instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
677 return true;
678 }
679 // The scheduler should not see any of these.
680 DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
681 // List of instructions explicitly excluded:
682 // HClearException
683 // HClinitCheck
684 // HDeoptimize
685 // HLoadClass
686 // HLoadException
687 // HMemoryBarrier
688 // HMonitorOperation
689 // HNop
690 // HThrow
691 // HTryBoundary
692 // All unresolved field access instructions
693 // All volatile field access instructions, e.g. HInstanceFieldGet
694 // TODO: Some of the instructions above may be safe to schedule (maybe as
695 // scheduling barriers).
696 return instruction->IsArrayGet() ||
697 instruction->IsArraySet() ||
698 instruction->IsArrayLength() ||
699 instruction->IsBoundType() ||
700 instruction->IsBoundsCheck() ||
701 instruction->IsCheckCast() ||
702 instruction->IsClassTableGet() ||
703 instruction->IsCurrentMethod() ||
704 instruction->IsDivZeroCheck() ||
705 (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
706 (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
707 instruction->IsInstanceOf() ||
708 instruction->IsInvokeInterface() ||
709 instruction->IsInvokeStaticOrDirect() ||
710 instruction->IsInvokeUnresolved() ||
711 instruction->IsInvokeVirtual() ||
712 instruction->IsLoadString() ||
713 instruction->IsNewArray() ||
714 instruction->IsNewInstance() ||
715 instruction->IsNullCheck() ||
716 instruction->IsPackedSwitch() ||
717 instruction->IsParameterValue() ||
718 instruction->IsPhi() ||
719 instruction->IsReturn() ||
720 instruction->IsReturnVoid() ||
721 instruction->IsSelect() ||
722 (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
723 (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
724 instruction->IsSuspendCheck() ||
725 instruction->IsTypeConversion();
726 }
727
IsSchedulable(const HBasicBlock * block) const728 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
729 // We may be only interested in loop blocks.
730 if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
731 return false;
732 }
733 if (block->GetTryCatchInformation() != nullptr) {
734 // Do not schedule blocks that are part of try-catch.
735 // Because scheduler cannot see if catch block has assumptions on the instruction order in
736 // the try block. In following example, if we enable scheduler for the try block,
737 // MulitiplyAccumulate may be scheduled before DivZeroCheck,
738 // which can result in an incorrect value in the catch block.
739 // try {
740 // a = a/b; // DivZeroCheck
741 // // Div
742 // c = c*d+e; // MulitiplyAccumulate
743 // } catch {System.out.print(c); }
744 return false;
745 }
746 // Check whether all instructions in this block are schedulable.
747 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
748 if (!IsSchedulable(it.Current())) {
749 return false;
750 }
751 }
752 return true;
753 }
754
IsSchedulingBarrier(const HInstruction * instr) const755 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
756 return instr->IsControlFlow() ||
757 // Don't break calling convention.
758 instr->IsParameterValue() ||
759 // Code generation of goto relies on SuspendCheck's position.
760 instr->IsSuspendCheck();
761 }
762
Run(bool only_optimize_loop_blocks,bool schedule_randomly)763 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
764 bool schedule_randomly) {
765 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
766 // Phase-local allocator that allocates scheduler internal data structures like
767 // scheduling nodes, internel nodes map, dependencies, etc.
768 CriticalPathSchedulingNodeSelector critical_path_selector;
769 // Do not create the `RandomSchedulingNodeSelector` if not requested.
770 // The construction is expensive, including a call to `srand()`.
771 std::optional<RandomSchedulingNodeSelector> random_selector;
772 SchedulingNodeSelector* selector = &critical_path_selector;
773 if (schedule_randomly) {
774 random_selector.emplace();
775 selector = &random_selector.value();
776 }
777 #else
778 // Avoid compilation error when compiling for unsupported instruction set.
779 UNUSED(only_optimize_loop_blocks);
780 UNUSED(schedule_randomly);
781 UNUSED(codegen_);
782 #endif
783
784 switch (instruction_set_) {
785 #ifdef ART_ENABLE_CODEGEN_arm64
786 case InstructionSet::kArm64: {
787 arm64::HSchedulerARM64 scheduler(selector);
788 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
789 scheduler.Schedule(graph_);
790 break;
791 }
792 #endif
793 #if defined(ART_ENABLE_CODEGEN_arm)
794 case InstructionSet::kThumb2:
795 case InstructionSet::kArm: {
796 arm::HSchedulerARM scheduler(selector, codegen_);
797 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
798 scheduler.Schedule(graph_);
799 break;
800 }
801 #endif
802 default:
803 break;
804 }
805 return true;
806 }
807
808 } // namespace art
809