1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 19 20 #include <fstream> 21 22 #include "base/macros.h" 23 #include "base/scoped_arena_allocator.h" 24 #include "base/scoped_arena_containers.h" 25 #include "base/stl_util.h" 26 #include "base/time_utils.h" 27 #include "code_generator.h" 28 #include "load_store_analysis.h" 29 #include "nodes.h" 30 #include "optimization.h" 31 32 namespace art HIDDEN { 33 34 // General description of instruction scheduling. 35 // 36 // This pass tries to improve the quality of the generated code by reordering 37 // instructions in the graph to avoid execution delays caused by execution 38 // dependencies. 39 // Currently, scheduling is performed at the block level, so no `HInstruction` 40 // ever leaves its block in this pass. 41 // 42 // The scheduling process iterates through blocks in the graph. For blocks that 43 // we can and want to schedule: 44 // 1) Build a dependency graph for instructions. 45 // It includes data dependencies (inputs/uses), but also environment 46 // dependencies and side-effect dependencies. 47 // 2) Schedule the dependency graph. 48 // This is a topological sort of the dependency graph, using heuristics to 49 // decide what node to scheduler first when there are multiple candidates. 50 // 51 // A few factors impacting the quality of the scheduling are: 52 // - The heuristics used to decide what node to schedule in the topological sort 53 // when there are multiple valid candidates. There is a wide range of 54 // complexity possible here, going from a simple model only considering 55 // latencies, to a super detailed CPU pipeline model. 56 // - Fewer dependencies in the dependency graph give more freedom for the 57 // scheduling heuristics. For example de-aliasing can allow possibilities for 58 // reordering of memory accesses. 59 // - The level of abstraction of the IR. It is easier to evaluate scheduling for 60 // IRs that translate to a single assembly instruction than for IRs 61 // that generate multiple assembly instructions or generate different code 62 // depending on properties of the IR. 63 // - Scheduling is performed before register allocation, it is not aware of the 64 // impact of moving instructions on register allocation. 65 // 66 // 67 // The scheduling code uses the terms predecessors, successors, and dependencies. 68 // This can be confusing at times, so here are clarifications. 69 // These terms are used from the point of view of the program dependency graph. So 70 // the inputs of an instruction are part of its dependencies, and hence part its 71 // predecessors. So the uses of an instruction are (part of) its successors. 72 // (Side-effect dependencies can yield predecessors or successors that are not 73 // inputs or uses.) 74 // 75 // Here is a trivial example. For the Java code: 76 // 77 // int a = 1 + 2; 78 // 79 // we would have the instructions 80 // 81 // i1 HIntConstant 1 82 // i2 HIntConstant 2 83 // i3 HAdd [i1,i2] 84 // 85 // `i1` and `i2` are predecessors of `i3`. 86 // `i3` is a successor of `i1` and a successor of `i2`. 87 // In a scheduling graph for this code we would have three nodes `n1`, `n2`, 88 // and `n3` (respectively for instructions `i1`, `i1`, and `i3`). 89 // Conceptually the program dependency graph for this would contain two edges 90 // 91 // n1 -> n3 92 // n2 -> n3 93 // 94 // Since we schedule backwards (starting from the last instruction in each basic 95 // block), the implementation of nodes keeps a list of pointers their 96 // predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`. 97 // 98 // Node dependencies are also referred to from the program dependency graph 99 // point of view: we say that node `B` immediately depends on `A` if there is an 100 // edge from `A` to `B` in the program dependency graph. `A` is a predecessor of 101 // `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and 102 // `n2`. 103 // Since nodes in the scheduling graph keep a list of their predecessors, node 104 // `B` will have a pointer to its predecessor `A`. 105 // As we schedule backwards, `B` will be selected for scheduling before `A` is. 106 // 107 // So the scheduling for the example above could happen as follow 108 // 109 // |---------------------------+------------------------| 110 // | candidates for scheduling | instructions scheduled | 111 // | --------------------------+------------------------| 112 // 113 // The only node without successors is `n3`, so it is the only initial 114 // candidate. 115 // 116 // | n3 | (none) | 117 // 118 // We schedule `n3` as the last (and only) instruction. All its predecessors 119 // that do not have any unscheduled successors become candidate. That is, `n1` 120 // and `n2` become candidates. 121 // 122 // | n1, n2 | n3 | 123 // 124 // One of the candidates is selected. In practice this is where scheduling 125 // heuristics kick in, to decide which of the candidates should be selected. 126 // In this example, let it be `n1`. It is scheduled before previously scheduled 127 // nodes (in program order). There are no other nodes to add to the list of 128 // candidates. 129 // 130 // | n2 | n1 | 131 // | | n3 | 132 // 133 // The only candidate available for scheduling is `n2`. Schedule it before 134 // (in program order) the previously scheduled nodes. 135 // 136 // | (none) | n2 | 137 // | | n1 | 138 // | | n3 | 139 // |---------------------------+------------------------| 140 // 141 // So finally the instructions will be executed in the order `i2`, `i1`, and `i3`. 142 // In this trivial example, it does not matter which of `i1` and `i2` is 143 // scheduled first since they are constants. However the same process would 144 // apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`). 145 146 // Set to true to have instruction scheduling dump scheduling graphs to the file 147 // `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`. 148 static constexpr bool kDumpDotSchedulingGraphs = false; 149 150 // Typically used as a default instruction latency. 151 static constexpr uint32_t kGenericInstructionLatency = 1; 152 153 class HScheduler; 154 155 /** 156 * A node representing an `HInstruction` in the `SchedulingGraph`. 157 */ 158 class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { 159 public: SchedulingNode(HInstruction * instr,ScopedArenaAllocator * allocator,bool is_scheduling_barrier)160 SchedulingNode(HInstruction* instr, ScopedArenaAllocator* allocator, bool is_scheduling_barrier) 161 : latency_(0), 162 internal_latency_(0), 163 critical_path_(0), 164 instruction_(instr), 165 is_scheduling_barrier_(is_scheduling_barrier), 166 data_predecessors_(allocator->Adapter(kArenaAllocScheduler)), 167 other_predecessors_(allocator->Adapter(kArenaAllocScheduler)), 168 num_unscheduled_successors_(0) { 169 data_predecessors_.reserve(kPreallocatedPredecessors); 170 } 171 AddDataPredecessor(SchedulingNode * predecessor)172 void AddDataPredecessor(SchedulingNode* predecessor) { 173 // Check whether the predecessor has been added earlier. 174 if (HasDataDependency(predecessor)) { 175 return; 176 } 177 data_predecessors_.push_back(predecessor); 178 predecessor->num_unscheduled_successors_++; 179 } 180 GetDataPredecessors()181 const ScopedArenaVector<SchedulingNode*>& GetDataPredecessors() const { 182 return data_predecessors_; 183 } 184 AddOtherPredecessor(SchedulingNode * predecessor)185 void AddOtherPredecessor(SchedulingNode* predecessor) { 186 // Check whether the predecessor has been added earlier. 187 // As an optimization of the scheduling graph, we don't need to create another dependency if 188 // there is a data dependency between scheduling nodes. 189 if (HasOtherDependency(predecessor) || HasDataDependency(predecessor)) { 190 return; 191 } 192 other_predecessors_.push_back(predecessor); 193 predecessor->num_unscheduled_successors_++; 194 } 195 GetOtherPredecessors()196 const ScopedArenaVector<SchedulingNode*>& GetOtherPredecessors() const { 197 return other_predecessors_; 198 } 199 DecrementNumberOfUnscheduledSuccessors()200 void DecrementNumberOfUnscheduledSuccessors() { 201 num_unscheduled_successors_--; 202 } 203 MaybeUpdateCriticalPath(uint32_t other_critical_path)204 void MaybeUpdateCriticalPath(uint32_t other_critical_path) { 205 critical_path_ = std::max(critical_path_, other_critical_path); 206 } 207 HasUnscheduledSuccessors()208 bool HasUnscheduledSuccessors() const { 209 return num_unscheduled_successors_ != 0; 210 } 211 GetInstruction()212 HInstruction* GetInstruction() const { return instruction_; } GetLatency()213 uint32_t GetLatency() const { return latency_; } SetLatency(uint32_t latency)214 void SetLatency(uint32_t latency) { latency_ = latency; } GetInternalLatency()215 uint32_t GetInternalLatency() const { return internal_latency_; } SetInternalLatency(uint32_t internal_latency)216 void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; } GetCriticalPath()217 uint32_t GetCriticalPath() const { return critical_path_; } IsSchedulingBarrier()218 bool IsSchedulingBarrier() const { return is_scheduling_barrier_; } 219 HasDataDependency(const SchedulingNode * node)220 bool HasDataDependency(const SchedulingNode* node) const { 221 return ContainsElement(data_predecessors_, node); 222 } 223 HasOtherDependency(const SchedulingNode * node)224 bool HasOtherDependency(const SchedulingNode* node) const { 225 return ContainsElement(other_predecessors_, node); 226 } 227 228 private: 229 // The latency of this node. It represents the latency between the moment the 230 // last instruction for this node has executed to the moment the result 231 // produced by this node is available to users. 232 uint32_t latency_; 233 // This represents the time spent *within* the generated code for this node. 234 // It should be zero for nodes that only generate a single instruction. 235 uint32_t internal_latency_; 236 237 // The critical path from this instruction to the end of scheduling. It is 238 // used by the scheduling heuristics to measure the priority of this instruction. 239 // It is defined as 240 // critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses) 241 // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in 242 // `HScheduler::Schedule(SchedulingNode* scheduling_node)`). 243 uint32_t critical_path_; 244 245 // The instruction that this node represents. 246 HInstruction* const instruction_; 247 248 // If a node is scheduling barrier, other nodes cannot be scheduled before it. 249 const bool is_scheduling_barrier_; 250 251 // The lists of predecessors. They cannot be scheduled before this node. Once 252 // this node is scheduled, we check whether any of its predecessors has become a 253 // valid candidate for scheduling. 254 // Predecessors in `data_predecessors_` are data dependencies. Those in 255 // `other_predecessors_` contain side-effect dependencies, environment 256 // dependencies, and scheduling barrier dependencies. 257 ScopedArenaVector<SchedulingNode*> data_predecessors_; 258 ScopedArenaVector<SchedulingNode*> other_predecessors_; 259 260 // The number of unscheduled successors for this node. This number is 261 // decremented as successors are scheduled. When it reaches zero this node 262 // becomes a valid candidate to schedule. 263 uint32_t num_unscheduled_successors_; 264 265 static constexpr size_t kPreallocatedPredecessors = 4; 266 }; 267 268 /* 269 * Provide analysis of instruction dependencies (side effects) which are not in a form of explicit 270 * def-use data dependencies. 271 */ 272 class SideEffectDependencyAnalysis { 273 public: SideEffectDependencyAnalysis(const HeapLocationCollector * heap_location_collector)274 explicit SideEffectDependencyAnalysis(const HeapLocationCollector* heap_location_collector) 275 : memory_dependency_analysis_(heap_location_collector) {} 276 HasSideEffectDependency(HInstruction * instr1,HInstruction * instr2)277 bool HasSideEffectDependency(HInstruction* instr1, HInstruction* instr2) const { 278 if (memory_dependency_analysis_.HasMemoryDependency(instr1, instr2)) { 279 return true; 280 } 281 282 // Even if above memory dependency check has passed, it is still necessary to 283 // check dependencies between instructions that can throw and instructions 284 // that write to memory. 285 if (HasExceptionDependency(instr1, instr2)) { 286 return true; 287 } 288 289 return false; 290 } 291 292 private: 293 static bool HasExceptionDependency(const HInstruction* instr1, const HInstruction* instr2); 294 static bool HasReorderingDependency(const HInstruction* instr1, const HInstruction* instr2); 295 296 /* 297 * Memory dependency analysis of instructions based on their memory side effects 298 * and heap location information from the LCA pass if it is provided. 299 */ 300 class MemoryDependencyAnalysis { 301 public: MemoryDependencyAnalysis(const HeapLocationCollector * heap_location_collector)302 explicit MemoryDependencyAnalysis(const HeapLocationCollector* heap_location_collector) 303 : heap_location_collector_(heap_location_collector) {} 304 305 bool HasMemoryDependency(HInstruction* instr1, HInstruction* instr2) const; 306 307 private: 308 bool ArrayAccessMayAlias(HInstruction* instr1, HInstruction* instr2) const; 309 bool FieldAccessMayAlias(const HInstruction* instr1, const HInstruction* instr2) const; 310 size_t ArrayAccessHeapLocation(HInstruction* instruction) const; 311 size_t FieldAccessHeapLocation(const HInstruction* instruction) const; 312 313 const HeapLocationCollector* const heap_location_collector_; 314 }; 315 316 MemoryDependencyAnalysis memory_dependency_analysis_; 317 }; 318 319 /* 320 * Directed acyclic graph for scheduling. 321 */ 322 class SchedulingGraph : public ValueObject { 323 public: SchedulingGraph(ScopedArenaAllocator * allocator,const HeapLocationCollector * heap_location_collector)324 SchedulingGraph(ScopedArenaAllocator* allocator, 325 const HeapLocationCollector* heap_location_collector) 326 : allocator_(allocator), 327 contains_scheduling_barrier_(false), 328 nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), 329 side_effect_dependency_analysis_(heap_location_collector) {} 330 331 SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { 332 std::unique_ptr<SchedulingNode> node( 333 new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); 334 SchedulingNode* result = node.get(); 335 nodes_map_.insert(std::make_pair(instr, std::move(node))); 336 contains_scheduling_barrier_ |= is_scheduling_barrier; 337 AddDependencies(result, is_scheduling_barrier); 338 return result; 339 } 340 GetNode(const HInstruction * instr)341 SchedulingNode* GetNode(const HInstruction* instr) const { 342 auto it = nodes_map_.find(instr); 343 if (it == nodes_map_.end()) { 344 return nullptr; 345 } else { 346 return it->second.get(); 347 } 348 } 349 Size()350 size_t Size() const { 351 return nodes_map_.size(); 352 } 353 354 // Dump the scheduling graph, in dot file format, appending it to the file 355 // `scheduling_graphs.dot`. 356 void DumpAsDotGraph(const std::string& description, 357 const ScopedArenaVector<SchedulingNode*>& initial_candidates); 358 359 protected: 360 void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency); AddDataDependency(SchedulingNode * node,SchedulingNode * dependency)361 void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) { 362 AddDependency(node, dependency, /*is_data_dependency*/true); 363 } AddOtherDependency(SchedulingNode * node,SchedulingNode * dependency)364 void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { 365 AddDependency(node, dependency, /*is_data_dependency*/false); 366 } 367 368 // Analyze whether the scheduling node has cross-iteration dependencies which mean it uses 369 // values defined on the previous iteration. 370 // 371 // Supported cases: 372 // 373 // L: 374 // v2 = loop_head_phi(v1) 375 // instr1(v2) 376 // v1 = instr2 377 // goto L 378 // 379 // In such cases moving instr2 before instr1 creates intersecting live ranges 380 // of v1 and v2. As a result a separate register is needed to keep the value 381 // defined by instr2 which is only used on the next iteration. 382 // If instr2 is not moved, no additional register is needed. The register 383 // used by instr1 is reused. 384 // To prevent such a situation a "other" dependency between instr1 and instr2 must be set. 385 void AddCrossIterationDependencies(SchedulingNode* node); 386 387 // Add dependencies nodes for the given `SchedulingNode`: inputs, environments, and side-effects. 388 void AddDependencies(SchedulingNode* node, bool is_scheduling_barrier = false); 389 390 ScopedArenaAllocator* const allocator_; 391 bool contains_scheduling_barrier_; 392 ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; 393 SideEffectDependencyAnalysis side_effect_dependency_analysis_; 394 }; 395 396 /* 397 * The visitors derived from this base class are used by schedulers to evaluate 398 * the latencies of `HInstruction`s. 399 */ 400 class SchedulingLatencyVisitor : public HGraphDelegateVisitor { 401 public: 402 // This class and its sub-classes will never be used to drive a visit of an 403 // `HGraph` but only to visit `HInstructions` one at a time, so we do not need 404 // to pass a valid graph to `HGraphDelegateVisitor()`. SchedulingLatencyVisitor()405 SchedulingLatencyVisitor() 406 : HGraphDelegateVisitor(nullptr), 407 last_visited_latency_(0), 408 last_visited_internal_latency_(0) {} 409 VisitInstruction(HInstruction * instruction)410 void VisitInstruction(HInstruction* instruction) override { 411 LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " 412 "Architecture-specific scheduling latency visitors must handle all instructions" 413 " (potentially by overriding the generic `VisitInstruction()`."; 414 UNREACHABLE(); 415 } 416 Visit(HInstruction * instruction)417 void Visit(HInstruction* instruction) { 418 instruction->Accept(this); 419 } 420 CalculateLatency(SchedulingNode * node)421 void CalculateLatency(SchedulingNode* node) { 422 // By default nodes have no internal latency. 423 last_visited_internal_latency_ = 0; 424 Visit(node->GetInstruction()); 425 } 426 GetLastVisitedLatency()427 uint32_t GetLastVisitedLatency() const { return last_visited_latency_; } GetLastVisitedInternalLatency()428 uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; } 429 430 protected: 431 // The latency of the most recent visited SchedulingNode. 432 // This is for reporting the latency value to the user of this visitor. 433 uint32_t last_visited_latency_; 434 // This represents the time spent *within* the generated code for the most recent visited 435 // SchedulingNode. This is for reporting the internal latency value to the user of this visitor. 436 uint32_t last_visited_internal_latency_; 437 }; 438 439 class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { 440 public: Reset()441 virtual void Reset() {} 442 virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 443 const SchedulingGraph& graph) = 0; ~SchedulingNodeSelector()444 virtual ~SchedulingNodeSelector() {} 445 protected: DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode * > * nodes,size_t index)446 static void DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode*>* nodes, size_t index) { 447 (*nodes)[index] = nodes->back(); 448 nodes->pop_back(); 449 } 450 }; 451 452 /* 453 * Select a `SchedulingNode` at random within the candidates. 454 */ 455 class RandomSchedulingNodeSelector : public SchedulingNodeSelector { 456 public: RandomSchedulingNodeSelector()457 RandomSchedulingNodeSelector() : seed_(0) { 458 seed_ = static_cast<uint32_t>(NanoTime()); 459 srand(seed_); 460 } 461 PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)462 SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 463 const SchedulingGraph& graph) override { 464 UNUSED(graph); 465 DCHECK(!nodes->empty()); 466 size_t select = rand_r(&seed_) % nodes->size(); 467 SchedulingNode* select_node = (*nodes)[select]; 468 DeleteNodeAtIndex(nodes, select); 469 return select_node; 470 } 471 472 uint32_t seed_; 473 }; 474 475 /* 476 * Select a `SchedulingNode` according to critical path information, 477 * with heuristics to favor certain instruction patterns like materialized condition. 478 */ 479 class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { 480 public: CriticalPathSchedulingNodeSelector()481 CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} 482 Reset()483 void Reset() override { prev_select_ = nullptr; } 484 SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 485 const SchedulingGraph& graph) override; 486 487 protected: 488 SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate, 489 SchedulingNode* check) const; 490 491 SchedulingNode* SelectMaterializedCondition(ScopedArenaVector<SchedulingNode*>* nodes, 492 const SchedulingGraph& graph) const; 493 494 private: 495 const SchedulingNode* prev_select_; 496 }; 497 498 class HScheduler { 499 public: HScheduler(SchedulingNodeSelector * selector)500 explicit HScheduler(SchedulingNodeSelector* selector) 501 : selector_(selector), 502 only_optimize_loop_blocks_(true), 503 cursor_(nullptr) {} ~HScheduler()504 virtual ~HScheduler() {} 505 506 void Schedule(HGraph* graph); 507 SetOnlyOptimizeLoopBlocks(bool loop_only)508 void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; } 509 510 // Instructions can not be rescheduled across a scheduling barrier. 511 virtual bool IsSchedulingBarrier(const HInstruction* instruction) const; 512 513 protected: 514 virtual std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph( 515 HBasicBlock* block, 516 ScopedArenaAllocator* allocator, 517 const HeapLocationCollector* heap_location_collector) = 0; 518 519 template <typename LatencyVisitor> BuildSchedulingGraph(HBasicBlock * block,ScopedArenaAllocator * allocator,const HeapLocationCollector * heap_location_collector,LatencyVisitor * latency_visitor)520 std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph( 521 HBasicBlock* block, 522 ScopedArenaAllocator* allocator, 523 const HeapLocationCollector* heap_location_collector, 524 LatencyVisitor* latency_visitor) ALWAYS_INLINE { 525 SchedulingGraph scheduling_graph(allocator, heap_location_collector); 526 ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator->Adapter(kArenaAllocScheduler)); 527 for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { 528 HInstruction* instruction = it.Current(); 529 CHECK_EQ(instruction->GetBlock(), block) 530 << instruction->DebugName() 531 << " is in block " << instruction->GetBlock()->GetBlockId() 532 << ", and expected in block " << block->GetBlockId(); 533 SchedulingNode* node = 534 scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction)); 535 latency_visitor->CalculateLatency(node); 536 node->SetLatency(latency_visitor->GetLastVisitedLatency()); 537 node->SetInternalLatency(latency_visitor->GetLastVisitedInternalLatency()); 538 scheduling_nodes.push_back(node); 539 } 540 return {std::move(scheduling_graph), std::move(scheduling_nodes)}; 541 } 542 543 void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector); 544 void Schedule(SchedulingNode* scheduling_node, 545 /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates); 546 void Schedule(HInstruction* instruction); 547 548 // Any instruction returning `false` via this method will prevent its 549 // containing basic block from being scheduled. 550 // This method is used to restrict scheduling to instructions that we know are 551 // safe to handle. 552 // 553 // For newly introduced instructions by default HScheduler::IsSchedulable returns false. 554 // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see 555 // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also 556 // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. 557 virtual bool IsSchedulable(const HInstruction* instruction) const; 558 bool IsSchedulable(const HBasicBlock* block) const; 559 560 SchedulingNodeSelector* const selector_; 561 bool only_optimize_loop_blocks_; 562 563 // A pointer indicating where the next instruction to be scheduled will be inserted. 564 HInstruction* cursor_; 565 566 private: 567 DISALLOW_COPY_AND_ASSIGN(HScheduler); 568 }; 569 570 class HInstructionScheduling : public HOptimization { 571 public: 572 HInstructionScheduling(HGraph* graph, 573 InstructionSet instruction_set, 574 CodeGenerator* cg = nullptr, 575 const char* name = kInstructionSchedulingPassName) HOptimization(graph,name)576 : HOptimization(graph, name), 577 codegen_(cg), 578 instruction_set_(instruction_set) {} 579 Run()580 bool Run() override { 581 return Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); 582 } 583 584 bool Run(bool only_optimize_loop_blocks, bool schedule_randomly); 585 586 static constexpr const char* kInstructionSchedulingPassName = "scheduler"; 587 588 private: 589 CodeGenerator* const codegen_; 590 const InstructionSet instruction_set_; 591 DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling); 592 }; 593 594 } // namespace art 595 596 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 597