1 // Copyright 2015 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/compiler/instruction-scheduler.h" 6 7 #include "src/base/adapters.h" 8 #include "src/base/utils/random-number-generator.h" 9 10 namespace v8 { 11 namespace internal { 12 namespace compiler { 13 AddNode(ScheduleGraphNode * node)14 void InstructionScheduler::SchedulingQueueBase::AddNode( 15 ScheduleGraphNode* node) { 16 // We keep the ready list sorted by total latency so that we can quickly find 17 // the next best candidate to schedule. 18 auto it = nodes_.begin(); 19 while ((it != nodes_.end()) && 20 ((*it)->total_latency() >= node->total_latency())) { 21 ++it; 22 } 23 nodes_.insert(it, node); 24 } 25 26 27 InstructionScheduler::ScheduleGraphNode* PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { 29 DCHECK(!IsEmpty()); 30 auto candidate = nodes_.end(); 31 for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { 32 // We only consider instructions that have all their operands ready. 33 if (cycle >= (*iterator)->start_cycle()) { 34 candidate = iterator; 35 break; 36 } 37 } 38 39 if (candidate != nodes_.end()) { 40 ScheduleGraphNode *result = *candidate; 41 nodes_.erase(candidate); 42 return result; 43 } 44 45 return nullptr; 46 } 47 48 49 InstructionScheduler::ScheduleGraphNode* PopBestCandidate(int cycle)50 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { 51 DCHECK(!IsEmpty()); 52 // Choose a random element from the ready list. 53 auto candidate = nodes_.begin(); 54 std::advance(candidate, isolate()->random_number_generator()->NextInt( 55 static_cast<int>(nodes_.size()))); 56 ScheduleGraphNode *result = *candidate; 57 nodes_.erase(candidate); 58 return result; 59 } 60 61 ScheduleGraphNode(Zone * zone,Instruction * instr)62 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode( 63 Zone* zone, 64 Instruction* instr) 65 : instr_(instr), 66 successors_(zone), 67 unscheduled_predecessors_count_(0), 68 latency_(GetInstructionLatency(instr)), 69 total_latency_(-1), 70 start_cycle_(-1) { 71 } 72 73 AddSuccessor(ScheduleGraphNode * node)74 void InstructionScheduler::ScheduleGraphNode::AddSuccessor( 75 ScheduleGraphNode* node) { 76 successors_.push_back(node); 77 node->unscheduled_predecessors_count_++; 78 } 79 80 InstructionScheduler(Zone * zone,InstructionSequence * sequence)81 InstructionScheduler::InstructionScheduler(Zone* zone, 82 InstructionSequence* sequence) 83 : zone_(zone), 84 sequence_(sequence), 85 graph_(zone), 86 last_side_effect_instr_(nullptr), 87 pending_loads_(zone), 88 last_live_in_reg_marker_(nullptr), 89 last_deopt_(nullptr), 90 operands_map_(zone) {} 91 92 StartBlock(RpoNumber rpo)93 void InstructionScheduler::StartBlock(RpoNumber rpo) { 94 DCHECK(graph_.empty()); 95 DCHECK(last_side_effect_instr_ == nullptr); 96 DCHECK(pending_loads_.empty()); 97 DCHECK(last_live_in_reg_marker_ == nullptr); 98 DCHECK(last_deopt_ == nullptr); 99 DCHECK(operands_map_.empty()); 100 sequence()->StartBlock(rpo); 101 } 102 103 EndBlock(RpoNumber rpo)104 void InstructionScheduler::EndBlock(RpoNumber rpo) { 105 if (FLAG_turbo_stress_instruction_scheduling) { 106 ScheduleBlock<StressSchedulerQueue>(); 107 } else { 108 ScheduleBlock<CriticalPathFirstQueue>(); 109 } 110 sequence()->EndBlock(rpo); 111 graph_.clear(); 112 last_side_effect_instr_ = nullptr; 113 pending_loads_.clear(); 114 last_live_in_reg_marker_ = nullptr; 115 last_deopt_ = nullptr; 116 operands_map_.clear(); 117 } 118 119 AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) { 121 ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); 122 123 if (IsBlockTerminator(instr)) { 124 // Make sure that basic block terminators are not moved by adding them 125 // as successor of every instruction. 126 for (ScheduleGraphNode* node : graph_) { 127 node->AddSuccessor(new_node); 128 } 129 } else if (IsFixedRegisterParameter(instr)) { 130 if (last_live_in_reg_marker_ != nullptr) { 131 last_live_in_reg_marker_->AddSuccessor(new_node); 132 } 133 last_live_in_reg_marker_ = new_node; 134 } else { 135 if (last_live_in_reg_marker_ != nullptr) { 136 last_live_in_reg_marker_->AddSuccessor(new_node); 137 } 138 139 // Make sure that instructions are not scheduled before the last 140 // deoptimization point when they depend on it. 141 if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) { 142 last_deopt_->AddSuccessor(new_node); 143 } 144 145 // Instructions with side effects and memory operations can't be 146 // reordered with respect to each other. 147 if (HasSideEffect(instr)) { 148 if (last_side_effect_instr_ != nullptr) { 149 last_side_effect_instr_->AddSuccessor(new_node); 150 } 151 for (ScheduleGraphNode* load : pending_loads_) { 152 load->AddSuccessor(new_node); 153 } 154 pending_loads_.clear(); 155 last_side_effect_instr_ = new_node; 156 } else if (IsLoadOperation(instr)) { 157 // Load operations can't be reordered with side effects instructions but 158 // independent loads can be reordered with respect to each other. 159 if (last_side_effect_instr_ != nullptr) { 160 last_side_effect_instr_->AddSuccessor(new_node); 161 } 162 pending_loads_.push_back(new_node); 163 } else if (instr->IsDeoptimizeCall()) { 164 // Ensure that deopts are not reordered with respect to side-effect 165 // instructions. 166 if (last_side_effect_instr_ != nullptr) { 167 last_side_effect_instr_->AddSuccessor(new_node); 168 } 169 last_deopt_ = new_node; 170 } 171 172 // Look for operand dependencies. 173 for (size_t i = 0; i < instr->InputCount(); ++i) { 174 const InstructionOperand* input = instr->InputAt(i); 175 if (input->IsUnallocated()) { 176 int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); 177 auto it = operands_map_.find(vreg); 178 if (it != operands_map_.end()) { 179 it->second->AddSuccessor(new_node); 180 } 181 } 182 } 183 184 // Record the virtual registers defined by this instruction. 185 for (size_t i = 0; i < instr->OutputCount(); ++i) { 186 const InstructionOperand* output = instr->OutputAt(i); 187 if (output->IsUnallocated()) { 188 operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = 189 new_node; 190 } else if (output->IsConstant()) { 191 operands_map_[ConstantOperand::cast(output)->virtual_register()] = 192 new_node; 193 } 194 } 195 } 196 197 graph_.push_back(new_node); 198 } 199 200 201 template <typename QueueType> ScheduleBlock()202 void InstructionScheduler::ScheduleBlock() { 203 QueueType ready_list(this); 204 205 // Compute total latencies so that we can schedule the critical path first. 206 ComputeTotalLatencies(); 207 208 // Add nodes which don't have dependencies to the ready list. 209 for (ScheduleGraphNode* node : graph_) { 210 if (!node->HasUnscheduledPredecessor()) { 211 ready_list.AddNode(node); 212 } 213 } 214 215 // Go through the ready list and schedule the instructions. 216 int cycle = 0; 217 while (!ready_list.IsEmpty()) { 218 ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); 219 220 if (candidate != nullptr) { 221 sequence()->AddInstruction(candidate->instruction()); 222 223 for (ScheduleGraphNode* successor : candidate->successors()) { 224 successor->DropUnscheduledPredecessor(); 225 successor->set_start_cycle( 226 std::max(successor->start_cycle(), 227 cycle + candidate->latency())); 228 229 if (!successor->HasUnscheduledPredecessor()) { 230 ready_list.AddNode(successor); 231 } 232 } 233 } 234 235 cycle++; 236 } 237 } 238 239 GetInstructionFlags(const Instruction * instr) const240 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { 241 switch (instr->arch_opcode()) { 242 case kArchNop: 243 case kArchFramePointer: 244 case kArchParentFramePointer: 245 case kArchTruncateDoubleToI: 246 case kArchStackSlot: 247 case kArchDebugBreak: 248 case kArchComment: 249 case kIeee754Float64Acos: 250 case kIeee754Float64Acosh: 251 case kIeee754Float64Asin: 252 case kIeee754Float64Asinh: 253 case kIeee754Float64Atan: 254 case kIeee754Float64Atanh: 255 case kIeee754Float64Atan2: 256 case kIeee754Float64Cbrt: 257 case kIeee754Float64Cos: 258 case kIeee754Float64Cosh: 259 case kIeee754Float64Exp: 260 case kIeee754Float64Expm1: 261 case kIeee754Float64Log: 262 case kIeee754Float64Log1p: 263 case kIeee754Float64Log10: 264 case kIeee754Float64Log2: 265 case kIeee754Float64Pow: 266 case kIeee754Float64Sin: 267 case kIeee754Float64Sinh: 268 case kIeee754Float64Tan: 269 case kIeee754Float64Tanh: 270 return kNoOpcodeFlags; 271 272 case kArchStackPointer: 273 // ArchStackPointer instruction loads the current stack pointer value and 274 // must not be reordered with instruction with side effects. 275 return kIsLoadOperation; 276 277 case kArchPrepareCallCFunction: 278 case kArchPrepareTailCall: 279 case kArchCallCFunction: 280 case kArchCallCodeObject: 281 case kArchCallJSFunction: 282 return kHasSideEffect; 283 284 case kArchTailCallCodeObjectFromJSFunction: 285 case kArchTailCallCodeObject: 286 case kArchTailCallJSFunctionFromJSFunction: 287 case kArchTailCallAddress: 288 return kHasSideEffect | kIsBlockTerminator; 289 290 case kArchDeoptimize: 291 case kArchJmp: 292 case kArchLookupSwitch: 293 case kArchTableSwitch: 294 case kArchRet: 295 case kArchThrowTerminator: 296 return kIsBlockTerminator; 297 298 case kCheckedLoadInt8: 299 case kCheckedLoadUint8: 300 case kCheckedLoadInt16: 301 case kCheckedLoadUint16: 302 case kCheckedLoadWord32: 303 case kCheckedLoadWord64: 304 case kCheckedLoadFloat32: 305 case kCheckedLoadFloat64: 306 return kIsLoadOperation; 307 308 case kCheckedStoreWord8: 309 case kCheckedStoreWord16: 310 case kCheckedStoreWord32: 311 case kCheckedStoreWord64: 312 case kCheckedStoreFloat32: 313 case kCheckedStoreFloat64: 314 case kArchStoreWithWriteBarrier: 315 return kHasSideEffect; 316 317 case kAtomicLoadInt8: 318 case kAtomicLoadUint8: 319 case kAtomicLoadInt16: 320 case kAtomicLoadUint16: 321 case kAtomicLoadWord32: 322 return kIsLoadOperation; 323 324 case kAtomicStoreWord8: 325 case kAtomicStoreWord16: 326 case kAtomicStoreWord32: 327 return kHasSideEffect; 328 329 #define CASE(Name) case k##Name: 330 TARGET_ARCH_OPCODE_LIST(CASE) 331 #undef CASE 332 return GetTargetInstructionFlags(instr); 333 } 334 335 UNREACHABLE(); 336 return kNoOpcodeFlags; 337 } 338 339 IsBlockTerminator(const Instruction * instr) const340 bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const { 341 return ((GetInstructionFlags(instr) & kIsBlockTerminator) || 342 (instr->flags_mode() == kFlags_branch)); 343 } 344 345 ComputeTotalLatencies()346 void InstructionScheduler::ComputeTotalLatencies() { 347 for (ScheduleGraphNode* node : base::Reversed(graph_)) { 348 int max_latency = 0; 349 350 for (ScheduleGraphNode* successor : node->successors()) { 351 DCHECK(successor->total_latency() != -1); 352 if (successor->total_latency() > max_latency) { 353 max_latency = successor->total_latency(); 354 } 355 } 356 357 node->set_total_latency(max_latency + node->latency()); 358 } 359 } 360 361 } // namespace compiler 362 } // namespace internal 363 } // namespace v8 364