• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/instruction-scheduler.h"
6 
7 #include "src/base/iterator.h"
8 #include "src/base/optional.h"
9 #include "src/base/utils/random-number-generator.h"
10 
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14 
AddNode(ScheduleGraphNode * node)15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16     ScheduleGraphNode* node) {
17   // We keep the ready list sorted by total latency so that we can quickly find
18   // the next best candidate to schedule.
19   auto it = nodes_.begin();
20   while ((it != nodes_.end()) &&
21          ((*it)->total_latency() >= node->total_latency())) {
22     ++it;
23   }
24   nodes_.insert(it, node);
25 }
26 
27 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29   DCHECK(!IsEmpty());
30   auto candidate = nodes_.end();
31   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32     // We only consider instructions that have all their operands ready.
33     if (cycle >= (*iterator)->start_cycle()) {
34       candidate = iterator;
35       break;
36     }
37   }
38 
39   if (candidate != nodes_.end()) {
40     ScheduleGraphNode* result = *candidate;
41     nodes_.erase(candidate);
42     return result;
43   }
44 
45   return nullptr;
46 }
47 
48 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)49 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50   DCHECK(!IsEmpty());
51   // Choose a random element from the ready list.
52   auto candidate = nodes_.begin();
53   std::advance(candidate, random_number_generator()->NextInt(
54                               static_cast<int>(nodes_.size())));
55   ScheduleGraphNode* result = *candidate;
56   nodes_.erase(candidate);
57   return result;
58 }
59 
ScheduleGraphNode(Zone * zone,Instruction * instr)60 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61                                                            Instruction* instr)
62     : instr_(instr),
63       successors_(zone),
64       unscheduled_predecessors_count_(0),
65       latency_(GetInstructionLatency(instr)),
66       total_latency_(-1),
67       start_cycle_(-1) {}
68 
AddSuccessor(ScheduleGraphNode * node)69 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70     ScheduleGraphNode* node) {
71   successors_.push_back(node);
72   node->unscheduled_predecessors_count_++;
73 }
74 
InstructionScheduler(Zone * zone,InstructionSequence * sequence)75 InstructionScheduler::InstructionScheduler(Zone* zone,
76                                            InstructionSequence* sequence)
77     : zone_(zone),
78       sequence_(sequence),
79       graph_(zone),
80       last_side_effect_instr_(nullptr),
81       pending_loads_(zone),
82       last_live_in_reg_marker_(nullptr),
83       last_deopt_or_trap_(nullptr),
84       operands_map_(zone) {
85   if (FLAG_turbo_stress_instruction_scheduling) {
86     random_number_generator_ =
87         base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
88   }
89 }
90 
StartBlock(RpoNumber rpo)91 void InstructionScheduler::StartBlock(RpoNumber rpo) {
92   DCHECK(graph_.empty());
93   DCHECK_NULL(last_side_effect_instr_);
94   DCHECK(pending_loads_.empty());
95   DCHECK_NULL(last_live_in_reg_marker_);
96   DCHECK_NULL(last_deopt_or_trap_);
97   DCHECK(operands_map_.empty());
98   sequence()->StartBlock(rpo);
99 }
100 
EndBlock(RpoNumber rpo)101 void InstructionScheduler::EndBlock(RpoNumber rpo) {
102   if (FLAG_turbo_stress_instruction_scheduling) {
103     Schedule<StressSchedulerQueue>();
104   } else {
105     Schedule<CriticalPathFirstQueue>();
106   }
107   sequence()->EndBlock(rpo);
108 }
109 
AddTerminator(Instruction * instr)110 void InstructionScheduler::AddTerminator(Instruction* instr) {
111   ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
112   // Make sure that basic block terminators are not moved by adding them
113   // as successor of every instruction.
114   for (ScheduleGraphNode* node : graph_) {
115     node->AddSuccessor(new_node);
116   }
117   graph_.push_back(new_node);
118 }
119 
AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) {
121   if (IsBarrier(instr)) {
122     if (FLAG_turbo_stress_instruction_scheduling) {
123       Schedule<StressSchedulerQueue>();
124     } else {
125       Schedule<CriticalPathFirstQueue>();
126     }
127     sequence()->AddInstruction(instr);
128     return;
129   }
130 
131   ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
132 
133   // We should not have branches in the middle of a block.
134   DCHECK_NE(instr->flags_mode(), kFlags_branch);
135 
136   if (IsFixedRegisterParameter(instr)) {
137     if (last_live_in_reg_marker_ != nullptr) {
138       last_live_in_reg_marker_->AddSuccessor(new_node);
139     }
140     last_live_in_reg_marker_ = new_node;
141   } else {
142     if (last_live_in_reg_marker_ != nullptr) {
143       last_live_in_reg_marker_->AddSuccessor(new_node);
144     }
145 
146     // Make sure that instructions are not scheduled before the last
147     // deoptimization or trap point when they depend on it.
148     if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
149       last_deopt_or_trap_->AddSuccessor(new_node);
150     }
151 
152     // Instructions with side effects and memory operations can't be
153     // reordered with respect to each other.
154     if (HasSideEffect(instr)) {
155       if (last_side_effect_instr_ != nullptr) {
156         last_side_effect_instr_->AddSuccessor(new_node);
157       }
158       for (ScheduleGraphNode* load : pending_loads_) {
159         load->AddSuccessor(new_node);
160       }
161       pending_loads_.clear();
162       last_side_effect_instr_ = new_node;
163     } else if (IsLoadOperation(instr)) {
164       // Load operations can't be reordered with side effects instructions but
165       // independent loads can be reordered with respect to each other.
166       if (last_side_effect_instr_ != nullptr) {
167         last_side_effect_instr_->AddSuccessor(new_node);
168       }
169       pending_loads_.push_back(new_node);
170     } else if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
171       // Ensure that deopts or traps are not reordered with respect to
172       // side-effect instructions.
173       if (last_side_effect_instr_ != nullptr) {
174         last_side_effect_instr_->AddSuccessor(new_node);
175       }
176     }
177 
178     // Update last deoptimization or trap point.
179     if (instr->IsDeoptimizeCall() || CanTrap(instr)) {
180       last_deopt_or_trap_ = new_node;
181     }
182 
183     // Look for operand dependencies.
184     for (size_t i = 0; i < instr->InputCount(); ++i) {
185       const InstructionOperand* input = instr->InputAt(i);
186       if (input->IsUnallocated()) {
187         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
188         auto it = operands_map_.find(vreg);
189         if (it != operands_map_.end()) {
190           it->second->AddSuccessor(new_node);
191         }
192       }
193     }
194 
195     // Record the virtual registers defined by this instruction.
196     for (size_t i = 0; i < instr->OutputCount(); ++i) {
197       const InstructionOperand* output = instr->OutputAt(i);
198       if (output->IsUnallocated()) {
199         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
200             new_node;
201       } else if (output->IsConstant()) {
202         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
203             new_node;
204       }
205     }
206   }
207 
208   graph_.push_back(new_node);
209 }
210 
211 template <typename QueueType>
Schedule()212 void InstructionScheduler::Schedule() {
213   QueueType ready_list(this);
214 
215   // Compute total latencies so that we can schedule the critical path first.
216   ComputeTotalLatencies();
217 
218   // Add nodes which don't have dependencies to the ready list.
219   for (ScheduleGraphNode* node : graph_) {
220     if (!node->HasUnscheduledPredecessor()) {
221       ready_list.AddNode(node);
222     }
223   }
224 
225   // Go through the ready list and schedule the instructions.
226   int cycle = 0;
227   while (!ready_list.IsEmpty()) {
228     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
229 
230     if (candidate != nullptr) {
231       sequence()->AddInstruction(candidate->instruction());
232 
233       for (ScheduleGraphNode* successor : candidate->successors()) {
234         successor->DropUnscheduledPredecessor();
235         successor->set_start_cycle(
236             std::max(successor->start_cycle(), cycle + candidate->latency()));
237 
238         if (!successor->HasUnscheduledPredecessor()) {
239           ready_list.AddNode(successor);
240         }
241       }
242     }
243 
244     cycle++;
245   }
246 
247   // Reset own state.
248   graph_.clear();
249   operands_map_.clear();
250   pending_loads_.clear();
251   last_deopt_or_trap_ = nullptr;
252   last_live_in_reg_marker_ = nullptr;
253   last_side_effect_instr_ = nullptr;
254 }
255 
GetInstructionFlags(const Instruction * instr) const256 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
257   switch (instr->arch_opcode()) {
258     case kArchNop:
259     case kArchStackCheckOffset:
260     case kArchFramePointer:
261     case kArchParentFramePointer:
262     case kArchStackSlot:  // Despite its name this opcode will produce a
263                           // reference to a frame slot, so it is not affected
264                           // by the arm64 dual stack issues mentioned below.
265     case kArchComment:
266     case kArchDeoptimize:
267     case kArchJmp:
268     case kArchBinarySearchSwitch:
269     case kArchRet:
270     case kArchTableSwitch:
271     case kArchThrowTerminator:
272       return kNoOpcodeFlags;
273 
274     case kArchTruncateDoubleToI:
275     case kIeee754Float64Acos:
276     case kIeee754Float64Acosh:
277     case kIeee754Float64Asin:
278     case kIeee754Float64Asinh:
279     case kIeee754Float64Atan:
280     case kIeee754Float64Atanh:
281     case kIeee754Float64Atan2:
282     case kIeee754Float64Cbrt:
283     case kIeee754Float64Cos:
284     case kIeee754Float64Cosh:
285     case kIeee754Float64Exp:
286     case kIeee754Float64Expm1:
287     case kIeee754Float64Log:
288     case kIeee754Float64Log1p:
289     case kIeee754Float64Log10:
290     case kIeee754Float64Log2:
291     case kIeee754Float64Pow:
292     case kIeee754Float64Sin:
293     case kIeee754Float64Sinh:
294     case kIeee754Float64Tan:
295     case kIeee754Float64Tanh:
296       return kNoOpcodeFlags;
297 
298     case kArchStackPointerGreaterThan:
299       // The ArchStackPointerGreaterThan instruction loads the current stack
300       // pointer value and must not be reordered with instructions with side
301       // effects.
302       return kIsLoadOperation;
303 
304     case kArchPrepareCallCFunction:
305     case kArchPrepareTailCall:
306     case kArchTailCallCodeObject:
307     case kArchTailCallAddress:
308 #if V8_ENABLE_WEBASSEMBLY
309     case kArchTailCallWasm:
310 #endif  // V8_ENABLE_WEBASSEMBLY
311     case kArchAbortCSADcheck:
312       return kHasSideEffect;
313 
314     case kArchDebugBreak:
315       return kIsBarrier;
316 
317     case kArchSaveCallerRegisters:
318     case kArchRestoreCallerRegisters:
319       return kIsBarrier;
320 
321     case kArchCallCFunction:
322     case kArchCallCodeObject:
323     case kArchCallJSFunction:
324 #if V8_ENABLE_WEBASSEMBLY
325     case kArchCallWasmFunction:
326 #endif  // V8_ENABLE_WEBASSEMBLY
327     case kArchCallBuiltinPointer:
328       // Calls can cause GC and GC may relocate objects. If a pure instruction
329       // operates on a tagged pointer that was cast to a word then it may be
330       // incorrect to move the instruction across the call. Hence we mark all
331       // (non-tail-)calls as barriers.
332       return kIsBarrier;
333 
334     case kArchStoreWithWriteBarrier:
335     case kArchAtomicStoreWithWriteBarrier:
336       return kHasSideEffect;
337 
338     case kAtomicLoadInt8:
339     case kAtomicLoadUint8:
340     case kAtomicLoadInt16:
341     case kAtomicLoadUint16:
342     case kAtomicLoadWord32:
343       return kIsLoadOperation;
344 
345     case kAtomicStoreWord8:
346     case kAtomicStoreWord16:
347     case kAtomicStoreWord32:
348       return kHasSideEffect;
349 
350     case kAtomicExchangeInt8:
351     case kAtomicExchangeUint8:
352     case kAtomicExchangeInt16:
353     case kAtomicExchangeUint16:
354     case kAtomicExchangeWord32:
355     case kAtomicCompareExchangeInt8:
356     case kAtomicCompareExchangeUint8:
357     case kAtomicCompareExchangeInt16:
358     case kAtomicCompareExchangeUint16:
359     case kAtomicCompareExchangeWord32:
360     case kAtomicAddInt8:
361     case kAtomicAddUint8:
362     case kAtomicAddInt16:
363     case kAtomicAddUint16:
364     case kAtomicAddWord32:
365     case kAtomicSubInt8:
366     case kAtomicSubUint8:
367     case kAtomicSubInt16:
368     case kAtomicSubUint16:
369     case kAtomicSubWord32:
370     case kAtomicAndInt8:
371     case kAtomicAndUint8:
372     case kAtomicAndInt16:
373     case kAtomicAndUint16:
374     case kAtomicAndWord32:
375     case kAtomicOrInt8:
376     case kAtomicOrUint8:
377     case kAtomicOrInt16:
378     case kAtomicOrUint16:
379     case kAtomicOrWord32:
380     case kAtomicXorInt8:
381     case kAtomicXorUint8:
382     case kAtomicXorInt16:
383     case kAtomicXorUint16:
384     case kAtomicXorWord32:
385       return kHasSideEffect;
386 
387 #define CASE(Name) case k##Name:
388       TARGET_ARCH_OPCODE_LIST(CASE)
389 #undef CASE
390       return GetTargetInstructionFlags(instr);
391   }
392 
393   UNREACHABLE();
394 }
395 
ComputeTotalLatencies()396 void InstructionScheduler::ComputeTotalLatencies() {
397   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
398     int max_latency = 0;
399 
400     for (ScheduleGraphNode* successor : node->successors()) {
401       DCHECK_NE(-1, successor->total_latency());
402       if (successor->total_latency() > max_latency) {
403         max_latency = successor->total_latency();
404       }
405     }
406 
407     node->set_total_latency(max_latency + node->latency());
408   }
409 }
410 
411 }  // namespace compiler
412 }  // namespace internal
413 }  // namespace v8
414