• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/instruction-scheduler.h"
6 
7 #include "src/base/iterator.h"
8 #include "src/base/optional.h"
9 #include "src/base/utils/random-number-generator.h"
10 
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14 
AddNode(ScheduleGraphNode * node)15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16     ScheduleGraphNode* node) {
17   // We keep the ready list sorted by total latency so that we can quickly find
18   // the next best candidate to schedule.
19   auto it = nodes_.begin();
20   while ((it != nodes_.end()) &&
21          ((*it)->total_latency() >= node->total_latency())) {
22     ++it;
23   }
24   nodes_.insert(it, node);
25 }
26 
27 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29   DCHECK(!IsEmpty());
30   auto candidate = nodes_.end();
31   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32     // We only consider instructions that have all their operands ready.
33     if (cycle >= (*iterator)->start_cycle()) {
34       candidate = iterator;
35       break;
36     }
37   }
38 
39   if (candidate != nodes_.end()) {
40     ScheduleGraphNode* result = *candidate;
41     nodes_.erase(candidate);
42     return result;
43   }
44 
45   return nullptr;
46 }
47 
48 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)49 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50   DCHECK(!IsEmpty());
51   // Choose a random element from the ready list.
52   auto candidate = nodes_.begin();
53   std::advance(candidate, random_number_generator()->NextInt(
54                               static_cast<int>(nodes_.size())));
55   ScheduleGraphNode* result = *candidate;
56   nodes_.erase(candidate);
57   return result;
58 }
59 
ScheduleGraphNode(Zone * zone,Instruction * instr)60 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61                                                            Instruction* instr)
62     : instr_(instr),
63       successors_(zone),
64       unscheduled_predecessors_count_(0),
65       latency_(GetInstructionLatency(instr)),
66       total_latency_(-1),
67       start_cycle_(-1) {}
68 
AddSuccessor(ScheduleGraphNode * node)69 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70     ScheduleGraphNode* node) {
71   successors_.push_back(node);
72   node->unscheduled_predecessors_count_++;
73 }
74 
InstructionScheduler(Zone * zone,InstructionSequence * sequence)75 InstructionScheduler::InstructionScheduler(Zone* zone,
76                                            InstructionSequence* sequence)
77     : zone_(zone),
78       sequence_(sequence),
79       graph_(zone),
80       last_side_effect_instr_(nullptr),
81       pending_loads_(zone),
82       last_live_in_reg_marker_(nullptr),
83       last_deopt_or_trap_(nullptr),
84       operands_map_(zone) {
85   if (FLAG_turbo_stress_instruction_scheduling) {
86     random_number_generator_ =
87         base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
88   }
89 }
90 
StartBlock(RpoNumber rpo)91 void InstructionScheduler::StartBlock(RpoNumber rpo) {
92   DCHECK(graph_.empty());
93   DCHECK_NULL(last_side_effect_instr_);
94   DCHECK(pending_loads_.empty());
95   DCHECK_NULL(last_live_in_reg_marker_);
96   DCHECK_NULL(last_deopt_or_trap_);
97   DCHECK(operands_map_.empty());
98   sequence()->StartBlock(rpo);
99 }
100 
EndBlock(RpoNumber rpo)101 void InstructionScheduler::EndBlock(RpoNumber rpo) {
102   if (FLAG_turbo_stress_instruction_scheduling) {
103     Schedule<StressSchedulerQueue>();
104   } else {
105     Schedule<CriticalPathFirstQueue>();
106   }
107   sequence()->EndBlock(rpo);
108 }
109 
AddTerminator(Instruction * instr)110 void InstructionScheduler::AddTerminator(Instruction* instr) {
111   ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
112   // Make sure that basic block terminators are not moved by adding them
113   // as successor of every instruction.
114   for (ScheduleGraphNode* node : graph_) {
115     node->AddSuccessor(new_node);
116   }
117   graph_.push_back(new_node);
118 }
119 
AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) {
121   if (IsBarrier(instr)) {
122     if (FLAG_turbo_stress_instruction_scheduling) {
123       Schedule<StressSchedulerQueue>();
124     } else {
125       Schedule<CriticalPathFirstQueue>();
126     }
127     sequence()->AddInstruction(instr);
128     return;
129   }
130 
131   ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
132 
133   // We should not have branches in the middle of a block.
134   DCHECK_NE(instr->flags_mode(), kFlags_branch);
135   DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
136 
137   if (IsFixedRegisterParameter(instr)) {
138     if (last_live_in_reg_marker_ != nullptr) {
139       last_live_in_reg_marker_->AddSuccessor(new_node);
140     }
141     last_live_in_reg_marker_ = new_node;
142   } else {
143     if (last_live_in_reg_marker_ != nullptr) {
144       last_live_in_reg_marker_->AddSuccessor(new_node);
145     }
146 
147     // Make sure that instructions are not scheduled before the last
148     // deoptimization or trap point when they depend on it.
149     if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
150       last_deopt_or_trap_->AddSuccessor(new_node);
151     }
152 
153     // Instructions with side effects and memory operations can't be
154     // reordered with respect to each other.
155     if (HasSideEffect(instr)) {
156       if (last_side_effect_instr_ != nullptr) {
157         last_side_effect_instr_->AddSuccessor(new_node);
158       }
159       for (ScheduleGraphNode* load : pending_loads_) {
160         load->AddSuccessor(new_node);
161       }
162       pending_loads_.clear();
163       last_side_effect_instr_ = new_node;
164     } else if (IsLoadOperation(instr)) {
165       // Load operations can't be reordered with side effects instructions but
166       // independent loads can be reordered with respect to each other.
167       if (last_side_effect_instr_ != nullptr) {
168         last_side_effect_instr_->AddSuccessor(new_node);
169       }
170       pending_loads_.push_back(new_node);
171     } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
172       // Ensure that deopts or traps are not reordered with respect to
173       // side-effect instructions.
174       if (last_side_effect_instr_ != nullptr) {
175         last_side_effect_instr_->AddSuccessor(new_node);
176       }
177       last_deopt_or_trap_ = new_node;
178     }
179 
180     // Look for operand dependencies.
181     for (size_t i = 0; i < instr->InputCount(); ++i) {
182       const InstructionOperand* input = instr->InputAt(i);
183       if (input->IsUnallocated()) {
184         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
185         auto it = operands_map_.find(vreg);
186         if (it != operands_map_.end()) {
187           it->second->AddSuccessor(new_node);
188         }
189       }
190     }
191 
192     // Record the virtual registers defined by this instruction.
193     for (size_t i = 0; i < instr->OutputCount(); ++i) {
194       const InstructionOperand* output = instr->OutputAt(i);
195       if (output->IsUnallocated()) {
196         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
197             new_node;
198       } else if (output->IsConstant()) {
199         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
200             new_node;
201       }
202     }
203   }
204 
205   graph_.push_back(new_node);
206 }
207 
208 template <typename QueueType>
Schedule()209 void InstructionScheduler::Schedule() {
210   QueueType ready_list(this);
211 
212   // Compute total latencies so that we can schedule the critical path first.
213   ComputeTotalLatencies();
214 
215   // Add nodes which don't have dependencies to the ready list.
216   for (ScheduleGraphNode* node : graph_) {
217     if (!node->HasUnscheduledPredecessor()) {
218       ready_list.AddNode(node);
219     }
220   }
221 
222   // Go through the ready list and schedule the instructions.
223   int cycle = 0;
224   while (!ready_list.IsEmpty()) {
225     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
226 
227     if (candidate != nullptr) {
228       sequence()->AddInstruction(candidate->instruction());
229 
230       for (ScheduleGraphNode* successor : candidate->successors()) {
231         successor->DropUnscheduledPredecessor();
232         successor->set_start_cycle(
233             std::max(successor->start_cycle(), cycle + candidate->latency()));
234 
235         if (!successor->HasUnscheduledPredecessor()) {
236           ready_list.AddNode(successor);
237         }
238       }
239     }
240 
241     cycle++;
242   }
243 
244   // Reset own state.
245   graph_.clear();
246   operands_map_.clear();
247   pending_loads_.clear();
248   last_deopt_or_trap_ = nullptr;
249   last_live_in_reg_marker_ = nullptr;
250   last_side_effect_instr_ = nullptr;
251 }
252 
GetInstructionFlags(const Instruction * instr) const253 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
254   switch (instr->arch_opcode()) {
255     case kArchNop:
256     case kArchStackCheckOffset:
257     case kArchFramePointer:
258     case kArchParentFramePointer:
259     case kArchStackSlot:  // Despite its name this opcode will produce a
260                           // reference to a frame slot, so it is not affected
261                           // by the arm64 dual stack issues mentioned below.
262     case kArchComment:
263     case kArchDeoptimize:
264     case kArchJmp:
265     case kArchBinarySearchSwitch:
266     case kArchRet:
267     case kArchTableSwitch:
268     case kArchThrowTerminator:
269       return kNoOpcodeFlags;
270 
271     case kArchTruncateDoubleToI:
272     case kIeee754Float64Acos:
273     case kIeee754Float64Acosh:
274     case kIeee754Float64Asin:
275     case kIeee754Float64Asinh:
276     case kIeee754Float64Atan:
277     case kIeee754Float64Atanh:
278     case kIeee754Float64Atan2:
279     case kIeee754Float64Cbrt:
280     case kIeee754Float64Cos:
281     case kIeee754Float64Cosh:
282     case kIeee754Float64Exp:
283     case kIeee754Float64Expm1:
284     case kIeee754Float64Log:
285     case kIeee754Float64Log1p:
286     case kIeee754Float64Log10:
287     case kIeee754Float64Log2:
288     case kIeee754Float64Pow:
289     case kIeee754Float64Sin:
290     case kIeee754Float64Sinh:
291     case kIeee754Float64Tan:
292     case kIeee754Float64Tanh:
293       return kNoOpcodeFlags;
294 
295     case kArchStackPointerGreaterThan:
296       // The ArchStackPointerGreaterThan instruction loads the current stack
297       // pointer value and must not be reordered with instructions with side
298       // effects.
299       return kIsLoadOperation;
300 
301     case kArchWordPoisonOnSpeculation:
302       // While poisoning operations have no side effect, they must not be
303       // reordered relative to branches.
304       return kHasSideEffect;
305 
306     case kArchPrepareCallCFunction:
307     case kArchPrepareTailCall:
308     case kArchTailCallCodeObjectFromJSFunction:
309     case kArchTailCallCodeObject:
310     case kArchTailCallAddress:
311     case kArchTailCallWasm:
312     case kArchAbortCSAAssert:
313       return kHasSideEffect;
314 
315     case kArchDebugBreak:
316       return kIsBarrier;
317 
318     case kArchSaveCallerRegisters:
319     case kArchRestoreCallerRegisters:
320       return kIsBarrier;
321 
322     case kArchCallCFunction:
323     case kArchCallCodeObject:
324     case kArchCallJSFunction:
325     case kArchCallWasmFunction:
326     case kArchCallBuiltinPointer:
327       // Calls can cause GC and GC may relocate objects. If a pure instruction
328       // operates on a tagged pointer that was cast to a word then it may be
329       // incorrect to move the instruction across the call. Hence we mark all
330       // (non-tail-)calls as barriers.
331       return kIsBarrier;
332 
333     case kArchStoreWithWriteBarrier:
334       return kHasSideEffect;
335 
336     case kWord32AtomicLoadInt8:
337     case kWord32AtomicLoadUint8:
338     case kWord32AtomicLoadInt16:
339     case kWord32AtomicLoadUint16:
340     case kWord32AtomicLoadWord32:
341       return kIsLoadOperation;
342 
343     case kWord32AtomicStoreWord8:
344     case kWord32AtomicStoreWord16:
345     case kWord32AtomicStoreWord32:
346       return kHasSideEffect;
347 
348     case kWord32AtomicExchangeInt8:
349     case kWord32AtomicExchangeUint8:
350     case kWord32AtomicExchangeInt16:
351     case kWord32AtomicExchangeUint16:
352     case kWord32AtomicExchangeWord32:
353     case kWord32AtomicCompareExchangeInt8:
354     case kWord32AtomicCompareExchangeUint8:
355     case kWord32AtomicCompareExchangeInt16:
356     case kWord32AtomicCompareExchangeUint16:
357     case kWord32AtomicCompareExchangeWord32:
358     case kWord32AtomicAddInt8:
359     case kWord32AtomicAddUint8:
360     case kWord32AtomicAddInt16:
361     case kWord32AtomicAddUint16:
362     case kWord32AtomicAddWord32:
363     case kWord32AtomicSubInt8:
364     case kWord32AtomicSubUint8:
365     case kWord32AtomicSubInt16:
366     case kWord32AtomicSubUint16:
367     case kWord32AtomicSubWord32:
368     case kWord32AtomicAndInt8:
369     case kWord32AtomicAndUint8:
370     case kWord32AtomicAndInt16:
371     case kWord32AtomicAndUint16:
372     case kWord32AtomicAndWord32:
373     case kWord32AtomicOrInt8:
374     case kWord32AtomicOrUint8:
375     case kWord32AtomicOrInt16:
376     case kWord32AtomicOrUint16:
377     case kWord32AtomicOrWord32:
378     case kWord32AtomicXorInt8:
379     case kWord32AtomicXorUint8:
380     case kWord32AtomicXorInt16:
381     case kWord32AtomicXorUint16:
382     case kWord32AtomicXorWord32:
383       return kHasSideEffect;
384 
385 #define CASE(Name) case k##Name:
386       TARGET_ARCH_OPCODE_LIST(CASE)
387 #undef CASE
388       return GetTargetInstructionFlags(instr);
389   }
390 
391   UNREACHABLE();
392 }
393 
ComputeTotalLatencies()394 void InstructionScheduler::ComputeTotalLatencies() {
395   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
396     int max_latency = 0;
397 
398     for (ScheduleGraphNode* successor : node->successors()) {
399       DCHECK_NE(-1, successor->total_latency());
400       if (successor->total_latency() > max_latency) {
401         max_latency = successor->total_latency();
402       }
403     }
404 
405     node->set_total_latency(max_latency + node->latency());
406   }
407 }
408 
409 }  // namespace compiler
410 }  // namespace internal
411 }  // namespace v8
412