• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/instruction-scheduler.h"
6 
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9 
10 namespace v8 {
11 namespace internal {
12 namespace compiler {
13 
14 // Compare the two nodes and return true if node1 is a better candidate than
15 // node2 (i.e. node1 should be scheduled before node2).
CompareNodes(ScheduleGraphNode * node1,ScheduleGraphNode * node2) const16 bool InstructionScheduler::CriticalPathFirstQueue::CompareNodes(
17     ScheduleGraphNode *node1, ScheduleGraphNode *node2) const {
18   return node1->total_latency() > node2->total_latency();
19 }
20 
21 
22 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)23 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
24   DCHECK(!IsEmpty());
25   auto candidate = nodes_.end();
26   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
27     // We only consider instructions that have all their operands ready and
28     // we try to schedule the critical path first.
29     if (cycle >= (*iterator)->start_cycle()) {
30       if ((candidate == nodes_.end()) || CompareNodes(*iterator, *candidate)) {
31         candidate = iterator;
32       }
33     }
34   }
35 
36   if (candidate != nodes_.end()) {
37     ScheduleGraphNode *result = *candidate;
38     nodes_.erase(candidate);
39     return result;
40   }
41 
42   return nullptr;
43 }
44 
45 
46 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)47 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
48   DCHECK(!IsEmpty());
49   // Choose a random element from the ready list.
50   auto candidate = nodes_.begin();
51   std::advance(candidate, isolate()->random_number_generator()->NextInt(
52       static_cast<int>(nodes_.size())));
53   ScheduleGraphNode *result = *candidate;
54   nodes_.erase(candidate);
55   return result;
56 }
57 
58 
ScheduleGraphNode(Zone * zone,Instruction * instr)59 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
60     Zone* zone,
61     Instruction* instr)
62     : instr_(instr),
63       successors_(zone),
64       unscheduled_predecessors_count_(0),
65       latency_(GetInstructionLatency(instr)),
66       total_latency_(-1),
67       start_cycle_(-1) {
68 }
69 
70 
AddSuccessor(ScheduleGraphNode * node)71 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
72     ScheduleGraphNode* node) {
73   successors_.push_back(node);
74   node->unscheduled_predecessors_count_++;
75 }
76 
77 
InstructionScheduler(Zone * zone,InstructionSequence * sequence)78 InstructionScheduler::InstructionScheduler(Zone* zone,
79                                            InstructionSequence* sequence)
80     : zone_(zone),
81       sequence_(sequence),
82       graph_(zone),
83       last_side_effect_instr_(nullptr),
84       pending_loads_(zone),
85       last_live_in_reg_marker_(nullptr),
86       last_deopt_(nullptr) {
87 }
88 
89 
StartBlock(RpoNumber rpo)90 void InstructionScheduler::StartBlock(RpoNumber rpo) {
91   DCHECK(graph_.empty());
92   DCHECK(last_side_effect_instr_ == nullptr);
93   DCHECK(pending_loads_.empty());
94   DCHECK(last_live_in_reg_marker_ == nullptr);
95   DCHECK(last_deopt_ == nullptr);
96   sequence()->StartBlock(rpo);
97 }
98 
99 
EndBlock(RpoNumber rpo)100 void InstructionScheduler::EndBlock(RpoNumber rpo) {
101   if (FLAG_turbo_stress_instruction_scheduling) {
102     ScheduleBlock<StressSchedulerQueue>();
103   } else {
104     ScheduleBlock<CriticalPathFirstQueue>();
105   }
106   sequence()->EndBlock(rpo);
107   graph_.clear();
108   last_side_effect_instr_ = nullptr;
109   pending_loads_.clear();
110   last_live_in_reg_marker_ = nullptr;
111   last_deopt_ = nullptr;
112 }
113 
114 
AddInstruction(Instruction * instr)115 void InstructionScheduler::AddInstruction(Instruction* instr) {
116   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
117 
118   if (IsBlockTerminator(instr)) {
119     // Make sure that basic block terminators are not moved by adding them
120     // as successor of every instruction.
121     for (ScheduleGraphNode* node : graph_) {
122       node->AddSuccessor(new_node);
123     }
124   } else if (IsFixedRegisterParameter(instr)) {
125     if (last_live_in_reg_marker_ != nullptr) {
126       last_live_in_reg_marker_->AddSuccessor(new_node);
127     }
128     last_live_in_reg_marker_ = new_node;
129   } else {
130     if (last_live_in_reg_marker_ != nullptr) {
131       last_live_in_reg_marker_->AddSuccessor(new_node);
132     }
133 
134     // Make sure that new instructions are not scheduled before the last
135     // deoptimization point.
136     if (last_deopt_ != nullptr) {
137       last_deopt_->AddSuccessor(new_node);
138     }
139 
140     // Instructions with side effects and memory operations can't be
141     // reordered with respect to each other.
142     if (HasSideEffect(instr)) {
143       if (last_side_effect_instr_ != nullptr) {
144         last_side_effect_instr_->AddSuccessor(new_node);
145       }
146       for (ScheduleGraphNode* load : pending_loads_) {
147         load->AddSuccessor(new_node);
148       }
149       pending_loads_.clear();
150       last_side_effect_instr_ = new_node;
151     } else if (IsLoadOperation(instr)) {
152       // Load operations can't be reordered with side effects instructions but
153       // independent loads can be reordered with respect to each other.
154       if (last_side_effect_instr_ != nullptr) {
155         last_side_effect_instr_->AddSuccessor(new_node);
156       }
157       pending_loads_.push_back(new_node);
158     } else if (instr->IsDeoptimizeCall()) {
159       // Ensure that deopts are not reordered with respect to side-effect
160       // instructions.
161       if (last_side_effect_instr_ != nullptr) {
162         last_side_effect_instr_->AddSuccessor(new_node);
163       }
164       last_deopt_ = new_node;
165     }
166 
167     // Look for operand dependencies.
168     for (ScheduleGraphNode* node : graph_) {
169       if (HasOperandDependency(node->instruction(), instr)) {
170         node->AddSuccessor(new_node);
171       }
172     }
173   }
174 
175   graph_.push_back(new_node);
176 }
177 
178 
179 template <typename QueueType>
ScheduleBlock()180 void InstructionScheduler::ScheduleBlock() {
181   QueueType ready_list(this);
182 
183   // Compute total latencies so that we can schedule the critical path first.
184   ComputeTotalLatencies();
185 
186   // Add nodes which don't have dependencies to the ready list.
187   for (ScheduleGraphNode* node : graph_) {
188     if (!node->HasUnscheduledPredecessor()) {
189       ready_list.AddNode(node);
190     }
191   }
192 
193   // Go through the ready list and schedule the instructions.
194   int cycle = 0;
195   while (!ready_list.IsEmpty()) {
196     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
197 
198     if (candidate != nullptr) {
199       sequence()->AddInstruction(candidate->instruction());
200 
201       for (ScheduleGraphNode* successor : candidate->successors()) {
202         successor->DropUnscheduledPredecessor();
203         successor->set_start_cycle(
204             std::max(successor->start_cycle(),
205                      cycle + candidate->latency()));
206 
207         if (!successor->HasUnscheduledPredecessor()) {
208           ready_list.AddNode(successor);
209         }
210       }
211     }
212 
213     cycle++;
214   }
215 }
216 
217 
GetInstructionFlags(const Instruction * instr) const218 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
219   switch (instr->arch_opcode()) {
220     case kArchNop:
221     case kArchFramePointer:
222     case kArchParentFramePointer:
223     case kArchTruncateDoubleToI:
224     case kArchStackSlot:
225     case kArchDebugBreak:
226     case kArchComment:
227     case kIeee754Float64Atan:
228     case kIeee754Float64Atan2:
229     case kIeee754Float64Atanh:
230     case kIeee754Float64Cbrt:
231     case kIeee754Float64Cos:
232     case kIeee754Float64Exp:
233     case kIeee754Float64Expm1:
234     case kIeee754Float64Log:
235     case kIeee754Float64Log1p:
236     case kIeee754Float64Log10:
237     case kIeee754Float64Log2:
238     case kIeee754Float64Sin:
239     case kIeee754Float64Tan:
240       return kNoOpcodeFlags;
241 
242     case kArchStackPointer:
243       // ArchStackPointer instruction loads the current stack pointer value and
244       // must not be reordered with instruction with side effects.
245       return kIsLoadOperation;
246 
247     case kArchPrepareCallCFunction:
248     case kArchPrepareTailCall:
249     case kArchCallCFunction:
250     case kArchCallCodeObject:
251     case kArchCallJSFunction:
252       return kHasSideEffect;
253 
254     case kArchTailCallCodeObjectFromJSFunction:
255     case kArchTailCallCodeObject:
256     case kArchTailCallJSFunctionFromJSFunction:
257     case kArchTailCallJSFunction:
258     case kArchTailCallAddress:
259       return kHasSideEffect | kIsBlockTerminator;
260 
261     case kArchDeoptimize:
262     case kArchJmp:
263     case kArchLookupSwitch:
264     case kArchTableSwitch:
265     case kArchRet:
266     case kArchThrowTerminator:
267       return kIsBlockTerminator;
268 
269     case kCheckedLoadInt8:
270     case kCheckedLoadUint8:
271     case kCheckedLoadInt16:
272     case kCheckedLoadUint16:
273     case kCheckedLoadWord32:
274     case kCheckedLoadWord64:
275     case kCheckedLoadFloat32:
276     case kCheckedLoadFloat64:
277       return kIsLoadOperation;
278 
279     case kCheckedStoreWord8:
280     case kCheckedStoreWord16:
281     case kCheckedStoreWord32:
282     case kCheckedStoreWord64:
283     case kCheckedStoreFloat32:
284     case kCheckedStoreFloat64:
285     case kArchStoreWithWriteBarrier:
286       return kHasSideEffect;
287 
288     case kAtomicLoadInt8:
289     case kAtomicLoadUint8:
290     case kAtomicLoadInt16:
291     case kAtomicLoadUint16:
292     case kAtomicLoadWord32:
293       return kIsLoadOperation;
294 
295     case kAtomicStoreWord8:
296     case kAtomicStoreWord16:
297     case kAtomicStoreWord32:
298       return kHasSideEffect;
299 
300 #define CASE(Name) case k##Name:
301     TARGET_ARCH_OPCODE_LIST(CASE)
302 #undef CASE
303       return GetTargetInstructionFlags(instr);
304   }
305 
306   UNREACHABLE();
307   return kNoOpcodeFlags;
308 }
309 
310 
HasOperandDependency(const Instruction * instr1,const Instruction * instr2) const311 bool InstructionScheduler::HasOperandDependency(
312     const Instruction* instr1, const Instruction* instr2) const {
313   for (size_t i = 0; i < instr1->OutputCount(); ++i) {
314     for (size_t j = 0; j < instr2->InputCount(); ++j) {
315       const InstructionOperand* output = instr1->OutputAt(i);
316       const InstructionOperand* input = instr2->InputAt(j);
317 
318       if (output->IsUnallocated() && input->IsUnallocated() &&
319           (UnallocatedOperand::cast(output)->virtual_register() ==
320            UnallocatedOperand::cast(input)->virtual_register())) {
321         return true;
322       }
323 
324       if (output->IsConstant() && input->IsUnallocated() &&
325           (ConstantOperand::cast(output)->virtual_register() ==
326            UnallocatedOperand::cast(input)->virtual_register())) {
327         return true;
328       }
329     }
330   }
331 
332   // TODO(bafsa): Do we need to look for anti-dependencies/output-dependencies?
333 
334   return false;
335 }
336 
337 
IsBlockTerminator(const Instruction * instr) const338 bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
339   return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
340           (instr->flags_mode() == kFlags_branch));
341 }
342 
343 
ComputeTotalLatencies()344 void InstructionScheduler::ComputeTotalLatencies() {
345   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
346     int max_latency = 0;
347 
348     for (ScheduleGraphNode* successor : node->successors()) {
349       DCHECK(successor->total_latency() != -1);
350       if (successor->total_latency() > max_latency) {
351         max_latency = successor->total_latency();
352       }
353     }
354 
355     node->set_total_latency(max_latency + node->latency());
356   }
357 }
358 
359 }  // namespace compiler
360 }  // namespace internal
361 }  // namespace v8
362