1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "parallel_move_resolver.h"
18
19 #include "base/stl_util.h"
20 #include "nodes.h"
21
22 namespace art {
23
BuildInitialMoveList(HParallelMove * parallel_move)24 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
25 // Perform a linear sweep of the moves to add them to the initial list of
26 // moves to perform, ignoring any move that is redundant (the source is
27 // the same as the destination, the destination is ignored and
28 // unallocated, or the move was already eliminated).
29 for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
30 MoveOperands* move = parallel_move->MoveOperandsAt(i);
31 if (!move->IsRedundant()) {
32 moves_.push_back(move);
33 }
34 }
35 }
36
EmitNativeCode(HParallelMove * parallel_move)37 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
38 DCHECK(moves_.empty());
39 // Build up a worklist of moves.
40 BuildInitialMoveList(parallel_move);
41
42 // Move stack/stack slot to take advantage of a free register on constrained machines.
43 for (size_t i = 0; i < moves_.size(); ++i) {
44 const MoveOperands& move = *moves_[i];
45 // Ignore constants and moves already eliminated.
46 if (move.IsEliminated() || move.GetSource().IsConstant()) {
47 continue;
48 }
49
50 if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
51 (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
52 PerformMove(i);
53 }
54 }
55
56 for (size_t i = 0; i < moves_.size(); ++i) {
57 const MoveOperands& move = *moves_[i];
58 // Skip constants to perform them last. They don't block other moves
59 // and skipping such moves with register destinations keeps those
60 // registers free for the whole algorithm.
61 if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
62 PerformMove(i);
63 }
64 }
65
66 // Perform the moves with constant sources.
67 for (size_t i = 0; i < moves_.size(); ++i) {
68 MoveOperands* move = moves_[i];
69 if (!move->IsEliminated()) {
70 DCHECK(move->GetSource().IsConstant());
71 EmitMove(i);
72 // Eliminate the move, in case following moves need a scratch register.
73 move->Eliminate();
74 }
75 }
76
77 moves_.clear();
78 }
79
LowOf(Location location)80 Location LowOf(Location location) {
81 if (location.IsRegisterPair()) {
82 return Location::RegisterLocation(location.low());
83 } else if (location.IsFpuRegisterPair()) {
84 return Location::FpuRegisterLocation(location.low());
85 } else if (location.IsDoubleStackSlot()) {
86 return Location::StackSlot(location.GetStackIndex());
87 } else {
88 return Location::NoLocation();
89 }
90 }
91
HighOf(Location location)92 Location HighOf(Location location) {
93 if (location.IsRegisterPair()) {
94 return Location::RegisterLocation(location.high());
95 } else if (location.IsFpuRegisterPair()) {
96 return Location::FpuRegisterLocation(location.high());
97 } else if (location.IsDoubleStackSlot()) {
98 return Location::StackSlot(location.GetHighStackIndex(4));
99 } else {
100 return Location::NoLocation();
101 }
102 }
103
104 // Update the source of `move`, knowing that `updated_location` has been swapped
105 // with `new_source`. Note that `updated_location` can be a pair, therefore if
106 // `move` is non-pair, we need to extract which register to use.
UpdateSourceOf(MoveOperands * move,Location updated_location,Location new_source)107 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
108 Location source = move->GetSource();
109 if (LowOf(updated_location).Equals(source)) {
110 move->SetSource(LowOf(new_source));
111 } else if (HighOf(updated_location).Equals(source)) {
112 move->SetSource(HighOf(new_source));
113 } else {
114 DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
115 move->SetSource(new_source);
116 }
117 }
118
PerformMove(size_t index)119 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
120 // Each call to this function performs a move and deletes it from the move
121 // graph. We first recursively perform any move blocking this one. We
122 // mark a move as "pending" on entry to PerformMove in order to detect
123 // cycles in the move graph. We use operand swaps to resolve cycles,
124 // which means that a call to PerformMove could change any source operand
125 // in the move graph.
126
127 MoveOperands* move = moves_[index];
128 DCHECK(!move->IsPending());
129 if (move->IsRedundant()) {
130 // Because we swap register pairs first, following, un-pending
131 // moves may become redundant.
132 move->Eliminate();
133 return nullptr;
134 }
135
136 // Clear this move's destination to indicate a pending move. The actual
137 // destination is saved in a stack-allocated local. Recursion may allow
138 // multiple moves to be pending.
139 DCHECK(!move->GetSource().IsInvalid());
140 Location destination = move->MarkPending();
141
142 // Perform a depth-first traversal of the move graph to resolve
143 // dependencies. Any unperformed, unpending move with a source the same
144 // as this one's destination blocks this one so recursively perform all
145 // such moves.
146 MoveOperands* required_swap = nullptr;
147 for (size_t i = 0; i < moves_.size(); ++i) {
148 const MoveOperands& other_move = *moves_[i];
149 if (other_move.Blocks(destination) && !other_move.IsPending()) {
150 // Though PerformMove can change any source operand in the move graph,
151 // calling `PerformMove` cannot create a blocking move via a swap
152 // (this loop does not miss any).
153 // For example, assume there is a non-blocking move with source A
154 // and this move is blocked on source B and there is a swap of A and
155 // B. Then A and B must be involved in the same cycle (or they would
156 // not be swapped). Since this move's destination is B and there is
157 // only a single incoming edge to an operand, this move must also be
158 // involved in the same cycle. In that case, the blocking move will
159 // be created but will be "pending" when we return from PerformMove.
160 required_swap = PerformMove(i);
161
162 if (required_swap == move) {
163 // If this move is required to swap, we do so without looking
164 // at the next moves. Swapping is not blocked by anything, it just
165 // updates other moves's source.
166 break;
167 } else if (required_swap == moves_[i]) {
168 // If `other_move` was swapped, we iterate again to find a new
169 // potential cycle.
170 required_swap = nullptr;
171 i = -1;
172 } else if (required_swap != nullptr) {
173 // A move is required to swap. We walk back the cycle to find the
174 // move by just returning from this `PerformMove`.
175 moves_[index]->ClearPending(destination);
176 return required_swap;
177 }
178 }
179 }
180
181 // We are about to resolve this move and don't need it marked as
182 // pending, so restore its destination.
183 move->ClearPending(destination);
184
185 // This move's source may have changed due to swaps to resolve cycles and
186 // so it may now be the last move in the cycle. If so remove it.
187 if (move->GetSource().Equals(destination)) {
188 move->Eliminate();
189 DCHECK(required_swap == nullptr);
190 return nullptr;
191 }
192
193 // The move may be blocked on a (at most one) pending move, in which case
194 // we have a cycle. Search for such a blocking move and perform a swap to
195 // resolve it.
196 bool do_swap = false;
197 if (required_swap != nullptr) {
198 DCHECK_EQ(required_swap, move);
199 do_swap = true;
200 } else {
201 for (MoveOperands* other_move : moves_) {
202 if (other_move->Blocks(destination)) {
203 DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
204 if (!move->Is64BitMove() && other_move->Is64BitMove()) {
205 // We swap 64bits moves before swapping 32bits moves. Go back from the
206 // cycle by returning the move that must be swapped.
207 return other_move;
208 }
209 do_swap = true;
210 break;
211 }
212 }
213 }
214
215 if (do_swap) {
216 EmitSwap(index);
217 // Any unperformed (including pending) move with a source of either
218 // this move's source or destination needs to have their source
219 // changed to reflect the state of affairs after the swap.
220 Location source = move->GetSource();
221 Location swap_destination = move->GetDestination();
222 move->Eliminate();
223 for (MoveOperands* other_move : moves_) {
224 if (other_move->Blocks(source)) {
225 UpdateSourceOf(other_move, source, swap_destination);
226 } else if (other_move->Blocks(swap_destination)) {
227 UpdateSourceOf(other_move, swap_destination, source);
228 }
229 }
230 // If the swap was required because of a 64bits move in the middle of a cycle,
231 // we return the swapped move, so that the caller knows it needs to re-iterate
232 // its dependency loop.
233 return required_swap;
234 } else {
235 // This move is not blocked.
236 EmitMove(index);
237 move->Eliminate();
238 DCHECK(required_swap == nullptr);
239 return nullptr;
240 }
241 }
242
IsScratchLocation(Location loc)243 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
244 for (MoveOperands* move : moves_) {
245 if (move->Blocks(loc)) {
246 return false;
247 }
248 }
249
250 for (MoveOperands* move : moves_) {
251 if (move->GetDestination().Equals(loc)) {
252 return true;
253 }
254 }
255
256 return false;
257 }
258
AllocateScratchRegister(int blocked,int register_count,int if_scratch,bool * spilled)259 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
260 int register_count,
261 int if_scratch,
262 bool* spilled) {
263 DCHECK_NE(blocked, if_scratch);
264 int scratch = -1;
265 for (int reg = 0; reg < register_count; ++reg) {
266 if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
267 scratch = reg;
268 break;
269 }
270 }
271
272 if (scratch == -1) {
273 *spilled = true;
274 scratch = if_scratch;
275 } else {
276 *spilled = false;
277 }
278
279 return scratch;
280 }
281
282
ScratchRegisterScope(ParallelMoveResolverWithSwap * resolver,int blocked,int if_scratch,int number_of_registers)283 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
284 ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
285 : resolver_(resolver),
286 reg_(kNoRegister),
287 spilled_(false) {
288 reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
289
290 if (spilled_) {
291 resolver->SpillScratch(reg_);
292 }
293 }
294
295
~ScratchRegisterScope()296 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
297 if (spilled_) {
298 resolver_->RestoreScratch(reg_);
299 }
300 }
301
EmitNativeCode(HParallelMove * parallel_move)302 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
303 DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
304 DCHECK(moves_.empty());
305 DCHECK(scratches_.empty());
306
307 // Backend dependent initialization.
308 PrepareForEmitNativeCode();
309
310 // Build up a worklist of moves.
311 BuildInitialMoveList(parallel_move);
312
313 for (size_t i = 0; i < moves_.size(); ++i) {
314 const MoveOperands& move = *moves_[i];
315 // Skip constants to perform them last. They don't block other moves and
316 // skipping such moves with register destinations keeps those registers
317 // free for the whole algorithm.
318 if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
319 PerformMove(i);
320 }
321 }
322
323 // Perform the moves with constant sources and register destinations with UpdateMoveSource()
324 // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
325 // from changing the constant sources to stack locations.
326 for (size_t i = 0; i < moves_.size(); ++i) {
327 MoveOperands* move = moves_[i];
328 Location destination = move->GetDestination();
329 if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
330 Location source = move->GetSource();
331 EmitMove(i);
332 move->Eliminate();
333 // This may introduce additional instruction dependency, but reduce number
334 // of moves and possible literal loads. For example,
335 // Original moves:
336 // 1234.5678 -> D0
337 // 1234.5678 -> D1
338 // Updated moves:
339 // 1234.5678 -> D0
340 // D0 -> D1
341 UpdateMoveSource(source, destination);
342 }
343 }
344
345 // Perform the rest of the moves.
346 for (size_t i = 0; i < moves_.size(); ++i) {
347 MoveOperands* move = moves_[i];
348 if (!move->IsEliminated()) {
349 EmitMove(i);
350 move->Eliminate();
351 }
352 }
353
354 // All pending moves that we have added for resolve cycles should be performed.
355 DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
356
357 // Backend dependent cleanup.
358 FinishEmitNativeCode();
359
360 moves_.clear();
361 scratches_.clear();
362 }
363
GetScratchLocation(Location::Kind kind)364 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
365 for (Location loc : scratches_) {
366 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
367 return loc;
368 }
369 }
370 for (MoveOperands* move : moves_) {
371 Location loc = move->GetDestination();
372 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
373 return loc;
374 }
375 }
376 return Location::NoLocation();
377 }
378
AddScratchLocation(Location loc)379 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
380 if (kIsDebugBuild) {
381 for (Location scratch : scratches_) {
382 CHECK(!loc.Equals(scratch));
383 }
384 }
385 scratches_.push_back(loc);
386 }
387
RemoveScratchLocation(Location loc)388 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
389 DCHECK(!IsBlockedByMoves(loc));
390 for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
391 if (loc.Equals(*it)) {
392 scratches_.erase(it);
393 break;
394 }
395 }
396 }
397
PerformMove(size_t index)398 void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
399 // Each call to this function performs a move and deletes it from the move
400 // graph. We first recursively perform any move blocking this one. We mark
401 // a move as "pending" on entry to PerformMove in order to detect cycles
402 // in the move graph. We use scratch location to resolve cycles, also
403 // additional pending moves might be added. After move has been performed,
404 // we will update source operand in the move graph to reduce dependencies in
405 // the graph.
406
407 MoveOperands* move = moves_[index];
408 DCHECK(!move->IsPending());
409 DCHECK(!move->IsEliminated());
410 if (move->IsRedundant()) {
411 // Previous operations on the list of moves have caused this particular move
412 // to become a no-op, so we can safely eliminate it. Consider for example
413 // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
414 // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
415 // used as the scratch location, the move (1 -> 2) will occur while resolving
416 // the cycle. When that move is emitted, the code will update moves with a '1'
417 // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
418 // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
419 // eliminated here.
420 move->Eliminate();
421 return;
422 }
423
424 // Clear this move's destination to indicate a pending move. The actual
425 // destination is saved in a stack-allocated local. Recursion may allow
426 // multiple moves to be pending.
427 DCHECK(!move->GetSource().IsInvalid());
428 Location destination = move->MarkPending();
429
430 // Perform a depth-first traversal of the move graph to resolve
431 // dependencies. Any unperformed, unpending move with a source the same
432 // as this one's destination blocks this one so recursively perform all
433 // such moves.
434 for (size_t i = 0; i < moves_.size(); ++i) {
435 const MoveOperands& other_move = *moves_[i];
436 if (other_move.Blocks(destination) && !other_move.IsPending()) {
437 PerformMove(i);
438 }
439 }
440
441 // We are about to resolve this move and don't need it marked as
442 // pending, so restore its destination.
443 move->ClearPending(destination);
444
445 // No one else should write to the move destination when the it is pending.
446 DCHECK(!move->IsRedundant());
447
448 Location source = move->GetSource();
449 // The move may be blocked on several pending moves, in case we have a cycle.
450 if (IsBlockedByMoves(destination)) {
451 // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
452 // sequence:
453 // (C -> scratch) # Emit right now.
454 // (A -> B) (B -> C) # Unblocked.
455 // (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
456 Location::Kind kind = source.GetKind();
457 DCHECK_NE(kind, Location::kConstant);
458 Location scratch = AllocateScratchLocationFor(kind);
459 // We only care about the move size.
460 DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32;
461 // Perform (C -> scratch)
462 move->SetDestination(scratch);
463 EmitMove(index);
464 move->Eliminate();
465 UpdateMoveSource(source, scratch);
466 // Add (scratch -> A).
467 AddPendingMove(scratch, destination, type);
468 } else {
469 // This move is not blocked.
470 EmitMove(index);
471 move->Eliminate();
472 UpdateMoveSource(source, destination);
473 }
474
475 // Moves in the pending list should not block any other moves. But performing
476 // unblocked moves in the pending list can free scratch registers, so we do this
477 // as early as possible.
478 MoveOperands* pending_move;
479 while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
480 Location pending_source = pending_move->GetSource();
481 Location pending_destination = pending_move->GetDestination();
482 // We do not depend on the pending move index. So just delete the move instead
483 // of eliminating it to make the pending list cleaner.
484 DeletePendingMove(pending_move);
485 move->SetSource(pending_source);
486 move->SetDestination(pending_destination);
487 EmitMove(index);
488 move->Eliminate();
489 UpdateMoveSource(pending_source, pending_destination);
490 // Free any unblocked locations in the scratch location list.
491 // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
492 // FIXME: If FreeScratchLocation() removes the location from scratches_,
493 // we skip the next location. This happens for arm64.
494 for (size_t i = 0; i < scratches_.size(); ++i) {
495 Location scratch = scratches_[i];
496 // Only scratch overlapping with performed move source can be unblocked.
497 if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
498 FreeScratchLocation(pending_source);
499 }
500 }
501 }
502 }
503
UpdateMoveSource(Location from,Location to)504 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
505 // This function is used to reduce the dependencies in the graph after
506 // (from -> to) has been performed. Since we ensure there is no move with the same
507 // destination, (to -> X) cannot be blocked while (from -> X) might still be
508 // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
509 // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
510 // a dependency between the two. If we update the source location from 1 to 2, we
511 // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
512 //
513 // This is not something we must do, but we can use fewer scratch locations with
514 // this trick. For example, we can avoid using additional scratch locations for
515 // moves (0 -> 1), (1 -> 2), (1 -> 0).
516 for (MoveOperands* move : moves_) {
517 if (move->GetSource().Equals(from)) {
518 move->SetSource(to);
519 }
520 }
521 }
522
AddPendingMove(Location source,Location destination,DataType::Type type)523 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
524 Location destination,
525 DataType::Type type) {
526 pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
527 }
528
DeletePendingMove(MoveOperands * move)529 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
530 RemoveElement(pending_moves_, move);
531 }
532
GetUnblockedPendingMove(Location loc)533 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
534 for (MoveOperands* move : pending_moves_) {
535 Location destination = move->GetDestination();
536 // Only moves with destination overlapping with input loc can be unblocked.
537 if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
538 return move;
539 }
540 }
541 return nullptr;
542 }
543
IsBlockedByMoves(Location loc)544 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
545 for (MoveOperands* move : pending_moves_) {
546 if (move->Blocks(loc)) {
547 return true;
548 }
549 }
550 for (MoveOperands* move : moves_) {
551 if (move->Blocks(loc)) {
552 return true;
553 }
554 }
555 return false;
556 }
557
558 // So far it is only used for debugging purposes to make sure all pending moves
559 // have been performed.
GetNumberOfPendingMoves()560 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
561 return pending_moves_.size();
562 }
563
564 } // namespace art
565