1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gvn.h"
18
19 #include "base/arena_bit_vector.h"
20 #include "base/bit_vector-inl.h"
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "base/utils.h"
24 #include "side_effects_analysis.h"
25
26 namespace art {
27
28 /**
29 * A ValueSet holds instructions that can replace other instructions. It is updated
30 * through the `Add` method, and the `Kill` method. The `Kill` method removes
31 * instructions that are affected by the given side effect.
32 *
33 * The `Lookup` method returns an equivalent instruction to the given instruction
34 * if there is one in the set. In GVN, we would say those instructions have the
35 * same "number".
36 */
37 class ValueSet : public ArenaObject<kArenaAllocGvn> {
38 public:
39 // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ScopedArenaAllocator * allocator)40 explicit ValueSet(ScopedArenaAllocator* allocator)
41 : allocator_(allocator),
42 num_buckets_(kMinimumNumberOfBuckets),
43 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
44 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
45 num_entries_(0u) {
46 DCHECK(IsPowerOfTwo(num_buckets_));
47 std::fill_n(buckets_, num_buckets_, nullptr);
48 buckets_owned_.SetInitialBits(num_buckets_);
49 }
50
51 // Copy constructor. Depending on the load factor, it will either make a deep
52 // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ScopedArenaAllocator * allocator,const ValueSet & other)53 ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other)
54 : allocator_(allocator),
55 num_buckets_(other.IdealBucketCount()),
56 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
57 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
58 num_entries_(0u) {
59 DCHECK(IsPowerOfTwo(num_buckets_));
60 PopulateFromInternal(other);
61 }
62
63 // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)64 void PopulateFrom(const ValueSet& other) {
65 if (this == &other) {
66 return;
67 }
68 PopulateFromInternal(other);
69 }
70
71 // Returns true if `this` has enough buckets so that if `other` is copied into
72 // it, the load factor will not cross the upper threshold.
73 // If `exact_match` is set, true is returned only if `this` has the ideal
74 // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)75 bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
76 if (exact_match) {
77 return other.IdealBucketCount() == num_buckets_;
78 } else {
79 return other.IdealBucketCount() <= num_buckets_;
80 }
81 }
82
83 // Adds an instruction in the set.
Add(HInstruction * instruction)84 void Add(HInstruction* instruction) {
85 DCHECK(Lookup(instruction) == nullptr);
86 size_t hash_code = HashCode(instruction);
87 size_t index = BucketIndex(hash_code);
88
89 if (!buckets_owned_.IsBitSet(index)) {
90 CloneBucket(index);
91 }
92 buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
93 ++num_entries_;
94 }
95
96 // If in the set, returns an equivalent instruction to the given instruction.
97 // Returns null otherwise.
Lookup(HInstruction * instruction) const98 HInstruction* Lookup(HInstruction* instruction) const {
99 size_t hash_code = HashCode(instruction);
100 size_t index = BucketIndex(hash_code);
101
102 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
103 if (node->GetHashCode() == hash_code) {
104 HInstruction* existing = node->GetInstruction();
105 if (existing->Equals(instruction)) {
106 return existing;
107 }
108 }
109 }
110 return nullptr;
111 }
112
113 // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const114 bool Contains(HInstruction* instruction) const {
115 size_t hash_code = HashCode(instruction);
116 size_t index = BucketIndex(hash_code);
117
118 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
119 if (node->GetInstruction() == instruction) {
120 return true;
121 }
122 }
123 return false;
124 }
125
126 // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)127 void Kill(SideEffects side_effects) {
128 DeleteAllImpureWhich([side_effects](Node* node) {
129 return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
130 });
131 }
132
Clear()133 void Clear() {
134 num_entries_ = 0;
135 for (size_t i = 0; i < num_buckets_; ++i) {
136 buckets_[i] = nullptr;
137 }
138 buckets_owned_.SetInitialBits(num_buckets_);
139 }
140
141 // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)142 void IntersectWith(ValueSet* predecessor) {
143 if (IsEmpty()) {
144 return;
145 } else if (predecessor->IsEmpty()) {
146 Clear();
147 } else {
148 // Pure instructions do not need to be tested because only impure
149 // instructions can be killed.
150 DeleteAllImpureWhich([predecessor](Node* node) {
151 return !predecessor->Contains(node->GetInstruction());
152 });
153 }
154 }
155
IsEmpty() const156 bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const157 size_t GetNumberOfEntries() const { return num_entries_; }
158
159 private:
160 // Copies all entries from `other` to `this`.
PopulateFromInternal(const ValueSet & other)161 void PopulateFromInternal(const ValueSet& other) {
162 DCHECK_NE(this, &other);
163 DCHECK_GE(num_buckets_, other.IdealBucketCount());
164
165 if (num_buckets_ == other.num_buckets_) {
166 // Hash table remains the same size. We copy the bucket pointers and leave
167 // all buckets_owned_ bits false.
168 buckets_owned_.ClearAllBits();
169 memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
170 } else {
171 // Hash table size changes. We copy and rehash all entries, and set all
172 // buckets_owned_ bits to true.
173 std::fill_n(buckets_, num_buckets_, nullptr);
174 for (size_t i = 0; i < other.num_buckets_; ++i) {
175 for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
176 size_t new_index = BucketIndex(node->GetHashCode());
177 buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
178 }
179 }
180 buckets_owned_.SetInitialBits(num_buckets_);
181 }
182
183 num_entries_ = other.num_entries_;
184 }
185
186 class Node : public ArenaObject<kArenaAllocGvn> {
187 public:
Node(HInstruction * instruction,size_t hash_code,Node * next)188 Node(HInstruction* instruction, size_t hash_code, Node* next)
189 : instruction_(instruction), hash_code_(hash_code), next_(next) {}
190
GetHashCode() const191 size_t GetHashCode() const { return hash_code_; }
GetInstruction() const192 HInstruction* GetInstruction() const { return instruction_; }
GetNext() const193 Node* GetNext() const { return next_; }
SetNext(Node * node)194 void SetNext(Node* node) { next_ = node; }
195
Dup(ScopedArenaAllocator * allocator,Node * new_next=nullptr)196 Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) {
197 return new (allocator) Node(instruction_, hash_code_, new_next);
198 }
199
200 private:
201 HInstruction* const instruction_;
202 const size_t hash_code_;
203 Node* next_;
204
205 DISALLOW_COPY_AND_ASSIGN(Node);
206 };
207
208 // Creates our own copy of a bucket that is currently pointing to a parent.
209 // This algorithm can be called while iterating over the bucket because it
210 // preserves the order of entries in the bucket and will return the clone of
211 // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)212 Node* CloneBucket(size_t index, Node* iterator = nullptr) {
213 DCHECK(!buckets_owned_.IsBitSet(index));
214 Node* clone_current = nullptr;
215 Node* clone_previous = nullptr;
216 Node* clone_iterator = nullptr;
217 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
218 clone_current = node->Dup(allocator_, nullptr);
219 if (node == iterator) {
220 clone_iterator = clone_current;
221 }
222 if (clone_previous == nullptr) {
223 buckets_[index] = clone_current;
224 } else {
225 clone_previous->SetNext(clone_current);
226 }
227 clone_previous = clone_current;
228 }
229 buckets_owned_.SetBit(index);
230 return clone_iterator;
231 }
232
233 // Iterates over buckets with impure instructions (even indices) and deletes
234 // the ones on which 'cond' returns true.
235 template<typename Functor>
DeleteAllImpureWhich(Functor cond)236 void DeleteAllImpureWhich(Functor cond) {
237 for (size_t i = 0; i < num_buckets_; i += 2) {
238 Node* node = buckets_[i];
239 Node* previous = nullptr;
240
241 if (node == nullptr) {
242 continue;
243 }
244
245 if (!buckets_owned_.IsBitSet(i)) {
246 // Bucket is not owned but maybe we won't need to change it at all.
247 // Iterate as long as the entries don't satisfy 'cond'.
248 while (node != nullptr) {
249 if (cond(node)) {
250 // We do need to delete an entry but we do not own the bucket.
251 // Clone the bucket, make sure 'previous' and 'node' point to
252 // the cloned entries and break.
253 previous = CloneBucket(i, previous);
254 node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
255 break;
256 }
257 previous = node;
258 node = node->GetNext();
259 }
260 }
261
262 // By this point we either own the bucket and can start deleting entries,
263 // or we do not own it but no entries matched 'cond'.
264 DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
265
266 // We iterate over the remainder of entries and delete those that match
267 // the given condition.
268 while (node != nullptr) {
269 Node* next = node->GetNext();
270 if (cond(node)) {
271 if (previous == nullptr) {
272 buckets_[i] = next;
273 } else {
274 previous->SetNext(next);
275 }
276 } else {
277 previous = node;
278 }
279 node = next;
280 }
281 }
282 }
283
284 // Computes a bucket count such that the load factor is reasonable.
285 // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const286 size_t IdealBucketCount() const {
287 size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
288 if (bucket_count > kMinimumNumberOfBuckets) {
289 return bucket_count;
290 } else {
291 return kMinimumNumberOfBuckets;
292 }
293 }
294
295 // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const296 size_t HashCode(HInstruction* instruction) const {
297 size_t hash_code = instruction->ComputeHashCode();
298 // Pure instructions are put into odd buckets to speed up deletion. Note that in the
299 // case of irreducible loops, we don't put pure instructions in odd buckets, as we
300 // need to delete them when entering the loop.
301 // ClinitCheck is treated as a pure instruction since it's only executed
302 // once.
303 bool pure = !instruction->GetSideEffects().HasDependencies() ||
304 instruction->IsClinitCheck();
305 if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
306 return (hash_code << 1) | 0;
307 } else {
308 return (hash_code << 1) | 1;
309 }
310 }
311
312 // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const313 size_t BucketIndex(size_t hash_code) const {
314 return hash_code & (num_buckets_ - 1);
315 }
316
317 ScopedArenaAllocator* const allocator_;
318
319 // The internal bucket implementation of the set.
320 size_t const num_buckets_;
321 Node** const buckets_;
322
323 // Flags specifying which buckets were copied into the set from its parent.
324 // If a flag is not set, the corresponding bucket points to entries in the
325 // parent and must be cloned prior to making changes.
326 ArenaBitVector buckets_owned_;
327
328 // The number of entries in the set.
329 size_t num_entries_;
330
331 static constexpr size_t kMinimumNumberOfBuckets = 8;
332
333 DISALLOW_COPY_AND_ASSIGN(ValueSet);
334 };
335
336 /**
337 * Optimization phase that removes redundant instruction.
338 */
339 class GlobalValueNumberer : public ValueObject {
340 public:
GlobalValueNumberer(HGraph * graph,const SideEffectsAnalysis & side_effects)341 GlobalValueNumberer(HGraph* graph,
342 const SideEffectsAnalysis& side_effects)
343 : graph_(graph),
344 allocator_(graph->GetArenaStack()),
345 side_effects_(side_effects),
346 sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
347 visited_blocks_(
348 &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) {
349 visited_blocks_.ClearAllBits();
350 }
351
352 bool Run();
353
354 private:
355 // Per-block GVN. Will also update the ValueSet of the dominated and
356 // successor blocks.
357 void VisitBasicBlock(HBasicBlock* block);
358
359 HGraph* graph_;
360 ScopedArenaAllocator allocator_;
361 const SideEffectsAnalysis& side_effects_;
362
FindSetFor(HBasicBlock * block) const363 ValueSet* FindSetFor(HBasicBlock* block) const {
364 ValueSet* result = sets_[block->GetBlockId()];
365 DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
366 return result;
367 }
368
AbandonSetFor(HBasicBlock * block)369 void AbandonSetFor(HBasicBlock* block) {
370 DCHECK(sets_[block->GetBlockId()] != nullptr)
371 << "Block B" << block->GetBlockId() << " expected to have a set";
372 sets_[block->GetBlockId()] = nullptr;
373 }
374
375 // Returns false if the GlobalValueNumberer has already visited all blocks
376 // which may reference `block`.
377 bool WillBeReferencedAgain(HBasicBlock* block) const;
378
379 // Iterates over visited blocks and finds one which has a ValueSet such that:
380 // (a) it will not be referenced in the future, and
381 // (b) it can hold a copy of `reference_set` with a reasonable load factor.
382 HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
383 const ValueSet& reference_set) const;
384
385 // ValueSet for blocks. Initially null, but for an individual block they
386 // are allocated and populated by the dominator, and updated by all blocks
387 // in the path from the dominator to the block.
388 ScopedArenaVector<ValueSet*> sets_;
389
390 // BitVector which serves as a fast-access map from block id to
391 // visited/unvisited Boolean.
392 ArenaBitVector visited_blocks_;
393
394 DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
395 };
396
Run()397 bool GlobalValueNumberer::Run() {
398 DCHECK(side_effects_.HasRun());
399 sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
400
401 // Use the reverse post order to ensure the non back-edge predecessors of a block are
402 // visited before the block itself.
403 for (HBasicBlock* block : graph_->GetReversePostOrder()) {
404 VisitBasicBlock(block);
405 }
406 return true;
407 }
408
VisitBasicBlock(HBasicBlock * block)409 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
410 ValueSet* set = nullptr;
411
412 const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
413 if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
414 // The entry block should only accumulate constant instructions, and
415 // the builder puts constants only in the entry block.
416 // Therefore, there is no need to propagate the value set to the next block.
417 set = new (&allocator_) ValueSet(&allocator_);
418 } else {
419 HBasicBlock* dominator = block->GetDominator();
420 ValueSet* dominator_set = FindSetFor(dominator);
421
422 if (dominator->GetSuccessors().size() == 1) {
423 // `block` is a direct successor of its dominator. No need to clone the
424 // dominator's set, `block` can take over its ownership including its buckets.
425 DCHECK_EQ(dominator->GetSingleSuccessor(), block);
426 AbandonSetFor(dominator);
427 set = dominator_set;
428 } else {
429 // Try to find a basic block which will never be referenced again and whose
430 // ValueSet can therefore be recycled. We will need to copy `dominator_set`
431 // into the recycled set, so we pass `dominator_set` as a reference for size.
432 HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
433 if (recyclable == nullptr) {
434 // No block with a suitable ValueSet found. Allocate a new one and
435 // copy `dominator_set` into it.
436 set = new (&allocator_) ValueSet(&allocator_, *dominator_set);
437 } else {
438 // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
439 set = FindSetFor(recyclable);
440 AbandonSetFor(recyclable);
441 set->PopulateFrom(*dominator_set);
442 }
443 }
444
445 if (!set->IsEmpty()) {
446 if (block->IsLoopHeader()) {
447 if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
448 // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
449 // loop header. We clear the set at entry of irreducible loops and any loop containing
450 // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
451 // across the irreducible loop.
452 // Note that, if we're not compiling OSR, we could still do GVN and introduce
453 // phis at irreducible loop headers. We decided it was not worth the complexity.
454 set->Clear();
455 } else {
456 DCHECK(!block->GetLoopInformation()->IsIrreducible());
457 DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
458 set->Kill(side_effects_.GetLoopEffects(block));
459 }
460 } else if (predecessors.size() > 1) {
461 for (HBasicBlock* predecessor : predecessors) {
462 set->IntersectWith(FindSetFor(predecessor));
463 if (set->IsEmpty()) {
464 break;
465 }
466 }
467 }
468 }
469 }
470
471 sets_[block->GetBlockId()] = set;
472
473 HInstruction* current = block->GetFirstInstruction();
474 while (current != nullptr) {
475 // Save the next instruction in case `current` is removed from the graph.
476 HInstruction* next = current->GetNext();
477 // Do not kill the set with the side effects of the instruction just now: if
478 // the instruction is GVN'ed, we don't need to kill.
479 //
480 // BoundType is a special case example of an instruction which shouldn't be moved but can be
481 // GVN'ed.
482 if (current->CanBeMoved() || current->IsBoundType()) {
483 if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
484 // For commutative ops, (x op y) will be treated the same as (y op x)
485 // after fixed ordering.
486 current->AsBinaryOperation()->OrderInputs();
487 }
488 HInstruction* existing = set->Lookup(current);
489 if (existing != nullptr) {
490 // This replacement doesn't make more OrderInputs() necessary since
491 // current is either used by an instruction that it dominates,
492 // which hasn't been visited yet due to the order we visit instructions.
493 // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
494 current->ReplaceWith(existing);
495 current->GetBlock()->RemoveInstruction(current);
496 } else {
497 set->Kill(current->GetSideEffects());
498 set->Add(current);
499 }
500 } else {
501 set->Kill(current->GetSideEffects());
502 }
503 current = next;
504 }
505
506 visited_blocks_.SetBit(block->GetBlockId());
507 }
508
WillBeReferencedAgain(HBasicBlock * block) const509 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
510 DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
511
512 for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
513 if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
514 return true;
515 }
516 }
517
518 for (const HBasicBlock* successor : block->GetSuccessors()) {
519 if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
520 return true;
521 }
522 }
523
524 return false;
525 }
526
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const527 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
528 HBasicBlock* block, const ValueSet& reference_set) const {
529 HBasicBlock* secondary_match = nullptr;
530
531 for (size_t block_id : visited_blocks_.Indexes()) {
532 ValueSet* current_set = sets_[block_id];
533 if (current_set == nullptr) {
534 // Set was already recycled.
535 continue;
536 }
537
538 HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
539
540 // We test if `current_set` has enough buckets to store a copy of
541 // `reference_set` with a reasonable load factor. If we find a set whose
542 // number of buckets matches perfectly, we return right away. If we find one
543 // that is larger, we return it if no perfectly-matching set is found.
544 // Note that we defer testing WillBeReferencedAgain until all other criteria
545 // have been satisfied because it might be expensive.
546 if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) {
547 if (!WillBeReferencedAgain(current_block)) {
548 return current_block;
549 }
550 } else if (secondary_match == nullptr &&
551 current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) {
552 if (!WillBeReferencedAgain(current_block)) {
553 secondary_match = current_block;
554 }
555 }
556 }
557
558 return secondary_match;
559 }
560
Run()561 bool GVNOptimization::Run() {
562 GlobalValueNumberer gvn(graph_, side_effects_);
563 return gvn.Run();
564 }
565
566 } // namespace art
567