• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gvn.h"
18 
19 #include "base/arena_bit_vector.h"
20 #include "base/bit_vector-inl.h"
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "base/utils.h"
24 #include "side_effects_analysis.h"
25 
26 namespace art {
27 
28 /**
29  * A ValueSet holds instructions that can replace other instructions. It is updated
30  * through the `Add` method, and the `Kill` method. The `Kill` method removes
31  * instructions that are affected by the given side effect.
32  *
33  * The `Lookup` method returns an equivalent instruction to the given instruction
34  * if there is one in the set. In GVN, we would say those instructions have the
35  * same "number".
36  */
37 class ValueSet : public ArenaObject<kArenaAllocGvn> {
38  public:
39   // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ScopedArenaAllocator * allocator)40   explicit ValueSet(ScopedArenaAllocator* allocator)
41       : allocator_(allocator),
42         num_buckets_(kMinimumNumberOfBuckets),
43         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
44         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
45         num_entries_(0u) {
46     DCHECK(IsPowerOfTwo(num_buckets_));
47     std::fill_n(buckets_, num_buckets_, nullptr);
48     buckets_owned_.SetInitialBits(num_buckets_);
49   }
50 
51   // Copy constructor. Depending on the load factor, it will either make a deep
52   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ScopedArenaAllocator * allocator,const ValueSet & other)53   ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other)
54       : allocator_(allocator),
55         num_buckets_(other.IdealBucketCount()),
56         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
57         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
58         num_entries_(0u) {
59     DCHECK(IsPowerOfTwo(num_buckets_));
60     PopulateFromInternal(other);
61   }
62 
63   // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)64   void PopulateFrom(const ValueSet& other) {
65     if (this == &other) {
66       return;
67     }
68     PopulateFromInternal(other);
69   }
70 
71   // Returns true if `this` has enough buckets so that if `other` is copied into
72   // it, the load factor will not cross the upper threshold.
73   // If `exact_match` is set, true is returned only if `this` has the ideal
74   // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)75   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
76     if (exact_match) {
77       return other.IdealBucketCount() == num_buckets_;
78     } else {
79       return other.IdealBucketCount() <= num_buckets_;
80     }
81   }
82 
83   // Adds an instruction in the set.
Add(HInstruction * instruction)84   void Add(HInstruction* instruction) {
85     DCHECK(Lookup(instruction) == nullptr);
86     size_t hash_code = HashCode(instruction);
87     size_t index = BucketIndex(hash_code);
88 
89     if (!buckets_owned_.IsBitSet(index)) {
90       CloneBucket(index);
91     }
92     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
93     ++num_entries_;
94   }
95 
96   // If in the set, returns an equivalent instruction to the given instruction.
97   // Returns null otherwise.
Lookup(HInstruction * instruction) const98   HInstruction* Lookup(HInstruction* instruction) const {
99     size_t hash_code = HashCode(instruction);
100     size_t index = BucketIndex(hash_code);
101 
102     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
103       if (node->GetHashCode() == hash_code) {
104         HInstruction* existing = node->GetInstruction();
105         if (existing->Equals(instruction)) {
106           return existing;
107         }
108       }
109     }
110     return nullptr;
111   }
112 
113   // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const114   bool Contains(HInstruction* instruction) const {
115     size_t hash_code = HashCode(instruction);
116     size_t index = BucketIndex(hash_code);
117 
118     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
119       if (node->GetInstruction() == instruction) {
120         return true;
121       }
122     }
123     return false;
124   }
125 
126   // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)127   void Kill(SideEffects side_effects) {
128     DeleteAllImpureWhich([side_effects](Node* node) {
129       return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
130     });
131   }
132 
Clear()133   void Clear() {
134     num_entries_ = 0;
135     for (size_t i = 0; i < num_buckets_; ++i) {
136       buckets_[i] = nullptr;
137     }
138     buckets_owned_.SetInitialBits(num_buckets_);
139   }
140 
141   // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)142   void IntersectWith(ValueSet* predecessor) {
143     if (IsEmpty()) {
144       return;
145     } else if (predecessor->IsEmpty()) {
146       Clear();
147     } else {
148       // Pure instructions do not need to be tested because only impure
149       // instructions can be killed.
150       DeleteAllImpureWhich([predecessor](Node* node) {
151         return !predecessor->Contains(node->GetInstruction());
152       });
153     }
154   }
155 
IsEmpty() const156   bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const157   size_t GetNumberOfEntries() const { return num_entries_; }
158 
159  private:
160   // Copies all entries from `other` to `this`.
PopulateFromInternal(const ValueSet & other)161   void PopulateFromInternal(const ValueSet& other) {
162     DCHECK_NE(this, &other);
163     DCHECK_GE(num_buckets_, other.IdealBucketCount());
164 
165     if (num_buckets_ == other.num_buckets_) {
166       // Hash table remains the same size. We copy the bucket pointers and leave
167       // all buckets_owned_ bits false.
168       buckets_owned_.ClearAllBits();
169       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
170     } else {
171       // Hash table size changes. We copy and rehash all entries, and set all
172       // buckets_owned_ bits to true.
173       std::fill_n(buckets_, num_buckets_, nullptr);
174       for (size_t i = 0; i < other.num_buckets_; ++i) {
175         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
176           size_t new_index = BucketIndex(node->GetHashCode());
177           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
178         }
179       }
180       buckets_owned_.SetInitialBits(num_buckets_);
181     }
182 
183     num_entries_ = other.num_entries_;
184   }
185 
186   class Node : public ArenaObject<kArenaAllocGvn> {
187    public:
Node(HInstruction * instruction,size_t hash_code,Node * next)188     Node(HInstruction* instruction, size_t hash_code, Node* next)
189         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
190 
GetHashCode() const191     size_t GetHashCode() const { return hash_code_; }
GetInstruction() const192     HInstruction* GetInstruction() const { return instruction_; }
GetNext() const193     Node* GetNext() const { return next_; }
SetNext(Node * node)194     void SetNext(Node* node) { next_ = node; }
195 
Dup(ScopedArenaAllocator * allocator,Node * new_next=nullptr)196     Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) {
197       return new (allocator) Node(instruction_, hash_code_, new_next);
198     }
199 
200    private:
201     HInstruction* const instruction_;
202     const size_t hash_code_;
203     Node* next_;
204 
205     DISALLOW_COPY_AND_ASSIGN(Node);
206   };
207 
208   // Creates our own copy of a bucket that is currently pointing to a parent.
209   // This algorithm can be called while iterating over the bucket because it
210   // preserves the order of entries in the bucket and will return the clone of
211   // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)212   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
213     DCHECK(!buckets_owned_.IsBitSet(index));
214     Node* clone_current = nullptr;
215     Node* clone_previous = nullptr;
216     Node* clone_iterator = nullptr;
217     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
218       clone_current = node->Dup(allocator_, nullptr);
219       if (node == iterator) {
220         clone_iterator = clone_current;
221       }
222       if (clone_previous == nullptr) {
223         buckets_[index] = clone_current;
224       } else {
225         clone_previous->SetNext(clone_current);
226       }
227       clone_previous = clone_current;
228     }
229     buckets_owned_.SetBit(index);
230     return clone_iterator;
231   }
232 
233   // Iterates over buckets with impure instructions (even indices) and deletes
234   // the ones on which 'cond' returns true.
235   template<typename Functor>
DeleteAllImpureWhich(Functor cond)236   void DeleteAllImpureWhich(Functor cond) {
237     for (size_t i = 0; i < num_buckets_; i += 2) {
238       Node* node = buckets_[i];
239       Node* previous = nullptr;
240 
241       if (node == nullptr) {
242         continue;
243       }
244 
245       if (!buckets_owned_.IsBitSet(i)) {
246         // Bucket is not owned but maybe we won't need to change it at all.
247         // Iterate as long as the entries don't satisfy 'cond'.
248         while (node != nullptr) {
249           if (cond(node)) {
250             // We do need to delete an entry but we do not own the bucket.
251             // Clone the bucket, make sure 'previous' and 'node' point to
252             // the cloned entries and break.
253             previous = CloneBucket(i, previous);
254             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
255             break;
256           }
257           previous = node;
258           node = node->GetNext();
259         }
260       }
261 
262       // By this point we either own the bucket and can start deleting entries,
263       // or we do not own it but no entries matched 'cond'.
264       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
265 
266       // We iterate over the remainder of entries and delete those that match
267       // the given condition.
268       while (node != nullptr) {
269         Node* next = node->GetNext();
270         if (cond(node)) {
271           if (previous == nullptr) {
272             buckets_[i] = next;
273           } else {
274             previous->SetNext(next);
275           }
276         } else {
277           previous = node;
278         }
279         node = next;
280       }
281     }
282   }
283 
284   // Computes a bucket count such that the load factor is reasonable.
285   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const286   size_t IdealBucketCount() const {
287     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
288     if (bucket_count > kMinimumNumberOfBuckets) {
289       return bucket_count;
290     } else {
291       return kMinimumNumberOfBuckets;
292     }
293   }
294 
295   // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const296   size_t HashCode(HInstruction* instruction) const {
297     size_t hash_code = instruction->ComputeHashCode();
298     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
299     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
300     // need to delete them when entering the loop.
301     // ClinitCheck is treated as a pure instruction since it's only executed
302     // once.
303     bool pure = !instruction->GetSideEffects().HasDependencies() ||
304                 instruction->IsClinitCheck();
305     if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
306       return (hash_code << 1) | 0;
307     } else {
308       return (hash_code << 1) | 1;
309     }
310   }
311 
312   // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const313   size_t BucketIndex(size_t hash_code) const {
314     return hash_code & (num_buckets_ - 1);
315   }
316 
317   ScopedArenaAllocator* const allocator_;
318 
319   // The internal bucket implementation of the set.
320   size_t const num_buckets_;
321   Node** const buckets_;
322 
323   // Flags specifying which buckets were copied into the set from its parent.
324   // If a flag is not set, the corresponding bucket points to entries in the
325   // parent and must be cloned prior to making changes.
326   ArenaBitVector buckets_owned_;
327 
328   // The number of entries in the set.
329   size_t num_entries_;
330 
331   static constexpr size_t kMinimumNumberOfBuckets = 8;
332 
333   DISALLOW_COPY_AND_ASSIGN(ValueSet);
334 };
335 
336 /**
337  * Optimization phase that removes redundant instruction.
338  */
339 class GlobalValueNumberer : public ValueObject {
340  public:
GlobalValueNumberer(HGraph * graph,const SideEffectsAnalysis & side_effects)341   GlobalValueNumberer(HGraph* graph,
342                       const SideEffectsAnalysis& side_effects)
343       : graph_(graph),
344         allocator_(graph->GetArenaStack()),
345         side_effects_(side_effects),
346         sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
347         visited_blocks_(
348             &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) {
349     visited_blocks_.ClearAllBits();
350   }
351 
352   bool Run();
353 
354  private:
355   // Per-block GVN. Will also update the ValueSet of the dominated and
356   // successor blocks.
357   void VisitBasicBlock(HBasicBlock* block);
358 
359   HGraph* graph_;
360   ScopedArenaAllocator allocator_;
361   const SideEffectsAnalysis& side_effects_;
362 
FindSetFor(HBasicBlock * block) const363   ValueSet* FindSetFor(HBasicBlock* block) const {
364     ValueSet* result = sets_[block->GetBlockId()];
365     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
366     return result;
367   }
368 
AbandonSetFor(HBasicBlock * block)369   void AbandonSetFor(HBasicBlock* block) {
370     DCHECK(sets_[block->GetBlockId()] != nullptr)
371         << "Block B" << block->GetBlockId() << " expected to have a set";
372     sets_[block->GetBlockId()] = nullptr;
373   }
374 
375   // Returns false if the GlobalValueNumberer has already visited all blocks
376   // which may reference `block`.
377   bool WillBeReferencedAgain(HBasicBlock* block) const;
378 
379   // Iterates over visited blocks and finds one which has a ValueSet such that:
380   // (a) it will not be referenced in the future, and
381   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
382   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
383                                                  const ValueSet& reference_set) const;
384 
385   // ValueSet for blocks. Initially null, but for an individual block they
386   // are allocated and populated by the dominator, and updated by all blocks
387   // in the path from the dominator to the block.
388   ScopedArenaVector<ValueSet*> sets_;
389 
390   // BitVector which serves as a fast-access map from block id to
391   // visited/unvisited Boolean.
392   ArenaBitVector visited_blocks_;
393 
394   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
395 };
396 
Run()397 bool GlobalValueNumberer::Run() {
398   DCHECK(side_effects_.HasRun());
399   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
400 
401   // Use the reverse post order to ensure the non back-edge predecessors of a block are
402   // visited before the block itself.
403   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
404     VisitBasicBlock(block);
405   }
406   return true;
407 }
408 
VisitBasicBlock(HBasicBlock * block)409 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
410   ValueSet* set = nullptr;
411 
412   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
413   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
414     // The entry block should only accumulate constant instructions, and
415     // the builder puts constants only in the entry block.
416     // Therefore, there is no need to propagate the value set to the next block.
417     set = new (&allocator_) ValueSet(&allocator_);
418   } else {
419     HBasicBlock* dominator = block->GetDominator();
420     ValueSet* dominator_set = FindSetFor(dominator);
421 
422     if (dominator->GetSuccessors().size() == 1) {
423       // `block` is a direct successor of its dominator. No need to clone the
424       // dominator's set, `block` can take over its ownership including its buckets.
425       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
426       AbandonSetFor(dominator);
427       set = dominator_set;
428     } else {
429       // Try to find a basic block which will never be referenced again and whose
430       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
431       // into the recycled set, so we pass `dominator_set` as a reference for size.
432       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
433       if (recyclable == nullptr) {
434         // No block with a suitable ValueSet found. Allocate a new one and
435         // copy `dominator_set` into it.
436         set = new (&allocator_) ValueSet(&allocator_, *dominator_set);
437       } else {
438         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
439         set = FindSetFor(recyclable);
440         AbandonSetFor(recyclable);
441         set->PopulateFrom(*dominator_set);
442       }
443     }
444 
445     if (!set->IsEmpty()) {
446       if (block->IsLoopHeader()) {
447         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
448           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
449           // loop header. We clear the set at entry of irreducible loops and any loop containing
450           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
451           // across the irreducible loop.
452           // Note that, if we're not compiling OSR, we could still do GVN and introduce
453           // phis at irreducible loop headers. We decided it was not worth the complexity.
454           set->Clear();
455         } else {
456           DCHECK(!block->GetLoopInformation()->IsIrreducible());
457           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
458           set->Kill(side_effects_.GetLoopEffects(block));
459         }
460       } else if (predecessors.size() > 1) {
461         for (HBasicBlock* predecessor : predecessors) {
462           set->IntersectWith(FindSetFor(predecessor));
463           if (set->IsEmpty()) {
464             break;
465           }
466         }
467       }
468     }
469   }
470 
471   sets_[block->GetBlockId()] = set;
472 
473   HInstruction* current = block->GetFirstInstruction();
474   while (current != nullptr) {
475     // Save the next instruction in case `current` is removed from the graph.
476     HInstruction* next = current->GetNext();
477     // Do not kill the set with the side effects of the instruction just now: if
478     // the instruction is GVN'ed, we don't need to kill.
479     //
480     // BoundType is a special case example of an instruction which shouldn't be moved but can be
481     // GVN'ed.
482     if (current->CanBeMoved() || current->IsBoundType()) {
483       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
484         // For commutative ops, (x op y) will be treated the same as (y op x)
485         // after fixed ordering.
486         current->AsBinaryOperation()->OrderInputs();
487       }
488       HInstruction* existing = set->Lookup(current);
489       if (existing != nullptr) {
490         // This replacement doesn't make more OrderInputs() necessary since
491         // current is either used by an instruction that it dominates,
492         // which hasn't been visited yet due to the order we visit instructions.
493         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
494         current->ReplaceWith(existing);
495         current->GetBlock()->RemoveInstruction(current);
496       } else {
497         set->Kill(current->GetSideEffects());
498         set->Add(current);
499       }
500     } else {
501       set->Kill(current->GetSideEffects());
502     }
503     current = next;
504   }
505 
506   visited_blocks_.SetBit(block->GetBlockId());
507 }
508 
WillBeReferencedAgain(HBasicBlock * block) const509 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
510   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
511 
512   for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
513     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
514       return true;
515     }
516   }
517 
518   for (const HBasicBlock* successor : block->GetSuccessors()) {
519     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
520       return true;
521     }
522   }
523 
524   return false;
525 }
526 
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const527 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
528     HBasicBlock* block, const ValueSet& reference_set) const {
529   HBasicBlock* secondary_match = nullptr;
530 
531   for (size_t block_id : visited_blocks_.Indexes()) {
532     ValueSet* current_set = sets_[block_id];
533     if (current_set == nullptr) {
534       // Set was already recycled.
535       continue;
536     }
537 
538     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
539 
540     // We test if `current_set` has enough buckets to store a copy of
541     // `reference_set` with a reasonable load factor. If we find a set whose
542     // number of buckets matches perfectly, we return right away. If we find one
543     // that is larger, we return it if no perfectly-matching set is found.
544     // Note that we defer testing WillBeReferencedAgain until all other criteria
545     // have been satisfied because it might be expensive.
546     if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) {
547       if (!WillBeReferencedAgain(current_block)) {
548         return current_block;
549       }
550     } else if (secondary_match == nullptr &&
551                current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) {
552       if (!WillBeReferencedAgain(current_block)) {
553         secondary_match = current_block;
554       }
555     }
556   }
557 
558   return secondary_match;
559 }
560 
Run()561 bool GVNOptimization::Run() {
562   GlobalValueNumberer gvn(graph_, side_effects_);
563   return gvn.Run();
564 }
565 
566 }  // namespace art
567