1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gvn.h"
18
19 #include "base/arena_bit_vector.h"
20 #include "base/arena_containers.h"
21 #include "base/bit_vector-inl.h"
22 #include "side_effects_analysis.h"
23 #include "utils.h"
24
25 namespace art {
26
27 /**
28 * A ValueSet holds instructions that can replace other instructions. It is updated
29 * through the `Add` method, and the `Kill` method. The `Kill` method removes
30 * instructions that are affected by the given side effect.
31 *
32 * The `Lookup` method returns an equivalent instruction to the given instruction
33 * if there is one in the set. In GVN, we would say those instructions have the
34 * same "number".
35 */
36 class ValueSet : public ArenaObject<kArenaAllocGvn> {
37 public:
38 // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ArenaAllocator * allocator)39 explicit ValueSet(ArenaAllocator* allocator)
40 : allocator_(allocator),
41 num_buckets_(kMinimumNumberOfBuckets),
42 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
43 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
44 num_entries_(0u) {
45 // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
46 DCHECK(IsPowerOfTwo(num_buckets_));
47 buckets_owned_.SetInitialBits(num_buckets_);
48 }
49
50 // Copy constructor. Depending on the load factor, it will either make a deep
51 // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ArenaAllocator * allocator,const ValueSet & other)52 ValueSet(ArenaAllocator* allocator, const ValueSet& other)
53 : allocator_(allocator),
54 num_buckets_(other.IdealBucketCount()),
55 buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
56 buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
57 num_entries_(0u) {
58 // ArenaAllocator returns zeroed memory, so entries of buckets_ and
59 // buckets_owned_ are initialized to null and false, respectively.
60 DCHECK(IsPowerOfTwo(num_buckets_));
61 PopulateFromInternal(other, /* is_dirty */ false);
62 }
63
64 // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)65 void PopulateFrom(const ValueSet& other) {
66 if (this == &other) {
67 return;
68 }
69 PopulateFromInternal(other, /* is_dirty */ true);
70 }
71
72 // Returns true if `this` has enough buckets so that if `other` is copied into
73 // it, the load factor will not cross the upper threshold.
74 // If `exact_match` is set, true is returned only if `this` has the ideal
75 // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)76 bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
77 if (exact_match) {
78 return other.IdealBucketCount() == num_buckets_;
79 } else {
80 return other.IdealBucketCount() <= num_buckets_;
81 }
82 }
83
84 // Adds an instruction in the set.
Add(HInstruction * instruction)85 void Add(HInstruction* instruction) {
86 DCHECK(Lookup(instruction) == nullptr);
87 size_t hash_code = HashCode(instruction);
88 size_t index = BucketIndex(hash_code);
89
90 if (!buckets_owned_.IsBitSet(index)) {
91 CloneBucket(index);
92 }
93 buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
94 ++num_entries_;
95 }
96
97 // If in the set, returns an equivalent instruction to the given instruction.
98 // Returns null otherwise.
Lookup(HInstruction * instruction) const99 HInstruction* Lookup(HInstruction* instruction) const {
100 size_t hash_code = HashCode(instruction);
101 size_t index = BucketIndex(hash_code);
102
103 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
104 if (node->GetHashCode() == hash_code) {
105 HInstruction* existing = node->GetInstruction();
106 if (existing->Equals(instruction)) {
107 return existing;
108 }
109 }
110 }
111 return nullptr;
112 }
113
114 // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const115 bool Contains(HInstruction* instruction) const {
116 size_t hash_code = HashCode(instruction);
117 size_t index = BucketIndex(hash_code);
118
119 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
120 if (node->GetInstruction() == instruction) {
121 return true;
122 }
123 }
124 return false;
125 }
126
127 // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)128 void Kill(SideEffects side_effects) {
129 DeleteAllImpureWhich([side_effects](Node* node) {
130 return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
131 });
132 }
133
Clear()134 void Clear() {
135 num_entries_ = 0;
136 for (size_t i = 0; i < num_buckets_; ++i) {
137 buckets_[i] = nullptr;
138 }
139 buckets_owned_.SetInitialBits(num_buckets_);
140 }
141
142 // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)143 void IntersectWith(ValueSet* predecessor) {
144 if (IsEmpty()) {
145 return;
146 } else if (predecessor->IsEmpty()) {
147 Clear();
148 } else {
149 // Pure instructions do not need to be tested because only impure
150 // instructions can be killed.
151 DeleteAllImpureWhich([predecessor](Node* node) {
152 return !predecessor->Contains(node->GetInstruction());
153 });
154 }
155 }
156
IsEmpty() const157 bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const158 size_t GetNumberOfEntries() const { return num_entries_; }
159
160 private:
161 // Copies all entries from `other` to `this`.
162 // If `is_dirty` is set to true, existing data will be wiped first. It is
163 // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise.
PopulateFromInternal(const ValueSet & other,bool is_dirty)164 void PopulateFromInternal(const ValueSet& other, bool is_dirty) {
165 DCHECK_NE(this, &other);
166 DCHECK_GE(num_buckets_, other.IdealBucketCount());
167
168 if (num_buckets_ == other.num_buckets_) {
169 // Hash table remains the same size. We copy the bucket pointers and leave
170 // all buckets_owned_ bits false.
171 if (is_dirty) {
172 buckets_owned_.ClearAllBits();
173 } else {
174 DCHECK_EQ(buckets_owned_.NumSetBits(), 0u);
175 }
176 memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
177 } else {
178 // Hash table size changes. We copy and rehash all entries, and set all
179 // buckets_owned_ bits to true.
180 if (is_dirty) {
181 memset(buckets_, 0, num_buckets_ * sizeof(Node*));
182 } else {
183 if (kIsDebugBuild) {
184 for (size_t i = 0; i < num_buckets_; ++i) {
185 DCHECK(buckets_[i] == nullptr) << i;
186 }
187 }
188 }
189 for (size_t i = 0; i < other.num_buckets_; ++i) {
190 for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
191 size_t new_index = BucketIndex(node->GetHashCode());
192 buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
193 }
194 }
195 buckets_owned_.SetInitialBits(num_buckets_);
196 }
197
198 num_entries_ = other.num_entries_;
199 }
200
201 class Node : public ArenaObject<kArenaAllocGvn> {
202 public:
Node(HInstruction * instruction,size_t hash_code,Node * next)203 Node(HInstruction* instruction, size_t hash_code, Node* next)
204 : instruction_(instruction), hash_code_(hash_code), next_(next) {}
205
GetHashCode() const206 size_t GetHashCode() const { return hash_code_; }
GetInstruction() const207 HInstruction* GetInstruction() const { return instruction_; }
GetNext() const208 Node* GetNext() const { return next_; }
SetNext(Node * node)209 void SetNext(Node* node) { next_ = node; }
210
Dup(ArenaAllocator * allocator,Node * new_next=nullptr)211 Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) {
212 return new (allocator) Node(instruction_, hash_code_, new_next);
213 }
214
215 private:
216 HInstruction* const instruction_;
217 const size_t hash_code_;
218 Node* next_;
219
220 DISALLOW_COPY_AND_ASSIGN(Node);
221 };
222
223 // Creates our own copy of a bucket that is currently pointing to a parent.
224 // This algorithm can be called while iterating over the bucket because it
225 // preserves the order of entries in the bucket and will return the clone of
226 // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)227 Node* CloneBucket(size_t index, Node* iterator = nullptr) {
228 DCHECK(!buckets_owned_.IsBitSet(index));
229 Node* clone_current = nullptr;
230 Node* clone_previous = nullptr;
231 Node* clone_iterator = nullptr;
232 for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
233 clone_current = node->Dup(allocator_, nullptr);
234 if (node == iterator) {
235 clone_iterator = clone_current;
236 }
237 if (clone_previous == nullptr) {
238 buckets_[index] = clone_current;
239 } else {
240 clone_previous->SetNext(clone_current);
241 }
242 clone_previous = clone_current;
243 }
244 buckets_owned_.SetBit(index);
245 return clone_iterator;
246 }
247
248 // Iterates over buckets with impure instructions (even indices) and deletes
249 // the ones on which 'cond' returns true.
250 template<typename Functor>
DeleteAllImpureWhich(Functor cond)251 void DeleteAllImpureWhich(Functor cond) {
252 for (size_t i = 0; i < num_buckets_; i += 2) {
253 Node* node = buckets_[i];
254 Node* previous = nullptr;
255
256 if (node == nullptr) {
257 continue;
258 }
259
260 if (!buckets_owned_.IsBitSet(i)) {
261 // Bucket is not owned but maybe we won't need to change it at all.
262 // Iterate as long as the entries don't satisfy 'cond'.
263 while (node != nullptr) {
264 if (cond(node)) {
265 // We do need to delete an entry but we do not own the bucket.
266 // Clone the bucket, make sure 'previous' and 'node' point to
267 // the cloned entries and break.
268 previous = CloneBucket(i, previous);
269 node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
270 break;
271 }
272 previous = node;
273 node = node->GetNext();
274 }
275 }
276
277 // By this point we either own the bucket and can start deleting entries,
278 // or we do not own it but no entries matched 'cond'.
279 DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
280
281 // We iterate over the remainder of entries and delete those that match
282 // the given condition.
283 while (node != nullptr) {
284 Node* next = node->GetNext();
285 if (cond(node)) {
286 if (previous == nullptr) {
287 buckets_[i] = next;
288 } else {
289 previous->SetNext(next);
290 }
291 } else {
292 previous = node;
293 }
294 node = next;
295 }
296 }
297 }
298
299 // Computes a bucket count such that the load factor is reasonable.
300 // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const301 size_t IdealBucketCount() const {
302 size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
303 if (bucket_count > kMinimumNumberOfBuckets) {
304 return bucket_count;
305 } else {
306 return kMinimumNumberOfBuckets;
307 }
308 }
309
310 // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const311 size_t HashCode(HInstruction* instruction) const {
312 size_t hash_code = instruction->ComputeHashCode();
313 // Pure instructions are put into odd buckets to speed up deletion. Note that in the
314 // case of irreducible loops, we don't put pure instructions in odd buckets, as we
315 // need to delete them when entering the loop.
316 if (instruction->GetSideEffects().HasDependencies() ||
317 instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
318 return (hash_code << 1) | 0;
319 } else {
320 return (hash_code << 1) | 1;
321 }
322 }
323
324 // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const325 size_t BucketIndex(size_t hash_code) const {
326 return hash_code & (num_buckets_ - 1);
327 }
328
329 ArenaAllocator* const allocator_;
330
331 // The internal bucket implementation of the set.
332 size_t const num_buckets_;
333 Node** const buckets_;
334
335 // Flags specifying which buckets were copied into the set from its parent.
336 // If a flag is not set, the corresponding bucket points to entries in the
337 // parent and must be cloned prior to making changes.
338 ArenaBitVector buckets_owned_;
339
340 // The number of entries in the set.
341 size_t num_entries_;
342
343 static constexpr size_t kMinimumNumberOfBuckets = 8;
344
345 DISALLOW_COPY_AND_ASSIGN(ValueSet);
346 };
347
348 /**
349 * Optimization phase that removes redundant instruction.
350 */
351 class GlobalValueNumberer : public ValueObject {
352 public:
GlobalValueNumberer(ArenaAllocator * allocator,HGraph * graph,const SideEffectsAnalysis & side_effects)353 GlobalValueNumberer(ArenaAllocator* allocator,
354 HGraph* graph,
355 const SideEffectsAnalysis& side_effects)
356 : graph_(graph),
357 allocator_(allocator),
358 side_effects_(side_effects),
359 sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)),
360 visited_blocks_(
361 allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {}
362
363 void Run();
364
365 private:
366 // Per-block GVN. Will also update the ValueSet of the dominated and
367 // successor blocks.
368 void VisitBasicBlock(HBasicBlock* block);
369
370 HGraph* graph_;
371 ArenaAllocator* const allocator_;
372 const SideEffectsAnalysis& side_effects_;
373
FindSetFor(HBasicBlock * block) const374 ValueSet* FindSetFor(HBasicBlock* block) const {
375 ValueSet* result = sets_[block->GetBlockId()];
376 DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
377 return result;
378 }
379
AbandonSetFor(HBasicBlock * block)380 void AbandonSetFor(HBasicBlock* block) {
381 DCHECK(sets_[block->GetBlockId()] != nullptr)
382 << "Block B" << block->GetBlockId() << " expected to have a set";
383 sets_[block->GetBlockId()] = nullptr;
384 }
385
386 // Returns false if the GlobalValueNumberer has already visited all blocks
387 // which may reference `block`.
388 bool WillBeReferencedAgain(HBasicBlock* block) const;
389
390 // Iterates over visited blocks and finds one which has a ValueSet such that:
391 // (a) it will not be referenced in the future, and
392 // (b) it can hold a copy of `reference_set` with a reasonable load factor.
393 HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
394 const ValueSet& reference_set) const;
395
396 // ValueSet for blocks. Initially null, but for an individual block they
397 // are allocated and populated by the dominator, and updated by all blocks
398 // in the path from the dominator to the block.
399 ArenaVector<ValueSet*> sets_;
400
401 // BitVector which serves as a fast-access map from block id to
402 // visited/unvisited Boolean.
403 ArenaBitVector visited_blocks_;
404
405 DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
406 };
407
Run()408 void GlobalValueNumberer::Run() {
409 DCHECK(side_effects_.HasRun());
410 sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_);
411
412 // Use the reverse post order to ensure the non back-edge predecessors of a block are
413 // visited before the block itself.
414 for (HBasicBlock* block : graph_->GetReversePostOrder()) {
415 VisitBasicBlock(block);
416 }
417 }
418
VisitBasicBlock(HBasicBlock * block)419 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
420 ValueSet* set = nullptr;
421
422 const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
423 if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
424 // The entry block should only accumulate constant instructions, and
425 // the builder puts constants only in the entry block.
426 // Therefore, there is no need to propagate the value set to the next block.
427 set = new (allocator_) ValueSet(allocator_);
428 } else {
429 HBasicBlock* dominator = block->GetDominator();
430 ValueSet* dominator_set = FindSetFor(dominator);
431
432 if (dominator->GetSuccessors().size() == 1) {
433 // `block` is a direct successor of its dominator. No need to clone the
434 // dominator's set, `block` can take over its ownership including its buckets.
435 DCHECK_EQ(dominator->GetSingleSuccessor(), block);
436 AbandonSetFor(dominator);
437 set = dominator_set;
438 } else {
439 // Try to find a basic block which will never be referenced again and whose
440 // ValueSet can therefore be recycled. We will need to copy `dominator_set`
441 // into the recycled set, so we pass `dominator_set` as a reference for size.
442 HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
443 if (recyclable == nullptr) {
444 // No block with a suitable ValueSet found. Allocate a new one and
445 // copy `dominator_set` into it.
446 set = new (allocator_) ValueSet(allocator_, *dominator_set);
447 } else {
448 // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
449 set = FindSetFor(recyclable);
450 AbandonSetFor(recyclable);
451 set->PopulateFrom(*dominator_set);
452 }
453 }
454
455 if (!set->IsEmpty()) {
456 if (block->IsLoopHeader()) {
457 if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
458 // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
459 // loop header. We clear the set at entry of irreducible loops and any loop containing
460 // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
461 // across the irreducible loop.
462 // Note that, if we're not compiling OSR, we could still do GVN and introduce
463 // phis at irreducible loop headers. We decided it was not worth the complexity.
464 set->Clear();
465 } else {
466 DCHECK(!block->GetLoopInformation()->IsIrreducible());
467 DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
468 set->Kill(side_effects_.GetLoopEffects(block));
469 }
470 } else if (predecessors.size() > 1) {
471 for (HBasicBlock* predecessor : predecessors) {
472 set->IntersectWith(FindSetFor(predecessor));
473 if (set->IsEmpty()) {
474 break;
475 }
476 }
477 }
478 }
479 }
480
481 sets_[block->GetBlockId()] = set;
482
483 HInstruction* current = block->GetFirstInstruction();
484 while (current != nullptr) {
485 // Save the next instruction in case `current` is removed from the graph.
486 HInstruction* next = current->GetNext();
487 // Do not kill the set with the side effects of the instruction just now: if
488 // the instruction is GVN'ed, we don't need to kill.
489 if (current->CanBeMoved()) {
490 if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
491 // For commutative ops, (x op y) will be treated the same as (y op x)
492 // after fixed ordering.
493 current->AsBinaryOperation()->OrderInputs();
494 }
495 HInstruction* existing = set->Lookup(current);
496 if (existing != nullptr) {
497 // This replacement doesn't make more OrderInputs() necessary since
498 // current is either used by an instruction that it dominates,
499 // which hasn't been visited yet due to the order we visit instructions.
500 // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
501 current->ReplaceWith(existing);
502 current->GetBlock()->RemoveInstruction(current);
503 } else {
504 set->Kill(current->GetSideEffects());
505 set->Add(current);
506 }
507 } else {
508 set->Kill(current->GetSideEffects());
509 }
510 current = next;
511 }
512
513 visited_blocks_.SetBit(block->GetBlockId());
514 }
515
WillBeReferencedAgain(HBasicBlock * block) const516 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
517 DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
518
519 for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
520 if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
521 return true;
522 }
523 }
524
525 for (const HBasicBlock* successor : block->GetSuccessors()) {
526 if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
527 return true;
528 }
529 }
530
531 return false;
532 }
533
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const534 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
535 HBasicBlock* block, const ValueSet& reference_set) const {
536 HBasicBlock* secondary_match = nullptr;
537
538 for (size_t block_id : visited_blocks_.Indexes()) {
539 ValueSet* current_set = sets_[block_id];
540 if (current_set == nullptr) {
541 // Set was already recycled.
542 continue;
543 }
544
545 HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
546
547 // We test if `current_set` has enough buckets to store a copy of
548 // `reference_set` with a reasonable load factor. If we find a set whose
549 // number of buckets matches perfectly, we return right away. If we find one
550 // that is larger, we return it if no perfectly-matching set is found.
551 // Note that we defer testing WillBeReferencedAgain until all other criteria
552 // have been satisfied because it might be expensive.
553 if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
554 if (!WillBeReferencedAgain(current_block)) {
555 return current_block;
556 }
557 } else if (secondary_match == nullptr &&
558 current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
559 if (!WillBeReferencedAgain(current_block)) {
560 secondary_match = current_block;
561 }
562 }
563 }
564
565 return secondary_match;
566 }
567
Run()568 void GVNOptimization::Run() {
569 GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_);
570 gvn.Run();
571 }
572
573 } // namespace art
574