• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_utils_iterator.h"
28 #include "base/bit_vector-inl.h"
29 #include "base/bit_vector.h"
30 #include "base/globals.h"
31 #include "base/indenter.h"
32 #include "base/iteration_range.h"
33 #include "base/scoped_arena_allocator.h"
34 #include "base/scoped_arena_containers.h"
35 #include "base/transform_iterator.h"
36 #include "escape.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "oat/stack_map.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46 
47 /**
48  * The general algorithm of load-store elimination (LSE).
49  *
50  * We use load-store analysis to collect a list of heap locations and perform
51  * alias analysis of those heap locations. LSE then keeps track of a list of
52  * heap values corresponding to the heap locations and stores that put those
53  * values in these locations.
54  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
55  *    block, visit instructions sequentially, recording heap values and looking
56  *    for loads and stores to eliminate without relying on loop Phis.
57  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
58  *    or using a loop-invariant value.
59  *  - In phase 3, we determine which stores are dead and can be eliminated and
60  *    based on that information we re-evaluate whether some kept stores are
61  *    storing the same value as the value in the heap location; such stores are
62  *    also marked for elimination.
63  *  - In phase 4, we commit the changes, replacing loads marked for elimination
64  *    in previous processing and removing stores not marked for keeping. We also
65  *    remove allocations that are no longer needed.
66  *  - In phase 5, we move allocations which only escape along some executions
67  *    closer to their escape points and fixup non-escaping paths with their actual
68  *    values, creating PHIs when needed.
69  *
70  * 1. Walk over blocks and their instructions.
71  *
72  * The initial set of heap values for a basic block is
73  *  - For a loop header of an irreducible loop, all heap values are unknown.
74  *  - For a loop header of a normal loop, all values unknown at the end of the
75  *    preheader are initialized to unknown, other heap values are set to Phi
76  *    placeholders as we cannot determine yet whether these values are known on
77  *    all back-edges. We use Phi placeholders also for array heap locations with
78  *    index defined inside the loop but this helps only when the value remains
79  *    zero from the array allocation throughout the loop.
80  *  - For catch blocks, we clear all assumptions since we arrived due to an
81  *    instruction throwing.
82  *  - For other basic blocks, we merge incoming values from the end of all
83  *    predecessors. If any incoming value is unknown, the start value for this
84  *    block is also unknown. Otherwise, if all the incoming values are the same
85  *    (including the case of a single predecessor), the incoming value is used.
86  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
87  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
88  *
89  * For each instruction in the block
90  *  - If the instruction is a load from a heap location with a known value not
91  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
92  *    using an existing instruction or by creating new Phi(s) instead. In order
93  *    to maintain the validity of all heap locations during the optimization
94  *    phase, we only record substitutes at this phase and the real elimination
95  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
96  *    replacement are recorded for processing later.
97  *  - If the instruction is a store, it updates the heap value for the heap
98  *    location with the stored value and records the store itself so that we can
99  *    mark it for keeping if the value becomes observable. Heap values are
100  *    invalidated for heap locations that may alias with the store instruction's
101  *    heap location and their recorded stores are marked for keeping as they are
102  *    now potentially observable. The store instruction can be eliminated unless
103  *    the value stored is later needed e.g. by a load from the same/aliased heap
104  *    location or the heap location persists at method return/deoptimization.
105  *  - A store that stores the same value as the heap value is eliminated.
106  *  - For newly instantiated instances, their heap values are initialized to
107  *    language defined default values.
108  *  - Finalizable objects are considered as persisting at method
109  *    return/deoptimization.
110  *  - Some instructions such as invokes are treated as loading and invalidating
111  *    all the heap values, depending on the instruction's side effects.
112  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
113  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
114  *    alias and no load/store is eliminated in such case.
115  *
116  * The time complexity of the initial phase has several components. The total
117  * time for the initialization of heap values for all blocks is
118  *    O(heap_locations * edges)
119  * and the time complexity for simple instruction processing is
120  *    O(instructions).
121  * See the description of phase 3 for additional complexity due to matching of
122  * existing Phis for replacing loads.
123  *
124  * 2. Process loads that depend on loop Phi placeholders.
125  *
126  * We go over these loads to determine whether they can be eliminated. We look
127  * for the set of all Phi placeholders that feed the load and depend on a loop
128  * Phi placeholder and, if we find no unknown value, we construct the necessary
129  * Phi(s) or, if all other inputs are identical, i.e. the location does not
130  * change in the loop, just use that input. If we do find an unknown input, this
131  * must be from a loop back-edge and we replace the loop Phi placeholder with
132  * unknown value and re-process loads and stores that previously depended on
133  * loop Phi placeholders. This shall find at least one load of an unknown value
134  * which is now known to be unreplaceable or a new unknown value on a back-edge
135  * and we repeat this process until each load is either marked for replacement
136  * or found to be unreplaceable. As we mark at least one additional loop Phi
137  * placeholder as unreplacable in each iteration, this process shall terminate.
138  *
139  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
140  * is limited by the number of Phi placeholders and their dependencies we need
141  * to search with worst-case time complexity
142  *    O(phi_placeholder_dependencies) .
143  * The dependencies are usually just the Phi placeholders' potential inputs,
144  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
145  * replacement search, there are additional dependencies to consider, see below.
146  *
147  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
148  * algorithm to determine transitive closures for each found Phi placeholder,
149  * and then match or materialize Phis from the smallest transitive closure,
150  * so that we can determine if such subset has a single other input. This has
151  * time complexity
152  *    O(phi_placeholders_found^3) .
153  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
154  * contribute to this as such Phi placeholders are replaced immediately.
155  * The total time of all such successful cases has time complexity
156  *    O(phi_placeholders^3)
157  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
158  * argument applies to the searches used to find all successful cases, so their
159  * total contribution is also just an insignificant
160  *    O(phi_placeholder_dependencies) .
161  * The materialization of Phis has an insignificant total time complexity
162  *    O(phi_placeholders * edges) .
163  *
164  * If we find an unknown input, we re-process heap values and loads with a time
165  * complexity that's the same as the phase 1 in the worst case. Adding this to
166  * the depth-first search time complexity yields
167  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
168  * for a single iteration. We can ignore the middle term as it's proprotional
169  * to the number of Phi placeholder inputs included in the first term. Using
170  * the upper limit of number of such iterations, the total time complexity is
171  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
172  *
173  * The upper bound of Phi placeholder inputs is
174  *    heap_locations * edges
175  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
176  * include other heap locations in predecessor blocks with the upper bound of
177  *    heap_locations^2 * edges .
178  * Using the estimate
179  *    edges <= blocks^2
180  * and
181  *    phi_placeholders <= heap_locations * blocks ,
182  * the worst-case time complexity of the
183  *    O(phi_placeholder_dependencies * phi_placeholders)
184  * term from unknown input cases is actually
185  *    O(heap_locations^3 * blocks^3) ,
186  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
187  * Adding the other term from the unknown input cases (to account for the case
188  * with significantly more instructions than blocks and heap locations), the
189  * phase 2 time complexity is
190  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
191  *
192  * See the description of phase 3 for additional complexity due to matching of
193  * existing Phis for replacing loads.
194  *
195  * 3. Determine which stores to keep and which to eliminate.
196  *
197  * During instruction processing in phase 1 and re-processing in phase 2, we are
198  * keeping a record of the stores and Phi placeholders that become observable
199  * and now propagate the observable Phi placeholders to all actual stores that
200  * feed them. Having determined observable stores, we look for stores that just
201  * overwrite the old value with the same. Since ignoring non-observable stores
202  * actually changes the old values in heap locations, we need to recalculate
203  * Phi placeholder replacements but we proceed similarly to the previous phase.
204  * We look for the set of all Phis that feed the old value replaced by the store
205  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
206  * value, we try to match existing Phis (we do not create new Phis anymore) or,
207  * if all other inputs are identical, i.e. the location does not change in the
208  * loop, just use that input. If this succeeds and the old value is identical to
209  * the value we're storing, such store shall be eliminated.
210  *
211  * The work is similar to the phase 2, except that we're not re-processing loads
212  * and stores anymore, so the time complexity of phase 3 is
213  *    O(heap_locations^3 * blocks^3) .
214  *
215  * There is additional complexity in matching existing Phis shared between the
216  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
217  * time (this could be difficult and slow), so each matching attempt is just
218  * looking at Phis in the block (both old Phis and newly created Phis) and their
219  * inputs. As we create at most `heap_locations` Phis in each block, the upper
220  * bound on the number of Phis we look at is
221  *    heap_locations * (old_phis + heap_locations)
222  * and the worst-case time complexity is
223  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
224  * The first term is lower than one term in phase 2, so the relevant part is
225  *    O(heap_locations * old_phis * edges) .
226  *
227  * 4. Replace loads and remove unnecessary stores and singleton allocations.
228  *
229  * A special type of objects called singletons are instantiated in the method
230  * and have a single name, i.e. no aliases. Singletons have exclusive heap
231  * locations since they have no aliases. Singletons are helpful in narrowing
232  * down the life span of a heap location such that they do not always need to
233  * participate in merging heap values. Allocation of a singleton can be
234  * eliminated if that singleton is not used and does not persist at method
235  * return/deoptimization.
236  *
237  * The time complexity of this phase is
238  *    O(instructions + instruction_uses) .
239  *
240  * FIXME: The time complexities described above assumes that the
241  * HeapLocationCollector finds a heap location for an instruction in O(1)
242  * time but it is currently O(heap_locations); this can be fixed by adding
243  * a hash map to the HeapLocationCollector.
244  */
245 
246 namespace art HIDDEN {
247 
248 #define LSE_VLOG \
249   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
250 
251 class HeapRefHolder;
252 
253 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
254 class LSEVisitor final : private HGraphDelegateVisitor {
255  public:
256   LSEVisitor(HGraph* graph,
257              const HeapLocationCollector& heap_location_collector,
258              OptimizingCompilerStats* stats);
259 
260   void Run();
261 
262  private:
263   class PhiPlaceholder {
264    public:
PhiPlaceholder()265     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)266     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
267         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
268 
269     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
270     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
271     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
272     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
273 
GetBlockId() const274     constexpr uint32_t GetBlockId() const {
275       return block_id_;
276     }
277 
GetHeapLocation() const278     constexpr size_t GetHeapLocation() const {
279       return heap_location_;
280     }
281 
Equals(const PhiPlaceholder & p2) const282     constexpr bool Equals(const PhiPlaceholder& p2) const {
283       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
284     }
285 
Dump(std::ostream & oss) const286     void Dump(std::ostream& oss) const {
287       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
288     }
289 
290    private:
291     uint32_t block_id_;
292     uint32_t heap_location_;
293   };
294 
295   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
296   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
297 
298   class Value {
299    public:
300     enum class ValuelessType {
301       kInvalid,
302       kUnknown,
303       kDefault,
304     };
305     struct NeedsNonLoopPhiMarker {
306       PhiPlaceholder phi_;
307     };
308     struct NeedsPlainLoopPhiMarker {
309       PhiPlaceholder phi_;
310     };
311     struct NeedsConvertedLoopPhiMarker {
312       HInstruction* load_;  // Load from a narrower location than the loop phi it needs.
313     };
314 
Invalid()315     static constexpr Value Invalid() {
316       return Value(ValuelessType::kInvalid);
317     }
318 
319     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
320     // A heap location can be set to an unknown heap value when:
321     // - it is coming from outside the method,
322     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
Unknown()323     static constexpr Value Unknown() {
324       return Value(ValuelessType::kUnknown);
325     }
326 
327     // Default heap value after an allocation.
328     // A heap location can be set to that value right after an allocation.
Default()329     static constexpr Value Default() {
330       return Value(ValuelessType::kDefault);
331     }
332 
ForInstruction(HInstruction * instruction)333     static constexpr Value ForInstruction(HInstruction* instruction) {
334       return Value(instruction);
335     }
336 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)337     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
338       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
339     }
340 
ForPlainLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)341     static constexpr Value ForPlainLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
342       return Value(NeedsPlainLoopPhiMarker{phi_placeholder});
343     }
344 
ForConvertedLoopPhiPlaceholder(HInstruction * load)345     static constexpr Value ForConvertedLoopPhiPlaceholder(HInstruction* load) {
346       return Value(NeedsConvertedLoopPhiMarker{load});
347     }
348 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)349     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
350       return needs_loop_phi ? ForPlainLoopPhiPlaceholder(phi_placeholder)
351                             : ForNonLoopPhiPlaceholder(phi_placeholder);
352     }
353 
IsValid() const354     constexpr bool IsValid() const {
355       return !IsInvalid();
356     }
357 
IsInvalid() const358     constexpr bool IsInvalid() const {
359       return std::holds_alternative<ValuelessType>(value_) &&
360              GetValuelessType() == ValuelessType::kInvalid;
361     }
362 
IsUnknown() const363     bool IsUnknown() const {
364       return std::holds_alternative<ValuelessType>(value_) &&
365              GetValuelessType() == ValuelessType::kUnknown;
366     }
367 
IsDefault() const368     bool IsDefault() const {
369       return std::holds_alternative<ValuelessType>(value_) &&
370              GetValuelessType() == ValuelessType::kDefault;
371     }
372 
IsInstruction() const373     bool IsInstruction() const {
374       return std::holds_alternative<HInstruction*>(value_);
375     }
376 
NeedsNonLoopPhi() const377     bool NeedsNonLoopPhi() const {
378       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
379     }
380 
NeedsPlainLoopPhi() const381     bool NeedsPlainLoopPhi() const {
382       return std::holds_alternative<NeedsPlainLoopPhiMarker>(value_);
383     }
384 
NeedsConvertedLoopPhi() const385     bool NeedsConvertedLoopPhi() const {
386       return std::holds_alternative<NeedsConvertedLoopPhiMarker>(value_);
387     }
388 
NeedsLoopPhi() const389     bool NeedsLoopPhi() const {
390       return NeedsPlainLoopPhi() || NeedsConvertedLoopPhi();
391     }
392 
NeedsPhi() const393     bool NeedsPhi() const {
394       return NeedsNonLoopPhi() || NeedsLoopPhi();
395     }
396 
GetInstruction() const397     HInstruction* GetInstruction() const {
398       DCHECK(IsInstruction()) << *this;
399       return std::get<HInstruction*>(value_);
400     }
401 
GetPhiPlaceholder() const402     PhiPlaceholder GetPhiPlaceholder() const {
403       if (NeedsNonLoopPhi()) {
404         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
405       } else {
406         DCHECK(NeedsPlainLoopPhi());
407         return std::get<NeedsPlainLoopPhiMarker>(value_).phi_;
408       }
409     }
410 
GetHeapLocation() const411     size_t GetHeapLocation() const {
412       DCHECK(NeedsPhi()) << this;
413       return GetPhiPlaceholder().GetHeapLocation();
414     }
415 
GetLoopPhiConversionLoad() const416     HInstruction* GetLoopPhiConversionLoad() const {
417       DCHECK(NeedsConvertedLoopPhi());
418       return std::get<NeedsConvertedLoopPhiMarker>(value_).load_;
419     }
420 
421     constexpr bool ExactEquals(Value other) const;
422 
423     constexpr bool Equals(Value other) const;
424 
Equals(HInstruction * instruction) const425     constexpr bool Equals(HInstruction* instruction) const {
426       return Equals(ForInstruction(instruction));
427     }
428 
429     std::ostream& Dump(std::ostream& os) const;
430 
431     // Public for use with lists.
Value()432     constexpr Value() : value_(ValuelessType::kInvalid) {}
433 
434    private:
435     using ValueHolder = std::variant<ValuelessType,
436                                      HInstruction*,
437                                      NeedsNonLoopPhiMarker,
438                                      NeedsPlainLoopPhiMarker,
439                                      NeedsConvertedLoopPhiMarker>;
GetValuelessType() const440     constexpr ValuelessType GetValuelessType() const {
441       return std::get<ValuelessType>(value_);
442     }
443 
Value(ValueHolder v)444     constexpr explicit Value(ValueHolder v) : value_(v) {}
445 
446     friend std::ostream& operator<<(std::ostream& os, const Value& v);
447 
448     ValueHolder value_;
449 
450     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
451   };
452 
453   friend constexpr bool operator==(const Value::NeedsPlainLoopPhiMarker& p1,
454                                    const Value::NeedsPlainLoopPhiMarker& p2);
455   friend constexpr bool operator==(const Value::NeedsConvertedLoopPhiMarker& p1,
456                                    const Value::NeedsConvertedLoopPhiMarker& p2);
457   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
458                                    const Value::NeedsNonLoopPhiMarker& p2);
459 
460   class TypeConversionSet {
461    public:
TypeConversionSet()462     TypeConversionSet() : type_conversions_(0u) {}
463 
Add(DataType::Type result_type)464     void Add(DataType::Type result_type) {
465       static_assert(enum_cast<>(DataType::Type::kLast) < BitSizeOf<uint32_t>());
466       type_conversions_ |= 1u << enum_cast<>(result_type);
467     }
468 
Add(TypeConversionSet other)469     void Add(TypeConversionSet other) {
470       type_conversions_ |= other.type_conversions_;
471     }
472 
AreAllTypeConversionsImplicit(HInstruction * input) const473     bool AreAllTypeConversionsImplicit(HInstruction* input) const {
474       if (type_conversions_ != 0u) {
475         if (input->IsIntConstant()) {
476           int32_t value = input->AsIntConstant()->GetValue();
477           for (uint32_t raw_type : LowToHighBits(type_conversions_)) {
478             DataType::Type type = enum_cast<DataType::Type>(raw_type);
479             if (!DataType::IsTypeConversionImplicit(value, type)) {
480               return false;
481             }
482           }
483         } else {
484           DataType::Type input_type = input->GetType();
485           for (uint32_t raw_type : LowToHighBits(type_conversions_)) {
486             DataType::Type type = enum_cast<DataType::Type>(raw_type);
487             if (!DataType::IsTypeConversionImplicit(input_type, type)) {
488               return false;
489             }
490           }
491         }
492       }
493       return true;
494     }
495 
496    private:
497     uint32_t type_conversions_;
498   };
499 
SkipTypeConversions(Value value,TypeConversionSet * type_conversions=nullptr) const500   Value SkipTypeConversions(Value value,
501                             /*inout*/ TypeConversionSet* type_conversions = nullptr) const {
502     while (value.NeedsConvertedLoopPhi()) {
503       HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
504       DCHECK(!conversion_load->IsVecLoad());
505       if (type_conversions != nullptr) {
506         type_conversions->Add(conversion_load->GetType());
507       }
508       ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
509       DCHECK(prev_record != nullptr);
510       value = prev_record->value;
511     }
512     return value;
513   }
514 
515   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
516   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const517   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
518     size_t res =
519         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
520         phi_placeholder.GetHeapLocation();
521     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
522         << res << "blks: " << GetGraph()->GetBlocks().size()
523         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
524     return res;
525   }
526 
PhiPlaceholderIndex(Value phi_placeholder) const527   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
528     return PhiPlaceholderIndex(SkipTypeConversions(phi_placeholder).GetPhiPlaceholder());
529   }
530 
IsEscapingObject(ReferenceInfo * info)531   bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
532 
GetPhiPlaceholderAt(size_t off) const533   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
534     DCHECK_LT(off, num_phi_placeholders_);
535     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
536     // Technically this should be (off - id) / NumberOfHeapLocations
537     // but due to truncation it's all the same.
538     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
539     return GetPhiPlaceholder(blk_id, id);
540   }
541 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const542   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
543     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
544     return PhiPlaceholder(block_id, idx);
545   }
546 
Replacement(Value value) const547   Value Replacement(Value value) const {
548     DCHECK(value.NeedsNonLoopPhi() || value.NeedsPlainLoopPhi())
549         << value << " phase: " << current_phase_;
550     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
551     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
552     DCHECK(replacement.IsUnknown() ||
553            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
554     return replacement;
555   }
556 
ReplacementOrValue(Value value) const557   Value ReplacementOrValue(Value value) const {
558     if (value.NeedsConvertedLoopPhi() &&
559         substitute_instructions_for_loads_[value.GetLoopPhiConversionLoad()->GetId()] != nullptr) {
560       return Value::ForInstruction(
561           substitute_instructions_for_loads_[value.GetLoopPhiConversionLoad()->GetId()]);
562     } else if ((value.NeedsNonLoopPhi() || value.NeedsPlainLoopPhi()) &&
563                phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
564       return Replacement(value);
565     } else {
566       DCHECK_IMPLIES(value.IsInstruction(),
567                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
568       return value;
569     }
570   }
571 
572   // The record of a heap value and instruction(s) that feed that value.
573   struct ValueRecord {
574     Value value;
575     Value stored_by;
576   };
577 
578   // Calculate the value stored in location `idx` for a loop Phi placeholder-dependent `load`.
StoredValueForLoopPhiPlaceholderDependentLoad(size_t idx,HInstruction * load) const579   Value StoredValueForLoopPhiPlaceholderDependentLoad(size_t idx, HInstruction* load) const {
580     DCHECK(IsLoad(load));
581     DCHECK_LT(static_cast<size_t>(load->GetId()), loads_requiring_loop_phi_.size());
582     DCHECK(loads_requiring_loop_phi_[load->GetId()] != nullptr);
583     Value loaded_value = loads_requiring_loop_phi_[load->GetId()]->value;
584     DCHECK(loaded_value.NeedsLoopPhi());
585     DataType::Type load_type = load->GetType();
586     size_t load_size = DataType::Size(load_type);
587     size_t store_size = DataType::Size(heap_location_collector_.GetHeapLocation(idx)->GetType());
588 
589     if (kIsDebugBuild && load->IsVecLoad()) {
590       // For vector operations, the load type is always `Float64` and therefore the store size is
591       // never higher and we do not record any conversions below. This is OK because we currently
592       // do not vectorize any loops with widening operations.
593       CHECK_EQ(load_size, DataType::Size(DataType::Type::kFloat64));
594       CHECK_LE(store_size, load_size);
595       CHECK(!loaded_value.NeedsConvertedLoopPhi());
596     } else if (kIsDebugBuild) {
597       // There are no implicit conversions between 64-bit types and smaller types.
598       // We shall not record any conversions for 64-bit types.
599       CHECK_EQ(load_size == DataType::Size(DataType::Type::kInt64),
600                store_size == DataType::Size(DataType::Type::kInt64));
601       CHECK_IMPLIES(load_size == DataType::Size(DataType::Type::kInt64),
602                     !loaded_value.NeedsConvertedLoopPhi());
603     }
604     // The `loaded_value` can record a conversion only if the `load` was from
605     // a wider field than the previous converting load.
606     DCHECK_IMPLIES(loaded_value.NeedsConvertedLoopPhi(),
607                    load_size > DataType::Size(loaded_value.GetLoopPhiConversionLoad()->GetType()));
608 
609     Value value = loaded_value;
610     if (load_size < store_size) {
611       // Add a type conversion to a narrow type unless it's an implicit conversion
612       // from an already converted value.
613       if (!loaded_value.NeedsConvertedLoopPhi() ||
614           !DataType::IsTypeConversionImplicit(loaded_value.GetLoopPhiConversionLoad()->GetType(),
615                                               load_type)) {
616         value = Value::ForConvertedLoopPhiPlaceholder(load);
617       } else {
618         DCHECK(value.Equals(loaded_value));
619       }
620     } else {
621       // Remove conversions to types at least as wide as the field we're storing to.
622       // We record only conversions that define sign-/zero-extension bits to store.
623       while (value.NeedsConvertedLoopPhi() &&
624              DataType::Size(value.GetLoopPhiConversionLoad()->GetType()) >= store_size) {
625         HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
626         ValueRecord* prev_record =
627             loads_requiring_loop_phi_[value.GetLoopPhiConversionLoad()->GetId()];
628         DCHECK(prev_record != nullptr);
629         value = prev_record->value;
630         DCHECK(value.NeedsLoopPhi());
631       }
632     }
633 
634     DCHECK_EQ(PhiPlaceholderIndex(loaded_value), PhiPlaceholderIndex(value));
635     return value;
636   }
637 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)638   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
639                                                       HInstruction* value,
640                                                       DataType::Type expected_type) {
641     // Should never add type conversion into boolean value.
642     if (expected_type == DataType::Type::kBool ||
643         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
644         // TODO: This prevents type conversion of default values but we can still insert
645         // type conversion of other constants and there is no constant folding pass after LSE.
646         IsZeroBitPattern(value)) {
647       return nullptr;
648     }
649 
650     // All vector instructions report their type as `Float64`, so the conversion is implicit.
651     // This is OK because we currently do not vectorize any loops with widening operations.
652     DCHECK(!instruction->IsVecLoad());
653 
654     // Check if there is already a suitable TypeConversion we can reuse.
655     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
656       if (use.GetUser()->IsTypeConversion() &&
657           use.GetUser()->GetType() == expected_type &&
658           // TODO: We could move the TypeConversion to a common dominator
659           // if it does not cross irreducible loop header.
660           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
661           // Don't share across irreducible loop headers.
662           // TODO: can be more fine-grained than this by testing each dominator.
663           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
664            !GetGraph()->HasIrreducibleLoops())) {
665         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
666             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
667           // Move the TypeConversion before the instruction.
668           use.GetUser()->MoveBefore(instruction);
669         }
670         DCHECK(use.GetUser()->StrictlyDominates(instruction));
671         return use.GetUser()->AsTypeConversion();
672       }
673     }
674 
675     // We must create a new TypeConversion instruction.
676     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
677           expected_type, value, instruction->GetDexPc());
678     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
679     return type_conversion;
680   }
681 
682   // Find an instruction's substitute if it's a removed load.
683   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const684   HInstruction* FindSubstitute(HInstruction* instruction) const {
685     size_t id = static_cast<size_t>(instruction->GetId());
686     if (id >= substitute_instructions_for_loads_.size()) {
687       // New Phi (may not be in the graph yet), or default value.
688       DCHECK(!IsLoad(instruction));
689       return instruction;
690     }
691     HInstruction* substitute = substitute_instructions_for_loads_[id];
692     DCHECK(substitute == nullptr || IsLoad(instruction));
693     return (substitute != nullptr) ? substitute : instruction;
694   }
695 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)696   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
697     DCHECK(IsLoad(load));
698     DCHECK_EQ(FindSubstitute(load), load);
699     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
700         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
701 
702     // The load expects to load the heap value as type load->GetType().
703     // However the tracked heap value may not be of that type. An explicit
704     // type conversion may be needed.
705     // There are actually three types involved here:
706     // (1) tracked heap value's type (type A)
707     // (2) heap location (field or element)'s type (type B)
708     // (3) load's type (type C)
709     // We guarantee that type A stored as type B and then fetched out as
710     // type C is the same as casting from type A to type C directly, since
711     // type B and type C will have the same size which is guaranteed in
712     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
713     // So we only need one type conversion from type A to type C.
714     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
715         load, heap_value, load->GetType());
716 
717     substitute_instructions_for_loads_[load->GetId()] =
718         type_conversion != nullptr ? type_conversion : heap_value;
719   }
720 
IsLoad(HInstruction * instruction)721   static bool IsLoad(HInstruction* instruction) {
722     // Unresolved load is not treated as a load.
723     return instruction->IsInstanceFieldGet() ||
724            instruction->IsStaticFieldGet() ||
725            instruction->IsVecLoad() ||
726            instruction->IsArrayGet();
727   }
728 
IsStore(HInstruction * instruction)729   static bool IsStore(HInstruction* instruction) {
730     // Unresolved store is not treated as a store.
731     return instruction->IsInstanceFieldSet() ||
732            instruction->IsArraySet() ||
733            instruction->IsVecStore() ||
734            instruction->IsStaticFieldSet();
735   }
736 
737   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)738   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
739     DCHECK(IsLoad(instruction));
740     // Using defaults for VecLoads requires to create additional vector operations.
741     // As there are some issues with scheduling vector operations it is better to avoid creating
742     // them.
743     return !instruction->IsVecOperation();
744   }
745 
746   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
747   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)748   void KeepStores(Value value) {
749     if (value.IsUnknown()) {
750       return;
751     }
752     if (value.NeedsPhi()) {
753       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
754     } else {
755       HInstruction* instruction = value.GetInstruction();
756       DCHECK(IsStore(instruction));
757       kept_stores_.SetBit(instruction->GetId());
758     }
759   }
760 
761   // If a heap location X may alias with heap location at `loc_index`
762   // and heap_values of that heap location X holds a store, keep that store.
763   // It's needed for a dependent load that's not eliminated since any store
764   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)765   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
766                                      size_t loc_index) {
767     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
768       if (i == loc_index) {
769         // We use this function when reading a location with unknown value and
770         // therefore we cannot know what exact store wrote that unknown value.
771         // But we can have a phi placeholder here marking multiple stores to keep.
772         DCHECK(!heap_values[i].stored_by.IsInstruction());
773         KeepStores(heap_values[i].stored_by);
774         heap_values[i].stored_by = Value::Unknown();
775       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
776         KeepStores(heap_values[i].stored_by);
777         heap_values[i].stored_by = Value::Unknown();
778       }
779     }
780   }
781 
GetDefaultValue(DataType::Type type)782   HInstruction* GetDefaultValue(DataType::Type type) {
783     switch (type) {
784       case DataType::Type::kReference:
785         return GetGraph()->GetNullConstant();
786       case DataType::Type::kBool:
787       case DataType::Type::kUint8:
788       case DataType::Type::kInt8:
789       case DataType::Type::kUint16:
790       case DataType::Type::kInt16:
791       case DataType::Type::kInt32:
792         return GetGraph()->GetIntConstant(0);
793       case DataType::Type::kInt64:
794         return GetGraph()->GetLongConstant(0);
795       case DataType::Type::kFloat32:
796         return GetGraph()->GetFloatConstant(0);
797       case DataType::Type::kFloat64:
798         return GetGraph()->GetDoubleConstant(0);
799       default:
800         UNREACHABLE();
801     }
802   }
803 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)804   bool CanValueBeKeptIfSameAsNew(Value value,
805                                  HInstruction* new_value,
806                                  HInstruction* new_value_set_instr) {
807     // For field/array set location operations, if the value is the same as the new_value
808     // it can be kept even if aliasing happens. All aliased operations will access the same memory
809     // range.
810     // For vector values, this is not true. For example:
811     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
812     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
813     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
814     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
815     //                                                     here is not packed_data anymore.
816     //
817     // TODO: to allow such 'same value' optimization on vector data,
818     // LSA needs to report more fine-grain MAY alias information:
819     // (1) May alias due to two vector data partial overlap.
820     //     e.g. a[i..i+3] and a[i+1,..,i+4].
821     // (2) May alias due to two vector data may complete overlap each other.
822     //     e.g. a[i..i+3] and b[i..i+3].
823     // (3) May alias but the exact relationship between two locations is unknown.
824     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
825     // This 'same value' optimization can apply only on case (2).
826     if (new_value_set_instr->IsVecOperation()) {
827       return false;
828     }
829 
830     return value.Equals(new_value);
831   }
832 
833   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
834   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
835   void PrepareLoopRecords(HBasicBlock* block);
836   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
837   void MergePredecessorRecords(HBasicBlock* block);
838 
839   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
840 
841   void VisitGetLocation(HInstruction* instruction, size_t idx);
842   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)843   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
844     field_infos_[heap_loc] = info;
845   }
846 
847   void VisitBasicBlock(HBasicBlock* block) override;
848 
849   enum class Phase {
850     kLoadElimination,
851     kStoreElimination,
852   };
853 
854   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
855 
856   bool TryReplacingLoopPhiPlaceholderWithDefault(
857       PhiPlaceholder phi_placeholder,
858       DataType::Type type,
859       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
860   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
861       PhiPlaceholder phi_placeholder,
862       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
863   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
864       PhiPlaceholder phi_placeholder,
865       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
866       DataType::Type type,
867       bool can_use_default_or_phi);
868   void MaterializeTypeConversionsIfNeeded(Value value);
869   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
870                            DataType::Type type);
871   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
872   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
873                            DataType::Type type);
874   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
875   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
876                                                          HInstruction* load);
877   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
878   void ProcessLoadsRequiringLoopPhis();
879 
880   void SearchPhiPlaceholdersForKeptStores();
881   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
882   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
883   void FindStoresWritingOldValues();
884   void FinishFullLSE();
885 
HandleAcquireLoad(HInstruction * instruction)886   void HandleAcquireLoad(HInstruction* instruction) {
887     DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
888            (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
889            (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
890         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
891 
892     // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
893     // invalidate all current values.
894     ScopedArenaVector<ValueRecord>& heap_values =
895         heap_values_for_[instruction->GetBlock()->GetBlockId()];
896     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
897       KeepStores(heap_values[i].stored_by);
898       heap_values[i].stored_by = Value::Unknown();
899       heap_values[i].value = Value::Unknown();
900     }
901 
902     // Note that there's no need to record the load as subsequent acquire loads shouldn't be
903     // eliminated either.
904   }
905 
HandleReleaseStore(HInstruction * instruction)906   void HandleReleaseStore(HInstruction* instruction) {
907     DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
908            (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
909            (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
910         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
911 
912     // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
913     // they will push the modifications for other threads to see. Therefore, we must keep the
914     // stores but there's no need to clobber the value.
915     ScopedArenaVector<ValueRecord>& heap_values =
916         heap_values_for_[instruction->GetBlock()->GetBlockId()];
917     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
918       KeepStores(heap_values[i].stored_by);
919       heap_values[i].stored_by = Value::Unknown();
920     }
921 
922     // Note that there's no need to record the store as subsequent release store shouldn't be
923     // eliminated either.
924   }
925 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)926   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
927     HInstruction* object = instruction->InputAt(0);
928     if (instruction->IsVolatile()) {
929       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
930           heap_location_collector_.HuntForOriginalReference(object));
931       if (!ref_info->IsSingletonAndRemovable()) {
932         HandleAcquireLoad(instruction);
933         return;
934       }
935       // Treat it as a normal load if it is a removable singleton.
936     }
937 
938     const FieldInfo& field_info = instruction->GetFieldInfo();
939     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field_info);
940     RecordFieldInfo(&field_info, idx);
941     VisitGetLocation(instruction, idx);
942   }
943 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)944   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
945     HInstruction* object = instruction->InputAt(0);
946     if (instruction->IsVolatile()) {
947       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
948           heap_location_collector_.HuntForOriginalReference(object));
949       if (!ref_info->IsSingletonAndRemovable()) {
950         HandleReleaseStore(instruction);
951         return;
952       }
953       // Treat it as a normal store if it is a removable singleton.
954     }
955 
956     const FieldInfo& field_info = instruction->GetFieldInfo();
957     HInstruction* value = instruction->InputAt(1);
958     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field_info);
959     RecordFieldInfo(&field_info, idx);
960     VisitSetLocation(instruction, idx, value);
961   }
962 
VisitStaticFieldGet(HStaticFieldGet * instruction)963   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
964     if (instruction->IsVolatile()) {
965       HandleAcquireLoad(instruction);
966       return;
967     }
968 
969     const FieldInfo& field_info = instruction->GetFieldInfo();
970     HInstruction* cls = instruction->InputAt(0);
971     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field_info);
972     RecordFieldInfo(&field_info, idx);
973     VisitGetLocation(instruction, idx);
974   }
975 
VisitStaticFieldSet(HStaticFieldSet * instruction)976   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
977     if (instruction->IsVolatile()) {
978       HandleReleaseStore(instruction);
979       return;
980     }
981 
982     const FieldInfo& field_info = instruction->GetFieldInfo();
983     HInstruction* cls = instruction->InputAt(0);
984     HInstruction* value = instruction->InputAt(1);
985     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field_info);
986     RecordFieldInfo(&field_info, idx);
987     VisitSetLocation(instruction, idx, value);
988   }
989 
VisitMonitorOperation(HMonitorOperation * monitor_op)990   void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
991     HInstruction* object = monitor_op->InputAt(0);
992     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
993         heap_location_collector_.HuntForOriginalReference(object));
994     if (ref_info->IsSingletonAndRemovable()) {
995       // If the object is a removable singleton, we know that no other threads will have
996       // access to it, and we can remove the MonitorOperation instruction.
997       // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
998       // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
999       DCHECK(!object->CanBeNull());
1000       monitor_op->GetBlock()->RemoveInstruction(monitor_op);
1001       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
1002       return;
1003     }
1004 
1005     // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
1006     monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
1007     if (monitor_op->IsEnter()) {
1008       HandleAcquireLoad(monitor_op);
1009     } else {
1010       HandleReleaseStore(monitor_op);
1011     }
1012   }
1013 
VisitArrayGet(HArrayGet * instruction)1014   void VisitArrayGet(HArrayGet* instruction) override {
1015     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1016   }
1017 
VisitArraySet(HArraySet * instruction)1018   void VisitArraySet(HArraySet* instruction) override {
1019     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1020     VisitSetLocation(instruction, idx, instruction->GetValue());
1021   }
1022 
VisitVecLoad(HVecLoad * instruction)1023   void VisitVecLoad(HVecLoad* instruction) override {
1024     DCHECK(!instruction->IsPredicated());
1025     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1026   }
1027 
VisitVecStore(HVecStore * instruction)1028   void VisitVecStore(HVecStore* instruction) override {
1029     DCHECK(!instruction->IsPredicated());
1030     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1031     VisitSetLocation(instruction, idx, instruction->GetValue());
1032   }
1033 
VisitDeoptimize(HDeoptimize * instruction)1034   void VisitDeoptimize(HDeoptimize* instruction) override {
1035     // If we are in a try, even singletons are observable.
1036     const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
1037     HBasicBlock* block = instruction->GetBlock();
1038     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1039     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1040       Value* stored_by = &heap_values[i].stored_by;
1041       if (stored_by->IsUnknown()) {
1042         continue;
1043       }
1044       // Stores are generally observeable after deoptimization, except
1045       // for singletons that don't escape in the deoptimization environment.
1046       bool observable = true;
1047       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1048       if (!inside_a_try && info->IsSingleton()) {
1049         HInstruction* reference = info->GetReference();
1050         // Finalizable objects always escape.
1051         const bool finalizable_object =
1052             reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1053         if (!finalizable_object && !IsEscapingObject(info)) {
1054           // Check whether the reference for a store is used by an environment local of
1055           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1056           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1057           observable = std::any_of(
1058               env_uses.begin(),
1059               env_uses.end(),
1060               [instruction](const HUseListNode<HEnvironment*>& use) {
1061                 return use.GetUser()->GetHolder() == instruction;
1062               });
1063         }
1064       }
1065       if (observable) {
1066         KeepStores(*stored_by);
1067         *stored_by = Value::Unknown();
1068       }
1069     }
1070   }
1071 
1072   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1073   void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1074     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1075     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1076       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1077       if (must_keep_stores || IsEscapingObject(ref_info)) {
1078         KeepStores(heap_values[i].stored_by);
1079         heap_values[i].stored_by = Value::Unknown();
1080       }
1081     }
1082   }
1083 
VisitReturn(HReturn * instruction)1084   void VisitReturn(HReturn* instruction) override {
1085     HandleExit(instruction->GetBlock());
1086   }
1087 
VisitReturnVoid(HReturnVoid * return_void)1088   void VisitReturnVoid(HReturnVoid* return_void) override {
1089     HandleExit(return_void->GetBlock());
1090   }
1091 
HandleThrowingInstruction(HInstruction * instruction)1092   void HandleThrowingInstruction(HInstruction* instruction) {
1093     DCHECK(instruction->CanThrow());
1094     // If we are inside of a try, singletons can become visible since we may not exit the method.
1095     HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
1096   }
1097 
VisitMethodEntryHook(HMethodEntryHook * method_entry)1098   void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1099     HandleThrowingInstruction(method_entry);
1100   }
1101 
VisitMethodExitHook(HMethodExitHook * method_exit)1102   void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1103     HandleThrowingInstruction(method_exit);
1104   }
1105 
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1106   void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1107     HandleThrowingInstruction(div_zero_check);
1108   }
1109 
VisitNullCheck(HNullCheck * null_check)1110   void VisitNullCheck(HNullCheck* null_check) override {
1111     HandleThrowingInstruction(null_check);
1112   }
1113 
VisitBoundsCheck(HBoundsCheck * bounds_check)1114   void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1115     HandleThrowingInstruction(bounds_check);
1116   }
1117 
VisitLoadClass(HLoadClass * load_class)1118   void VisitLoadClass(HLoadClass* load_class) override {
1119     if (load_class->CanThrow()) {
1120       HandleThrowingInstruction(load_class);
1121     }
1122   }
1123 
VisitLoadString(HLoadString * load_string)1124   void VisitLoadString(HLoadString* load_string) override {
1125     if (load_string->CanThrow()) {
1126       HandleThrowingInstruction(load_string);
1127     }
1128   }
1129 
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)1130   void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
1131     HandleThrowingInstruction(load_method_handle);
1132   }
1133 
VisitLoadMethodType(HLoadMethodType * load_method_type)1134   void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
1135     HandleThrowingInstruction(load_method_type);
1136   }
1137 
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1138   void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1139     HandleThrowingInstruction(sb_append);
1140   }
1141 
VisitThrow(HThrow * throw_instruction)1142   void VisitThrow(HThrow* throw_instruction) override {
1143     HandleThrowingInstruction(throw_instruction);
1144   }
1145 
VisitCheckCast(HCheckCast * check_cast)1146   void VisitCheckCast(HCheckCast* check_cast) override {
1147     HandleThrowingInstruction(check_cast);
1148   }
1149 
HandleInvoke(HInstruction * instruction)1150   void HandleInvoke(HInstruction* instruction) {
1151     // If `instruction` can throw we have to presume all stores are visible.
1152     const bool can_throw = instruction->CanThrow();
1153     // If we are in a try, even singletons are observable.
1154     const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
1155     SideEffects side_effects = instruction->GetSideEffects();
1156     ScopedArenaVector<ValueRecord>& heap_values =
1157         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1158     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1159       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1160       // We don't need to do anything if the reference has not escaped at this point.
1161       // This is true if we never escape.
1162       if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1163         // Singleton references cannot be seen by the callee.
1164       } else {
1165         if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1166           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1167           KeepStores(heap_values[i].stored_by);
1168           heap_values[i].stored_by = Value::Unknown();
1169         }
1170         if (side_effects.DoesAnyWrite()) {
1171           // The value may be clobbered.
1172           heap_values[i].value = Value::Unknown();
1173         }
1174       }
1175     }
1176   }
1177 
VisitInvoke(HInvoke * invoke)1178   void VisitInvoke(HInvoke* invoke) override {
1179     HandleInvoke(invoke);
1180   }
1181 
VisitClinitCheck(HClinitCheck * clinit)1182   void VisitClinitCheck(HClinitCheck* clinit) override {
1183     // Class initialization check can result in class initializer calling arbitrary methods.
1184     HandleInvoke(clinit);
1185   }
1186 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1187   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1188     // Conservatively treat it as an invocation.
1189     HandleInvoke(instruction);
1190   }
1191 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1192   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1193     // Conservatively treat it as an invocation.
1194     HandleInvoke(instruction);
1195   }
1196 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1197   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1198     // Conservatively treat it as an invocation.
1199     HandleInvoke(instruction);
1200   }
1201 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1202   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1203     // Conservatively treat it as an invocation.
1204     HandleInvoke(instruction);
1205   }
1206 
VisitNewInstance(HNewInstance * new_instance)1207   void VisitNewInstance(HNewInstance* new_instance) override {
1208     // If we are in a try, even singletons are observable.
1209     const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1210     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1211     if (ref_info == nullptr) {
1212       // new_instance isn't used for field accesses. No need to process it.
1213       return;
1214     }
1215     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1216       DCHECK(!new_instance->IsFinalizable());
1217       // new_instance can potentially be eliminated.
1218       singleton_new_instances_.push_back(new_instance);
1219     }
1220     HBasicBlock* block = new_instance->GetBlock();
1221     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1222     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1223       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1224       HInstruction* ref = info->GetReference();
1225       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1226       if (ref == new_instance) {
1227         if (offset >= mirror::kObjectHeaderSize ||
1228             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1229           // Instance fields except the header fields are set to default heap values.
1230           // The shadow$_monitor_ field is set to the default value however.
1231           heap_values[i].value = Value::Default();
1232           heap_values[i].stored_by = Value::Unknown();
1233         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1234           // The shadow$_klass_ field is special and has an actual value however.
1235           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1236           heap_values[i].stored_by = Value::Unknown();
1237         }
1238       } else if (inside_a_try || IsEscapingObject(info)) {
1239         // Since NewInstance can throw, we presume all previous stores could be visible.
1240         KeepStores(heap_values[i].stored_by);
1241         heap_values[i].stored_by = Value::Unknown();
1242       }
1243     }
1244   }
1245 
VisitNewArray(HNewArray * new_array)1246   void VisitNewArray(HNewArray* new_array) override {
1247     // If we are in a try, even singletons are observable.
1248     const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1249     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1250     if (ref_info == nullptr) {
1251       // new_array isn't used for array accesses. No need to process it.
1252       return;
1253     }
1254     if (ref_info->IsSingletonAndRemovable()) {
1255       if (new_array->GetLength()->IsIntConstant() &&
1256           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1257         // new_array can potentially be eliminated.
1258         singleton_new_instances_.push_back(new_array);
1259       } else {
1260         // new_array may throw NegativeArraySizeException. Keep it.
1261       }
1262     }
1263     HBasicBlock* block = new_array->GetBlock();
1264     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1265     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1266       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1267       ReferenceInfo* info = location->GetReferenceInfo();
1268       HInstruction* ref = info->GetReference();
1269       if (ref == new_array && location->GetIndex() != nullptr) {
1270         // Array elements are set to default heap values.
1271         heap_values[i].value = Value::Default();
1272         heap_values[i].stored_by = Value::Unknown();
1273       } else if (inside_a_try || IsEscapingObject(info)) {
1274         // Since NewArray can throw, we presume all previous stores could be visible.
1275         KeepStores(heap_values[i].stored_by);
1276         heap_values[i].stored_by = Value::Unknown();
1277       }
1278     }
1279   }
1280 
VisitInstruction(HInstruction * instruction)1281   void VisitInstruction(HInstruction* instruction) override {
1282     // Throwing instructions must be handled specially.
1283     DCHECK(!instruction->CanThrow());
1284   }
1285 
1286   const HeapLocationCollector& heap_location_collector_;
1287 
1288   // Use local allocator for allocating memory.
1289   ScopedArenaAllocator allocator_;
1290 
1291   // The number of unique phi_placeholders there possibly are
1292   size_t num_phi_placeholders_;
1293 
1294   // One array of heap value records for each block.
1295   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1296 
1297   // We record loads and stores for re-processing when we find a loop Phi placeholder
1298   // with unknown value from a predecessor, and also for removing stores that are
1299   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1300   struct LoadStoreRecord {
1301     HInstruction* load_or_store;
1302     size_t heap_location_index;
1303   };
1304   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1305 
1306   // We record the substitute instructions for loads that should be
1307   // eliminated but may be used by heap locations. They'll be removed
1308   // in the end. These are indexed by the load's id.
1309   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1310 
1311   // Record stores to keep in a bit vector indexed by instruction ID.
1312   ArenaBitVector kept_stores_;
1313   // When we need to keep all stores that feed a Phi placeholder, we just record the
1314   // index of that placeholder for processing after graph traversal.
1315   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1316 
1317   // Loads that would require a loop Phi to replace are recorded for processing
1318   // later as we do not have enough information from back-edges to determine if
1319   // a suitable Phi can be found or created when we visit these loads.
1320   // This is a flat "map" indexed by the load instruction id.
1321   ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1322 
1323   // For stores, record the old value records that were replaced and the stored values.
1324   struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1325     StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1326         : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1327     ValueRecord old_value_record;
1328     HInstruction* stored_value;
1329   };
1330   // This is a flat "map" indexed by the store instruction id.
1331   ScopedArenaVector<StoreRecord*> store_records_;
1332 
1333   // Replacements for Phi placeholders.
1334   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1335   ScopedArenaVector<Value> phi_placeholder_replacements_;
1336 
1337   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1338 
1339   // The field infos for each heap location (if relevant).
1340   ScopedArenaVector<const FieldInfo*> field_infos_;
1341 
1342   Phase current_phase_;
1343 
1344   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1345   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1346 
1347   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1348 };
1349 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1350 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1351   switch (phase) {
1352     case LSEVisitor::Phase::kLoadElimination:
1353       return oss << "kLoadElimination";
1354     case LSEVisitor::Phase::kStoreElimination:
1355       return oss << "kStoreElimination";
1356   }
1357 }
1358 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1359 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1360                           const LSEVisitor::PhiPlaceholder& p2) {
1361   return p1.Equals(p2);
1362 }
1363 
operator ==(const LSEVisitor::Value::NeedsPlainLoopPhiMarker & p1,const LSEVisitor::Value::NeedsPlainLoopPhiMarker & p2)1364 constexpr bool operator==(const LSEVisitor::Value::NeedsPlainLoopPhiMarker& p1,
1365                           const LSEVisitor::Value::NeedsPlainLoopPhiMarker& p2) {
1366   return p1.phi_ == p2.phi_;
1367 }
1368 
operator ==(const LSEVisitor::Value::NeedsConvertedLoopPhiMarker & p1,const LSEVisitor::Value::NeedsConvertedLoopPhiMarker & p2)1369 constexpr bool operator==(const LSEVisitor::Value::NeedsConvertedLoopPhiMarker& p1,
1370                           const LSEVisitor::Value::NeedsConvertedLoopPhiMarker& p2) {
1371   return p1.load_ == p2.load_;
1372 }
1373 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1374 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1375                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1376   return p1.phi_ == p2.phi_;
1377 }
1378 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1379 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1380   p.Dump(oss);
1381   return oss;
1382 }
1383 
ExactEquals(LSEVisitor::Value other) const1384 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1385   return value_ == other.value_;
1386 }
1387 
Equals(LSEVisitor::Value other) const1388 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1389   // Only valid values can be compared.
1390   DCHECK(IsValid());
1391   DCHECK(other.IsValid());
1392   if (value_ == other.value_) {
1393     // Note: Two unknown values are considered different.
1394     return !IsUnknown();
1395   } else {
1396     // Default is considered equal to zero-bit-pattern instructions.
1397     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1398             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1399   }
1400 }
1401 
Dump(std::ostream & os) const1402 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1403   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1404     switch (GetValuelessType()) {
1405       case ValuelessType::kDefault:
1406         return os << "Default";
1407       case ValuelessType::kUnknown:
1408         return os << "Unknown";
1409       case ValuelessType::kInvalid:
1410         return os << "Invalid";
1411     }
1412   } else if (IsInstruction()) {
1413     return os << "Instruction[id: " << GetInstruction()->GetId()
1414               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1415   } else if (NeedsPlainLoopPhi()) {
1416     return os << "NeedsPlainLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1417               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1418   } else if (NeedsConvertedLoopPhi()) {
1419     return os << "NeedsConvertedLoopPhi[id: " << GetLoopPhiConversionLoad()->GetId()
1420               << ", block: " << GetLoopPhiConversionLoad()->GetBlock()->GetBlockId() << "]";
1421   } else {
1422     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1423               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1424   }
1425 }
1426 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1427 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1428   return v.Dump(os);
1429 }
1430 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1431 LSEVisitor::LSEVisitor(HGraph* graph,
1432                        const HeapLocationCollector& heap_location_collector,
1433                        OptimizingCompilerStats* stats)
1434     : HGraphDelegateVisitor(graph, stats),
1435       heap_location_collector_(heap_location_collector),
1436       allocator_(graph->GetArenaStack()),
1437       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1438                             heap_location_collector_.GetNumberOfHeapLocations()),
1439       heap_values_for_(graph->GetBlocks().size(),
1440                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1441                        allocator_.Adapter(kArenaAllocLSE)),
1442       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1443       // We may add new instructions (default values, Phis) but we're not adding loads
1444       // or stores, so we shall not need to resize following vector and BitVector.
1445       substitute_instructions_for_loads_(
1446           graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1447       kept_stores_(&allocator_,
1448                    /*start_bits=*/graph->GetCurrentInstructionId(),
1449                    /*expandable=*/false,
1450                    kArenaAllocLSE),
1451       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1452                                                   num_phi_placeholders_,
1453                                                   /*expandable=*/false,
1454                                                   kArenaAllocLSE),
1455       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1456       store_records_(allocator_.Adapter(kArenaAllocLSE)),
1457       phi_placeholder_replacements_(
1458           num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1459       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1460       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1461                    allocator_.Adapter(kArenaAllocLSE)),
1462       current_phase_(Phase::kLoadElimination) {}
1463 
PrepareLoopValue(HBasicBlock * block,size_t idx)1464 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1465   // If the pre-header value is known (which implies that the reference dominates this
1466   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1467   // are later found to have a known value, we can replace loads from this location,
1468   // either with the pre-header value or with a new Phi. For array locations, the index
1469   // may be defined inside the loop but the only known value in that case should be the
1470   // default value or a Phi placeholder that can be replaced only with the default value.
1471   HLoopInformation* loop_info = block->GetLoopInformation();
1472   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1473   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1474   if (pre_header_value.IsUnknown()) {
1475     return pre_header_value;
1476   }
1477   if (kIsDebugBuild) {
1478     // Check that the reference indeed dominates this loop.
1479     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1480     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1481     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1482         << GetGraph()->PrettyMethod();
1483     // Check that the index, if defined inside the loop, tracks a default value
1484     // or a Phi placeholder requiring a loop Phi.
1485     HInstruction* index = location->GetIndex();
1486     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1487       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1488           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1489           << pre_header_value;
1490     }
1491   }
1492   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1493   return ReplacementOrValue(Value::ForPlainLoopPhiPlaceholder(phi_placeholder));
1494 }
1495 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1496 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1497   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1498   // if the value in the location escapes. This is not applicable to singletons that are
1499   // defined inside the loop as they shall be dead in the loop header.
1500   const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1501   const HInstruction* reference = ref_info->GetReference();
1502   // Finalizable objects always escape.
1503   const bool is_finalizable =
1504       reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1505   if (ref_info->IsSingleton() &&
1506       block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1507       !is_finalizable) {
1508     return Value::Unknown();
1509   }
1510   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1511   return Value::ForPlainLoopPhiPlaceholder(phi_placeholder);
1512 }
1513 
PrepareLoopRecords(HBasicBlock * block)1514 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1515   DCHECK(block->IsLoopHeader());
1516   int block_id = block->GetBlockId();
1517   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1518   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1519       heap_values_for_[pre_header->GetBlockId()];
1520   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1521   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1522   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1523   DCHECK(heap_values.empty());
1524 
1525   // Don't eliminate loads in irreducible loops.
1526   if (block->GetLoopInformation()->IsIrreducible()) {
1527     heap_values.resize(num_heap_locations,
1528                        {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1529     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1530     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1531       KeepStores(Value::ForPlainLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1532     }
1533     return;
1534   }
1535 
1536   // Fill `heap_values` based on values from pre-header.
1537   heap_values.reserve(num_heap_locations);
1538   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1539     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1540   }
1541 }
1542 
MergePredecessorValues(HBasicBlock * block,size_t idx)1543 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1544   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1545   DCHECK(!predecessors.empty());
1546   Value merged_value =
1547       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1548   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1549     Value pred_value =
1550         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1551     if (pred_value.Equals(merged_value)) {
1552       // Value is the same. No need to update our merged value.
1553       continue;
1554     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1555       // If one is unknown and the other is not, the merged value is unknown.
1556       merged_value = Value::Unknown();
1557       break;
1558     } else {
1559       // There are conflicting known values. We may still be able to replace loads with a Phi.
1560       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1561       // Propagate the need for a new loop Phi from all predecessors.
1562       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1563       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1564     }
1565   }
1566   return merged_value;
1567 }
1568 
MergePredecessorRecords(HBasicBlock * block)1569 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1570   if (block->IsExitBlock()) {
1571     // Exit block doesn't really merge values since the control flow ends in
1572     // its predecessors. Each predecessor needs to make sure stores are kept
1573     // if necessary.
1574     return;
1575   }
1576 
1577   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1578   DCHECK(heap_values.empty());
1579   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1580   if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1581     DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1582     heap_values.resize(num_heap_locations,
1583                        {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1584     return;
1585   }
1586 
1587   heap_values.reserve(num_heap_locations);
1588   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1589     Value merged_value = MergePredecessorValues(block, idx);
1590     if (kIsDebugBuild) {
1591       if (merged_value.NeedsPhi()) {
1592         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1593         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1594       } else if (merged_value.IsInstruction()) {
1595         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1596       }
1597     }
1598     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1599     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1600     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1601       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1602       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1603       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1604           !merged_stored_by.Equals(stored_by)) {
1605         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1606         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1607         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1608         break;
1609       }
1610     }
1611     heap_values.push_back({ merged_value, merged_stored_by });
1612   }
1613 }
1614 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1615 static HInstruction* FindOrConstructNonLoopPhi(
1616     HBasicBlock* block,
1617     const ScopedArenaVector<HInstruction*>& phi_inputs,
1618     DataType::Type type) {
1619   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1620     HInstruction* phi = phi_it.Current();
1621     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1622     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1623       return lhs == rhs.GetInstruction();
1624     };
1625     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1626       return phi;
1627     }
1628   }
1629   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1630   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1631   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1632     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1633     phi->SetRawInputAt(i, phi_inputs[i]);
1634   }
1635   block->AddPhi(phi);
1636   if (type == DataType::Type::kReference) {
1637     // Update reference type information. Pass invalid handles, these are not used for Phis.
1638     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1639                                        Handle<mirror::DexCache>(),
1640                                        /* is_first_run= */ false);
1641     rtp_fixup.Visit(phi);
1642   }
1643   return phi;
1644 }
1645 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1646 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1647   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1648   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1649   size_t idx = phi_placeholder.GetHeapLocation();
1650 
1651   // Use local allocator to reduce peak memory usage.
1652   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1653   // Reuse the same vector for collecting phi inputs.
1654   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1655 
1656   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1657   work_queue.push_back(phi_placeholder);
1658   while (!work_queue.empty()) {
1659     PhiPlaceholder current_phi_placeholder = work_queue.back();
1660     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1661       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1662       // that directly or indirectly depends on it, so it was already processed as part of the
1663       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1664       work_queue.pop_back();
1665       continue;
1666     }
1667     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1668     HBasicBlock* current_block = blocks[current_block_id];
1669     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1670 
1671     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1672     // And the only way for such merged value to reach a different heap location is through
1673     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1674     // seen here are tied to one heap location.
1675     DCHECK(!current_block->IsLoopHeader())
1676         << current_phi_placeholder << " phase: " << current_phase_;
1677     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1678 
1679     phi_inputs.clear();
1680     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1681       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1682       DCHECK(!pred_value.IsUnknown()) << pred_value << " block " << current_block->GetBlockId()
1683                                       << " pred: " << predecessor->GetBlockId();
1684       if (pred_value.NeedsNonLoopPhi()) {
1685         // We need to process the Phi placeholder first.
1686         work_queue.push_back(pred_value.GetPhiPlaceholder());
1687       } else if (pred_value.IsDefault()) {
1688         phi_inputs.push_back(GetDefaultValue(type));
1689       } else {
1690         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1691                                            << " pred: " << predecessor->GetBlockId();
1692         phi_inputs.push_back(pred_value.GetInstruction());
1693       }
1694     }
1695     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1696       // All inputs are available. Find or construct the Phi replacement.
1697       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1698           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1699       // Remove the block from the queue.
1700       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1701       work_queue.pop_back();
1702     }
1703   }
1704 }
1705 
VisitGetLocation(HInstruction * instruction,size_t idx)1706 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1707   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1708   DCHECK_EQ(DataType::Size(heap_location_collector_.GetHeapLocation(idx)->GetType()),
1709             DataType::Size(instruction->IsVecLoad() ? instruction->AsVecLoad()->GetPackedType()
1710                                                     : instruction->GetType()));
1711   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1712   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1713   ValueRecord& record = heap_values[idx];
1714   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1715   loads_and_stores_.push_back({ instruction, idx });
1716   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1717       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1718     record.value = Value::Unknown();
1719   }
1720   if (record.value.IsDefault()) {
1721     KeepStores(record.stored_by);
1722     HInstruction* constant = GetDefaultValue(instruction->GetType());
1723     AddRemovedLoad(instruction, constant);
1724     record.value = Value::ForInstruction(constant);
1725   } else if (record.value.IsUnknown()) {
1726     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1727     // This acts like GVN but with better aliasing analysis.
1728     Value old_value = record.value;
1729     record.value = Value::ForInstruction(instruction);
1730     KeepStoresIfAliasedToLocation(heap_values, idx);
1731     KeepStores(old_value);
1732   } else if (record.value.NeedsLoopPhi()) {
1733     // We do not know yet if the value is known for all back edges. Record for future processing.
1734     if (loads_requiring_loop_phi_.empty()) {
1735       loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1736     }
1737     DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1738     loads_requiring_loop_phi_[instruction->GetId()] =
1739         new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1740   } else {
1741     // This load can be eliminated but we may need to construct non-loop Phis.
1742     if (record.value.NeedsNonLoopPhi()) {
1743       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1744       record.value = Replacement(record.value);
1745     }
1746     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1747     AddRemovedLoad(instruction, heap_value);
1748   }
1749 }
1750 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1751 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1752   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1753   DCHECK(!IsStore(value)) << value->DebugName();
1754   // The `value` may already have a substitute.
1755   value = FindSubstitute(value);
1756   HBasicBlock* block = instruction->GetBlock();
1757   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1758   ValueRecord& record = heap_values[idx];
1759   DCHECK_IMPLIES(record.value.IsInstruction(),
1760                  FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1761 
1762   // Calculate the new `Value` to store to the `record`.
1763   Value new_value = Value::ForInstruction(value);
1764   // Note that the `value` can be a newly created `Phi` with an id that falls outside
1765   // the allocated `loads_requiring_loop_phi_` range.
1766   DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1767                  static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1768   if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1769       loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1770     // Propapate the Phi placeholder or appropriate converting load to the record.
1771     new_value = StoredValueForLoopPhiPlaceholderDependentLoad(idx, value);
1772     DCHECK(new_value.NeedsLoopPhi());
1773   }
1774 
1775   if (record.value.Equals(value)) {
1776     // Store into the heap location with the same value.
1777     // This store can be eliminated right away.
1778     block->RemoveInstruction(instruction);
1779     return;
1780   }
1781 
1782   if (store_records_.empty()) {
1783     store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1784   }
1785   DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1786   store_records_[instruction->GetId()] =
1787       new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1788   loads_and_stores_.push_back({ instruction, idx });
1789 
1790   // If the `record.stored_by` specified a store from this block, it shall be removed
1791   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1792   // `kept_stores_` anymore after we update the `record.stored_by` below.
1793   DCHECK(!record.stored_by.IsInstruction() ||
1794          record.stored_by.GetInstruction()->GetBlock() != block ||
1795          record.stored_by.GetInstruction()->CanThrow() ||
1796          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1797 
1798   if (instruction->CanThrow()) {
1799     // Previous stores can become visible.
1800     HandleThrowingInstruction(instruction);
1801     // We cannot remove a possibly throwing store.
1802     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1803     kept_stores_.SetBit(instruction->GetId());
1804   }
1805 
1806   // Update the record.
1807   record.value = new_value;
1808   // Track the store in the value record. If the value is loaded or needed after
1809   // return/deoptimization later, this store isn't really redundant.
1810   record.stored_by = Value::ForInstruction(instruction);
1811 
1812   // This store may kill values in other heap locations due to aliasing.
1813   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1814     if (i == idx ||
1815         heap_values[i].value.IsUnknown() ||
1816         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1817         !heap_location_collector_.MayAlias(i, idx)) {
1818       continue;
1819     }
1820     // Kill heap locations that may alias and keep previous stores to these locations.
1821     KeepStores(heap_values[i].stored_by);
1822     heap_values[i].stored_by = Value::Unknown();
1823     heap_values[i].value = Value::Unknown();
1824   }
1825 }
1826 
VisitBasicBlock(HBasicBlock * block)1827 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1828   // Populate the heap_values array for this block.
1829   // TODO: try to reuse the heap_values array from one predecessor if possible.
1830   if (block->IsLoopHeader()) {
1831     PrepareLoopRecords(block);
1832   } else {
1833     MergePredecessorRecords(block);
1834   }
1835   // Visit non-Phi instructions.
1836   VisitNonPhiInstructions(block);
1837 }
1838 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1839 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1840   DCHECK_NE(idx1, idx2);
1841   DCHECK(loop_header->IsLoopHeader());
1842   if (heap_location_collector_.MayAlias(idx1, idx2)) {
1843     return true;
1844   }
1845   // For array locations with index defined inside the loop, include
1846   // all other locations in the array, even those that LSA declares
1847   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1848   // refer to the same locations for different iterations. (LSA's
1849   // `ComputeMayAlias()` does not consider different loop iterations.)
1850   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1851   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1852   if (loc1->IsArray() &&
1853       loc2->IsArray() &&
1854       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1855                                                 loc2->GetReferenceInfo())) {
1856     HLoopInformation* loop_info = loop_header->GetLoopInformation();
1857     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1858         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1859       // Consider the locations aliasing. Do not optimize the case where both indexes
1860       // are loop invariants defined inside the loop, rely on LICM to pull them out.
1861       return true;
1862     }
1863   }
1864   return false;
1865 }
1866 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1867 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1868     PhiPlaceholder phi_placeholder,
1869     DataType::Type type,
1870     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1871   // Use local allocator to reduce peak memory usage.
1872   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1873   ArenaBitVector visited(&allocator,
1874                          /*start_bits=*/ num_phi_placeholders_,
1875                          /*expandable=*/ false,
1876                          kArenaAllocLSE);
1877   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1878 
1879   auto maybe_add_to_work_queue = [&](Value predecessor_value) {
1880     // Visit the predecessor Phi placeholder if it's not visited yet.
1881     DCHECK(predecessor_value.NeedsNonLoopPhi() || predecessor_value.NeedsPlainLoopPhi());
1882     PhiPlaceholder predecessor_phi_placeholder = predecessor_value.GetPhiPlaceholder();
1883     if (!visited.IsBitSet(PhiPlaceholderIndex(predecessor_phi_placeholder))) {
1884       visited.SetBit(PhiPlaceholderIndex(predecessor_phi_placeholder));
1885       work_queue.push_back(predecessor_phi_placeholder);
1886     }
1887   };
1888 
1889   // Use depth first search to check if any non-Phi input is unknown.
1890   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1891   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1892   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1893   work_queue.push_back(phi_placeholder);
1894   while (!work_queue.empty()) {
1895     PhiPlaceholder current_phi_placeholder = work_queue.back();
1896     work_queue.pop_back();
1897     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1898     DCHECK_GE(block->GetPredecessors().size(), 2u);
1899     size_t idx = current_phi_placeholder.GetHeapLocation();
1900     for (HBasicBlock* predecessor : block->GetPredecessors()) {
1901       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1902       // Skip over type conversions (these are unnecessary for the default value).
1903       value = SkipTypeConversions(value);
1904       if (value.NeedsPhi()) {
1905         maybe_add_to_work_queue(value);
1906       } else if (!value.Equals(Value::Default())) {
1907         return false;  // Report failure.
1908       }
1909     }
1910     if (block->IsLoopHeader()) {
1911       // For back-edges we need to check all locations that write to the same array,
1912       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1913       // as they may actually refer to the same locations for different iterations.
1914       for (size_t i = 0; i != num_heap_locations; ++i) {
1915         if (i == idx ||
1916             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1917                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1918           continue;
1919         }
1920         for (HBasicBlock* predecessor : block->GetPredecessors()) {
1921           // Check if there were any writes to this location.
1922           // Note: We could simply process the values but due to the vector operation
1923           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1924           // the value to change and not be equal to default. To work around this and
1925           // allow replacing the non-vector load of loop-invariant default values
1926           // anyway, skip over paths that do not have any writes.
1927           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1928           while (record.stored_by.NeedsPlainLoopPhi() &&
1929                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1930             HLoopInformation* loop_info =
1931                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1932             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1933           }
1934           DCHECK(!record.stored_by.NeedsConvertedLoopPhi());
1935           Value value = ReplacementOrValue(record.value);
1936           // Skip over type conversions (these are unnecessary for the default value).
1937           value = SkipTypeConversions(value);
1938           if (value.NeedsPhi()) {
1939             maybe_add_to_work_queue(value);
1940           } else if (!value.Equals(Value::Default())) {
1941             return false;  // Report failure.
1942           }
1943         }
1944       }
1945     }
1946   }
1947 
1948   // Record replacement and report success.
1949   HInstruction* replacement = GetDefaultValue(type);
1950   for (uint32_t phi_placeholder_index : visited.Indexes()) {
1951     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1952     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1953     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1954     // We use both vector and non vector operations to analyze the information. However, we replace
1955     // only non vector operations in this code path.
1956     if (!hl->IsVecOp()) {
1957       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1958       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1959     }
1960   }
1961   return true;
1962 }
1963 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1964 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1965     PhiPlaceholder phi_placeholder,
1966     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1967   // Use local allocator to reduce peak memory usage.
1968   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1969   ArenaBitVector visited(&allocator,
1970                          /*start_bits=*/ num_phi_placeholders_,
1971                          /*expandable=*/ false,
1972                          kArenaAllocLSE);
1973   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1974 
1975   TypeConversionSet type_conversions;
1976 
1977   // Use depth first search to check if any non-Phi input is unknown.
1978   HInstruction* replacement = nullptr;
1979   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1980   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1981   work_queue.push_back(phi_placeholder);
1982   while (!work_queue.empty()) {
1983     PhiPlaceholder current_phi_placeholder = work_queue.back();
1984     work_queue.pop_back();
1985     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1986     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1987     size_t idx = current_phi_placeholder.GetHeapLocation();
1988     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1989       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1990       // Skip type conversions but record them for checking later.
1991       value = SkipTypeConversions(value, &type_conversions);
1992       if (value.NeedsPhi()) {
1993         // Visit the predecessor Phi placeholder if it's not visited yet.
1994         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1995           visited.SetBit(PhiPlaceholderIndex(value));
1996           work_queue.push_back(value.GetPhiPlaceholder());
1997         }
1998       } else {
1999         if (!value.IsInstruction() ||
2000             (replacement != nullptr && replacement != value.GetInstruction())) {
2001           return false;  // Report failure.
2002         }
2003         replacement = value.GetInstruction();
2004       }
2005     }
2006     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2007     // for back-edges, it is not needed here. When looking for a single input
2008     // instruction coming from before the loop, the array index must also be
2009     // defined before the loop and the aliasing analysis done by LSA is sufficient.
2010     // Any writes of a different value with an index that is not loop invariant
2011     // would invalidate the heap location in `VisitSetLocation()`.
2012   }
2013 
2014   // Check that there are no type conversions that would change the stored value.
2015   DCHECK(replacement != nullptr);
2016   if (!type_conversions.AreAllTypeConversionsImplicit(replacement)) {
2017     return false;
2018   }
2019 
2020   // Record replacement and report success.
2021   // Note: Replacements for the loads where we skipped type conversions above (and do not really
2022   // need the type conversion) shall be recorded later, either when we process the loads in
2023   // `ProcessLoadsRequiringLoopPhis()` or when needed to materialize another Phi.
2024   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2025     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2026     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2027     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2028     // We use both vector and non vector operations to analyze the information. However, we replace
2029     // only vector operations in this code path.
2030     if (hl->IsVecOp()) {
2031       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2032       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2033     }
2034   }
2035   return true;
2036 }
2037 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2038 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2039     PhiPlaceholder phi_placeholder,
2040     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2041     DataType::Type type,
2042     bool can_use_default_or_phi) {
2043   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2044 
2045   // Use local allocator to reduce peak memory usage.
2046   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2047   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2048 
2049   // Use depth first search to check if any non-Phi input is unknown.
2050   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2051   phi_placeholders_to_materialize->ClearAllBits();
2052   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2053   work_queue.push_back(phi_placeholder);
2054   while (!work_queue.empty()) {
2055     PhiPlaceholder current_phi_placeholder = work_queue.back();
2056     work_queue.pop_back();
2057     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2058       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2059       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2060                  Value::Default()));
2061       continue;
2062     }
2063     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2064     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2065     size_t idx = current_phi_placeholder.GetHeapLocation();
2066     if (current_block->IsLoopHeader()) {
2067       // If the index is defined inside the loop, it may reference different elements of the
2068       // array on each iteration. Since we do not track if all elements of an array are set
2069       // to the same value explicitly, the only known value in pre-header can be the default
2070       // value from NewArray or a Phi placeholder depending on a default value from some outer
2071       // loop pre-header. This Phi placeholder can be replaced only by the default value.
2072       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2073       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2074         if (can_use_default_or_phi &&
2075             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2076                                                       type,
2077                                                       phi_placeholders_to_materialize)) {
2078           continue;
2079         } else {
2080           return current_phi_placeholder;  // Report the loop Phi placeholder.
2081         }
2082       }
2083       // A similar situation arises with the index defined outside the loop if we cannot use
2084       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2085       // placeholder with a single instruction defined before the loop.
2086       if (!can_use_default_or_phi) {
2087         DCHECK(index != nullptr);  // Vector operations are array operations.
2088         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2089                                                           phi_placeholders_to_materialize)) {
2090           continue;
2091         } else {
2092           return current_phi_placeholder;  // Report the loop Phi placeholder.
2093         }
2094       }
2095     }
2096     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2097       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2098       Value value = ReplacementOrValue(heap_values[idx].value);
2099       if (value.IsUnknown()) {
2100         // We cannot create a Phi for this loop Phi placeholder.
2101         return current_phi_placeholder;  // Report the loop Phi placeholder.
2102       }
2103       // For arrays, the location may have been clobbered by writes to other locations
2104       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2105       if (current_block->IsLoopHeader() &&
2106           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2107           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2108         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2109           if (i != idx &&
2110               !heap_values[i].stored_by.IsUnknown() &&
2111               MayAliasOnBackEdge(current_block, idx, i)) {
2112             // We cannot create a Phi for this loop Phi placeholder.
2113             return current_phi_placeholder;
2114           }
2115         }
2116       }
2117       // Skip type conversions. We're looking for the Phi placeholders now.
2118       value = SkipTypeConversions(value);
2119       if (value.NeedsPlainLoopPhi()) {
2120         // Visit the predecessor Phi placeholder if it's not visited yet.
2121         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2122           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2123           work_queue.push_back(value.GetPhiPlaceholder());
2124           LSE_VLOG << "For materialization of " << phi_placeholder
2125                    << " we need to materialize " << value;
2126         }
2127       }
2128     }
2129   }
2130 
2131   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2132   return std::nullopt;
2133 }
2134 
MaterializeTypeConversionsIfNeeded(Value value)2135 void LSEVisitor::MaterializeTypeConversionsIfNeeded(Value value) {
2136   if (!value.NeedsConvertedLoopPhi()) {
2137     return;
2138   }
2139   // There are at most 2 conversions (Uint8+Int16 or Int8+Uint16). Conversion to Int32
2140   // is implicit and conversions to same or smaller size replace previous conversions.
2141   static constexpr size_t kMaxConversionLoads = 2u;
2142   HInstruction* conversion_loads[kMaxConversionLoads];
2143   size_t num_conversion_loads = 0u;
2144   do {
2145     DCHECK_LT(num_conversion_loads, kMaxConversionLoads);
2146     HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
2147     DCHECK(!conversion_load->IsVecLoad());
2148     HInstruction* substitute = FindSubstitute(conversion_load);
2149     if (substitute != conversion_load) {
2150       value = Value::ForInstruction(substitute);
2151       break;
2152     }
2153     conversion_loads[num_conversion_loads] = conversion_load;
2154     ++num_conversion_loads;
2155     ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
2156     DCHECK(prev_record != nullptr);
2157     value = prev_record->value;
2158   } while (value.NeedsConvertedLoopPhi());
2159   value = value.NeedsPlainLoopPhi() ? Replacement(value) : value;
2160   HInstruction* replacement = value.GetInstruction();
2161   ArrayRef<HInstruction*> conversion_loads_array(conversion_loads, num_conversion_loads);
2162   for (HInstruction* conversion_load : ReverseRange(conversion_loads_array)) {
2163     AddRemovedLoad(conversion_load, replacement);
2164     replacement = substitute_instructions_for_loads_[conversion_load->GetId()];
2165     DCHECK(replacement != nullptr);
2166     DCHECK(replacement->IsTypeConversion());
2167   }
2168 }
2169 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2170 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2171                                      DataType::Type type) {
2172   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2173 }
2174 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2175 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2176                                      DataType::Type type) {
2177   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2178   // other than loop Phis are the same.
2179   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2180   TypeConversionSet type_conversions;
2181   std::optional<Value> other_value = std::nullopt;
2182   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2183     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2184     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2185     DCHECK_GE(block->GetPredecessors().size(), 2u);
2186     size_t idx = phi_placeholder.GetHeapLocation();
2187     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2188       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2189       if (value.NeedsNonLoopPhi()) {
2190         DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
2191         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2192         value = Replacement(value);
2193       } else if (value.NeedsConvertedLoopPhi()) {
2194         TypeConversionSet local_type_conversions;
2195         Value without_conversions = SkipTypeConversions(value, &local_type_conversions);
2196         DCHECK(!without_conversions.NeedsNonLoopPhi());  // Would have been already materialized.
2197         if (without_conversions.NeedsPlainLoopPhi()) {
2198           type_conversions.Add(local_type_conversions);
2199           value = without_conversions;
2200         } else {
2201           MaterializeTypeConversionsIfNeeded(value);
2202           value = ReplacementOrValue(value);
2203         }
2204       }
2205       if (!value.NeedsPlainLoopPhi()) {
2206         if (!other_value) {
2207           // The first other value we found.
2208           other_value = value;
2209         } else if (!other_value->IsInvalid()) {
2210           // Check if the current `value` differs from the previous `other_value`.
2211           if (!value.Equals(*other_value)) {
2212             other_value = Value::Invalid();
2213           }
2214         }
2215       }
2216     }
2217   }
2218 
2219   DCHECK(other_value.has_value());
2220   DCHECK(other_value->IsInvalid() || other_value->IsDefault() || other_value->IsInstruction());
2221   if (other_value->IsDefault() ||  // Default value does not need type conversions.
2222       (other_value->IsInstruction() &&
2223           type_conversions.AreAllTypeConversionsImplicit(other_value->GetInstruction()))) {
2224     HInstruction* replacement =
2225         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2226     DCHECK(type_conversions.AreAllTypeConversionsImplicit(replacement));
2227     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2228       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2229     }
2230     return true;
2231   }
2232 
2233   // If we're materializing only a single Phi, try to match it with an existing Phi.
2234   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2235   // This also covers the case when after replacing a previous set of Phi placeholders,
2236   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2237   if (phi_placeholder_indexes.size() == 1u) {
2238     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2239     size_t idx = phi_placeholder.GetHeapLocation();
2240     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2241     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2242     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2243       HInstruction* phi = phi_it.Current();
2244       DCHECK_EQ(phi->InputCount(), predecessors.size());
2245       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2246       auto cmp = [=, this](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2247         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2248         HInstruction* lhs_instruction = lhs.GetInstruction();
2249         while (value.NeedsConvertedLoopPhi()) {
2250           HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
2251           if (!lhs_instruction->IsTypeConversion() ||
2252               lhs_instruction->GetType() != conversion_load->GetType()) {
2253             return false;
2254           }
2255           lhs_instruction = lhs_instruction->InputAt(0);
2256           ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
2257           DCHECK(prev_record != nullptr);
2258           value = prev_record->value;
2259         }
2260         if (value.NeedsPlainLoopPhi() && value.GetPhiPlaceholder().Equals(phi_placeholder)) {
2261           return lhs_instruction == phi;
2262         } else {
2263           value = ReplacementOrValue(value);
2264           DCHECK(value.IsDefault() || value.IsInstruction());
2265           return value.Equals(lhs_instruction);
2266         }
2267       };
2268       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2269         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2270         return true;
2271       }
2272     }
2273   }
2274 
2275   if (current_phase_ == Phase::kStoreElimination) {
2276     // We're not creating Phis during the final store elimination phase.
2277     return false;
2278   }
2279 
2280   // There are different inputs to the Phi chain. Create the Phis.
2281   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2282   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2283     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2284     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2285     CHECK_GE(block->GetPredecessors().size(), 2u);
2286     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2287         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2288   }
2289   // Fill the Phi inputs.
2290   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2291     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2292     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2293     size_t idx = phi_placeholder.GetHeapLocation();
2294     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2295     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2296         << "type=" << type << " vs phi-type=" << phi->GetType();
2297     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2298       HBasicBlock* predecessor = block->GetPredecessors()[i];
2299       Value predecessor_value = heap_values_for_[predecessor->GetBlockId()][idx].value;
2300       MaterializeTypeConversionsIfNeeded(predecessor_value);
2301       Value value = ReplacementOrValue(predecessor_value);
2302       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2303       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2304       phi->SetRawInputAt(i, input);
2305       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2306           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2307           << " request: " << type;
2308     }
2309   }
2310   // Add the Phis to their blocks.
2311   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2312     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2313     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2314     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2315   }
2316   if (type == DataType::Type::kReference) {
2317     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2318     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2319     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2320       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2321     }
2322     // Update reference type information. Pass invalid handles, these are not used for Phis.
2323     ReferenceTypePropagation rtp_fixup(GetGraph(),
2324                                        Handle<mirror::DexCache>(),
2325                                        /* is_first_run= */ false);
2326     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2327   }
2328 
2329   return true;
2330 }
2331 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2332 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2333                                      DataType::Type type) {
2334   // Use local allocator to reduce peak memory usage.
2335   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2336 
2337   // We want to recognize when a subset of these loop Phis that do not need other
2338   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2339   // i.e. that instruction can be used instead of each Phi in the set. See for example
2340   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2341   // materialize these loop Phis from the smallest transitive closure.
2342 
2343   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2344   // assign new indexes to the Phi placeholders, making the matrix dense.
2345   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2346                                            static_cast<size_t>(-1),  // Invalid.
2347                                            allocator.Adapter(kArenaAllocLSE));
2348   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2349   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2350   phi_placeholder_indexes.reserve(num_phi_placeholders);
2351   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2352     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2353     phi_placeholder_indexes.push_back(marker_index);
2354   }
2355   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2356   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2357   dependencies.reserve(num_phi_placeholders);
2358   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2359     static constexpr bool kExpandable = false;
2360     dependencies.push_back(
2361         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2362     ArenaBitVector* current_dependencies = dependencies.back();
2363     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2364     PhiPlaceholder current_phi_placeholder =
2365         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2366     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2367     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2368     size_t idx = current_phi_placeholder.GetHeapLocation();
2369     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2370       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2371       if (pred_value.NeedsLoopPhi()) {
2372         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2373         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2374         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2375         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2376       }
2377     }
2378   }
2379 
2380   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2381   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2382     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2383       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2384         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2385           dependencies[i]->SetBit(j);
2386         }
2387       }
2388     }
2389   }
2390 
2391   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2392   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2393   num_dependencies.reserve(num_phi_placeholders);
2394   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2395     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2396   }
2397 
2398   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2399   // materialize it and its dependencies. Repeat until we have materialized all.
2400   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2401   current_subset.reserve(num_phi_placeholders);
2402   size_t remaining_phi_placeholders = num_phi_placeholders;
2403   while (remaining_phi_placeholders != 0u) {
2404     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2405     DCHECK_LE(*it, remaining_phi_placeholders);
2406     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2407     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2408     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2409     current_subset.clear();
2410     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2411       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2412     }
2413     if (!MaterializeLoopPhis(current_subset, type)) {
2414       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2415       // This is the final store elimination phase and we shall not be able to eliminate any
2416       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2417       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2418         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2419           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2420           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
2421         }
2422       }
2423       return false;
2424     }
2425     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2426       if (current_dependencies->IsBitSet(matrix_index)) {
2427         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2428         // so that they shall never be the minimum again.
2429         num_dependencies[matrix_index] = num_phi_placeholders;
2430       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2431         // Remove dependencies from other Phi placeholders.
2432         dependencies[matrix_index]->Subtract(current_dependencies);
2433         num_dependencies[matrix_index] -= current_num_dependencies;
2434       }
2435     }
2436     remaining_phi_placeholders -= current_num_dependencies;
2437   }
2438   return true;
2439 }
2440 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2441 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2442   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2443   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2444   auto res =
2445       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2446   CHECK(!res.has_value()) << *res;
2447   return MaterializeLoopPhis(abv, type);
2448 }
2449 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2450 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2451     PhiPlaceholder phi_placeholder, HInstruction* load) {
2452   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2453 
2454   // Use local allocator to reduce peak memory usage.
2455   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2456 
2457   // Find Phi placeholders to materialize.
2458   ArenaBitVector phi_placeholders_to_materialize(
2459       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2460   DataType::Type type = load->GetType();
2461   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2462   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2463       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2464   if (loop_phi_with_unknown_input) {
2465     DCHECK_GE(GetGraph()
2466                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2467                   ->GetPredecessors()
2468                   .size(),
2469               2u);
2470     return loop_phi_with_unknown_input;  // Return failure.
2471   }
2472 
2473   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2474   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2475   DCHECK(success);
2476 
2477   // Report success.
2478   return std::nullopt;
2479 }
2480 
2481 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2482 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2483 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2484 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2485 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2486 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2487   DCHECK(!loads_requiring_loop_phi_.empty());
2488   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2489   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2490   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::Unknown();
2491 
2492   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2493   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2494   size_t reverse_post_order_index = 0;
2495   size_t reverse_post_order_size = reverse_post_order.size();
2496   size_t loads_and_stores_index = 0u;
2497   size_t loads_and_stores_size = loads_and_stores_.size();
2498 
2499   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2500   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2501   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2502     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2503     while (loads_and_stores_index != loads_and_stores_size &&
2504            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2505       ++loads_and_stores_index;
2506     }
2507     ++reverse_post_order_index;
2508     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2509   }
2510 
2511   // Use local allocator to reduce peak memory usage.
2512   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2513   // Reuse one temporary vector for all remaining blocks.
2514   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2515   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2516 
2517   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2518     Value value;
2519     if (block->IsLoopHeader()) {
2520       if (block->GetLoopInformation()->IsIrreducible()) {
2521         value = Value::Unknown();
2522       } else {
2523         value = PrepareLoopValue(block, idx);
2524       }
2525     } else {
2526       value = MergePredecessorValues(block, idx);
2527     }
2528     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2529     return value;
2530   };
2531 
2532   // Process remaining blocks and instructions.
2533   bool found_unreplaceable_load = false;
2534   bool replaced_heap_value_with_unknown = false;
2535   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2536     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2537     if (block->IsExitBlock()) {
2538       continue;
2539     }
2540 
2541     // We shall reconstruct only the heap values that we need for processing loads and stores.
2542     local_heap_values.clear();
2543     local_heap_values.resize(num_heap_locations, Value::Invalid());
2544 
2545     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2546       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2547       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2548       if (load_or_store->GetBlock() != block) {
2549         break;  // End of instructions from the current block.
2550       }
2551       if (IsStore(load_or_store)) {
2552         StoreRecord* store_record = store_records_[load_or_store->GetId()];
2553         DCHECK(store_record != nullptr);
2554         HInstruction* stored_value = store_record->stored_value;
2555         DCHECK(stored_value != nullptr);
2556         // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2557         // outside the allocated `loads_requiring_loop_phi_` range.
2558         DCHECK_IMPLIES(
2559             IsLoad(stored_value),
2560             static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2561         if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2562             loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2563           continue;  // This store never needed a loop Phi.
2564         }
2565         ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2566         // Process the store by updating `local_heap_values[idx]`. The last update shall
2567         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2568         // at the end of the block.
2569         Value replacement = ReplacementOrValue(record->value);
2570         if (replacement.NeedsLoopPhi()) {
2571           // No replacement yet. Use the Phi placeholder or an appropriate converting load.
2572           DCHECK(record->value.NeedsLoopPhi());
2573           local_heap_values[idx] = StoredValueForLoopPhiPlaceholderDependentLoad(idx, stored_value);
2574           DCHECK(local_heap_values[idx].NeedsLoopPhi());
2575         } else {
2576           // If the load fetched a known value, use it, otherwise use the load.
2577           local_heap_values[idx] = Value::ForInstruction(
2578               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2579         }
2580       } else {
2581         // Process the load unless it has previously been marked unreplaceable.
2582         DCHECK(IsLoad(load_or_store));
2583         ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2584         if (record == nullptr) {
2585           continue;  // This load never needed a loop Phi.
2586         }
2587         if (record->value.NeedsLoopPhi()) {
2588           if (local_heap_values[idx].IsInvalid()) {
2589             local_heap_values[idx] = get_initial_value(block, idx);
2590           }
2591           if (local_heap_values[idx].IsUnknown()) {
2592             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2593             // (no aliasing since then, otherwise the Phi placeholder would not have been
2594             // propagated as a value to this load) and store the load as the new heap value.
2595             found_unreplaceable_load = true;
2596             KeepStores(record->value);
2597             record->value = Value::Unknown();
2598             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2599           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2600             // The load may still be replaced with a Phi later.
2601             DCHECK(local_heap_values[idx].Equals(record->value));
2602           } else {
2603             // This load can be eliminated but we may need to construct non-loop Phis.
2604             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2605               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2606                                      load_or_store->GetType());
2607               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2608             }
2609             record->value = local_heap_values[idx];
2610             DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2611                 << "The replacement heap value can be an HIR instruction or the default value.";
2612             HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2613                                            GetDefaultValue(load_or_store->GetType()) :
2614                                            local_heap_values[idx].GetInstruction();
2615             AddRemovedLoad(load_or_store, heap_value);
2616           }
2617         }
2618       }
2619     }
2620 
2621     // All heap values that previously needed a loop Phi at the end of the block
2622     // need to be updated for processing successors.
2623     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2624     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2625       if (heap_values[idx].value.NeedsLoopPhi()) {
2626         if (local_heap_values[idx].IsValid()) {
2627           heap_values[idx].value = local_heap_values[idx];
2628         } else {
2629           heap_values[idx].value = get_initial_value(block, idx);
2630         }
2631         if (heap_values[idx].value.IsUnknown()) {
2632           replaced_heap_value_with_unknown = true;
2633         }
2634       }
2635     }
2636   }
2637   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2638 }
2639 
ProcessLoadsRequiringLoopPhis()2640 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2641   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2642   // make the result of the processing depend on the order in which we process these loads.
2643   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2644   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2645   if (loads_requiring_loop_phi_.empty()) {
2646     return;  // No loads to process.
2647   }
2648   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2649     ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2650     if (record == nullptr) {
2651       continue;
2652     }
2653     HInstruction* load = load_store_record.load_or_store;
2654     while (record->value.NeedsLoopPhi()) {
2655       Value without_conversions = SkipTypeConversions(record->value);
2656       if (!without_conversions.NeedsPlainLoopPhi() ||
2657           phi_placeholder_replacements_[PhiPlaceholderIndex(without_conversions)].IsValid()) {
2658         break;
2659       }
2660       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2661           TryToMaterializeLoopPhis(without_conversions.GetPhiPlaceholder(), load);
2662       DCHECK_EQ(
2663           loop_phi_with_unknown_input.has_value(),
2664           phi_placeholder_replacements_[PhiPlaceholderIndex(without_conversions)].IsInvalid());
2665       if (loop_phi_with_unknown_input) {
2666         DCHECK_GE(GetGraph()
2667                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2668                       ->GetPredecessors()
2669                       .size(),
2670                   2u);
2671         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2672       }
2673     }
2674     // The load, or converting load's underlying phi placeholder, could have been marked
2675     // as unreplaceable (and stores marked for keeping) or marked for replacement with an
2676     // instruction in `ProcessLoopPhiWithUnknownInput()`.
2677     DCHECK(record->value.IsUnknown() ||
2678            record->value.IsInstruction() ||
2679            record->value.NeedsLoopPhi());
2680     if (record->value.NeedsLoopPhi()) {
2681       MaterializeTypeConversionsIfNeeded(record->value);
2682       record->value = ReplacementOrValue(record->value);
2683       HInstruction* heap_value = record->value.GetInstruction();
2684       // Type conversion substitutes can be created by `MaterializeTypeConversionsIfNeeded()`,
2685       // either in the call directly above, or while materializing Phis. For all loads that did
2686       // not have a substitute recorded, record it now; this can also be a type conversion.
2687       HInstruction* substitute = FindSubstitute(load);
2688       if (substitute == load) {
2689         AddRemovedLoad(load, heap_value);
2690       } else {
2691         DCHECK(substitute->IsTypeConversion());
2692       }
2693     }
2694   }
2695 }
2696 
SearchPhiPlaceholdersForKeptStores()2697 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2698   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2699   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2700   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2701   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2702     work_queue.push_back(index);
2703   }
2704   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2705   while (!work_queue.empty()) {
2706     uint32_t cur_phi_idx = work_queue.back();
2707     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2708     work_queue.pop_back();
2709     size_t idx = phi_placeholder.GetHeapLocation();
2710     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2711     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2712                              << " (blocks: " << blocks.size() << ")";
2713     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2714       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2715       // For loop back-edges we must also preserve all stores to locations that
2716       // may alias with the location `idx`.
2717       // TODO: Add tests cases around this.
2718       bool is_back_edge =
2719           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2720       size_t start = is_back_edge ? 0u : idx;
2721       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2722       for (size_t i = start; i != end; ++i) {
2723         Value stored_by = heap_values[i].stored_by;
2724         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2725           if (stored_by.NeedsPhi()) {
2726             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2727             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2728               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2729               work_queue.push_back(phi_placeholder_index);
2730             }
2731           } else {
2732             DCHECK(IsStore(stored_by.GetInstruction()));
2733             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2734             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2735                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2736                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2737             kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2738           }
2739         }
2740       }
2741     }
2742   }
2743 }
2744 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2745 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2746   while (value_record->stored_by.IsInstruction() &&
2747          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2748     StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2749     DCHECK(store_record != nullptr);
2750     *value_record = store_record->old_value_record;
2751   }
2752   if (value_record->stored_by.NeedsPhi() &&
2753       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2754            PhiPlaceholderIndex(value_record->stored_by))) {
2755     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2756     // Phi placeholder to recalculate the actual value.
2757     value_record->value = value_record->stored_by;
2758   }
2759   value_record->value = ReplacementOrValue(value_record->value);
2760   if (value_record->value.NeedsNonLoopPhi()) {
2761     // Treat all Phi placeholders as requiring loop Phis at this point.
2762     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2763     value_record->value =
2764         Value::ForPlainLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2765   }
2766 }
2767 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2768 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2769                                                DataType::Type type) {
2770   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2771 
2772   // Use local allocator to reduce peak memory usage.
2773   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2774   ArenaBitVector visited(&allocator,
2775                          /*start_bits=*/ num_phi_placeholders_,
2776                          /*expandable=*/ false,
2777                          kArenaAllocLSE);
2778 
2779   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2780   ArenaBitVector phi_placeholders_to_materialize(
2781       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2782   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2783       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2784   if (loop_phi_with_unknown_input) {
2785     DCHECK_GE(GetGraph()
2786                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2787                   ->GetPredecessors()
2788                   .size(),
2789               2u);
2790     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2791     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2792     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2793         Value::Unknown();
2794     return;
2795   }
2796 
2797   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2798   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2799   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2800   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2801             !success);
2802 }
2803 
FindStoresWritingOldValues()2804 void LSEVisitor::FindStoresWritingOldValues() {
2805   // The Phi placeholder replacements have so far been used for eliminating loads,
2806   // tracking values that would be stored if all stores were kept. As we want to
2807   // compare actual old values after removing unmarked stores, prune the Phi
2808   // placeholder replacements that can be fed by values we may not actually store.
2809   // Replacements marked as unknown can be kept as they are fed by some unknown
2810   // value and would end up as unknown again if we recalculated them.
2811   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2812     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2813         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2814       phi_placeholder_replacements_[i] = Value::Invalid();
2815     }
2816   }
2817 
2818   // Update heap values at end of blocks.
2819   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2820     for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2821       UpdateValueRecordForStoreElimination(&value_record);
2822     }
2823   }
2824 
2825   // Use local allocator to reduce peak memory usage.
2826   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2827   // Mark the stores we want to eliminate in a separate bit vector.
2828   ArenaBitVector eliminated_stores(&allocator,
2829                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2830                                    /*expandable=*/ false,
2831                                    kArenaAllocLSE);
2832 
2833   for (uint32_t store_id : kept_stores_.Indexes()) {
2834     DCHECK(kept_stores_.IsBitSet(store_id));
2835     StoreRecord* store_record = store_records_[store_id];
2836     DCHECK(store_record != nullptr);
2837     UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2838     if (store_record->old_value_record.value.NeedsPhi()) {
2839       DataType::Type type = store_record->stored_value->GetType();
2840       FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2841       store_record->old_value_record.value =
2842           ReplacementOrValue(store_record->old_value_record.value);
2843     }
2844     DCHECK(!store_record->old_value_record.value.NeedsPhi());
2845     HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2846     if (store_record->old_value_record.value.Equals(stored_value)) {
2847       eliminated_stores.SetBit(store_id);
2848     }
2849   }
2850 
2851   // Commit the stores to eliminate by removing them from `kept_stores_`.
2852   kept_stores_.Subtract(&eliminated_stores);
2853 }
2854 
Run()2855 void LSEVisitor::Run() {
2856   // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2857   // remove, we will set it to true in VisitMonitorOperation.
2858   GetGraph()->SetHasMonitorOperations(false);
2859 
2860   // 1. Process blocks and instructions in reverse post order.
2861   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2862     VisitBasicBlock(block);
2863   }
2864 
2865   // 2. Process loads that require loop Phis, trying to find/create replacements.
2866   current_phase_ = Phase::kLoadElimination;
2867   ProcessLoadsRequiringLoopPhis();
2868 
2869   // 3. Determine which stores to keep and which to eliminate.
2870   current_phase_ = Phase::kStoreElimination;
2871   // Finish marking stores for keeping.
2872   SearchPhiPlaceholdersForKeptStores();
2873 
2874   // Find stores that write the same value as is already present in the location.
2875   FindStoresWritingOldValues();
2876 
2877   // 4. Replace loads and remove unnecessary stores and singleton allocations.
2878   FinishFullLSE();
2879 }
2880 
2881 
FinishFullLSE()2882 void LSEVisitor::FinishFullLSE() {
2883   // Remove recorded load instructions that should be eliminated.
2884   for (const LoadStoreRecord& record : loads_and_stores_) {
2885     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2886     HInstruction* substitute = substitute_instructions_for_loads_[id];
2887     if (substitute == nullptr) {
2888       continue;
2889     }
2890     HInstruction* load = record.load_or_store;
2891     DCHECK(load != nullptr);
2892     DCHECK(IsLoad(load));
2893     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2894     // We proactively retrieve the substitute for a removed load, so
2895     // a load that has a substitute should not be observed as a heap
2896     // location value.
2897     DCHECK_EQ(FindSubstitute(substitute), substitute);
2898 
2899     load->ReplaceWith(substitute);
2900     load->GetBlock()->RemoveInstruction(load);
2901     if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2902         (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2903       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2904     }
2905   }
2906 
2907   // Remove all the stores we can.
2908   for (const LoadStoreRecord& record : loads_and_stores_) {
2909     if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2910       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2911       if ((record.load_or_store->IsInstanceFieldSet() &&
2912            record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2913           (record.load_or_store->IsStaticFieldSet() &&
2914            record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2915         MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2916       }
2917     }
2918   }
2919 
2920   // Eliminate singleton-classified instructions:
2921   //   * - Constructor fences (they never escape this thread).
2922   //   * - Allocations (if they are unused).
2923   for (HInstruction* new_instance : singleton_new_instances_) {
2924     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2925     MaybeRecordStat(stats_,
2926                     MethodCompilationStat::kConstructorFenceRemovedLSE,
2927                     removed);
2928 
2929     if (!new_instance->HasNonEnvironmentUses()) {
2930       new_instance->RemoveEnvironmentUsers();
2931       new_instance->GetBlock()->RemoveInstruction(new_instance);
2932       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2933     }
2934   }
2935 }
2936 
2937 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2938 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2939 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2940  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2941   LSEVisitorWrapper(HGraph* graph,
2942                     const HeapLocationCollector& heap_location_collector,
2943                     OptimizingCompilerStats* stats)
2944       : lse_visitor_(graph, heap_location_collector, stats) {}
2945 
Run()2946   void Run() {
2947     lse_visitor_.Run();
2948   }
2949 
2950  private:
2951   LSEVisitor lse_visitor_;
2952 };
2953 
Run()2954 bool LoadStoreElimination::Run() {
2955   if (graph_->IsDebuggable()) {
2956     // Debugger may set heap values or trigger deoptimization of callers.
2957     // Skip this optimization.
2958     return false;
2959   }
2960   ScopedArenaAllocator allocator(graph_->GetArenaStack());
2961   LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2962   lsa.Run();
2963   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2964   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2965     // No HeapLocation information from LSA, skip this optimization.
2966     return false;
2967   }
2968 
2969   // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2970   // memory operations with different predicates.
2971   // TODO: support predicated SIMD.
2972   if (graph_->HasPredicatedSIMD()) {
2973     return false;
2974   }
2975 
2976   std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2977       new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2978   lse_visitor->Run();
2979   return true;
2980 }
2981 
2982 #undef LSE_VLOG
2983 
2984 }  // namespace art
2985