• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "execution_subgraph.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "optimizing/execution_subgraph.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46 #include "stack_map.h"
47 
48 /**
49  * The general algorithm of load-store elimination (LSE).
50  *
51  * We use load-store analysis to collect a list of heap locations and perform
52  * alias analysis of those heap locations. LSE then keeps track of a list of
53  * heap values corresponding to the heap locations and stores that put those
54  * values in these locations.
55  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
56  *    block, visit instructions sequentially, recording heap values and looking
57  *    for loads and stores to eliminate without relying on loop Phis.
58  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
59  *    or using a loop-invariant value.
60  *  - In phase 3, we determine which stores are dead and can be eliminated and
61  *    based on that information we re-evaluate whether some kept stores are
62  *    storing the same value as the value in the heap location; such stores are
63  *    also marked for elimination.
64  *  - In phase 4, we commit the changes, replacing loads marked for elimination
65  *    in previous processing and removing stores not marked for keeping. We also
66  *    remove allocations that are no longer needed.
67  *  - In phase 5, we move allocations which only escape along some executions
68  *    closer to their escape points and fixup non-escaping paths with their actual
69  *    values, creating PHIs when needed.
70  *
71  * 1. Walk over blocks and their instructions.
72  *
73  * The initial set of heap values for a basic block is
74  *  - For a loop header of an irreducible loop, all heap values are unknown.
75  *  - For a loop header of a normal loop, all values unknown at the end of the
76  *    preheader are initialized to unknown, other heap values are set to Phi
77  *    placeholders as we cannot determine yet whether these values are known on
78  *    all back-edges. We use Phi placeholders also for array heap locations with
79  *    index defined inside the loop but this helps only when the value remains
80  *    zero from the array allocation throughout the loop.
81  *  - For catch blocks, we clear all assumptions since we arrived due to an
82  *    instruction throwing.
83  *  - For other basic blocks, we merge incoming values from the end of all
84  *    predecessors. If any incoming value is unknown, the start value for this
85  *    block is also unknown. Otherwise, if all the incoming values are the same
86  *    (including the case of a single predecessor), the incoming value is used.
87  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
88  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
89  *
90  * For each instruction in the block
91  *  - If the instruction is a load from a heap location with a known value not
92  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
93  *    using an existing instruction or by creating new Phi(s) instead. In order
94  *    to maintain the validity of all heap locations during the optimization
95  *    phase, we only record substitutes at this phase and the real elimination
96  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
97  *    replacement are recorded for processing later. We also keep track of the
98  *    heap-value at the start load so that later partial-LSE can predicate the
99  *    load.
100  *  - If the instruction is a store, it updates the heap value for the heap
101  *    location with the stored value and records the store itself so that we can
102  *    mark it for keeping if the value becomes observable. Heap values are
103  *    invalidated for heap locations that may alias with the store instruction's
104  *    heap location and their recorded stores are marked for keeping as they are
105  *    now potentially observable. The store instruction can be eliminated unless
106  *    the value stored is later needed e.g. by a load from the same/aliased heap
107  *    location or the heap location persists at method return/deoptimization.
108  *  - A store that stores the same value as the heap value is eliminated.
109  *  - For newly instantiated instances, their heap values are initialized to
110  *    language defined default values.
111  *  - Finalizable objects are considered as persisting at method
112  *    return/deoptimization.
113  *  - Some instructions such as invokes are treated as loading and invalidating
114  *    all the heap values, depending on the instruction's side effects.
115  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
116  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
117  *    alias and no load/store is eliminated in such case.
118  *
119  * The time complexity of the initial phase has several components. The total
120  * time for the initialization of heap values for all blocks is
121  *    O(heap_locations * edges)
122  * and the time complexity for simple instruction processing is
123  *    O(instructions).
124  * See the description of phase 3 for additional complexity due to matching of
125  * existing Phis for replacing loads.
126  *
127  * 2. Process loads that depend on loop Phi placeholders.
128  *
129  * We go over these loads to determine whether they can be eliminated. We look
130  * for the set of all Phi placeholders that feed the load and depend on a loop
131  * Phi placeholder and, if we find no unknown value, we construct the necessary
132  * Phi(s) or, if all other inputs are identical, i.e. the location does not
133  * change in the loop, just use that input. If we do find an unknown input, this
134  * must be from a loop back-edge and we replace the loop Phi placeholder with
135  * unknown value and re-process loads and stores that previously depended on
136  * loop Phi placeholders. This shall find at least one load of an unknown value
137  * which is now known to be unreplaceable or a new unknown value on a back-edge
138  * and we repeat this process until each load is either marked for replacement
139  * or found to be unreplaceable. As we mark at least one additional loop Phi
140  * placeholder as unreplacable in each iteration, this process shall terminate.
141  *
142  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
143  * is limited by the number of Phi placeholders and their dependencies we need
144  * to search with worst-case time complexity
145  *    O(phi_placeholder_dependencies) .
146  * The dependencies are usually just the Phi placeholders' potential inputs,
147  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
148  * replacement search, there are additional dependencies to consider, see below.
149  *
150  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
151  * algorithm to determine transitive closures for each found Phi placeholder,
152  * and then match or materialize Phis from the smallest transitive closure,
153  * so that we can determine if such subset has a single other input. This has
154  * time complexity
155  *    O(phi_placeholders_found^3) .
156  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
157  * contribute to this as such Phi placeholders are replaced immediately.
158  * The total time of all such successful cases has time complexity
159  *    O(phi_placeholders^3)
160  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
161  * argument applies to the searches used to find all successful cases, so their
162  * total contribution is also just an insignificant
163  *    O(phi_placeholder_dependencies) .
164  * The materialization of Phis has an insignificant total time complexity
165  *    O(phi_placeholders * edges) .
166  *
167  * If we find an unknown input, we re-process heap values and loads with a time
168  * complexity that's the same as the phase 1 in the worst case. Adding this to
169  * the depth-first search time complexity yields
170  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
171  * for a single iteration. We can ignore the middle term as it's proprotional
172  * to the number of Phi placeholder inputs included in the first term. Using
173  * the upper limit of number of such iterations, the total time complexity is
174  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
175  *
176  * The upper bound of Phi placeholder inputs is
177  *    heap_locations * edges
178  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
179  * include other heap locations in predecessor blocks with the upper bound of
180  *    heap_locations^2 * edges .
181  * Using the estimate
182  *    edges <= blocks^2
183  * and
184  *    phi_placeholders <= heap_locations * blocks ,
185  * the worst-case time complexity of the
186  *    O(phi_placeholder_dependencies * phi_placeholders)
187  * term from unknown input cases is actually
188  *    O(heap_locations^3 * blocks^3) ,
189  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
190  * Adding the other term from the unknown input cases (to account for the case
191  * with significantly more instructions than blocks and heap locations), the
192  * phase 2 time complexity is
193  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
194  *
195  * See the description of phase 3 for additional complexity due to matching of
196  * existing Phis for replacing loads.
197  *
198  * 3. Determine which stores to keep and which to eliminate.
199  *
200  * During instruction processing in phase 1 and re-processing in phase 2, we are
201  * keeping a record of the stores and Phi placeholders that become observable
202  * and now propagate the observable Phi placeholders to all actual stores that
203  * feed them. Having determined observable stores, we look for stores that just
204  * overwrite the old value with the same. Since ignoring non-observable stores
205  * actually changes the old values in heap locations, we need to recalculate
206  * Phi placeholder replacements but we proceed similarly to the previous phase.
207  * We look for the set of all Phis that feed the old value replaced by the store
208  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
209  * value, we try to match existing Phis (we do not create new Phis anymore) or,
210  * if all other inputs are identical, i.e. the location does not change in the
211  * loop, just use that input. If this succeeds and the old value is identical to
212  * the value we're storing, such store shall be eliminated.
213  *
214  * The work is similar to the phase 2, except that we're not re-processing loads
215  * and stores anymore, so the time complexity of phase 3 is
216  *    O(heap_locations^3 * blocks^3) .
217  *
218  * There is additional complexity in matching existing Phis shared between the
219  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
220  * time (this could be difficult and slow), so each matching attempt is just
221  * looking at Phis in the block (both old Phis and newly created Phis) and their
222  * inputs. As we create at most `heap_locations` Phis in each block, the upper
223  * bound on the number of Phis we look at is
224  *    heap_locations * (old_phis + heap_locations)
225  * and the worst-case time complexity is
226  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
227  * The first term is lower than one term in phase 2, so the relevant part is
228  *    O(heap_locations * old_phis * edges) .
229  *
230  * 4. Replace loads and remove unnecessary stores and singleton allocations.
231  *
232  * A special type of objects called singletons are instantiated in the method
233  * and have a single name, i.e. no aliases. Singletons have exclusive heap
234  * locations since they have no aliases. Singletons are helpful in narrowing
235  * down the life span of a heap location such that they do not always need to
236  * participate in merging heap values. Allocation of a singleton can be
237  * eliminated if that singleton is not used and does not persist at method
238  * return/deoptimization.
239  *
240  * The time complexity of this phase is
241  *    O(instructions + instruction_uses) .
242  *
243  * 5. Partial LSE
244  *
245  * Move allocations closer to their escapes and remove/predicate loads and
246  * stores as required.
247  *
248  * Partial singletons are objects which only escape from the function or have
249  * multiple names along certain execution paths. In cases where we recognize
250  * these partial singletons we can move the allocation and initialization
251  * closer to the actual escape(s). We can then perform a simplified version of
252  * LSE step 2 to determine the unescaped value of any reads performed after the
253  * object may have escaped. These are used to replace these reads with
254  * 'predicated-read' instructions where the value is only read if the object
255  * has actually escaped. We use the existence of the object itself as the
256  * marker of whether escape has occurred.
257  *
258  * There are several steps in this sub-pass
259  *
260  * 5.1 Group references
261  *
262  * Since all heap-locations for a single reference escape at the same time, we
263  * need to group the heap-locations by reference and process them at the same
264  * time.
265  *
266  *    O(heap_locations).
267  *
268  * FIXME: The time complexity above assumes we can bucket the heap-locations in
269  * O(1) which is not true since we just perform a linear-scan of the heap-ref
270  * list. Since there are generally only a small number of heap-references which
271  * are partial-singletons this is fine and lower real overhead than a hash map.
272  *
273  * 5.2 Generate materializations
274  *
275  * Once we have the references we add new 'materialization blocks' on the edges
276  * where escape becomes inevitable. This information is calculated by the
277  * execution-subgraphs created during load-store-analysis. We create new
278  * 'materialization's in these blocks and initialize them with the value of
279  * each heap-location ignoring side effects (since the object hasn't escaped
280  * yet). Worst case this is the same time-complexity as step 3 since we may
281  * need to materialize phis.
282  *
283  *    O(heap_locations^2 * materialization_edges)
284  *
285  * 5.3 Propagate materializations
286  *
287  * Since we use the materialization as the marker for escape we need to
288  * propagate it throughout the graph. Since the subgraph analysis considers any
289  * lifetime that escapes a loop (and hence would require a loop-phi) to be
290  * escaping at the loop-header we do not need to create any loop-phis to do
291  * this.
292  *
293  *    O(edges)
294  *
295  * NB: Currently the subgraph analysis considers all objects to have their
296  * lifetimes start at the entry block. This simplifies that analysis enormously
297  * but means that we cannot distinguish between an escape in a loop where the
298  * lifetime does not escape the loop (in which case this pass could optimize)
299  * and one where it does escape the loop (in which case the whole loop is
300  * escaping). This is a shortcoming that would be good to fix at some point.
301  *
302  * 5.4 Propagate partial values
303  *
304  * We need to replace loads and stores to the partial reference with predicated
305  * ones that have default non-escaping values. Again this is the same as step 3.
306  *
307  *   O(heap_locations^2 * edges)
308  *
309  * 5.5 Final fixup
310  *
311  * Now all we need to do is replace and remove uses of the old reference with the
312  * appropriate materialization.
313  *
314  *   O(instructions + uses)
315  *
316  * FIXME: The time complexities described above assumes that the
317  * HeapLocationCollector finds a heap location for an instruction in O(1)
318  * time but it is currently O(heap_locations); this can be fixed by adding
319  * a hash map to the HeapLocationCollector.
320  */
321 
322 namespace art {
323 
324 #define LSE_VLOG \
325   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
326 
327 class PartialLoadStoreEliminationHelper;
328 class HeapRefHolder;
329 
330 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
331 class LSEVisitor final : private HGraphDelegateVisitor {
332  public:
333   LSEVisitor(HGraph* graph,
334              const HeapLocationCollector& heap_location_collector,
335              bool perform_partial_lse,
336              OptimizingCompilerStats* stats);
337 
338   void Run();
339 
340  private:
341   class PhiPlaceholder {
342    public:
PhiPlaceholder()343     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)344     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
345         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
346 
347     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
348     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
349     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
350     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
351 
GetBlockId() const352     constexpr uint32_t GetBlockId() const {
353       return block_id_;
354     }
355 
GetHeapLocation() const356     constexpr size_t GetHeapLocation() const {
357       return heap_location_;
358     }
359 
Equals(const PhiPlaceholder & p2) const360     constexpr bool Equals(const PhiPlaceholder& p2) const {
361       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
362     }
363 
Dump(std::ostream & oss) const364     void Dump(std::ostream& oss) const {
365       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
366     }
367 
368    private:
369     uint32_t block_id_;
370     uint32_t heap_location_;
371   };
372 
373   struct Marker {};
374 
375   class Value;
376 
377   class PriorValueHolder {
378    public:
379     constexpr explicit PriorValueHolder(Value prior);
380 
IsInstruction() const381     constexpr bool IsInstruction() const {
382       return std::holds_alternative<HInstruction*>(value_);
383     }
IsPhi() const384     constexpr bool IsPhi() const {
385       return std::holds_alternative<PhiPlaceholder>(value_);
386     }
IsDefault() const387     constexpr bool IsDefault() const {
388       return std::holds_alternative<Marker>(value_);
389     }
GetPhiPlaceholder() const390     constexpr PhiPlaceholder GetPhiPlaceholder() const {
391       DCHECK(IsPhi());
392       return std::get<PhiPlaceholder>(value_);
393     }
GetInstruction() const394     constexpr HInstruction* GetInstruction() const {
395       DCHECK(IsInstruction());
396       return std::get<HInstruction*>(value_);
397     }
398 
399     Value ToValue() const;
400     void Dump(std::ostream& oss) const;
401 
Equals(PriorValueHolder other) const402     constexpr bool Equals(PriorValueHolder other) const {
403       return value_ == other.value_;
404     }
405 
406    private:
407     std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
408   };
409 
410   friend constexpr bool operator==(const Marker&, const Marker&);
411   friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
412   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
413   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
414 
415   class Value {
416    public:
417     enum class ValuelessType {
418       kInvalid,
419       kPureUnknown,
420       kDefault,
421     };
422     struct MergedUnknownMarker {
423       PhiPlaceholder phi_;
424     };
425     struct NeedsNonLoopPhiMarker {
426       PhiPlaceholder phi_;
427     };
428     struct NeedsLoopPhiMarker {
429       PhiPlaceholder phi_;
430     };
431 
Invalid()432     static constexpr Value Invalid() {
433       return Value(ValuelessType::kInvalid);
434     }
435 
436     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
437     // A heap location can be set to an unknown heap value when:
438     // - it is coming from outside the method,
439     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()440     static constexpr Value PureUnknown() {
441       return Value(ValuelessType::kPureUnknown);
442     }
443 
PartialUnknown(Value old_value)444     static constexpr Value PartialUnknown(Value old_value) {
445       if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
446         return PureUnknown();
447       } else {
448         return Value(PriorValueHolder(old_value));
449       }
450     }
451 
MergedUnknown(PhiPlaceholder phi_placeholder)452     static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
453       return Value(MergedUnknownMarker{phi_placeholder});
454     }
455 
456     // Default heap value after an allocation.
457     // A heap location can be set to that value right after an allocation.
Default()458     static constexpr Value Default() {
459       return Value(ValuelessType::kDefault);
460     }
461 
ForInstruction(HInstruction * instruction)462     static constexpr Value ForInstruction(HInstruction* instruction) {
463       return Value(instruction);
464     }
465 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)466     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
467       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
468     }
469 
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)470     static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
471       return Value(NeedsLoopPhiMarker{phi_placeholder});
472     }
473 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)474     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
475       return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
476                             : ForNonLoopPhiPlaceholder(phi_placeholder);
477     }
478 
IsValid() const479     constexpr bool IsValid() const {
480       return !IsInvalid();
481     }
482 
IsInvalid() const483     constexpr bool IsInvalid() const {
484       return std::holds_alternative<ValuelessType>(value_) &&
485              GetValuelessType() == ValuelessType::kInvalid;
486     }
487 
IsPartialUnknown() const488     bool IsPartialUnknown() const {
489       return std::holds_alternative<PriorValueHolder>(value_);
490     }
491 
IsMergedUnknown() const492     bool IsMergedUnknown() const {
493       return std::holds_alternative<MergedUnknownMarker>(value_);
494     }
495 
IsPureUnknown() const496     bool IsPureUnknown() const {
497       return std::holds_alternative<ValuelessType>(value_) &&
498              GetValuelessType() == ValuelessType::kPureUnknown;
499     }
500 
IsUnknown() const501     bool IsUnknown() const {
502       return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
503     }
504 
IsDefault() const505     bool IsDefault() const {
506       return std::holds_alternative<ValuelessType>(value_) &&
507              GetValuelessType() == ValuelessType::kDefault;
508     }
509 
IsInstruction() const510     bool IsInstruction() const {
511       return std::holds_alternative<HInstruction*>(value_);
512     }
513 
NeedsNonLoopPhi() const514     bool NeedsNonLoopPhi() const {
515       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
516     }
517 
NeedsLoopPhi() const518     bool NeedsLoopPhi() const {
519       return std::holds_alternative<NeedsLoopPhiMarker>(value_);
520     }
521 
NeedsPhi() const522     bool NeedsPhi() const {
523       return NeedsNonLoopPhi() || NeedsLoopPhi();
524     }
525 
GetInstruction() const526     HInstruction* GetInstruction() const {
527       DCHECK(IsInstruction()) << *this;
528       return std::get<HInstruction*>(value_);
529     }
530 
GetPriorValue() const531     PriorValueHolder GetPriorValue() const {
532       DCHECK(IsPartialUnknown());
533       return std::get<PriorValueHolder>(value_);
534     }
535 
GetPhiPlaceholder() const536     PhiPlaceholder GetPhiPlaceholder() const {
537       DCHECK(NeedsPhi() || IsMergedUnknown());
538       if (NeedsNonLoopPhi()) {
539         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
540       } else if (NeedsLoopPhi()) {
541         return std::get<NeedsLoopPhiMarker>(value_).phi_;
542       } else {
543         return std::get<MergedUnknownMarker>(value_).phi_;
544       }
545     }
546 
GetMergeBlockId() const547     uint32_t GetMergeBlockId() const {
548       DCHECK(IsMergedUnknown()) << this;
549       return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
550     }
551 
GetMergeBlock(const HGraph * graph) const552     HBasicBlock* GetMergeBlock(const HGraph* graph) const {
553       DCHECK(IsMergedUnknown()) << *this;
554       return graph->GetBlocks()[GetMergeBlockId()];
555     }
556 
GetHeapLocation() const557     size_t GetHeapLocation() const {
558       DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
559       return GetPhiPlaceholder().GetHeapLocation();
560     }
561 
562     constexpr bool ExactEquals(Value other) const;
563 
564     constexpr bool Equals(Value other) const;
565 
Equals(HInstruction * instruction) const566     constexpr bool Equals(HInstruction* instruction) const {
567       return Equals(ForInstruction(instruction));
568     }
569 
570     std::ostream& Dump(std::ostream& os) const;
571 
572     // Public for use with lists.
Value()573     constexpr Value() : value_(ValuelessType::kInvalid) {}
574 
575    private:
576     using ValueHolder = std::variant<ValuelessType,
577                                      HInstruction*,
578                                      MergedUnknownMarker,
579                                      NeedsNonLoopPhiMarker,
580                                      NeedsLoopPhiMarker,
581                                      PriorValueHolder>;
GetValuelessType() const582     constexpr ValuelessType GetValuelessType() const {
583       return std::get<ValuelessType>(value_);
584     }
585 
Value(ValueHolder v)586     constexpr explicit Value(ValueHolder v) : value_(v) {}
587 
588     friend std::ostream& operator<<(std::ostream& os, const Value& v);
589 
590     ValueHolder value_;
591 
592     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
593   };
594 
595   friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
596                                    const Value::NeedsLoopPhiMarker& p2);
597   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
598                                    const Value::NeedsNonLoopPhiMarker& p2);
599   friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
600                                    const Value::MergedUnknownMarker& p2);
601 
602   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
603   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const604   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
605     size_t res =
606         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
607         phi_placeholder.GetHeapLocation();
608     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
609         << res << "blks: " << GetGraph()->GetBlocks().size()
610         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
611     return res;
612   }
613 
PhiPlaceholderIndex(Value phi_placeholder) const614   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
615     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
616   }
617 
IsEscapingObject(ReferenceInfo * info,HBasicBlock * block,size_t index)618   bool IsEscapingObject(ReferenceInfo* info, HBasicBlock* block, size_t index) {
619     return !info->IsSingletonAndRemovable() &&
620            !(info->IsPartialSingleton() && IsPartialNoEscape(block, index));
621   }
622 
IsPartialNoEscape(HBasicBlock * blk,size_t idx)623   bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
624     auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
625     if (!ri->IsPartialSingleton()) {
626       return false;
627     }
628     ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
629         ri->GetNoEscapeSubgraph()->GetExcludedCohorts();
630     return std::none_of(cohorts.cbegin(),
631                         cohorts.cend(),
632                         [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
633                           // Make sure we haven't yet and never will escape.
634                           return ex.PrecedesBlock(blk) ||
635                                  ex.ContainsBlock(blk) ||
636                                  ex.SucceedsBlock(blk);
637                         });
638   }
639 
GetPhiPlaceholderAt(size_t off) const640   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
641     DCHECK_LT(off, num_phi_placeholders_);
642     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
643     // Technically this should be (off - id) / NumberOfHeapLocations
644     // but due to truncation it's all the same.
645     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
646     return GetPhiPlaceholder(blk_id, id);
647   }
648 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const649   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
650     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
651     return PhiPlaceholder(block_id, idx);
652   }
653 
Replacement(Value value) const654   Value Replacement(Value value) const {
655     DCHECK(value.NeedsPhi() ||
656            (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
657         << value << " phase: " << current_phase_;
658     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
659     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
660     DCHECK(replacement.IsUnknown() ||
661            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
662     return replacement;
663   }
664 
ReplacementOrValue(Value value) const665   Value ReplacementOrValue(Value value) const {
666     if (current_phase_ == Phase::kPartialElimination) {
667       // In this phase we are materializing the default values which are used
668       // only if the partial singleton did not escape, so we can replace
669       // a partial unknown with the prior value.
670       if (value.IsPartialUnknown()) {
671         value = value.GetPriorValue().ToValue();
672       }
673       if ((value.IsMergedUnknown() || value.NeedsPhi()) &&
674           phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
675         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
676         DCHECK(!value.IsMergedUnknown());
677         DCHECK(!value.NeedsPhi());
678       } else if (value.IsMergedUnknown()) {
679         return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
680       }
681       if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) {
682         DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()),
683                   substitute_instructions_for_loads_.size());
684         HInstruction* substitute =
685             substitute_instructions_for_loads_[value.GetInstruction()->GetId()];
686         if (substitute != nullptr) {
687           DCHECK(substitute->IsPredicatedInstanceFieldGet());
688           return Value::ForInstruction(substitute);
689         }
690       }
691       DCHECK_IMPLIES(value.IsInstruction(),
692                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
693       return value;
694     }
695     if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
696       return Replacement(value);
697     } else {
698       DCHECK_IMPLIES(value.IsInstruction(),
699                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
700       return value;
701     }
702   }
703 
704   // The record of a heap value and instruction(s) that feed that value.
705   struct ValueRecord {
706     Value value;
707     Value stored_by;
708   };
709 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)710   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
711                                                       HInstruction* value,
712                                                       DataType::Type expected_type) {
713     // Should never add type conversion into boolean value.
714     if (expected_type == DataType::Type::kBool ||
715         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
716         // TODO: This prevents type conversion of default values but we can still insert
717         // type conversion of other constants and there is no constant folding pass after LSE.
718         IsZeroBitPattern(value)) {
719       return nullptr;
720     }
721 
722     // Check if there is already a suitable TypeConversion we can reuse.
723     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
724       if (use.GetUser()->IsTypeConversion() &&
725           use.GetUser()->GetType() == expected_type &&
726           // TODO: We could move the TypeConversion to a common dominator
727           // if it does not cross irreducible loop header.
728           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
729           // Don't share across irreducible loop headers.
730           // TODO: can be more fine-grained than this by testing each dominator.
731           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
732            !GetGraph()->HasIrreducibleLoops())) {
733         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
734             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
735           // Move the TypeConversion before the instruction.
736           use.GetUser()->MoveBefore(instruction);
737         }
738         DCHECK(use.GetUser()->StrictlyDominates(instruction));
739         return use.GetUser()->AsTypeConversion();
740       }
741     }
742 
743     // We must create a new TypeConversion instruction.
744     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
745           expected_type, value, instruction->GetDexPc());
746     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
747     return type_conversion;
748   }
749 
750   // Find an instruction's substitute if it's a removed load.
751   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const752   HInstruction* FindSubstitute(HInstruction* instruction) const {
753     size_t id = static_cast<size_t>(instruction->GetId());
754     if (id >= substitute_instructions_for_loads_.size()) {
755       // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet.
756       DCHECK_IMPLIES(IsLoad(instruction), instruction->IsPredicatedInstanceFieldGet());
757       return instruction;
758     }
759     HInstruction* substitute = substitute_instructions_for_loads_[id];
760     DCHECK(substitute == nullptr || IsLoad(instruction));
761     return (substitute != nullptr) ? substitute : instruction;
762   }
763 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)764   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
765     DCHECK(IsLoad(load));
766     DCHECK_EQ(FindSubstitute(load), load);
767     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
768         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
769 
770     // The load expects to load the heap value as type load->GetType().
771     // However the tracked heap value may not be of that type. An explicit
772     // type conversion may be needed.
773     // There are actually three types involved here:
774     // (1) tracked heap value's type (type A)
775     // (2) heap location (field or element)'s type (type B)
776     // (3) load's type (type C)
777     // We guarantee that type A stored as type B and then fetched out as
778     // type C is the same as casting from type A to type C directly, since
779     // type B and type C will have the same size which is guaranteed in
780     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
781     // So we only need one type conversion from type A to type C.
782     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
783         load, heap_value, load->GetType());
784 
785     substitute_instructions_for_loads_[load->GetId()] =
786         type_conversion != nullptr ? type_conversion : heap_value;
787   }
788 
IsLoad(HInstruction * instruction)789   static bool IsLoad(HInstruction* instruction) {
790     // Unresolved load is not treated as a load.
791     return instruction->IsInstanceFieldGet() ||
792            instruction->IsPredicatedInstanceFieldGet() ||
793            instruction->IsStaticFieldGet() ||
794            instruction->IsVecLoad() ||
795            instruction->IsArrayGet();
796   }
797 
IsStore(HInstruction * instruction)798   static bool IsStore(HInstruction* instruction) {
799     // Unresolved store is not treated as a store.
800     return instruction->IsInstanceFieldSet() ||
801            instruction->IsArraySet() ||
802            instruction->IsVecStore() ||
803            instruction->IsStaticFieldSet();
804   }
805 
806   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)807   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
808     DCHECK(IsLoad(instruction));
809     // Using defaults for VecLoads requires to create additional vector operations.
810     // As there are some issues with scheduling vector operations it is better to avoid creating
811     // them.
812     return !instruction->IsVecOperation();
813   }
814 
815   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
816   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)817   void KeepStores(Value value) {
818     if (value.IsPureUnknown() || value.IsPartialUnknown()) {
819       return;
820     }
821     if (value.IsMergedUnknown()) {
822       kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
823       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
824       return;
825     }
826     if (value.NeedsPhi()) {
827       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
828     } else {
829       HInstruction* instruction = value.GetInstruction();
830       DCHECK(IsStore(instruction));
831       kept_stores_.SetBit(instruction->GetId());
832     }
833   }
834 
835   // If a heap location X may alias with heap location at `loc_index`
836   // and heap_values of that heap location X holds a store, keep that store.
837   // It's needed for a dependent load that's not eliminated since any store
838   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)839   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
840                                      size_t loc_index) {
841     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
842       if (i == loc_index) {
843         // We use this function when reading a location with unknown value and
844         // therefore we cannot know what exact store wrote that unknown value.
845         // But we can have a phi placeholder here marking multiple stores to keep.
846         DCHECK(
847             !heap_values[i].stored_by.IsInstruction() ||
848             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
849         KeepStores(heap_values[i].stored_by);
850         heap_values[i].stored_by = Value::PureUnknown();
851       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
852         KeepStores(heap_values[i].stored_by);
853         heap_values[i].stored_by = Value::PureUnknown();
854       }
855     }
856   }
857 
858   // `instruction` is being removed. Try to see if the null check on it
859   // can be removed. This can happen if the same value is set in two branches
860   // but not in dominators. Such as:
861   //   int[] a = foo();
862   //   if () {
863   //     a[0] = 2;
864   //   } else {
865   //     a[0] = 2;
866   //   }
867   //   // a[0] can now be replaced with constant 2, and the null check on it can be removed.
TryRemovingNullCheck(HInstruction * instruction)868   void TryRemovingNullCheck(HInstruction* instruction) {
869     HInstruction* prev = instruction->GetPrevious();
870     if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
871       // Previous instruction is a null check for this instruction. Remove the null check.
872       prev->ReplaceWith(prev->InputAt(0));
873       prev->GetBlock()->RemoveInstruction(prev);
874     }
875   }
876 
GetDefaultValue(DataType::Type type)877   HInstruction* GetDefaultValue(DataType::Type type) {
878     switch (type) {
879       case DataType::Type::kReference:
880         return GetGraph()->GetNullConstant();
881       case DataType::Type::kBool:
882       case DataType::Type::kUint8:
883       case DataType::Type::kInt8:
884       case DataType::Type::kUint16:
885       case DataType::Type::kInt16:
886       case DataType::Type::kInt32:
887         return GetGraph()->GetIntConstant(0);
888       case DataType::Type::kInt64:
889         return GetGraph()->GetLongConstant(0);
890       case DataType::Type::kFloat32:
891         return GetGraph()->GetFloatConstant(0);
892       case DataType::Type::kFloat64:
893         return GetGraph()->GetDoubleConstant(0);
894       default:
895         UNREACHABLE();
896     }
897   }
898 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)899   bool CanValueBeKeptIfSameAsNew(Value value,
900                                  HInstruction* new_value,
901                                  HInstruction* new_value_set_instr) {
902     // For field/array set location operations, if the value is the same as the new_value
903     // it can be kept even if aliasing happens. All aliased operations will access the same memory
904     // range.
905     // For vector values, this is not true. For example:
906     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
907     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
908     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
909     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
910     //                                                     here is not packed_data anymore.
911     //
912     // TODO: to allow such 'same value' optimization on vector data,
913     // LSA needs to report more fine-grain MAY alias information:
914     // (1) May alias due to two vector data partial overlap.
915     //     e.g. a[i..i+3] and a[i+1,..,i+4].
916     // (2) May alias due to two vector data may complete overlap each other.
917     //     e.g. a[i..i+3] and b[i..i+3].
918     // (3) May alias but the exact relationship between two locations is unknown.
919     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
920     // This 'same value' optimization can apply only on case (2).
921     if (new_value_set_instr->IsVecOperation()) {
922       return false;
923     }
924 
925     return value.Equals(new_value);
926   }
927 
928   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
929   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
930   void PrepareLoopRecords(HBasicBlock* block);
931   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
932   void MergePredecessorRecords(HBasicBlock* block);
933 
934   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
935 
936   void VisitGetLocation(HInstruction* instruction, size_t idx);
937   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)938   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
939     field_infos_[heap_loc] = info;
940   }
941 
942   void VisitBasicBlock(HBasicBlock* block) override;
943 
944   enum class Phase {
945     kLoadElimination,
946     kStoreElimination,
947     kPartialElimination,
948   };
949 
950   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
951 
952   bool TryReplacingLoopPhiPlaceholderWithDefault(
953       PhiPlaceholder phi_placeholder,
954       DataType::Type type,
955       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
956   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
957       PhiPlaceholder phi_placeholder,
958       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
959   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
960       PhiPlaceholder phi_placeholder,
961       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
962       DataType::Type type,
963       bool can_use_default_or_phi);
964   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
965                            DataType::Type type);
966   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
967   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
968                            DataType::Type type);
969   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
970   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
971                                                          HInstruction* load);
972   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
973   void ProcessLoadsRequiringLoopPhis();
974 
975   void SearchPhiPlaceholdersForKeptStores();
976   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
977   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
978   void FindStoresWritingOldValues();
979   void FinishFullLSE();
980   void PrepareForPartialPhiComputation();
981   // Create materialization block and materialization object for the given predecessor of entry.
982   HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
983                                             HeapRefHolder&& holder,
984                                             size_t pred_idx,
985                                             HBasicBlock* blk);
986   // Returns the value that would be read by the 'read' instruction on
987   // 'orig_new_inst' if 'orig_new_inst' has not escaped.
988   HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
989   void MovePartialEscapes();
990 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)991   void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
992     LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
993                << " but LSE should be the only source of predicated-ifield-gets!";
994   }
995 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)996   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
997     HInstruction* object = instruction->InputAt(0);
998     const FieldInfo& field = instruction->GetFieldInfo();
999     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
1000   }
1001 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)1002   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
1003     HInstruction* object = instruction->InputAt(0);
1004     const FieldInfo& field = instruction->GetFieldInfo();
1005     HInstruction* value = instruction->InputAt(1);
1006     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
1007     VisitSetLocation(instruction, idx, value);
1008   }
1009 
VisitStaticFieldGet(HStaticFieldGet * instruction)1010   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
1011     HInstruction* cls = instruction->InputAt(0);
1012     const FieldInfo& field = instruction->GetFieldInfo();
1013     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
1014   }
1015 
VisitStaticFieldSet(HStaticFieldSet * instruction)1016   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
1017     HInstruction* cls = instruction->InputAt(0);
1018     const FieldInfo& field = instruction->GetFieldInfo();
1019     HInstruction* value = instruction->InputAt(1);
1020     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
1021     VisitSetLocation(instruction, idx, value);
1022   }
1023 
VisitArrayGet(HArrayGet * instruction)1024   void VisitArrayGet(HArrayGet* instruction) override {
1025     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1026   }
1027 
VisitArraySet(HArraySet * instruction)1028   void VisitArraySet(HArraySet* instruction) override {
1029     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1030     VisitSetLocation(instruction, idx, instruction->GetValue());
1031   }
1032 
VisitVecLoad(HVecLoad * instruction)1033   void VisitVecLoad(HVecLoad* instruction) override {
1034     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1035   }
1036 
VisitVecStore(HVecStore * instruction)1037   void VisitVecStore(HVecStore* instruction) override {
1038     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1039     VisitSetLocation(instruction, idx, instruction->GetValue());
1040   }
1041 
VisitDeoptimize(HDeoptimize * instruction)1042   void VisitDeoptimize(HDeoptimize* instruction) override {
1043     // If we are in a try catch, even singletons are observable.
1044     const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr;
1045     HBasicBlock* block = instruction->GetBlock();
1046     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1047     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1048       Value* stored_by = &heap_values[i].stored_by;
1049       if (stored_by->IsUnknown()) {
1050         continue;
1051       }
1052       // Stores are generally observeable after deoptimization, except
1053       // for singletons that don't escape in the deoptimization environment.
1054       bool observable = true;
1055       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1056       if (!in_try_catch && info->IsSingleton()) {
1057         HInstruction* reference = info->GetReference();
1058         // Finalizable objects always escape.
1059         const bool finalizable_object =
1060             reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1061         if (!finalizable_object && !IsEscapingObject(info, block, i)) {
1062           // Check whether the reference for a store is used by an environment local of
1063           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1064           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1065           observable = std::any_of(
1066               env_uses.begin(),
1067               env_uses.end(),
1068               [instruction](const HUseListNode<HEnvironment*>& use) {
1069                 return use.GetUser()->GetHolder() == instruction;
1070               });
1071         }
1072       }
1073       if (observable) {
1074         KeepStores(*stored_by);
1075         *stored_by = Value::PureUnknown();
1076       }
1077     }
1078   }
1079 
1080   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1081   void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1082     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1083     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1084       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1085       if (must_keep_stores || IsEscapingObject(ref_info, block, i)) {
1086         KeepStores(heap_values[i].stored_by);
1087         heap_values[i].stored_by = Value::PureUnknown();
1088       }
1089     }
1090   }
1091 
VisitReturn(HReturn * instruction)1092   void VisitReturn(HReturn* instruction) override {
1093     HandleExit(instruction->GetBlock());
1094   }
1095 
VisitReturnVoid(HReturnVoid * return_void)1096   void VisitReturnVoid(HReturnVoid* return_void) override {
1097     HandleExit(return_void->GetBlock());
1098   }
1099 
HandleThrowingInstruction(HInstruction * instruction)1100   void HandleThrowingInstruction(HInstruction* instruction) {
1101     DCHECK(instruction->CanThrow());
1102     // If we are inside of a try catch, singletons can become visible since we may not exit the
1103     // method.
1104     HandleExit(instruction->GetBlock(),
1105                instruction->GetBlock()->GetTryCatchInformation() != nullptr);
1106   }
1107 
VisitMethodEntryHook(HMethodEntryHook * method_entry)1108   void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1109     HandleThrowingInstruction(method_entry);
1110   }
1111 
VisitMethodExitHook(HMethodExitHook * method_exit)1112   void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1113     HandleThrowingInstruction(method_exit);
1114   }
1115 
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1116   void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1117     HandleThrowingInstruction(div_zero_check);
1118   }
1119 
VisitNullCheck(HNullCheck * null_check)1120   void VisitNullCheck(HNullCheck* null_check) override {
1121     HandleThrowingInstruction(null_check);
1122   }
1123 
VisitBoundsCheck(HBoundsCheck * bounds_check)1124   void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1125     HandleThrowingInstruction(bounds_check);
1126   }
1127 
VisitLoadClass(HLoadClass * load_class)1128   void VisitLoadClass(HLoadClass* load_class) override {
1129     if (load_class->CanThrow()) {
1130       HandleThrowingInstruction(load_class);
1131     }
1132   }
1133 
VisitLoadString(HLoadString * load_string)1134   void VisitLoadString(HLoadString* load_string) override {
1135     if (load_string->CanThrow()) {
1136       HandleThrowingInstruction(load_string);
1137     }
1138   }
1139 
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1140   void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1141     HandleThrowingInstruction(sb_append);
1142   }
1143 
VisitThrow(HThrow * throw_instruction)1144   void VisitThrow(HThrow* throw_instruction) override {
1145     HandleThrowingInstruction(throw_instruction);
1146   }
1147 
VisitCheckCast(HCheckCast * check_cast)1148   void VisitCheckCast(HCheckCast* check_cast) override {
1149     HandleThrowingInstruction(check_cast);
1150   }
1151 
VisitMonitorOperation(HMonitorOperation * monitor_op)1152   void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
1153     if (monitor_op->CanThrow()) {
1154       HandleThrowingInstruction(monitor_op);
1155     }
1156   }
1157 
HandleInvoke(HInstruction * instruction)1158   void HandleInvoke(HInstruction* instruction) {
1159     // If `instruction` can throw we have to presume all stores are visible.
1160     const bool can_throw = instruction->CanThrow();
1161     // If we are in a try catch, even singletons are observable.
1162     const bool can_throw_in_try_catch =
1163         can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr;
1164     SideEffects side_effects = instruction->GetSideEffects();
1165     ScopedArenaVector<ValueRecord>& heap_values =
1166         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1167     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1168       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1169       HBasicBlock* blk = instruction->GetBlock();
1170       // We don't need to do anything if the reference has not escaped at this point.
1171       // This is true if either we (1) never escape or (2) sometimes escape but
1172       // there is no possible execution where we have done so at this time. NB
1173       // We count being in the excluded cohort as escaping. Technically, this is
1174       // a bit over-conservative (since we can have multiple non-escaping calls
1175       // before a single escaping one) but this simplifies everything greatly.
1176       auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) {
1177         DCHECK(ref_info->IsPartialSingleton());
1178         if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) {
1179           return false;
1180         }
1181         ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
1182             ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
1183         return std::none_of(cohorts.begin(),
1184                             cohorts.end(),
1185                             [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
1186                               return cohort.PrecedesBlock(blk);
1187                             });
1188       };
1189       if (!can_throw_in_try_catch &&
1190           (ref_info->IsSingleton() ||
1191            // partial and we aren't currently escaping and we haven't escaped yet.
1192            (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
1193         // Singleton references cannot be seen by the callee.
1194       } else {
1195         if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1196           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1197           KeepStores(heap_values[i].stored_by);
1198           heap_values[i].stored_by = Value::PureUnknown();
1199         }
1200         if (side_effects.DoesAnyWrite()) {
1201           // The value may be clobbered.
1202           heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1203         }
1204       }
1205     }
1206   }
1207 
VisitInvoke(HInvoke * invoke)1208   void VisitInvoke(HInvoke* invoke) override {
1209     HandleInvoke(invoke);
1210   }
1211 
VisitClinitCheck(HClinitCheck * clinit)1212   void VisitClinitCheck(HClinitCheck* clinit) override {
1213     // Class initialization check can result in class initializer calling arbitrary methods.
1214     HandleInvoke(clinit);
1215   }
1216 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1217   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1218     // Conservatively treat it as an invocation.
1219     HandleInvoke(instruction);
1220   }
1221 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1222   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1223     // Conservatively treat it as an invocation.
1224     HandleInvoke(instruction);
1225   }
1226 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1227   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1228     // Conservatively treat it as an invocation.
1229     HandleInvoke(instruction);
1230   }
1231 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1232   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1233     // Conservatively treat it as an invocation.
1234     HandleInvoke(instruction);
1235   }
1236 
VisitNewInstance(HNewInstance * new_instance)1237   void VisitNewInstance(HNewInstance* new_instance) override {
1238     // If we are in a try catch, even singletons are observable.
1239     const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr;
1240     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1241     if (ref_info == nullptr) {
1242       // new_instance isn't used for field accesses. No need to process it.
1243       return;
1244     }
1245     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1246       DCHECK(!new_instance->IsFinalizable());
1247       // new_instance can potentially be eliminated.
1248       singleton_new_instances_.push_back(new_instance);
1249     }
1250     HBasicBlock* block = new_instance->GetBlock();
1251     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1252     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1253       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1254       HInstruction* ref = info->GetReference();
1255       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1256       if (ref == new_instance) {
1257         if (offset >= mirror::kObjectHeaderSize ||
1258             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1259           // Instance fields except the header fields are set to default heap values.
1260           // The shadow$_monitor_ field is set to the default value however.
1261           heap_values[i].value = Value::Default();
1262           heap_values[i].stored_by = Value::PureUnknown();
1263         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1264           // The shadow$_klass_ field is special and has an actual value however.
1265           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1266           heap_values[i].stored_by = Value::PureUnknown();
1267         }
1268       } else if (in_try_catch || IsEscapingObject(info, block, i)) {
1269         // Since NewInstance can throw, we presume all previous stores could be visible.
1270         KeepStores(heap_values[i].stored_by);
1271         heap_values[i].stored_by = Value::PureUnknown();
1272       }
1273     }
1274   }
1275 
VisitNewArray(HNewArray * new_array)1276   void VisitNewArray(HNewArray* new_array) override {
1277     // If we are in a try catch, even singletons are observable.
1278     const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr;
1279     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1280     if (ref_info == nullptr) {
1281       // new_array isn't used for array accesses. No need to process it.
1282       return;
1283     }
1284     if (ref_info->IsSingletonAndRemovable()) {
1285       if (new_array->GetLength()->IsIntConstant() &&
1286           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1287         // new_array can potentially be eliminated.
1288         singleton_new_instances_.push_back(new_array);
1289       } else {
1290         // new_array may throw NegativeArraySizeException. Keep it.
1291       }
1292     }
1293     HBasicBlock* block = new_array->GetBlock();
1294     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1295     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1296       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1297       ReferenceInfo* info = location->GetReferenceInfo();
1298       HInstruction* ref = info->GetReference();
1299       if (ref == new_array && location->GetIndex() != nullptr) {
1300         // Array elements are set to default heap values.
1301         heap_values[i].value = Value::Default();
1302         heap_values[i].stored_by = Value::PureUnknown();
1303       } else if (in_try_catch || IsEscapingObject(info, block, i)) {
1304         // Since NewArray can throw, we presume all previous stores could be visible.
1305         KeepStores(heap_values[i].stored_by);
1306         heap_values[i].stored_by = Value::PureUnknown();
1307       }
1308     }
1309   }
1310 
VisitInstruction(HInstruction * instruction)1311   void VisitInstruction(HInstruction* instruction) override {
1312     // Throwing instructions must be handled specially.
1313     DCHECK(!instruction->CanThrow());
1314   }
1315 
ShouldPerformPartialLSE() const1316   bool ShouldPerformPartialLSE() const {
1317     return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
1318   }
1319 
1320   bool perform_partial_lse_;
1321 
1322   const HeapLocationCollector& heap_location_collector_;
1323 
1324   // Use local allocator for allocating memory.
1325   ScopedArenaAllocator allocator_;
1326 
1327   // The number of unique phi_placeholders there possibly are
1328   size_t num_phi_placeholders_;
1329 
1330   // One array of heap value records for each block.
1331   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1332 
1333   // We record loads and stores for re-processing when we find a loop Phi placeholder
1334   // with unknown value from a predecessor, and also for removing stores that are
1335   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1336   struct LoadStoreRecord {
1337     HInstruction* load_or_store;
1338     size_t heap_location_index;
1339   };
1340   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1341 
1342   // We record the substitute instructions for loads that should be
1343   // eliminated but may be used by heap locations. They'll be removed
1344   // in the end. These are indexed by the load's id.
1345   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1346 
1347   // Value at the start of the given instruction for instructions which directly
1348   // read from a heap-location (i.e. FieldGet). The mapping to heap-location is
1349   // implicit through the fact that each instruction can only directly refer to
1350   // a single heap-location.
1351   ScopedArenaHashMap<HInstruction*, Value> intermediate_values_;
1352 
1353   // Record stores to keep in a bit vector indexed by instruction ID.
1354   ArenaBitVector kept_stores_;
1355   // When we need to keep all stores that feed a Phi placeholder, we just record the
1356   // index of that placeholder for processing after graph traversal.
1357   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1358 
1359   // Loads that would require a loop Phi to replace are recorded for processing
1360   // later as we do not have enough information from back-edges to determine if
1361   // a suitable Phi can be found or created when we visit these loads.
1362   ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
1363 
1364   // For stores, record the old value records that were replaced and the stored values.
1365   struct StoreRecord {
1366     ValueRecord old_value_record;
1367     HInstruction* stored_value;
1368   };
1369   // Small pre-allocated initial buffer avoids initializing a large one until it's really needed.
1370   static constexpr size_t kStoreRecordsInitialBufferSize = 16;
1371   std::pair<HInstruction*, StoreRecord> store_records_buffer_[kStoreRecordsInitialBufferSize];
1372   ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
1373 
1374   // Replacements for Phi placeholders.
1375   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1376   ScopedArenaVector<Value> phi_placeholder_replacements_;
1377 
1378   // Merged-unknowns that must have their predecessor values kept to ensure
1379   // partially escaped values are written
1380   ArenaBitVector kept_merged_unknowns_;
1381 
1382   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1383 
1384   // The field infos for each heap location (if relevant).
1385   ScopedArenaVector<const FieldInfo*> field_infos_;
1386 
1387   Phase current_phase_;
1388 
1389   friend class PartialLoadStoreEliminationHelper;
1390   friend struct ScopedRestoreHeapValues;
1391 
1392   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1393   friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1394   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1395 
1396   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1397 };
1398 
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1399 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1400   p.Dump(oss);
1401   return oss;
1402 }
1403 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1404 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1405   switch (phase) {
1406     case LSEVisitor::Phase::kLoadElimination:
1407       return oss << "kLoadElimination";
1408     case LSEVisitor::Phase::kStoreElimination:
1409       return oss << "kStoreElimination";
1410     case LSEVisitor::Phase::kPartialElimination:
1411       return oss << "kPartialElimination";
1412   }
1413 }
1414 
Dump(std::ostream & oss) const1415 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1416   if (IsDefault()) {
1417     oss << "Default";
1418   } else if (IsPhi()) {
1419     oss << "Phi: " << GetPhiPlaceholder();
1420   } else {
1421     oss << "Instruction: " << *GetInstruction();
1422   }
1423 }
1424 
PriorValueHolder(Value val)1425 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1426     : value_(Marker{}) {
1427   DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1428   if (val.IsPartialUnknown()) {
1429     value_ = val.GetPriorValue().value_;
1430   } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1431     value_ = val.GetPhiPlaceholder();
1432   } else if (val.IsInstruction()) {
1433     value_ = val.GetInstruction();
1434   } else {
1435     DCHECK(val.IsDefault());
1436   }
1437 }
1438 
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1439 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1440   return true;
1441 }
1442 
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1443 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1444                           const LSEVisitor::PriorValueHolder& p2) {
1445   return p1.Equals(p2);
1446 }
1447 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1448 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1449                           const LSEVisitor::PhiPlaceholder& p2) {
1450   return p1.Equals(p2);
1451 }
1452 
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1453 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1454                           const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1455   return p1.phi_ == p2.phi_;
1456 }
1457 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1458 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1459                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1460   return p1.phi_ == p2.phi_;
1461 }
1462 
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1463 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1464                           const LSEVisitor::Value::MergedUnknownMarker& p2) {
1465   return p1.phi_ == p2.phi_;
1466 }
1467 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1468 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1469   p.Dump(oss);
1470   return oss;
1471 }
1472 
ToValue() const1473 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1474   if (IsDefault()) {
1475     return Value::Default();
1476   } else if (IsPhi()) {
1477     return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1478   } else {
1479     return Value::ForInstruction(GetInstruction());
1480   }
1481 }
1482 
ExactEquals(LSEVisitor::Value other) const1483 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1484   return value_ == other.value_;
1485 }
1486 
Equals(LSEVisitor::Value other) const1487 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1488   // Only valid values can be compared.
1489   DCHECK(IsValid());
1490   DCHECK(other.IsValid());
1491   if (value_ == other.value_) {
1492     // Note: Two unknown values are considered different.
1493     return !IsUnknown();
1494   } else {
1495     // Default is considered equal to zero-bit-pattern instructions.
1496     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1497             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1498   }
1499 }
1500 
Dump(std::ostream & os) const1501 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1502   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1503     switch (GetValuelessType()) {
1504       case ValuelessType::kDefault:
1505         return os << "Default";
1506       case ValuelessType::kPureUnknown:
1507         return os << "PureUnknown";
1508       case ValuelessType::kInvalid:
1509         return os << "Invalid";
1510     }
1511   } else if (IsPartialUnknown()) {
1512     return os << "PartialUnknown[" << GetPriorValue() << "]";
1513   } else if (IsInstruction()) {
1514     return os << "Instruction[id: " << GetInstruction()->GetId()
1515               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1516   } else if (IsMergedUnknown()) {
1517     return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1518               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1519 
1520   } else if (NeedsLoopPhi()) {
1521     return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1522               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1523   } else {
1524     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1525               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1526   }
1527 }
1528 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1529 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1530   return v.Dump(os);
1531 }
1532 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)1533 LSEVisitor::LSEVisitor(HGraph* graph,
1534                        const HeapLocationCollector& heap_location_collector,
1535                        bool perform_partial_lse,
1536                        OptimizingCompilerStats* stats)
1537     : HGraphDelegateVisitor(graph, stats),
1538       perform_partial_lse_(perform_partial_lse),
1539       heap_location_collector_(heap_location_collector),
1540       allocator_(graph->GetArenaStack()),
1541       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1542                             heap_location_collector_.GetNumberOfHeapLocations()),
1543       heap_values_for_(graph->GetBlocks().size(),
1544                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1545                        allocator_.Adapter(kArenaAllocLSE)),
1546       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1547       // We may add new instructions (default values, Phis) but we're not adding loads
1548       // or stores, so we shall not need to resize following vector and BitVector.
1549       substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1550                                          nullptr,
1551                                          allocator_.Adapter(kArenaAllocLSE)),
1552       intermediate_values_(allocator_.Adapter(kArenaAllocLSE)),
1553       kept_stores_(&allocator_,
1554                    /*start_bits=*/graph->GetCurrentInstructionId(),
1555                    /*expandable=*/false,
1556                    kArenaAllocLSE),
1557       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1558                                                   num_phi_placeholders_,
1559                                                   /*expandable=*/false,
1560                                                   kArenaAllocLSE),
1561       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1562       store_records_(store_records_buffer_,
1563                      kStoreRecordsInitialBufferSize,
1564                      allocator_.Adapter(kArenaAllocLSE)),
1565       phi_placeholder_replacements_(num_phi_placeholders_,
1566                                     Value::Invalid(),
1567                                     allocator_.Adapter(kArenaAllocLSE)),
1568       kept_merged_unknowns_(&allocator_,
1569                             /*start_bits=*/num_phi_placeholders_,
1570                             /*expandable=*/false,
1571                             kArenaAllocLSE),
1572       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1573       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1574                    allocator_.Adapter(kArenaAllocLSE)),
1575       current_phase_(Phase::kLoadElimination) {
1576   // Clear bit vectors.
1577   phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1578   kept_stores_.ClearAllBits();
1579 }
1580 
PrepareLoopValue(HBasicBlock * block,size_t idx)1581 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1582   // If the pre-header value is known (which implies that the reference dominates this
1583   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1584   // are later found to have a known value, we can replace loads from this location,
1585   // either with the pre-header value or with a new Phi. For array locations, the index
1586   // may be defined inside the loop but the only known value in that case should be the
1587   // default value or a Phi placeholder that can be replaced only with the default value.
1588   HLoopInformation* loop_info = block->GetLoopInformation();
1589   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1590   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1591   if (pre_header_value.IsUnknown()) {
1592     return pre_header_value;
1593   }
1594   if (kIsDebugBuild) {
1595     // Check that the reference indeed dominates this loop.
1596     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1597     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1598     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1599         << GetGraph()->PrettyMethod();
1600     // Check that the index, if defined inside the loop, tracks a default value
1601     // or a Phi placeholder requiring a loop Phi.
1602     HInstruction* index = location->GetIndex();
1603     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1604       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1605           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1606           << pre_header_value;
1607     }
1608   }
1609   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1610   return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1611 }
1612 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1613 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1614   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1615   // if the value in the location escapes. This is not applicable to singletons that are
1616   // defined inside the loop as they shall be dead in the loop header.
1617   const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1618   const HInstruction* reference = ref_info->GetReference();
1619   // Finalizable objects always escape.
1620   const bool is_finalizable =
1621       reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1622   if (ref_info->IsSingleton() &&
1623       block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1624       !is_finalizable) {
1625     return Value::PureUnknown();
1626   }
1627   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1628   return Value::ForLoopPhiPlaceholder(phi_placeholder);
1629 }
1630 
PrepareLoopRecords(HBasicBlock * block)1631 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1632   DCHECK(block->IsLoopHeader());
1633   int block_id = block->GetBlockId();
1634   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1635   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1636       heap_values_for_[pre_header->GetBlockId()];
1637   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1638   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1639   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1640   DCHECK(heap_values.empty());
1641 
1642   // Don't eliminate loads in irreducible loops.
1643   if (block->GetLoopInformation()->IsIrreducible()) {
1644     heap_values.resize(num_heap_locations,
1645                        {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1646     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1647     bool is_osr = GetGraph()->IsCompilingOsr();
1648     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1649       heap_values[idx].value =
1650           is_osr ? Value::PureUnknown()
1651                  : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1652       KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1653     }
1654     return;
1655   }
1656 
1657   // Fill `heap_values` based on values from pre-header.
1658   heap_values.reserve(num_heap_locations);
1659   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1660     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1661   }
1662 }
1663 
MergePredecessorValues(HBasicBlock * block,size_t idx)1664 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1665   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1666   DCHECK(!predecessors.empty());
1667   Value merged_value =
1668       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1669   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1670     Value pred_value =
1671         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1672     if (pred_value.Equals(merged_value)) {
1673       // Value is the same. No need to update our merged value.
1674       continue;
1675     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1676       // If one is unknown and the other is a different type of unknown
1677       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1678       merged_value = Value::MergedUnknown(phi_placeholder);
1679       // We know that at least one of the merge points is unknown (and both are
1680       // not pure-unknowns since that's captured above). This means that the
1681       // overall value needs to be a MergedUnknown. Just return that.
1682       break;
1683     } else {
1684       // There are conflicting known values. We may still be able to replace loads with a Phi.
1685       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1686       // Propagate the need for a new loop Phi from all predecessors.
1687       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1688       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1689     }
1690   }
1691   DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1)
1692       << merged_value << " in " << GetGraph()->PrettyMethod();
1693   return merged_value;
1694 }
1695 
MergePredecessorRecords(HBasicBlock * block)1696 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1697   if (block->IsExitBlock()) {
1698     // Exit block doesn't really merge values since the control flow ends in
1699     // its predecessors. Each predecessor needs to make sure stores are kept
1700     // if necessary.
1701     return;
1702   }
1703 
1704   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1705   DCHECK(heap_values.empty());
1706   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1707   if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr &&
1708                                            block->GetTryCatchInformation()->IsCatchBlock())) {
1709     DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1710     heap_values.resize(num_heap_locations,
1711                        {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1712     return;
1713   }
1714 
1715   heap_values.reserve(num_heap_locations);
1716   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1717     Value merged_value = MergePredecessorValues(block, idx);
1718     if (kIsDebugBuild) {
1719       if (merged_value.NeedsPhi()) {
1720         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1721         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1722       } else if (merged_value.IsInstruction()) {
1723         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1724       }
1725     }
1726     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1727     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1728     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1729       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1730       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1731       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1732           !merged_stored_by.Equals(stored_by)) {
1733         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1734         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1735         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1736         break;
1737       }
1738     }
1739     heap_values.push_back({ merged_value, merged_stored_by });
1740   }
1741 }
1742 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1743 static HInstruction* FindOrConstructNonLoopPhi(
1744     HBasicBlock* block,
1745     const ScopedArenaVector<HInstruction*>& phi_inputs,
1746     DataType::Type type) {
1747   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1748     HInstruction* phi = phi_it.Current();
1749     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1750     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1751       return lhs == rhs.GetInstruction();
1752     };
1753     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1754       return phi;
1755     }
1756   }
1757   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1758   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1759   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1760     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1761     phi->SetRawInputAt(i, phi_inputs[i]);
1762   }
1763   block->AddPhi(phi);
1764   if (type == DataType::Type::kReference) {
1765     // Update reference type information. Pass invalid handles, these are not used for Phis.
1766     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1767                                        Handle<mirror::ClassLoader>(),
1768                                        Handle<mirror::DexCache>(),
1769                                        /* is_first_run= */ false);
1770     rtp_fixup.Visit(phi);
1771   }
1772   return phi;
1773 }
1774 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1775 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1776   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1777   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1778   size_t idx = phi_placeholder.GetHeapLocation();
1779 
1780   // Use local allocator to reduce peak memory usage.
1781   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1782   // Reuse the same vector for collecting phi inputs.
1783   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1784 
1785   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1786   work_queue.push_back(phi_placeholder);
1787   while (!work_queue.empty()) {
1788     PhiPlaceholder current_phi_placeholder = work_queue.back();
1789     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1790       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1791       // that directly or indirectly depends on it, so it was already processed as part of the
1792       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1793       work_queue.pop_back();
1794       continue;
1795     }
1796     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1797     HBasicBlock* current_block = blocks[current_block_id];
1798     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1799 
1800     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1801     // And the only way for such merged value to reach a different heap location is through
1802     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1803     // seen here are tied to one heap location.
1804     DCHECK(!current_block->IsLoopHeader())
1805         << current_phi_placeholder << " phase: " << current_phase_;
1806     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1807 
1808     phi_inputs.clear();
1809     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1810       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1811       DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1812                                           << " pred: " << predecessor->GetBlockId();
1813       if (pred_value.NeedsNonLoopPhi() ||
1814           (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
1815         // We need to process the Phi placeholder first.
1816         work_queue.push_back(pred_value.GetPhiPlaceholder());
1817       } else if (pred_value.IsDefault()) {
1818         phi_inputs.push_back(GetDefaultValue(type));
1819       } else {
1820         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1821                                            << " pred: " << predecessor->GetBlockId();
1822         phi_inputs.push_back(pred_value.GetInstruction());
1823       }
1824     }
1825     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1826       // All inputs are available. Find or construct the Phi replacement.
1827       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1828           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1829       // Remove the block from the queue.
1830       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1831       work_queue.pop_back();
1832     }
1833   }
1834 }
1835 
VisitGetLocation(HInstruction * instruction,size_t idx)1836 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1837   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1838   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1839   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1840   ValueRecord& record = heap_values[idx];
1841   if (instruction->IsFieldAccess()) {
1842     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1843   }
1844   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1845   // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
1846   // value.
1847   DCHECK(!record.value.IsPureUnknown() ||
1848          heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
1849          !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
1850          << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
1851   intermediate_values_.insert({instruction, record.value});
1852   loads_and_stores_.push_back({ instruction, idx });
1853   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1854       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1855     record.value = Value::PureUnknown();
1856   }
1857   if (record.value.IsDefault()) {
1858     KeepStores(record.stored_by);
1859     HInstruction* constant = GetDefaultValue(instruction->GetType());
1860     AddRemovedLoad(instruction, constant);
1861     record.value = Value::ForInstruction(constant);
1862   } else if (record.value.IsUnknown()) {
1863     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1864     // This acts like GVN but with better aliasing analysis.
1865     Value old_value = record.value;
1866     record.value = Value::ForInstruction(instruction);
1867     KeepStoresIfAliasedToLocation(heap_values, idx);
1868     KeepStores(old_value);
1869   } else if (record.value.NeedsLoopPhi()) {
1870     // We do not know yet if the value is known for all back edges. Record for future processing.
1871     loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1872   } else {
1873     // This load can be eliminated but we may need to construct non-loop Phis.
1874     if (record.value.NeedsNonLoopPhi()) {
1875       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1876       record.value = Replacement(record.value);
1877     }
1878     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1879     AddRemovedLoad(instruction, heap_value);
1880     TryRemovingNullCheck(instruction);
1881   }
1882 }
1883 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1884 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1885   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1886   DCHECK(!IsStore(value)) << value->DebugName();
1887   if (instruction->IsFieldAccess()) {
1888     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1889   }
1890   // value may already have a substitute.
1891   value = FindSubstitute(value);
1892   HBasicBlock* block = instruction->GetBlock();
1893   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1894   ValueRecord& record = heap_values[idx];
1895   DCHECK_IMPLIES(record.value.IsInstruction(),
1896                  FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1897 
1898   if (record.value.Equals(value)) {
1899     // Store into the heap location with the same value.
1900     // This store can be eliminated right away.
1901     block->RemoveInstruction(instruction);
1902     return;
1903   }
1904 
1905   store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1906   loads_and_stores_.push_back({ instruction, idx });
1907 
1908   // If the `record.stored_by` specified a store from this block, it shall be removed
1909   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1910   // `kept_stores_` anymore after we update the `record.stored_by` below.
1911   DCHECK(!record.stored_by.IsInstruction() ||
1912          record.stored_by.GetInstruction()->GetBlock() != block ||
1913          record.stored_by.GetInstruction()->CanThrow() ||
1914          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1915 
1916   if (instruction->CanThrow()) {
1917     // Previous stores can become visible.
1918     HandleThrowingInstruction(instruction);
1919     // We cannot remove a possibly throwing store.
1920     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1921     kept_stores_.SetBit(instruction->GetId());
1922   }
1923 
1924   // Update the record.
1925   auto it = loads_requiring_loop_phi_.find(value);
1926   if (it != loads_requiring_loop_phi_.end()) {
1927     // Propapate the Phi placeholder to the record.
1928     record.value = it->second.value;
1929     DCHECK(record.value.NeedsLoopPhi());
1930   } else {
1931     record.value = Value::ForInstruction(value);
1932   }
1933   // Track the store in the value record. If the value is loaded or needed after
1934   // return/deoptimization later, this store isn't really redundant.
1935   record.stored_by = Value::ForInstruction(instruction);
1936 
1937   // This store may kill values in other heap locations due to aliasing.
1938   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1939     if (i == idx ||
1940         heap_values[i].value.IsUnknown() ||
1941         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1942         !heap_location_collector_.MayAlias(i, idx)) {
1943       continue;
1944     }
1945     // Kill heap locations that may alias and keep previous stores to these locations.
1946     KeepStores(heap_values[i].stored_by);
1947     heap_values[i].stored_by = Value::PureUnknown();
1948     heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1949   }
1950 }
1951 
VisitBasicBlock(HBasicBlock * block)1952 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1953   // Populate the heap_values array for this block.
1954   // TODO: try to reuse the heap_values array from one predecessor if possible.
1955   if (block->IsLoopHeader()) {
1956     PrepareLoopRecords(block);
1957   } else {
1958     MergePredecessorRecords(block);
1959   }
1960   // Visit instructions.
1961   HGraphVisitor::VisitBasicBlock(block);
1962 }
1963 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1964 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1965   DCHECK_NE(idx1, idx2);
1966   DCHECK(loop_header->IsLoopHeader());
1967   if (heap_location_collector_.MayAlias(idx1, idx2)) {
1968     return true;
1969   }
1970   // For array locations with index defined inside the loop, include
1971   // all other locations in the array, even those that LSA declares
1972   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1973   // refer to the same locations for different iterations. (LSA's
1974   // `ComputeMayAlias()` does not consider different loop iterations.)
1975   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1976   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1977   if (loc1->IsArray() &&
1978       loc2->IsArray() &&
1979       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1980                                                 loc2->GetReferenceInfo())) {
1981     HLoopInformation* loop_info = loop_header->GetLoopInformation();
1982     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1983         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1984       // Consider the locations aliasing. Do not optimize the case where both indexes
1985       // are loop invariants defined inside the loop, rely on LICM to pull them out.
1986       return true;
1987     }
1988   }
1989   return false;
1990 }
1991 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1992 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1993     PhiPlaceholder phi_placeholder,
1994     DataType::Type type,
1995     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1996   // Use local allocator to reduce peak memory usage.
1997   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1998   ArenaBitVector visited(&allocator,
1999                          /*start_bits=*/ num_phi_placeholders_,
2000                          /*expandable=*/ false,
2001                          kArenaAllocLSE);
2002   visited.ClearAllBits();
2003   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2004 
2005   // Use depth first search to check if any non-Phi input is unknown.
2006   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2007   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2008   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2009   work_queue.push_back(phi_placeholder);
2010   while (!work_queue.empty()) {
2011     PhiPlaceholder current_phi_placeholder = work_queue.back();
2012     work_queue.pop_back();
2013     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
2014     DCHECK_GE(block->GetPredecessors().size(), 2u);
2015     size_t idx = current_phi_placeholder.GetHeapLocation();
2016     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2017       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2018       if (value.NeedsPhi()) {
2019         // Visit the predecessor Phi placeholder if it's not visited yet.
2020         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2021           visited.SetBit(PhiPlaceholderIndex(value));
2022           work_queue.push_back(value.GetPhiPlaceholder());
2023         }
2024       } else if (!value.Equals(Value::Default())) {
2025         return false;  // Report failure.
2026       }
2027     }
2028     if (block->IsLoopHeader()) {
2029       // For back-edges we need to check all locations that write to the same array,
2030       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
2031       // as they may actually refer to the same locations for different iterations.
2032       for (size_t i = 0; i != num_heap_locations; ++i) {
2033         if (i == idx ||
2034             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
2035                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
2036           continue;
2037         }
2038         for (HBasicBlock* predecessor : block->GetPredecessors()) {
2039           // Check if there were any writes to this location.
2040           // Note: We could simply process the values but due to the vector operation
2041           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
2042           // the value to change and not be equal to default. To work around this and
2043           // allow replacing the non-vector load of loop-invariant default values
2044           // anyway, skip over paths that do not have any writes.
2045           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
2046           while (record.stored_by.NeedsLoopPhi() &&
2047                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
2048             HLoopInformation* loop_info =
2049                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
2050             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
2051           }
2052           Value value = ReplacementOrValue(record.value);
2053           if (value.NeedsPhi()) {
2054             // Visit the predecessor Phi placeholder if it's not visited yet.
2055             if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2056               visited.SetBit(PhiPlaceholderIndex(value));
2057               work_queue.push_back(value.GetPhiPlaceholder());
2058             }
2059           } else if (!value.Equals(Value::Default())) {
2060             return false;  // Report failure.
2061           }
2062         }
2063       }
2064     }
2065   }
2066 
2067   // Record replacement and report success.
2068   HInstruction* replacement = GetDefaultValue(type);
2069   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2070     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2071     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2072   }
2073   phi_placeholders_to_materialize->Subtract(&visited);
2074   return true;
2075 }
2076 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)2077 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
2078     PhiPlaceholder phi_placeholder,
2079     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
2080   // Use local allocator to reduce peak memory usage.
2081   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2082   ArenaBitVector visited(&allocator,
2083                          /*start_bits=*/ num_phi_placeholders_,
2084                          /*expandable=*/ false,
2085                          kArenaAllocLSE);
2086   visited.ClearAllBits();
2087   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2088 
2089   // Use depth first search to check if any non-Phi input is unknown.
2090   HInstruction* replacement = nullptr;
2091   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2092   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2093   work_queue.push_back(phi_placeholder);
2094   while (!work_queue.empty()) {
2095     PhiPlaceholder current_phi_placeholder = work_queue.back();
2096     work_queue.pop_back();
2097     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2098     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2099     size_t idx = current_phi_placeholder.GetHeapLocation();
2100     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2101       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2102       if (value.NeedsPhi()) {
2103         // Visit the predecessor Phi placeholder if it's not visited yet.
2104         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2105           visited.SetBit(PhiPlaceholderIndex(value));
2106           work_queue.push_back(value.GetPhiPlaceholder());
2107         }
2108       } else {
2109         if (!value.IsInstruction() ||
2110             (replacement != nullptr && replacement != value.GetInstruction())) {
2111           return false;  // Report failure.
2112         }
2113         replacement = value.GetInstruction();
2114       }
2115     }
2116     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2117     // for back-edges, it is not needed here. When looking for a single input
2118     // instruction coming from before the loop, the array index must also be
2119     // defined before the loop and the aliasing analysis done by LSA is sufficient.
2120     // Any writes of a different value with an index that is not loop invariant
2121     // would invalidate the heap location in `VisitSetLocation()`.
2122   }
2123 
2124   // Record replacement and report success.
2125   DCHECK(replacement != nullptr);
2126   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2127     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2128     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2129   }
2130   phi_placeholders_to_materialize->Subtract(&visited);
2131   return true;
2132 }
2133 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2134 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2135     PhiPlaceholder phi_placeholder,
2136     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2137     DataType::Type type,
2138     bool can_use_default_or_phi) {
2139   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2140 
2141   // Use local allocator to reduce peak memory usage.
2142   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2143   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2144 
2145   // Use depth first search to check if any non-Phi input is unknown.
2146   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2147   phi_placeholders_to_materialize->ClearAllBits();
2148   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2149   work_queue.push_back(phi_placeholder);
2150   while (!work_queue.empty()) {
2151     PhiPlaceholder current_phi_placeholder = work_queue.back();
2152     work_queue.pop_back();
2153     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2154       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2155       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2156                  Value::Default()));
2157       continue;
2158     }
2159     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2160     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2161     size_t idx = current_phi_placeholder.GetHeapLocation();
2162     if (current_block->IsLoopHeader()) {
2163       // If the index is defined inside the loop, it may reference different elements of the
2164       // array on each iteration. Since we do not track if all elements of an array are set
2165       // to the same value explicitly, the only known value in pre-header can be the default
2166       // value from NewArray or a Phi placeholder depending on a default value from some outer
2167       // loop pre-header. This Phi placeholder can be replaced only by the default value.
2168       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2169       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2170         if (can_use_default_or_phi &&
2171             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2172                                                       type,
2173                                                       phi_placeholders_to_materialize)) {
2174           continue;
2175         } else {
2176           return current_phi_placeholder;  // Report the loop Phi placeholder.
2177         }
2178       }
2179       // A similar situation arises with the index defined outside the loop if we cannot use
2180       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2181       // placeholder with a single instruction defined before the loop.
2182       if (!can_use_default_or_phi) {
2183         DCHECK(index != nullptr);  // Vector operations are array operations.
2184         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2185                                                           phi_placeholders_to_materialize)) {
2186           continue;
2187         } else {
2188           return current_phi_placeholder;  // Report the loop Phi placeholder.
2189         }
2190       }
2191     }
2192     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2193       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2194       Value value = ReplacementOrValue(heap_values[idx].value);
2195       if (value.IsUnknown()) {
2196         // We cannot create a Phi for this loop Phi placeholder.
2197         return current_phi_placeholder;  // Report the loop Phi placeholder.
2198       }
2199       // For arrays, the location may have been clobbered by writes to other locations
2200       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2201       if (current_block->IsLoopHeader() &&
2202           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2203           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2204         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2205           if (i != idx &&
2206               !heap_values[i].stored_by.IsUnknown() &&
2207               MayAliasOnBackEdge(current_block, idx, i)) {
2208             // We cannot create a Phi for this loop Phi placeholder.
2209             return current_phi_placeholder;
2210           }
2211         }
2212       }
2213       if (value.NeedsLoopPhi()) {
2214         // Visit the predecessor Phi placeholder if it's not visited yet.
2215         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2216           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2217           work_queue.push_back(value.GetPhiPlaceholder());
2218           LSE_VLOG << "For materialization of " << phi_placeholder
2219                    << " we need to materialize " << value;
2220         }
2221       }
2222     }
2223   }
2224 
2225   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2226   return std::nullopt;
2227 }
2228 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2229 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2230                                      DataType::Type type) {
2231   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2232 }
2233 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2234 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2235                                      DataType::Type type) {
2236   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2237   // other than loop Phis are the same.
2238   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2239   std::optional<Value> other_value = std::nullopt;
2240   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2241     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2242     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2243     DCHECK_GE(block->GetPredecessors().size(), 2u);
2244     size_t idx = phi_placeholder.GetHeapLocation();
2245     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2246       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2247       if (value.NeedsNonLoopPhi()) {
2248         DCHECK(current_phase_ == Phase::kLoadElimination ||
2249                current_phase_ == Phase::kPartialElimination)
2250             << current_phase_;
2251         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2252         value = Replacement(value);
2253       }
2254       if (!value.NeedsLoopPhi()) {
2255         if (!other_value) {
2256           // The first other value we found.
2257           other_value = value;
2258         } else if (!other_value->IsInvalid()) {
2259           // Check if the current `value` differs from the previous `other_value`.
2260           if (!value.Equals(*other_value)) {
2261             other_value = Value::Invalid();
2262           }
2263         }
2264       }
2265     }
2266   }
2267 
2268   DCHECK(other_value.has_value());
2269   if (!other_value->IsInvalid()) {
2270     HInstruction* replacement =
2271         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2272     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2273       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2274     }
2275     return true;
2276   }
2277 
2278   // If we're materializing only a single Phi, try to match it with an existing Phi.
2279   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2280   // This also covers the case when after replacing a previous set of Phi placeholders,
2281   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2282   if (phi_placeholder_indexes.size() == 1u) {
2283     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2284     size_t idx = phi_placeholder.GetHeapLocation();
2285     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2286     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2287     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2288       HInstruction* phi = phi_it.Current();
2289       DCHECK_EQ(phi->InputCount(), predecessors.size());
2290       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2291       auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2292         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2293         if (value.NeedsPhi()) {
2294           DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2295           return lhs.GetInstruction() == phi;
2296         } else {
2297           DCHECK(value.IsDefault() || value.IsInstruction());
2298           return value.Equals(lhs.GetInstruction());
2299         }
2300       };
2301       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2302         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2303         return true;
2304       }
2305     }
2306   }
2307 
2308   if (current_phase_ == Phase::kStoreElimination) {
2309     // We're not creating Phis during the final store elimination phase.
2310     return false;
2311   }
2312 
2313   // There are different inputs to the Phi chain. Create the Phis.
2314   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2315   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2316     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2317     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2318     CHECK_GE(block->GetPredecessors().size(), 2u);
2319     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2320         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2321   }
2322   // Fill the Phi inputs.
2323   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2324     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2325     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2326     size_t idx = phi_placeholder.GetHeapLocation();
2327     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2328     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2329         << "type=" << type << " vs phi-type=" << phi->GetType();
2330     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2331       HBasicBlock* predecessor = block->GetPredecessors()[i];
2332       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2333       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2334       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2335       phi->SetRawInputAt(i, input);
2336       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2337           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2338           << " request: " << type;
2339     }
2340   }
2341   // Add the Phis to their blocks.
2342   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2343     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2344     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2345     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2346   }
2347   if (type == DataType::Type::kReference) {
2348     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2349     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2350     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2351       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2352     }
2353     // Update reference type information. Pass invalid handles, these are not used for Phis.
2354     ReferenceTypePropagation rtp_fixup(GetGraph(),
2355                                        Handle<mirror::ClassLoader>(),
2356                                        Handle<mirror::DexCache>(),
2357                                        /* is_first_run= */ false);
2358     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2359   }
2360 
2361   return true;
2362 }
2363 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2364 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2365                                      DataType::Type type) {
2366   // Use local allocator to reduce peak memory usage.
2367   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2368 
2369   // We want to recognize when a subset of these loop Phis that do not need other
2370   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2371   // i.e. that instruction can be used instead of each Phi in the set. See for example
2372   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2373   // materialize these loop Phis from the smallest transitive closure.
2374 
2375   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2376   // assign new indexes to the Phi placeholders, making the matrix dense.
2377   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2378                                            static_cast<size_t>(-1),  // Invalid.
2379                                            allocator.Adapter(kArenaAllocLSE));
2380   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2381   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2382   phi_placeholder_indexes.reserve(num_phi_placeholders);
2383   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2384     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2385     phi_placeholder_indexes.push_back(marker_index);
2386   }
2387   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2388   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2389   dependencies.reserve(num_phi_placeholders);
2390   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2391     static constexpr bool kExpandable = false;
2392     dependencies.push_back(
2393         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2394     ArenaBitVector* current_dependencies = dependencies.back();
2395     current_dependencies->ClearAllBits();
2396     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2397     PhiPlaceholder current_phi_placeholder =
2398         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2399     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2400     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2401     size_t idx = current_phi_placeholder.GetHeapLocation();
2402     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2403       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2404       if (pred_value.NeedsLoopPhi()) {
2405         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2406         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2407         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2408         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2409       }
2410     }
2411   }
2412 
2413   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2414   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2415     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2416       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2417         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2418           dependencies[i]->SetBit(j);
2419         }
2420       }
2421     }
2422   }
2423 
2424   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2425   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2426   num_dependencies.reserve(num_phi_placeholders);
2427   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2428     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2429   }
2430 
2431   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2432   // materialize it and its dependencies. Repeat until we have materialized all.
2433   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2434   current_subset.reserve(num_phi_placeholders);
2435   size_t remaining_phi_placeholders = num_phi_placeholders;
2436   while (remaining_phi_placeholders != 0u) {
2437     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2438     DCHECK_LE(*it, remaining_phi_placeholders);
2439     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2440     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2441     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2442     current_subset.clear();
2443     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2444       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2445     }
2446     if (!MaterializeLoopPhis(current_subset, type)) {
2447       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2448       // This is the final store elimination phase and we shall not be able to eliminate any
2449       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2450       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2451         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2452           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2453           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2454               Value::PureUnknown();
2455         }
2456       }
2457       return false;
2458     }
2459     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2460       if (current_dependencies->IsBitSet(matrix_index)) {
2461         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2462         // so that they shall never be the minimum again.
2463         num_dependencies[matrix_index] = num_phi_placeholders;
2464       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2465         // Remove dependencies from other Phi placeholders.
2466         dependencies[matrix_index]->Subtract(current_dependencies);
2467         num_dependencies[matrix_index] -= current_num_dependencies;
2468       }
2469     }
2470     remaining_phi_placeholders -= current_num_dependencies;
2471   }
2472   return true;
2473 }
2474 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2475 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2476   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2477   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2478   auto res =
2479       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2480   CHECK(!res.has_value()) << *res;
2481   return MaterializeLoopPhis(abv, type);
2482 }
2483 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2484 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2485     PhiPlaceholder phi_placeholder, HInstruction* load) {
2486   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2487 
2488   // Use local allocator to reduce peak memory usage.
2489   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2490 
2491   // Find Phi placeholders to materialize.
2492   ArenaBitVector phi_placeholders_to_materialize(
2493       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2494   phi_placeholders_to_materialize.ClearAllBits();
2495   DataType::Type type = load->GetType();
2496   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2497   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2498       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2499   if (loop_phi_with_unknown_input) {
2500     DCHECK_GE(GetGraph()
2501                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2502                   ->GetPredecessors()
2503                   .size(),
2504               2u);
2505     return loop_phi_with_unknown_input;  // Return failure.
2506   }
2507 
2508   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2509   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2510   DCHECK(success);
2511 
2512   // Report success.
2513   return std::nullopt;
2514 }
2515 
2516 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2517 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2518 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2519 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2520 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2521 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2522   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2523   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2524   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2525       Value::MergedUnknown(loop_phi_with_unknown_input);
2526 
2527   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2528   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2529   size_t reverse_post_order_index = 0;
2530   size_t reverse_post_order_size = reverse_post_order.size();
2531   size_t loads_and_stores_index = 0u;
2532   size_t loads_and_stores_size = loads_and_stores_.size();
2533 
2534   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2535   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2536   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2537     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2538     while (loads_and_stores_index != loads_and_stores_size &&
2539            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2540       ++loads_and_stores_index;
2541     }
2542     ++reverse_post_order_index;
2543     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2544   }
2545 
2546   // Use local allocator to reduce peak memory usage.
2547   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2548   // Reuse one temporary vector for all remaining blocks.
2549   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2550   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2551 
2552   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2553     Value value;
2554     if (block->IsLoopHeader()) {
2555       if (block->GetLoopInformation()->IsIrreducible()) {
2556         PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2557         value = Value::MergedUnknown(placeholder);
2558       } else {
2559         value = PrepareLoopValue(block, idx);
2560       }
2561     } else {
2562       value = MergePredecessorValues(block, idx);
2563     }
2564     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2565     return value;
2566   };
2567 
2568   // Process remaining blocks and instructions.
2569   bool found_unreplaceable_load = false;
2570   bool replaced_heap_value_with_unknown = false;
2571   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2572     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2573     if (block->IsExitBlock()) {
2574       continue;
2575     }
2576 
2577     // We shall reconstruct only the heap values that we need for processing loads and stores.
2578     local_heap_values.clear();
2579     local_heap_values.resize(num_heap_locations, Value::Invalid());
2580 
2581     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2582       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2583       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2584       if (load_or_store->GetBlock() != block) {
2585         break;  // End of instructions from the current block.
2586       }
2587       bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
2588       DCHECK_EQ(is_store, IsStore(load_or_store));
2589       HInstruction* stored_value = nullptr;
2590       if (is_store) {
2591         auto it = store_records_.find(load_or_store);
2592         DCHECK(it != store_records_.end());
2593         stored_value = it->second.stored_value;
2594       }
2595       auto it = loads_requiring_loop_phi_.find(
2596           stored_value != nullptr ? stored_value : load_or_store);
2597       if (it == loads_requiring_loop_phi_.end()) {
2598         continue;  // This load or store never needed a loop Phi.
2599       }
2600       ValueRecord& record = it->second;
2601       if (is_store) {
2602         // Process the store by updating `local_heap_values[idx]`. The last update shall
2603         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2604         // at the end of the block.
2605         Value replacement = ReplacementOrValue(record.value);
2606         if (replacement.NeedsLoopPhi()) {
2607           // No replacement yet, use the Phi placeholder from the load.
2608           DCHECK(record.value.NeedsLoopPhi());
2609           local_heap_values[idx] = record.value;
2610         } else {
2611           // If the load fetched a known value, use it, otherwise use the load.
2612           local_heap_values[idx] = Value::ForInstruction(
2613               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2614         }
2615       } else {
2616         // Process the load unless it has previously been marked unreplaceable.
2617         if (record.value.NeedsLoopPhi()) {
2618           if (local_heap_values[idx].IsInvalid()) {
2619             local_heap_values[idx] = get_initial_value(block, idx);
2620           }
2621           if (local_heap_values[idx].IsUnknown()) {
2622             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2623             // (no aliasing since then, otherwise the Phi placeholder would not have been
2624             // propagated as a value to this load) and store the load as the new heap value.
2625             found_unreplaceable_load = true;
2626             KeepStores(record.value);
2627             record.value = Value::MergedUnknown(record.value.GetPhiPlaceholder());
2628             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2629           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2630             // The load may still be replaced with a Phi later.
2631             DCHECK(local_heap_values[idx].Equals(record.value));
2632           } else {
2633             // This load can be eliminated but we may need to construct non-loop Phis.
2634             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2635               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2636                                      load_or_store->GetType());
2637               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2638             }
2639             record.value = local_heap_values[idx];
2640             HInstruction* heap_value = local_heap_values[idx].GetInstruction();
2641             AddRemovedLoad(load_or_store, heap_value);
2642             TryRemovingNullCheck(load_or_store);
2643           }
2644         }
2645       }
2646     }
2647 
2648     // All heap values that previously needed a loop Phi at the end of the block
2649     // need to be updated for processing successors.
2650     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2651     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2652       if (heap_values[idx].value.NeedsLoopPhi()) {
2653         if (local_heap_values[idx].IsValid()) {
2654           heap_values[idx].value = local_heap_values[idx];
2655         } else {
2656           heap_values[idx].value = get_initial_value(block, idx);
2657         }
2658         if (heap_values[idx].value.IsUnknown()) {
2659           replaced_heap_value_with_unknown = true;
2660         }
2661       }
2662     }
2663   }
2664   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2665 }
2666 
ProcessLoadsRequiringLoopPhis()2667 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2668   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2669   // make the result of the processing depend on the order in which we process these loads.
2670   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2671   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2672   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2673     auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2674     if (it == loads_requiring_loop_phi_.end()) {
2675       continue;
2676     }
2677     HInstruction* load = it->first;
2678     ValueRecord& record = it->second;
2679     while (record.value.NeedsLoopPhi() &&
2680            phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2681       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2682           TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2683       DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2684                 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2685       if (loop_phi_with_unknown_input) {
2686         DCHECK_GE(GetGraph()
2687                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2688                       ->GetPredecessors()
2689                       .size(),
2690                   2u);
2691         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2692       }
2693     }
2694     // The load could have been marked as unreplaceable (and stores marked for keeping)
2695     // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2696     DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2697     if (record.value.NeedsLoopPhi()) {
2698       record.value = Replacement(record.value);
2699       HInstruction* heap_value = record.value.GetInstruction();
2700       AddRemovedLoad(load, heap_value);
2701       TryRemovingNullCheck(load);
2702     }
2703   }
2704 }
2705 
SearchPhiPlaceholdersForKeptStores()2706 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2707   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2708   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2709   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2710   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2711     work_queue.push_back(index);
2712   }
2713   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2714   std::optional<ArenaBitVector> not_kept_stores;
2715   if (stats_) {
2716     not_kept_stores.emplace(GetGraph()->GetAllocator(),
2717                             kept_stores_.GetBitSizeOf(),
2718                             false,
2719                             ArenaAllocKind::kArenaAllocLSE);
2720   }
2721   while (!work_queue.empty()) {
2722     uint32_t cur_phi_idx = work_queue.back();
2723     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2724     // Only writes to partial-escapes need to be specifically kept.
2725     bool is_partial_kept_merged_unknown =
2726         kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
2727         heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation())
2728             ->GetReferenceInfo()
2729             ->IsPartialSingleton();
2730     work_queue.pop_back();
2731     size_t idx = phi_placeholder.GetHeapLocation();
2732     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2733     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2734                              << " (blocks: " << blocks.size() << ")";
2735     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2736       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2737       // For loop back-edges we must also preserve all stores to locations that
2738       // may alias with the location `idx`.
2739       // TODO: Add tests cases around this.
2740       bool is_back_edge =
2741           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2742       size_t start = is_back_edge ? 0u : idx;
2743       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2744       for (size_t i = start; i != end; ++i) {
2745         Value stored_by = heap_values[i].stored_by;
2746         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2747           if (stored_by.NeedsPhi()) {
2748             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2749             if (is_partial_kept_merged_unknown) {
2750               // Propagate merged-unknown keep since otherwise this might look
2751               // like a partial escape we can remove.
2752               kept_merged_unknowns_.SetBit(phi_placeholder_index);
2753             }
2754             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2755               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2756               work_queue.push_back(phi_placeholder_index);
2757             }
2758           } else {
2759             DCHECK(IsStore(stored_by.GetInstruction()));
2760             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2761             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2762                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2763                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2764             if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
2765               if (not_kept_stores) {
2766                 not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
2767               }
2768             } else {
2769               kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2770             }
2771           }
2772         }
2773       }
2774     }
2775   }
2776   if (not_kept_stores) {
2777     // a - b := (a & ~b)
2778     not_kept_stores->Subtract(&kept_stores_);
2779     auto num_removed = not_kept_stores->NumSetBits();
2780     MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
2781   }
2782 }
2783 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2784 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2785   while (value_record->stored_by.IsInstruction() &&
2786          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2787     auto it = store_records_.find(value_record->stored_by.GetInstruction());
2788     DCHECK(it != store_records_.end());
2789     *value_record = it->second.old_value_record;
2790   }
2791   if (value_record->stored_by.NeedsPhi() &&
2792       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2793            PhiPlaceholderIndex(value_record->stored_by))) {
2794     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2795     // Phi placeholder to recalculate the actual value.
2796     value_record->value = value_record->stored_by;
2797   }
2798   value_record->value = ReplacementOrValue(value_record->value);
2799   if (value_record->value.NeedsNonLoopPhi()) {
2800     // Treat all Phi placeholders as requiring loop Phis at this point.
2801     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2802     value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2803   }
2804 }
2805 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2806 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2807                                                DataType::Type type) {
2808   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2809 
2810   // Use local allocator to reduce peak memory usage.
2811   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2812   ArenaBitVector visited(&allocator,
2813                          /*start_bits=*/ num_phi_placeholders_,
2814                          /*expandable=*/ false,
2815                          kArenaAllocLSE);
2816   visited.ClearAllBits();
2817 
2818   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2819   ArenaBitVector phi_placeholders_to_materialize(
2820       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2821   phi_placeholders_to_materialize.ClearAllBits();
2822   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2823       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2824   if (loop_phi_with_unknown_input) {
2825     DCHECK_GE(GetGraph()
2826                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2827                   ->GetPredecessors()
2828                   .size(),
2829               2u);
2830     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2831     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2832     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2833         Value::PureUnknown();
2834     return;
2835   }
2836 
2837   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2838   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2839   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2840   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2841             !success);
2842 }
2843 
2844 struct ScopedRestoreHeapValues {
2845  public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2846   ScopedRestoreHeapValues(ArenaStack* alloc,
2847                           size_t num_heap_locs,
2848                           ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2849       : alloc_(alloc),
2850         updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2851         to_restore_(to_restore) {
2852     updated_values_.reserve(num_heap_locs * to_restore_.size());
2853   }
2854 
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2855   ~ScopedRestoreHeapValues() {
2856     for (const auto& rec : updated_values_) {
2857       to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2858     }
2859   }
2860 
2861   template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2862   void ForEachRecord(Func func) {
2863     for (size_t blk_id : Range(to_restore_.size())) {
2864       for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2865         LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2866         LSEVisitor::Value initial = vr->value;
2867         func(vr);
2868         if (!vr->value.ExactEquals(initial)) {
2869           updated_values_.push_back({blk_id, heap_loc, initial});
2870         }
2871       }
2872     }
2873   }
2874 
2875  private:
2876   struct UpdateRecord {
2877     size_t blk_id;
2878     size_t heap_loc;
2879     LSEVisitor::Value val_;
2880   };
2881   ScopedArenaAllocator alloc_;
2882   ScopedArenaVector<UpdateRecord> updated_values_;
2883   ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2884 
2885   DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2886 };
2887 
FindStoresWritingOldValues()2888 void LSEVisitor::FindStoresWritingOldValues() {
2889   // Partial LSE relies on knowing the real heap-values not the
2890   // store-replacement versions so we need to restore the map after removing
2891   // stores.
2892   ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2893                                     heap_location_collector_.GetNumberOfHeapLocations(),
2894                                     heap_values_for_);
2895   // The Phi placeholder replacements have so far been used for eliminating loads,
2896   // tracking values that would be stored if all stores were kept. As we want to
2897   // compare actual old values after removing unmarked stores, prune the Phi
2898   // placeholder replacements that can be fed by values we may not actually store.
2899   // Replacements marked as unknown can be kept as they are fed by some unknown
2900   // value and would end up as unknown again if we recalculated them.
2901   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2902     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2903         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2904       phi_placeholder_replacements_[i] = Value::Invalid();
2905     }
2906   }
2907 
2908   // Update heap values at end of blocks.
2909   heap_vals.ForEachRecord([&](ValueRecord* rec) {
2910     UpdateValueRecordForStoreElimination(rec);
2911   });
2912 
2913   if (kIsDebugBuild) {
2914     heap_vals.ForEachRecord([](ValueRecord* rec) {
2915       DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2916     });
2917   }
2918 
2919   // Use local allocator to reduce peak memory usage.
2920   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2921   // Mark the stores we want to eliminate in a separate bit vector.
2922   ArenaBitVector eliminated_stores(&allocator,
2923                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2924                                    /*expandable=*/ false,
2925                                    kArenaAllocLSE);
2926   eliminated_stores.ClearAllBits();
2927 
2928   for (auto& entry : store_records_) {
2929     HInstruction* store = entry.first;
2930     StoreRecord& store_record = entry.second;
2931     if (!kept_stores_.IsBitSet(store->GetId())) {
2932       continue;  // Ignore stores that are not kept.
2933     }
2934     UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2935     if (store_record.old_value_record.value.NeedsPhi()) {
2936       DataType::Type type = store_record.stored_value->GetType();
2937       FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2938       store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2939     }
2940     DCHECK(!store_record.old_value_record.value.NeedsPhi());
2941     HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2942     if (store_record.old_value_record.value.Equals(stored_value)) {
2943       eliminated_stores.SetBit(store->GetId());
2944     }
2945   }
2946 
2947   // Commit the stores to eliminate by removing them from `kept_stores_`.
2948   kept_stores_.Subtract(&eliminated_stores);
2949 }
2950 
Run()2951 void LSEVisitor::Run() {
2952   // 1. Process blocks and instructions in reverse post order.
2953   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2954     VisitBasicBlock(block);
2955   }
2956 
2957   // 2. Process loads that require loop Phis, trying to find/create replacements.
2958   current_phase_ = Phase::kLoadElimination;
2959   ProcessLoadsRequiringLoopPhis();
2960 
2961   // 3. Determine which stores to keep and which to eliminate.
2962   current_phase_ = Phase::kStoreElimination;
2963   // Finish marking stores for keeping.
2964   SearchPhiPlaceholdersForKeptStores();
2965 
2966   // Find stores that write the same value as is already present in the location.
2967   FindStoresWritingOldValues();
2968 
2969   // 4. Replace loads and remove unnecessary stores and singleton allocations.
2970   FinishFullLSE();
2971 
2972   // 5. Move partial escapes down and fixup with PHIs.
2973   current_phase_ = Phase::kPartialElimination;
2974   MovePartialEscapes();
2975 }
2976 
2977 // Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
2978 // retry all of them with more information about where they come from.
PrepareForPartialPhiComputation()2979 void LSEVisitor::PrepareForPartialPhiComputation() {
2980   std::replace_if(
2981       phi_placeholder_replacements_.begin(),
2982       phi_placeholder_replacements_.end(),
2983       [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); },
2984       Value::Invalid());
2985 }
2986 
2987 class PartialLoadStoreEliminationHelper {
2988  public:
PartialLoadStoreEliminationHelper(LSEVisitor * lse,ScopedArenaAllocator * alloc)2989   PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
2990       : lse_(lse),
2991         alloc_(alloc),
2992         new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
2993         heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
2994         max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
2995                                                 GetGraph()->GetActiveBlocks().end(),
2996                                                 [](HBasicBlock* a, HBasicBlock* b) {
2997                                                   return a->GetNumberOfPredecessors() <
2998                                                          b->GetNumberOfPredecessors();
2999                                                 }))
3000                                  ->GetNumberOfPredecessors()),
3001         materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
3002                                 nullptr,
3003                                 alloc_->Adapter(kArenaAllocLSE)),
3004         first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
3005     size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons();
3006     heap_refs_.reserve(num_partial_singletons);
3007     new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size());
3008     CollectInterestingHeapRefs();
3009   }
3010 
~PartialLoadStoreEliminationHelper()3011   ~PartialLoadStoreEliminationHelper() {
3012     if (heap_refs_.empty()) {
3013       return;
3014     }
3015     ReferenceTypePropagation rtp_fixup(GetGraph(),
3016                                        Handle<mirror::ClassLoader>(),
3017                                        Handle<mirror::DexCache>(),
3018                                        /* is_first_run= */ false);
3019     rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
3020     GetGraph()->ClearLoopInformation();
3021     GetGraph()->ClearDominanceInformation();
3022     GetGraph()->ClearReachabilityInformation();
3023     GetGraph()->BuildDominatorTree();
3024     GetGraph()->ComputeReachabilityInformation();
3025   }
3026 
3027   class IdxToHeapLoc {
3028    public:
IdxToHeapLoc(const HeapLocationCollector * hlc)3029     explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
operator ()(size_t idx) const3030     HeapLocation* operator()(size_t idx) const {
3031       return collector_->GetHeapLocation(idx);
3032     }
3033 
3034    private:
3035     const HeapLocationCollector* collector_;
3036   };
3037 
3038 
3039   class HeapReferenceData {
3040    public:
3041     using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
HeapReferenceData(PartialLoadStoreEliminationHelper * helper,HNewInstance * new_inst,const ExecutionSubgraph * subgraph,ScopedArenaAllocator * alloc)3042     HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
3043                       HNewInstance* new_inst,
3044                       const ExecutionSubgraph* subgraph,
3045                       ScopedArenaAllocator* alloc)
3046         : new_instance_(new_inst),
3047           helper_(helper),
3048           heap_locs_(alloc,
3049                      helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
3050                      /* expandable= */ false,
3051                      kArenaAllocLSE),
3052           materializations_(
3053               // We generally won't need to create too many materialization blocks and we can expand
3054               // this as needed so just start off with 2x.
3055               2 * helper->lse_->GetGraph()->GetBlocks().size(),
3056               nullptr,
3057               alloc->Adapter(kArenaAllocLSE)),
3058           collector_(helper->lse_->heap_location_collector_),
3059           subgraph_(subgraph) {}
3060 
IterateLocations()3061     LocIterator IterateLocations() {
3062       auto idxs = heap_locs_.Indexes();
3063       return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
3064     }
3065 
AddHeapLocation(size_t idx)3066     void AddHeapLocation(size_t idx) {
3067       heap_locs_.SetBit(idx);
3068     }
3069 
GetNoEscapeSubgraph() const3070     const ExecutionSubgraph* GetNoEscapeSubgraph() const {
3071       return subgraph_;
3072     }
3073 
IsPostEscape(HBasicBlock * blk)3074     bool IsPostEscape(HBasicBlock* blk) {
3075       return std::any_of(
3076           subgraph_->GetExcludedCohorts().cbegin(),
3077           subgraph_->GetExcludedCohorts().cend(),
3078           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
3079     }
3080 
InEscapeCohort(HBasicBlock * blk)3081     bool InEscapeCohort(HBasicBlock* blk) {
3082       return std::any_of(
3083           subgraph_->GetExcludedCohorts().cbegin(),
3084           subgraph_->GetExcludedCohorts().cend(),
3085           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
3086     }
3087 
BeforeAllEscapes(HBasicBlock * b)3088     bool BeforeAllEscapes(HBasicBlock* b) {
3089       return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
3090                           subgraph_->GetExcludedCohorts().cend(),
3091                           [&](const ExecutionSubgraph::ExcludedCohort& ec) {
3092                             return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
3093                           });
3094     }
3095 
OriginalNewInstance() const3096     HNewInstance* OriginalNewInstance() const {
3097       return new_instance_;
3098     }
3099 
3100     // Collect and replace all uses. We need to perform this twice since we will
3101     // generate PHIs and additional uses as we create the default-values for
3102     // pred-gets. These values might be other references that are also being
3103     // partially eliminated. By running just the replacement part again we are
3104     // able to avoid having to keep another whole in-progress partial map
3105     // around. Since we will have already handled all the other uses in the
3106     // first pass the second one will be quite fast.
FixupUses(bool first_pass)3107     void FixupUses(bool first_pass) {
3108       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3109       // Replace uses with materialized values.
3110       ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
3111       ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
3112       // Do we need to add a constructor-fence.
3113       ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
3114           saa.Adapter(kArenaAllocLSE));
3115       ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
3116 
3117       CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
3118 
3119       if (!first_pass) {
3120         // If another partial creates new references they can only be in Phis or pred-get defaults
3121         // so they must be in the to_replace group.
3122         DCHECK(to_predicate.empty());
3123         DCHECK(constructor_fences.empty());
3124         DCHECK(to_remove.empty());
3125       }
3126 
3127       ReplaceInput(to_replace);
3128       RemoveAndReplaceInputs(to_remove);
3129       CreateConstructorFences(constructor_fences);
3130       PredicateInstructions(to_predicate);
3131 
3132       CHECK(OriginalNewInstance()->GetUses().empty())
3133           << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
3134     }
3135 
AddMaterialization(HBasicBlock * blk,HInstruction * ins)3136     void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
3137       if (blk->GetBlockId() >= materializations_.size()) {
3138         // Make sure the materialization array is large enough, try to avoid
3139         // re-sizing too many times by giving extra space.
3140         materializations_.resize(blk->GetBlockId() * 2, nullptr);
3141       }
3142       DCHECK(materializations_[blk->GetBlockId()] == nullptr)
3143           << "Already have a materialization in block " << blk->GetBlockId() << ": "
3144           << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
3145           << *ins;
3146       materializations_[blk->GetBlockId()] = ins;
3147       LSE_VLOG << "In  block " << blk->GetBlockId() << " materialization is " << *ins;
3148       helper_->NotifyNewMaterialization(ins);
3149     }
3150 
HasMaterialization(HBasicBlock * blk) const3151     bool HasMaterialization(HBasicBlock* blk) const {
3152       return blk->GetBlockId() < materializations_.size() &&
3153              materializations_[blk->GetBlockId()] != nullptr;
3154     }
3155 
GetMaterialization(HBasicBlock * blk) const3156     HInstruction* GetMaterialization(HBasicBlock* blk) const {
3157       if (materializations_.size() <= blk->GetBlockId() ||
3158           materializations_[blk->GetBlockId()] == nullptr) {
3159         // This must be a materialization block added after the partial LSE of
3160         // the current reference finished. Since every edge can only have at
3161         // most one materialization block added to it we can just check the
3162         // blocks predecessor.
3163         DCHECK(helper_->IsMaterializationBlock(blk));
3164         blk = helper_->FindDominatingNonMaterializationBlock(blk);
3165         DCHECK(!helper_->IsMaterializationBlock(blk));
3166       }
3167       DCHECK_GT(materializations_.size(), blk->GetBlockId());
3168       DCHECK(materializations_[blk->GetBlockId()] != nullptr);
3169       return materializations_[blk->GetBlockId()];
3170     }
3171 
GenerateMaterializationValueFromPredecessors(HBasicBlock * blk)3172     void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
3173       DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3174                           GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3175                           [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3176                             return cohort.IsEntryBlock(blk);
3177                           }));
3178       DCHECK(!HasMaterialization(blk));
3179       if (blk->IsExitBlock()) {
3180         return;
3181       } else if (blk->IsLoopHeader()) {
3182         // See comment in execution_subgraph.h. Currently we act as though every
3183         // allocation for partial elimination takes place in the entry block.
3184         // This simplifies the analysis by making it so any escape cohort
3185         // expands to contain any loops it is a part of. This is something that
3186         // we should rectify at some point. In either case however we can still
3187         // special case the loop-header since (1) currently the loop can't have
3188         // any merges between different cohort entries since the pre-header will
3189         // be the earliest place entry can happen and (2) even if the analysis
3190         // is improved to consider lifetime of the object WRT loops any values
3191         // which would require loop-phis would have to make the whole loop
3192         // escape anyway.
3193         // This all means we can always use value from the pre-header when the
3194         // block is the loop-header and we didn't already create a
3195         // materialization block. (NB when we do improve the analysis we will
3196         // need to modify the materialization creation code to deal with this
3197         // correctly.)
3198         HInstruction* pre_header_val =
3199             GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
3200         AddMaterialization(blk, pre_header_val);
3201         return;
3202       }
3203       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3204       ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
3205       pred_vals.reserve(blk->GetNumberOfPredecessors());
3206       for (HBasicBlock* pred : blk->GetPredecessors()) {
3207         DCHECK(HasMaterialization(pred));
3208         pred_vals.push_back(GetMaterialization(pred));
3209       }
3210       GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
3211     }
3212 
GenerateMaterializationValueFromPredecessorsForEntry(HBasicBlock * entry,const ScopedArenaVector<HInstruction * > & pred_vals)3213     void GenerateMaterializationValueFromPredecessorsForEntry(
3214         HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
3215       DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3216                          GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3217                          [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3218                            return cohort.IsEntryBlock(entry);
3219                          }));
3220       GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
3221     }
3222 
3223    private:
3224     template <typename InstructionType>
3225     struct InstructionUse {
3226       InstructionType* instruction_;
3227       size_t index_;
3228     };
3229 
ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>> & to_replace)3230     void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
3231       for (auto& [ins, idx] : to_replace) {
3232         HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3233         if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
3234           // Phis we just pass through the appropriate inputs.
3235           ins->ReplaceInput(merged_inst->InputAt(idx), idx);
3236         } else {
3237           ins->ReplaceInput(merged_inst, idx);
3238         }
3239       }
3240     }
3241 
RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction * > & to_remove)3242     void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) {
3243       for (HInstruction* ins : to_remove) {
3244         if (ins->GetBlock() == nullptr) {
3245           // Already dealt with.
3246           continue;
3247         }
3248         DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
3249         if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
3250           bool instruction_has_users =
3251               ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty());
3252           if (instruction_has_users) {
3253             // Make sure any remaining users of read are replaced.
3254             HInstruction* replacement =
3255                 helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins);
3256             // NB ReplaceInput will remove a use from the list so this is
3257             // guaranteed to finish eventually.
3258             while (!ins->GetUses().empty()) {
3259               const HUseListNode<HInstruction*>& use = ins->GetUses().front();
3260               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3261             }
3262             while (!ins->GetEnvUses().empty()) {
3263               const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front();
3264               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3265             }
3266           } else {
3267             DCHECK(ins->GetUses().empty())
3268                 << "Instruction has users!\n"
3269                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses();
3270             DCHECK(ins->GetEnvUses().empty())
3271                 << "Instruction has users!\n"
3272                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses();
3273           }
3274           ins->GetBlock()->RemoveInstruction(ins);
3275         } else {
3276           // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
3277           // Since PHIs are escapes as far as LSE is concerned and we are before
3278           // any escapes these are the only 4 options.
3279           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
3280           HInstruction* replacement;
3281           if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
3282             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
3283                                          : GetGraph()->GetIntConstant(0);
3284           } else {
3285             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
3286                                          : GetGraph()->GetIntConstant(1);
3287           }
3288           ins->ReplaceWith(replacement);
3289           ins->GetBlock()->RemoveInstruction(ins);
3290         }
3291       }
3292     }
3293 
CreateConstructorFences(const ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences)3294     void CreateConstructorFences(
3295         const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
3296       if (!constructor_fences.empty()) {
3297         uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
3298         for (auto& [cf, idx] : constructor_fences) {
3299           if (cf->GetInputs().size() == 1) {
3300             cf->GetBlock()->RemoveInstruction(cf);
3301           } else {
3302             cf->RemoveInputAt(idx);
3303           }
3304         }
3305         for (const ExecutionSubgraph::ExcludedCohort& ec :
3306             GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3307           for (HBasicBlock* blk : ec.EntryBlocks()) {
3308             for (HBasicBlock* materializer :
3309                 Filter(MakeIterationRange(blk->GetPredecessors()),
3310                         [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
3311               HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
3312                   GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
3313               materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
3314             }
3315           }
3316         }
3317       }
3318     }
3319 
PredicateInstructions(const ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3320     void PredicateInstructions(
3321         const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3322       for (auto& [ins, idx] : to_predicate) {
3323         if (UNLIKELY(ins->GetBlock() == nullptr)) {
3324           // Already handled due to obj == obj;
3325           continue;
3326         } else if (ins->IsInstanceFieldGet()) {
3327           // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
3328           HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
3329               ins->AsInstanceFieldGet(),
3330               GetMaterialization(ins->GetBlock()),
3331               helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
3332           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
3333           ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
3334           if (ins->GetType() == DataType::Type::kReference) {
3335             // Reference info is the same
3336             new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
3337           }
3338           // In this phase, substitute instructions are used only for the predicated get
3339           // default values which are used only if the partial singleton did not escape,
3340           // so the out value of the `new_fget` for the relevant cases is the same as
3341           // the default value.
3342           // TODO: Use the default value for materializing default values used by
3343           // other predicated loads to avoid some unnecessary Phis. (This shall
3344           // complicate the search for replacement in `ReplacementOrValue()`.)
3345           DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr);
3346           helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget;
3347           ins->ReplaceWith(new_fget);
3348           ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
3349           CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
3350               << "Instruction: " << *ins << " uses: " << ins->GetUses()
3351               << ", env: " << ins->GetEnvUses();
3352           ins->GetBlock()->RemoveInstruction(ins);
3353         } else if (ins->IsInstanceFieldSet()) {
3354           // Any predicated sets shouldn't require movement.
3355           ins->AsInstanceFieldSet()->SetIsPredicatedSet();
3356           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
3357           HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3358           ins->ReplaceInput(merged_inst, idx);
3359         } else {
3360           // comparisons need to be split into 2.
3361           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
3362           bool this_is_first = idx == 0;
3363           if (ins->InputAt(0) == ins->InputAt(1)) {
3364             // This is a obj == obj or obj != obj.
3365             // No idea why anyone would do this but whatever.
3366             ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
3367             ins->GetBlock()->RemoveInstruction(ins);
3368             continue;
3369           } else {
3370             HInstruction* is_escaped = new (GetGraph()->GetAllocator())
3371                 HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
3372             HInstruction* combine_inst =
3373                 ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
3374                                      DataType::Type::kBool, is_escaped, ins))
3375                                : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
3376                                      DataType::Type::kBool, is_escaped, ins));
3377             ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
3378             ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
3379             ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
3380             ins->ReplaceWith(combine_inst);
3381             combine_inst->ReplaceInput(ins, 1);
3382           }
3383         }
3384       }
3385     }
3386 
3387     // Figure out all the instructions we need to
3388     // fixup/replace/remove/duplicate. Since this requires an iteration of an
3389     // intrusive linked list we want to do it only once and collect all the data
3390     // here.
CollectReplacements(ScopedArenaVector<InstructionUse<HInstruction>> & to_replace,ScopedArenaVector<HInstruction * > & to_remove,ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences,ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3391     void CollectReplacements(
3392         ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
3393         ScopedArenaVector<HInstruction*>& to_remove,
3394         ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
3395         ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3396       size_t size = new_instance_->GetUses().SizeSlow();
3397       to_replace.reserve(size);
3398       to_remove.reserve(size);
3399       constructor_fences.reserve(size);
3400       to_predicate.reserve(size);
3401       for (auto& use : new_instance_->GetUses()) {
3402         HBasicBlock* blk =
3403             helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
3404         if (InEscapeCohort(blk)) {
3405           LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
3406                    << *GetMaterialization(blk);
3407           to_replace.push_back({use.GetUser(), use.GetIndex()});
3408         } else if (IsPostEscape(blk)) {
3409           LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
3410           // The fields + cmp are normal uses. Phi can only be here if it was
3411           // generated by full LSE so whatever store+load that created the phi
3412           // is the escape.
3413           if (use.GetUser()->IsPhi()) {
3414             to_replace.push_back({use.GetUser(), use.GetIndex()});
3415           } else {
3416             DCHECK(use.GetUser()->IsFieldAccess() ||
3417                    use.GetUser()->IsEqual() ||
3418                    use.GetUser()->IsNotEqual())
3419                 << *use.GetUser() << "@" << use.GetIndex();
3420             to_predicate.push_back({use.GetUser(), use.GetIndex()});
3421           }
3422         } else if (use.GetUser()->IsConstructorFence()) {
3423           LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
3424           constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
3425         } else {
3426           LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
3427           to_remove.push_back(use.GetUser());
3428         }
3429       }
3430       DCHECK_EQ(
3431           to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
3432           size);
3433     }
3434 
GenerateMaterializationValueFromPredecessorsDirect(HBasicBlock * blk,const ScopedArenaVector<HInstruction * > & pred_vals)3435     void GenerateMaterializationValueFromPredecessorsDirect(
3436         HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
3437       DCHECK(!pred_vals.empty());
3438       bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
3439         return val == pred_vals.front();
3440       });
3441       if (LIKELY(all_equal)) {
3442         AddMaterialization(blk, pred_vals.front());
3443       } else {
3444         // Make a PHI for the predecessors.
3445         HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
3446             GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
3447         for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
3448           phi->SetRawInputAt(off, ins);
3449         }
3450         blk->AddPhi(phi);
3451         AddMaterialization(blk, phi);
3452       }
3453     }
3454 
GetGraph() const3455     HGraph* GetGraph() const {
3456       return helper_->GetGraph();
3457     }
3458 
3459     HNewInstance* new_instance_;
3460     PartialLoadStoreEliminationHelper* helper_;
3461     ArenaBitVector heap_locs_;
3462     ScopedArenaVector<HInstruction*> materializations_;
3463     const HeapLocationCollector& collector_;
3464     const ExecutionSubgraph* subgraph_;
3465   };
3466 
GetHeapRefs()3467   ArrayRef<HeapReferenceData> GetHeapRefs() {
3468     return ArrayRef<HeapReferenceData>(heap_refs_);
3469   }
3470 
IsMaterializationBlock(HBasicBlock * blk) const3471   bool IsMaterializationBlock(HBasicBlock* blk) const {
3472     return blk->GetBlockId() >= first_materialization_block_id_;
3473   }
3474 
GetOrCreateMaterializationBlock(HBasicBlock * entry,size_t pred_num)3475   HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3476     size_t idx = GetMaterializationBlockIndex(entry, pred_num);
3477     HBasicBlock* blk = materialization_blocks_[idx];
3478     if (blk == nullptr) {
3479       blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
3480       GetGraph()->AddBlock(blk);
3481       LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
3482                << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
3483       blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
3484       materialization_blocks_[idx] = blk;
3485     }
3486     return blk;
3487   }
3488 
GetMaterializationBlock(HBasicBlock * entry,size_t pred_num)3489   HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3490     HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
3491     DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
3492                            << entry->GetPredecessors()[pred_num]->GetBlockId();
3493     return out;
3494   }
3495 
IterateMaterializationBlocks()3496   IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
3497     return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
3498                               GetGraph()->GetBlocks().end());
3499   }
3500 
FixupPartialObjectUsers()3501   void FixupPartialObjectUsers() {
3502     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3503       // Use the materialized instances to replace original instance
3504       ref_data.FixupUses(/*first_pass=*/true);
3505       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3506           << ref_data.OriginalNewInstance()->GetUses() << ", "
3507           << ref_data.OriginalNewInstance()->GetEnvUses();
3508     }
3509     // This can cause new uses to be created due to the creation of phis/pred-get defaults
3510     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3511       // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
3512       ref_data.FixupUses(/*first_pass=*/false);
3513       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3514           << ref_data.OriginalNewInstance()->GetUses() << ", "
3515           << ref_data.OriginalNewInstance()->GetEnvUses();
3516     }
3517   }
3518 
3519   // Finds the first block which either is or dominates the given block which is
3520   // not a materialization block
FindDominatingNonMaterializationBlock(HBasicBlock * blk)3521   HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
3522     if (LIKELY(!IsMaterializationBlock(blk))) {
3523       // Not a materialization block so itself.
3524       return blk;
3525     } else if (blk->GetNumberOfPredecessors() != 0) {
3526       // We're far enough along that the materialization blocks have been
3527       // inserted into the graph so no need to go searching.
3528       return blk->GetSinglePredecessor();
3529     }
3530     // Search through the materialization blocks to find where it will be
3531     // inserted.
3532     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3533       if (mat == blk) {
3534         size_t cur_pred_idx = idx % max_preds_per_block_;
3535         HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3536         return entry->GetPredecessors()[cur_pred_idx];
3537       }
3538     }
3539     LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
3540     return nullptr;
3541   }
3542 
InsertMaterializationBlocks()3543   void InsertMaterializationBlocks() {
3544     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3545       if (mat == nullptr) {
3546         continue;
3547       }
3548       size_t cur_pred_idx = idx % max_preds_per_block_;
3549       HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3550       HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
3551       mat->InsertBetween(pred, entry);
3552       LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
3553                << pred->GetBlockId() << "->" << entry->GetBlockId();
3554     }
3555   }
3556 
3557   // Replace any env-uses remaining of the partial singletons with the
3558   // appropriate phis and remove the instructions.
RemoveReplacedInstructions()3559   void RemoveReplacedInstructions() {
3560     for (HeapReferenceData& ref_data : GetHeapRefs()) {
3561       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3562           << ref_data.OriginalNewInstance()->GetUses() << ", "
3563           << ref_data.OriginalNewInstance()->GetEnvUses()
3564           << " inst is: " << ref_data.OriginalNewInstance();
3565       const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
3566       while (!env_uses.empty()) {
3567         const HUseListNode<HEnvironment*>& use = env_uses.front();
3568         HInstruction* merged_inst =
3569             ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
3570         LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
3571                  << " with " << *merged_inst;
3572         use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
3573       }
3574       ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
3575     }
3576   }
3577 
3578   // We need to make sure any allocations dominate their environment uses.
3579   // Technically we could probably remove the env-uses and be fine but this is easy.
ReorderMaterializationsForEnvDominance()3580   void ReorderMaterializationsForEnvDominance() {
3581     for (HBasicBlock* blk : IterateMaterializationBlocks()) {
3582       ScopedArenaAllocator alloc(alloc_->GetArenaStack());
3583       ArenaBitVector still_unsorted(
3584           &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
3585       // This is guaranteed to be very short (since we will abandon LSE if there
3586       // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
3587       // absolute maximum size this list can be) so doing a selection sort is
3588       // fine. This avoids the need to do a complicated recursive check to
3589       // ensure transitivity for std::sort.
3590       ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
3591       materializations.reserve(GetHeapRefs().size());
3592       for (HInstruction* ins :
3593            MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
3594         if (ins->IsNewInstance()) {
3595           materializations.push_back(ins->AsNewInstance());
3596           still_unsorted.SetBit(ins->GetId());
3597         }
3598       }
3599       using Iter = ScopedArenaVector<HNewInstance*>::iterator;
3600       Iter unsorted_start = materializations.begin();
3601       Iter unsorted_end = materializations.end();
3602       // selection sort. Required since the only check we can easily perform a
3603       // is-before-all-unsorted check.
3604       while (unsorted_start != unsorted_end) {
3605         bool found_instruction = false;
3606         for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
3607           HNewInstance* ni = *candidate;
3608           if (std::none_of(ni->GetAllEnvironments().cbegin(),
3609                            ni->GetAllEnvironments().cend(),
3610                            [&](const HEnvironment* env) {
3611                              return std::any_of(
3612                                  env->GetEnvInputs().cbegin(),
3613                                  env->GetEnvInputs().cend(),
3614                                  [&](const HInstruction* env_element) {
3615                                    return env_element != nullptr &&
3616                                           still_unsorted.IsBitSet(env_element->GetId());
3617                                  });
3618                            })) {
3619             still_unsorted.ClearBit(ni->GetId());
3620             std::swap(*unsorted_start, *candidate);
3621             ++unsorted_start;
3622             found_instruction = true;
3623             break;
3624           }
3625         }
3626         CHECK(found_instruction) << "Unable to select next materialization instruction."
3627                                  << " Environments have a dependency loop!";
3628       }
3629       // Reverse so we as we prepend them we end up with the correct order.
3630       auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
3631       for (HNewInstance* ins : reverse_iter) {
3632         if (blk->GetFirstInstruction() != ins) {
3633           // Don't do checks since that makes sure the move is safe WRT
3634           // ins->CanBeMoved which for NewInstance is false.
3635           ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
3636         }
3637       }
3638     }
3639   }
3640 
3641  private:
CollectInterestingHeapRefs()3642   void CollectInterestingHeapRefs() {
3643     // Get all the partials we need to move around.
3644     for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
3645       ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
3646       if (ri->IsPartialSingleton() &&
3647           ri->GetReference()->GetBlock() != nullptr &&
3648           ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
3649         RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
3650       }
3651     }
3652   }
3653 
RecordHeapRefField(HNewInstance * ni,size_t loc)3654   void RecordHeapRefField(HNewInstance* ni, size_t loc) {
3655     DCHECK(ni != nullptr);
3656     // This is likely to be very short so just do a linear search.
3657     auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
3658       return data.OriginalNewInstance() == ni;
3659     });
3660     HeapReferenceData& cur_ref =
3661         (it == heap_refs_.end())
3662             ? heap_refs_.emplace_back(this,
3663                                       ni,
3664                                       lse_->heap_location_collector_.GetHeapLocation(loc)
3665                                           ->GetReferenceInfo()
3666                                           ->GetNoEscapeSubgraph(),
3667                                       alloc_)
3668             : *it;
3669     cur_ref.AddHeapLocation(loc);
3670   }
3671 
3672 
NotifyNewMaterialization(HInstruction * ins)3673   void NotifyNewMaterialization(HInstruction* ins) {
3674     if (ins->IsPhi()) {
3675       new_ref_phis_.push_back(ins->AsPhi());
3676     }
3677   }
3678 
GetMaterializationBlockIndex(HBasicBlock * blk,size_t pred_num) const3679   size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
3680     DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
3681         << "block is a materialization block!";
3682     DCHECK_LT(pred_num, max_preds_per_block_);
3683     return blk->GetBlockId() * max_preds_per_block_ + pred_num;
3684   }
3685 
GetGraph() const3686   HGraph* GetGraph() const {
3687     return lse_->GetGraph();
3688   }
3689 
3690   LSEVisitor* lse_;
3691   ScopedArenaAllocator* alloc_;
3692   ScopedArenaVector<HInstruction*> new_ref_phis_;
3693   ScopedArenaVector<HeapReferenceData> heap_refs_;
3694   size_t max_preds_per_block_;
3695   // An array of (# of non-materialization blocks) * max_preds_per_block
3696   // arranged in block-id major order. Since we can only have at most one
3697   // materialization block on each edge this is the maximum possible number of
3698   // materialization blocks.
3699   ScopedArenaVector<HBasicBlock*> materialization_blocks_;
3700   size_t first_materialization_block_id_;
3701 
3702   friend void LSEVisitor::MovePartialEscapes();
3703 };
3704 
3705 // Work around c++ type checking annoyances with not being able to forward-declare inner types.
3706 class HeapRefHolder
3707     : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
3708 
SetupPartialMaterialization(PartialLoadStoreEliminationHelper & helper,HeapRefHolder && holder,size_t pred_idx,HBasicBlock * entry)3709 HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
3710                                                       HeapRefHolder&& holder,
3711                                                       size_t pred_idx,
3712                                                       HBasicBlock* entry) {
3713   PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
3714   HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
3715   HInstruction* new_inst = ref_data.OriginalNewInstance();
3716   if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
3717     LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
3718              << " is null!";
3719     return GetGraph()->GetNullConstant();
3720   }
3721   HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
3722   CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
3723   HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
3724   repl_create->SetPartialMaterialization();
3725   bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
3726   repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
3727   MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
3728   LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
3729   ref_data.AddMaterialization(bb, repl_create);
3730   const FieldInfo* info = nullptr;
3731   for (const HeapLocation* loc : ref_data.IterateLocations()) {
3732     size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3733     info = field_infos_[loc_off];
3734     DCHECK(loc->GetIndex() == nullptr);
3735     Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
3736     if (value.NeedsLoopPhi() || value.IsMergedUnknown()) {
3737       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3738       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3739           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3740       if (!repl.IsInvalid()) {
3741         value = repl;
3742       } else {
3743         FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
3744         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3745       }
3746     } else if (value.NeedsNonLoopPhi()) {
3747       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3748       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3749           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3750       if (!repl.IsInvalid()) {
3751         value = repl;
3752       } else {
3753         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
3754         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3755       }
3756     }
3757     DCHECK(value.IsDefault() || value.IsInstruction())
3758         << GetGraph()->PrettyMethod() << ": " << value;
3759 
3760     if (!value.IsDefault() &&
3761         // shadow$_klass_ doesn't need to be manually initialized.
3762         MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
3763       CHECK(info != nullptr);
3764       HInstruction* set_value =
3765           new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
3766                                                              value.GetInstruction(),
3767                                                              field_infos_[loc_off]->GetField(),
3768                                                              loc->GetType(),
3769                                                              MemberOffset(loc->GetOffset()),
3770                                                              false,
3771                                                              field_infos_[loc_off]->GetFieldIndex(),
3772                                                              loc->GetDeclaringClassDefIndex(),
3773                                                              field_infos_[loc_off]->GetDexFile(),
3774                                                              0u);
3775       bb->InsertInstructionAfter(set_value, repl_create);
3776       LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
3777     }
3778   }
3779   return repl_create;
3780 }
3781 
GetPartialValueAt(HNewInstance * orig_new_inst,HInstruction * read)3782 HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
3783   size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
3784   Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
3785   LSE_VLOG << "using " << pred << " as default value for " << *read;
3786   if (pred.IsInstruction()) {
3787     return pred.GetInstruction();
3788   } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
3789     FullyMaterializePhi(pred.GetPhiPlaceholder(),
3790                         heap_location_collector_.GetHeapLocation(loc)->GetType());
3791     HInstruction* res = Replacement(pred).GetInstruction();
3792     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3793     return res;
3794   } else if (pred.IsDefault()) {
3795     HInstruction* res = GetDefaultValue(read->GetType());
3796     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3797     return res;
3798   }
3799   LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
3800              << "! This should be impossible! Value is " << pred;
3801   UNREACHABLE();
3802 }
3803 
MovePartialEscapes()3804 void LSEVisitor::MovePartialEscapes() {
3805   if (!ShouldPerformPartialLSE()) {
3806     return;
3807   }
3808 
3809   ScopedArenaAllocator saa(allocator_.GetArenaStack());
3810   PartialLoadStoreEliminationHelper helper(this, &saa);
3811 
3812   // Since for PHIs we now will have more information (since we know the object
3813   // hasn't escaped) we need to clear the old phi-replacements where we weren't
3814   // able to find the value.
3815   PrepareForPartialPhiComputation();
3816 
3817   for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
3818     LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
3819     // Setup entry and exit blocks.
3820     for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3821       // Setup materialization blocks.
3822       for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
3823         // Setup entries.
3824         // TODO Assuming we correctly break critical edges every entry block
3825         // must have only a single predecessor so we could just put all this
3826         // stuff in there. OTOH simplifier can do it for us and this is simpler
3827         // to implement - giving clean separation between the original graph and
3828         // materialization blocks - so for now we might as well have these new
3829         // blocks.
3830         ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
3831         ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
3832         pred_vals.reserve(entry->GetNumberOfPredecessors());
3833         for (const auto& [pred, pred_idx] :
3834              ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
3835           DCHECK(!helper.IsMaterializationBlock(pred));
3836           if (excluded_cohort.IsEntryBlock(pred)) {
3837             pred_vals.push_back(ref_data.GetMaterialization(pred));
3838             continue;
3839           } else {
3840             pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
3841           }
3842         }
3843         ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
3844       }
3845 
3846       // Setup exit block heap-values for later phi-generation.
3847       for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
3848         // mark every exit of cohorts as having a value so we can easily
3849         // materialize the PHIs.
3850         // TODO By setting this we can easily use the normal MaterializeLoopPhis
3851         //      (via FullyMaterializePhis) in order to generate the default-values
3852         //      for predicated-gets. This has the unfortunate side effect of creating
3853         //      somewhat more phis than are really needed (in some cases). We really
3854         //      should try to eventually know that we can lower these PHIs to only
3855         //      the non-escaping value in cases where it is possible. Currently this
3856         //      is done to some extent in instruction_simplifier but we have more
3857         //      information here to do the right thing.
3858         for (const HeapLocation* loc : ref_data.IterateLocations()) {
3859           size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3860           // This Value::Default() is only used to fill in PHIs used as the
3861           // default value for PredicatedInstanceFieldGets. The actual value
3862           // stored there is meaningless since the Predicated-iget will use the
3863           // actual field value instead on these paths.
3864           heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
3865         }
3866       }
3867     }
3868 
3869     // string materialization through the graph.
3870     // // Visit RPO to PHI the materialized object through the cohort.
3871     for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
3872       // NB This doesn't include materialization blocks.
3873       DCHECK(!helper.IsMaterializationBlock(blk))
3874           << "Materialization blocks should not be in RPO yet.";
3875       if (ref_data.HasMaterialization(blk)) {
3876         continue;
3877       } else if (ref_data.BeforeAllEscapes(blk)) {
3878         ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
3879         continue;
3880       } else {
3881         ref_data.GenerateMaterializationValueFromPredecessors(blk);
3882       }
3883     }
3884   }
3885 
3886   // Once we've generated all the materializations we can update the users.
3887   helper.FixupPartialObjectUsers();
3888 
3889   // Actually put materialization blocks into the graph
3890   helper.InsertMaterializationBlocks();
3891 
3892   // Get rid of the original instructions.
3893   helper.RemoveReplacedInstructions();
3894 
3895   // Ensure everything is ordered correctly in the materialization blocks. This
3896   // involves moving every NewInstance to the top and ordering them so that any
3897   // required env-uses are correctly ordered.
3898   helper.ReorderMaterializationsForEnvDominance();
3899 }
3900 
FinishFullLSE()3901 void LSEVisitor::FinishFullLSE() {
3902   // Remove recorded load instructions that should be eliminated.
3903   for (const LoadStoreRecord& record : loads_and_stores_) {
3904     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
3905     HInstruction* substitute = substitute_instructions_for_loads_[id];
3906     if (substitute == nullptr) {
3907       continue;
3908     }
3909     HInstruction* load = record.load_or_store;
3910     DCHECK(load != nullptr);
3911     DCHECK(IsLoad(load));
3912     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
3913     // We proactively retrieve the substitute for a removed load, so
3914     // a load that has a substitute should not be observed as a heap
3915     // location value.
3916     DCHECK_EQ(FindSubstitute(substitute), substitute);
3917 
3918     load->ReplaceWith(substitute);
3919     load->GetBlock()->RemoveInstruction(load);
3920   }
3921 
3922   // Remove all the stores we can.
3923   for (const LoadStoreRecord& record : loads_and_stores_) {
3924     bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
3925     DCHECK_EQ(is_store, IsStore(record.load_or_store));
3926     if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
3927       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
3928     }
3929   }
3930 
3931   // Eliminate singleton-classified instructions:
3932   //   * - Constructor fences (they never escape this thread).
3933   //   * - Allocations (if they are unused).
3934   for (HInstruction* new_instance : singleton_new_instances_) {
3935     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
3936     MaybeRecordStat(stats_,
3937                     MethodCompilationStat::kConstructorFenceRemovedLSE,
3938                     removed);
3939 
3940     if (!new_instance->HasNonEnvironmentUses()) {
3941       new_instance->RemoveEnvironmentUsers();
3942       new_instance->GetBlock()->RemoveInstruction(new_instance);
3943       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
3944     }
3945   }
3946 }
3947 
3948 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
3949 // cannot be directly allocated with an arena allocator, so we need to wrap it.
3950 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
3951  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)3952   LSEVisitorWrapper(HGraph* graph,
3953                     const HeapLocationCollector& heap_location_collector,
3954                     bool perform_partial_lse,
3955                     OptimizingCompilerStats* stats)
3956       : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {}
3957 
Run()3958   void Run() {
3959     lse_visitor_.Run();
3960   }
3961 
3962  private:
3963   LSEVisitor lse_visitor_;
3964 };
3965 
Run(bool enable_partial_lse)3966 bool LoadStoreElimination::Run(bool enable_partial_lse) {
3967   if (graph_->IsDebuggable()) {
3968     // Debugger may set heap values or trigger deoptimization of callers.
3969     // Skip this optimization.
3970     return false;
3971   }
3972   // We need to be able to determine reachability. Clear it just to be safe but
3973   // this should initially be empty.
3974   graph_->ClearReachabilityInformation();
3975   // This is O(blocks^3) time complexity. It means we can query reachability in
3976   // O(1) though.
3977   graph_->ComputeReachabilityInformation();
3978   ScopedArenaAllocator allocator(graph_->GetArenaStack());
3979   LoadStoreAnalysis lsa(graph_,
3980                         stats_,
3981                         &allocator,
3982                         enable_partial_lse ? LoadStoreAnalysisType::kFull
3983                                            : LoadStoreAnalysisType::kBasic);
3984   lsa.Run();
3985   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
3986   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
3987     // No HeapLocation information from LSA, skip this optimization.
3988     return false;
3989   }
3990 
3991   std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
3992       graph_, heap_location_collector, enable_partial_lse, stats_));
3993   lse_visitor->Run();
3994   return true;
3995 }
3996 
3997 #undef LSE_VLOG
3998 
3999 }  // namespace art
4000