• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "execution_subgraph.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "optimizing/execution_subgraph.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46 #include "stack_map.h"
47 
48 /**
49  * The general algorithm of load-store elimination (LSE).
50  *
51  * We use load-store analysis to collect a list of heap locations and perform
52  * alias analysis of those heap locations. LSE then keeps track of a list of
53  * heap values corresponding to the heap locations and stores that put those
54  * values in these locations.
55  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
56  *    block, visit instructions sequentially, recording heap values and looking
57  *    for loads and stores to eliminate without relying on loop Phis.
58  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
59  *    or using a loop-invariant value.
60  *  - In phase 3, we determine which stores are dead and can be eliminated and
61  *    based on that information we re-evaluate whether some kept stores are
62  *    storing the same value as the value in the heap location; such stores are
63  *    also marked for elimination.
64  *  - In phase 4, we commit the changes, replacing loads marked for elimination
65  *    in previous processing and removing stores not marked for keeping. We also
66  *    remove allocations that are no longer needed.
67  *  - In phase 5, we move allocations which only escape along some executions
68  *    closer to their escape points and fixup non-escaping paths with their actual
69  *    values, creating PHIs when needed.
70  *
71  * 1. Walk over blocks and their instructions.
72  *
73  * The initial set of heap values for a basic block is
74  *  - For a loop header of an irreducible loop, all heap values are unknown.
75  *  - For a loop header of a normal loop, all values unknown at the end of the
76  *    preheader are initialized to unknown, other heap values are set to Phi
77  *    placeholders as we cannot determine yet whether these values are known on
78  *    all back-edges. We use Phi placeholders also for array heap locations with
79  *    index defined inside the loop but this helps only when the value remains
80  *    zero from the array allocation throughout the loop.
81  *  - For catch blocks, we clear all assumptions since we arrived due to an
82  *    instruction throwing.
83  *  - For other basic blocks, we merge incoming values from the end of all
84  *    predecessors. If any incoming value is unknown, the start value for this
85  *    block is also unknown. Otherwise, if all the incoming values are the same
86  *    (including the case of a single predecessor), the incoming value is used.
87  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
88  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
89  *
90  * For each instruction in the block
91  *  - If the instruction is a load from a heap location with a known value not
92  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
93  *    using an existing instruction or by creating new Phi(s) instead. In order
94  *    to maintain the validity of all heap locations during the optimization
95  *    phase, we only record substitutes at this phase and the real elimination
96  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
97  *    replacement are recorded for processing later. We also keep track of the
98  *    heap-value at the start load so that later partial-LSE can predicate the
99  *    load.
100  *  - If the instruction is a store, it updates the heap value for the heap
101  *    location with the stored value and records the store itself so that we can
102  *    mark it for keeping if the value becomes observable. Heap values are
103  *    invalidated for heap locations that may alias with the store instruction's
104  *    heap location and their recorded stores are marked for keeping as they are
105  *    now potentially observable. The store instruction can be eliminated unless
106  *    the value stored is later needed e.g. by a load from the same/aliased heap
107  *    location or the heap location persists at method return/deoptimization.
108  *  - A store that stores the same value as the heap value is eliminated.
109  *  - For newly instantiated instances, their heap values are initialized to
110  *    language defined default values.
111  *  - Finalizable objects are considered as persisting at method
112  *    return/deoptimization.
113  *  - Some instructions such as invokes are treated as loading and invalidating
114  *    all the heap values, depending on the instruction's side effects.
115  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
116  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
117  *    alias and no load/store is eliminated in such case.
118  *
119  * The time complexity of the initial phase has several components. The total
120  * time for the initialization of heap values for all blocks is
121  *    O(heap_locations * edges)
122  * and the time complexity for simple instruction processing is
123  *    O(instructions).
124  * See the description of phase 3 for additional complexity due to matching of
125  * existing Phis for replacing loads.
126  *
127  * 2. Process loads that depend on loop Phi placeholders.
128  *
129  * We go over these loads to determine whether they can be eliminated. We look
130  * for the set of all Phi placeholders that feed the load and depend on a loop
131  * Phi placeholder and, if we find no unknown value, we construct the necessary
132  * Phi(s) or, if all other inputs are identical, i.e. the location does not
133  * change in the loop, just use that input. If we do find an unknown input, this
134  * must be from a loop back-edge and we replace the loop Phi placeholder with
135  * unknown value and re-process loads and stores that previously depended on
136  * loop Phi placeholders. This shall find at least one load of an unknown value
137  * which is now known to be unreplaceable or a new unknown value on a back-edge
138  * and we repeat this process until each load is either marked for replacement
139  * or found to be unreplaceable. As we mark at least one additional loop Phi
140  * placeholder as unreplacable in each iteration, this process shall terminate.
141  *
142  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
143  * is limited by the number of Phi placeholders and their dependencies we need
144  * to search with worst-case time complexity
145  *    O(phi_placeholder_dependencies) .
146  * The dependencies are usually just the Phi placeholders' potential inputs,
147  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
148  * replacement search, there are additional dependencies to consider, see below.
149  *
150  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
151  * algorithm to determine transitive closures for each found Phi placeholder,
152  * and then match or materialize Phis from the smallest transitive closure,
153  * so that we can determine if such subset has a single other input. This has
154  * time complexity
155  *    O(phi_placeholders_found^3) .
156  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
157  * contribute to this as such Phi placeholders are replaced immediately.
158  * The total time of all such successful cases has time complexity
159  *    O(phi_placeholders^3)
160  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
161  * argument applies to the searches used to find all successful cases, so their
162  * total contribution is also just an insignificant
163  *    O(phi_placeholder_dependencies) .
164  * The materialization of Phis has an insignificant total time complexity
165  *    O(phi_placeholders * edges) .
166  *
167  * If we find an unknown input, we re-process heap values and loads with a time
168  * complexity that's the same as the phase 1 in the worst case. Adding this to
169  * the depth-first search time complexity yields
170  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
171  * for a single iteration. We can ignore the middle term as it's proprotional
172  * to the number of Phi placeholder inputs included in the first term. Using
173  * the upper limit of number of such iterations, the total time complexity is
174  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
175  *
176  * The upper bound of Phi placeholder inputs is
177  *    heap_locations * edges
178  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
179  * include other heap locations in predecessor blocks with the upper bound of
180  *    heap_locations^2 * edges .
181  * Using the estimate
182  *    edges <= blocks^2
183  * and
184  *    phi_placeholders <= heap_locations * blocks ,
185  * the worst-case time complexity of the
186  *    O(phi_placeholder_dependencies * phi_placeholders)
187  * term from unknown input cases is actually
188  *    O(heap_locations^3 * blocks^3) ,
189  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
190  * Adding the other term from the unknown input cases (to account for the case
191  * with significantly more instructions than blocks and heap locations), the
192  * phase 2 time complexity is
193  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
194  *
195  * See the description of phase 3 for additional complexity due to matching of
196  * existing Phis for replacing loads.
197  *
198  * 3. Determine which stores to keep and which to eliminate.
199  *
200  * During instruction processing in phase 1 and re-processing in phase 2, we are
201  * keeping a record of the stores and Phi placeholders that become observable
202  * and now propagate the observable Phi placeholders to all actual stores that
203  * feed them. Having determined observable stores, we look for stores that just
204  * overwrite the old value with the same. Since ignoring non-observable stores
205  * actually changes the old values in heap locations, we need to recalculate
206  * Phi placeholder replacements but we proceed similarly to the previous phase.
207  * We look for the set of all Phis that feed the old value replaced by the store
208  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
209  * value, we try to match existing Phis (we do not create new Phis anymore) or,
210  * if all other inputs are identical, i.e. the location does not change in the
211  * loop, just use that input. If this succeeds and the old value is identical to
212  * the value we're storing, such store shall be eliminated.
213  *
214  * The work is similar to the phase 2, except that we're not re-processing loads
215  * and stores anymore, so the time complexity of phase 3 is
216  *    O(heap_locations^3 * blocks^3) .
217  *
218  * There is additional complexity in matching existing Phis shared between the
219  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
220  * time (this could be difficult and slow), so each matching attempt is just
221  * looking at Phis in the block (both old Phis and newly created Phis) and their
222  * inputs. As we create at most `heap_locations` Phis in each block, the upper
223  * bound on the number of Phis we look at is
224  *    heap_locations * (old_phis + heap_locations)
225  * and the worst-case time complexity is
226  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
227  * The first term is lower than one term in phase 2, so the relevant part is
228  *    O(heap_locations * old_phis * edges) .
229  *
230  * 4. Replace loads and remove unnecessary stores and singleton allocations.
231  *
232  * A special type of objects called singletons are instantiated in the method
233  * and have a single name, i.e. no aliases. Singletons have exclusive heap
234  * locations since they have no aliases. Singletons are helpful in narrowing
235  * down the life span of a heap location such that they do not always need to
236  * participate in merging heap values. Allocation of a singleton can be
237  * eliminated if that singleton is not used and does not persist at method
238  * return/deoptimization.
239  *
240  * The time complexity of this phase is
241  *    O(instructions + instruction_uses) .
242  *
243  * 5. Partial LSE
244  *
245  * Move allocations closer to their escapes and remove/predicate loads and
246  * stores as required.
247  *
248  * Partial singletons are objects which only escape from the function or have
249  * multiple names along certain execution paths. In cases where we recognize
250  * these partial singletons we can move the allocation and initialization
251  * closer to the actual escape(s). We can then perform a simplified version of
252  * LSE step 2 to determine the unescaped value of any reads performed after the
253  * object may have escaped. These are used to replace these reads with
254  * 'predicated-read' instructions where the value is only read if the object
255  * has actually escaped. We use the existence of the object itself as the
256  * marker of whether escape has occurred.
257  *
258  * There are several steps in this sub-pass
259  *
260  * 5.1 Group references
261  *
262  * Since all heap-locations for a single reference escape at the same time, we
263  * need to group the heap-locations by reference and process them at the same
264  * time.
265  *
266  *    O(heap_locations).
267  *
268  * FIXME: The time complexity above assumes we can bucket the heap-locations in
269  * O(1) which is not true since we just perform a linear-scan of the heap-ref
270  * list. Since there are generally only a small number of heap-references which
271  * are partial-singletons this is fine and lower real overhead than a hash map.
272  *
273  * 5.2 Generate materializations
274  *
275  * Once we have the references we add new 'materialization blocks' on the edges
276  * where escape becomes inevitable. This information is calculated by the
277  * execution-subgraphs created during load-store-analysis. We create new
278  * 'materialization's in these blocks and initialize them with the value of
279  * each heap-location ignoring side effects (since the object hasn't escaped
280  * yet). Worst case this is the same time-complexity as step 3 since we may
281  * need to materialize phis.
282  *
283  *    O(heap_locations^2 * materialization_edges)
284  *
285  * 5.3 Propagate materializations
286  *
287  * Since we use the materialization as the marker for escape we need to
288  * propagate it throughout the graph. Since the subgraph analysis considers any
289  * lifetime that escapes a loop (and hence would require a loop-phi) to be
290  * escaping at the loop-header we do not need to create any loop-phis to do
291  * this.
292  *
293  *    O(edges)
294  *
295  * NB: Currently the subgraph analysis considers all objects to have their
296  * lifetimes start at the entry block. This simplifies that analysis enormously
297  * but means that we cannot distinguish between an escape in a loop where the
298  * lifetime does not escape the loop (in which case this pass could optimize)
299  * and one where it does escape the loop (in which case the whole loop is
300  * escaping). This is a shortcoming that would be good to fix at some point.
301  *
302  * 5.4 Propagate partial values
303  *
304  * We need to replace loads and stores to the partial reference with predicated
305  * ones that have default non-escaping values. Again this is the same as step 3.
306  *
307  *   O(heap_locations^2 * edges)
308  *
309  * 5.5 Final fixup
310  *
311  * Now all we need to do is replace and remove uses of the old reference with the
312  * appropriate materialization.
313  *
314  *   O(instructions + uses)
315  *
316  * FIXME: The time complexities described above assumes that the
317  * HeapLocationCollector finds a heap location for an instruction in O(1)
318  * time but it is currently O(heap_locations); this can be fixed by adding
319  * a hash map to the HeapLocationCollector.
320  */
321 
322 namespace art HIDDEN {
323 
324 #define LSE_VLOG \
325   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
326 
327 class PartialLoadStoreEliminationHelper;
328 class HeapRefHolder;
329 
330 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
331 class LSEVisitor final : private HGraphDelegateVisitor {
332  public:
333   LSEVisitor(HGraph* graph,
334              const HeapLocationCollector& heap_location_collector,
335              bool perform_partial_lse,
336              OptimizingCompilerStats* stats);
337 
338   void Run();
339 
340  private:
341   class PhiPlaceholder {
342    public:
PhiPlaceholder()343     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)344     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
345         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
346 
347     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
348     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
349     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
350     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
351 
GetBlockId() const352     constexpr uint32_t GetBlockId() const {
353       return block_id_;
354     }
355 
GetHeapLocation() const356     constexpr size_t GetHeapLocation() const {
357       return heap_location_;
358     }
359 
Equals(const PhiPlaceholder & p2) const360     constexpr bool Equals(const PhiPlaceholder& p2) const {
361       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
362     }
363 
Dump(std::ostream & oss) const364     void Dump(std::ostream& oss) const {
365       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
366     }
367 
368    private:
369     uint32_t block_id_;
370     uint32_t heap_location_;
371   };
372 
373   struct Marker {};
374 
375   class Value;
376 
377   class PriorValueHolder {
378    public:
379     constexpr explicit PriorValueHolder(Value prior);
380 
IsInstruction() const381     constexpr bool IsInstruction() const {
382       return std::holds_alternative<HInstruction*>(value_);
383     }
IsPhi() const384     constexpr bool IsPhi() const {
385       return std::holds_alternative<PhiPlaceholder>(value_);
386     }
IsDefault() const387     constexpr bool IsDefault() const {
388       return std::holds_alternative<Marker>(value_);
389     }
GetPhiPlaceholder() const390     constexpr PhiPlaceholder GetPhiPlaceholder() const {
391       DCHECK(IsPhi());
392       return std::get<PhiPlaceholder>(value_);
393     }
GetInstruction() const394     constexpr HInstruction* GetInstruction() const {
395       DCHECK(IsInstruction());
396       return std::get<HInstruction*>(value_);
397     }
398 
399     Value ToValue() const;
400     void Dump(std::ostream& oss) const;
401 
Equals(PriorValueHolder other) const402     constexpr bool Equals(PriorValueHolder other) const {
403       return value_ == other.value_;
404     }
405 
406    private:
407     std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
408   };
409 
410   friend constexpr bool operator==(const Marker&, const Marker&);
411   friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
412   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
413   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
414 
415   class Value {
416    public:
417     enum class ValuelessType {
418       kInvalid,
419       kPureUnknown,
420       kDefault,
421     };
422     struct MergedUnknownMarker {
423       PhiPlaceholder phi_;
424     };
425     struct NeedsNonLoopPhiMarker {
426       PhiPlaceholder phi_;
427     };
428     struct NeedsLoopPhiMarker {
429       PhiPlaceholder phi_;
430     };
431 
Invalid()432     static constexpr Value Invalid() {
433       return Value(ValuelessType::kInvalid);
434     }
435 
436     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
437     // A heap location can be set to an unknown heap value when:
438     // - it is coming from outside the method,
439     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()440     static constexpr Value PureUnknown() {
441       return Value(ValuelessType::kPureUnknown);
442     }
443 
PartialUnknown(Value old_value)444     static constexpr Value PartialUnknown(Value old_value) {
445       if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
446         return PureUnknown();
447       } else {
448         return Value(PriorValueHolder(old_value));
449       }
450     }
451 
MergedUnknown(PhiPlaceholder phi_placeholder)452     static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
453       return Value(MergedUnknownMarker{phi_placeholder});
454     }
455 
456     // Default heap value after an allocation.
457     // A heap location can be set to that value right after an allocation.
Default()458     static constexpr Value Default() {
459       return Value(ValuelessType::kDefault);
460     }
461 
ForInstruction(HInstruction * instruction)462     static constexpr Value ForInstruction(HInstruction* instruction) {
463       return Value(instruction);
464     }
465 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)466     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
467       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
468     }
469 
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)470     static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
471       return Value(NeedsLoopPhiMarker{phi_placeholder});
472     }
473 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)474     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
475       return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
476                             : ForNonLoopPhiPlaceholder(phi_placeholder);
477     }
478 
IsValid() const479     constexpr bool IsValid() const {
480       return !IsInvalid();
481     }
482 
IsInvalid() const483     constexpr bool IsInvalid() const {
484       return std::holds_alternative<ValuelessType>(value_) &&
485              GetValuelessType() == ValuelessType::kInvalid;
486     }
487 
IsPartialUnknown() const488     bool IsPartialUnknown() const {
489       return std::holds_alternative<PriorValueHolder>(value_);
490     }
491 
IsMergedUnknown() const492     bool IsMergedUnknown() const {
493       return std::holds_alternative<MergedUnknownMarker>(value_);
494     }
495 
IsPureUnknown() const496     bool IsPureUnknown() const {
497       return std::holds_alternative<ValuelessType>(value_) &&
498              GetValuelessType() == ValuelessType::kPureUnknown;
499     }
500 
IsUnknown() const501     bool IsUnknown() const {
502       return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
503     }
504 
IsDefault() const505     bool IsDefault() const {
506       return std::holds_alternative<ValuelessType>(value_) &&
507              GetValuelessType() == ValuelessType::kDefault;
508     }
509 
IsInstruction() const510     bool IsInstruction() const {
511       return std::holds_alternative<HInstruction*>(value_);
512     }
513 
NeedsNonLoopPhi() const514     bool NeedsNonLoopPhi() const {
515       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
516     }
517 
NeedsLoopPhi() const518     bool NeedsLoopPhi() const {
519       return std::holds_alternative<NeedsLoopPhiMarker>(value_);
520     }
521 
NeedsPhi() const522     bool NeedsPhi() const {
523       return NeedsNonLoopPhi() || NeedsLoopPhi();
524     }
525 
GetInstruction() const526     HInstruction* GetInstruction() const {
527       DCHECK(IsInstruction()) << *this;
528       return std::get<HInstruction*>(value_);
529     }
530 
GetPriorValue() const531     PriorValueHolder GetPriorValue() const {
532       DCHECK(IsPartialUnknown());
533       return std::get<PriorValueHolder>(value_);
534     }
535 
GetPhiPlaceholder() const536     PhiPlaceholder GetPhiPlaceholder() const {
537       DCHECK(NeedsPhi() || IsMergedUnknown());
538       if (NeedsNonLoopPhi()) {
539         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
540       } else if (NeedsLoopPhi()) {
541         return std::get<NeedsLoopPhiMarker>(value_).phi_;
542       } else {
543         return std::get<MergedUnknownMarker>(value_).phi_;
544       }
545     }
546 
GetMergeBlockId() const547     uint32_t GetMergeBlockId() const {
548       DCHECK(IsMergedUnknown()) << this;
549       return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
550     }
551 
GetMergeBlock(const HGraph * graph) const552     HBasicBlock* GetMergeBlock(const HGraph* graph) const {
553       DCHECK(IsMergedUnknown()) << *this;
554       return graph->GetBlocks()[GetMergeBlockId()];
555     }
556 
GetHeapLocation() const557     size_t GetHeapLocation() const {
558       DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
559       return GetPhiPlaceholder().GetHeapLocation();
560     }
561 
562     constexpr bool ExactEquals(Value other) const;
563 
564     constexpr bool Equals(Value other) const;
565 
Equals(HInstruction * instruction) const566     constexpr bool Equals(HInstruction* instruction) const {
567       return Equals(ForInstruction(instruction));
568     }
569 
570     std::ostream& Dump(std::ostream& os) const;
571 
572     // Public for use with lists.
Value()573     constexpr Value() : value_(ValuelessType::kInvalid) {}
574 
575    private:
576     using ValueHolder = std::variant<ValuelessType,
577                                      HInstruction*,
578                                      MergedUnknownMarker,
579                                      NeedsNonLoopPhiMarker,
580                                      NeedsLoopPhiMarker,
581                                      PriorValueHolder>;
GetValuelessType() const582     constexpr ValuelessType GetValuelessType() const {
583       return std::get<ValuelessType>(value_);
584     }
585 
Value(ValueHolder v)586     constexpr explicit Value(ValueHolder v) : value_(v) {}
587 
588     friend std::ostream& operator<<(std::ostream& os, const Value& v);
589 
590     ValueHolder value_;
591 
592     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
593   };
594 
595   friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
596                                    const Value::NeedsLoopPhiMarker& p2);
597   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
598                                    const Value::NeedsNonLoopPhiMarker& p2);
599   friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
600                                    const Value::MergedUnknownMarker& p2);
601 
602   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
603   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const604   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
605     size_t res =
606         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
607         phi_placeholder.GetHeapLocation();
608     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
609         << res << "blks: " << GetGraph()->GetBlocks().size()
610         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
611     return res;
612   }
613 
PhiPlaceholderIndex(Value phi_placeholder) const614   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
615     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
616   }
617 
IsEscapingObject(ReferenceInfo * info,HBasicBlock * block,size_t index)618   bool IsEscapingObject(ReferenceInfo* info, HBasicBlock* block, size_t index) {
619     return !info->IsSingletonAndRemovable() &&
620            !(info->IsPartialSingleton() && IsPartialNoEscape(block, index));
621   }
622 
IsPartialNoEscape(HBasicBlock * blk,size_t idx)623   bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
624     auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
625     if (!ri->IsPartialSingleton()) {
626       return false;
627     }
628     ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
629         ri->GetNoEscapeSubgraph()->GetExcludedCohorts();
630     return std::none_of(cohorts.cbegin(),
631                         cohorts.cend(),
632                         [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
633                           // Make sure we haven't yet and never will escape.
634                           return ex.PrecedesBlock(blk) ||
635                                  ex.ContainsBlock(blk) ||
636                                  ex.SucceedsBlock(blk);
637                         });
638   }
639 
GetPhiPlaceholderAt(size_t off) const640   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
641     DCHECK_LT(off, num_phi_placeholders_);
642     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
643     // Technically this should be (off - id) / NumberOfHeapLocations
644     // but due to truncation it's all the same.
645     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
646     return GetPhiPlaceholder(blk_id, id);
647   }
648 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const649   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
650     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
651     return PhiPlaceholder(block_id, idx);
652   }
653 
Replacement(Value value) const654   Value Replacement(Value value) const {
655     DCHECK(value.NeedsPhi() ||
656            (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
657         << value << " phase: " << current_phase_;
658     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
659     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
660     DCHECK(replacement.IsUnknown() ||
661            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
662     return replacement;
663   }
664 
ReplacementOrValue(Value value) const665   Value ReplacementOrValue(Value value) const {
666     if (current_phase_ == Phase::kPartialElimination) {
667       // In this phase we are materializing the default values which are used
668       // only if the partial singleton did not escape, so we can replace
669       // a partial unknown with the prior value.
670       if (value.IsPartialUnknown()) {
671         value = value.GetPriorValue().ToValue();
672       }
673       if ((value.IsMergedUnknown() || value.NeedsPhi()) &&
674           phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
675         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
676         DCHECK(!value.IsMergedUnknown());
677         DCHECK(!value.NeedsPhi());
678       } else if (value.IsMergedUnknown()) {
679         return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
680       }
681       if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) {
682         DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()),
683                   substitute_instructions_for_loads_.size());
684         HInstruction* substitute =
685             substitute_instructions_for_loads_[value.GetInstruction()->GetId()];
686         if (substitute != nullptr) {
687           DCHECK(substitute->IsPredicatedInstanceFieldGet());
688           return Value::ForInstruction(substitute);
689         }
690       }
691       DCHECK_IMPLIES(value.IsInstruction(),
692                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
693       return value;
694     }
695     if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
696       return Replacement(value);
697     } else {
698       DCHECK_IMPLIES(value.IsInstruction(),
699                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
700       return value;
701     }
702   }
703 
704   // The record of a heap value and instruction(s) that feed that value.
705   struct ValueRecord {
706     Value value;
707     Value stored_by;
708   };
709 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)710   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
711                                                       HInstruction* value,
712                                                       DataType::Type expected_type) {
713     // Should never add type conversion into boolean value.
714     if (expected_type == DataType::Type::kBool ||
715         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
716         // TODO: This prevents type conversion of default values but we can still insert
717         // type conversion of other constants and there is no constant folding pass after LSE.
718         IsZeroBitPattern(value)) {
719       return nullptr;
720     }
721 
722     // Check if there is already a suitable TypeConversion we can reuse.
723     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
724       if (use.GetUser()->IsTypeConversion() &&
725           use.GetUser()->GetType() == expected_type &&
726           // TODO: We could move the TypeConversion to a common dominator
727           // if it does not cross irreducible loop header.
728           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
729           // Don't share across irreducible loop headers.
730           // TODO: can be more fine-grained than this by testing each dominator.
731           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
732            !GetGraph()->HasIrreducibleLoops())) {
733         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
734             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
735           // Move the TypeConversion before the instruction.
736           use.GetUser()->MoveBefore(instruction);
737         }
738         DCHECK(use.GetUser()->StrictlyDominates(instruction));
739         return use.GetUser()->AsTypeConversion();
740       }
741     }
742 
743     // We must create a new TypeConversion instruction.
744     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
745           expected_type, value, instruction->GetDexPc());
746     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
747     return type_conversion;
748   }
749 
750   // Find an instruction's substitute if it's a removed load.
751   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const752   HInstruction* FindSubstitute(HInstruction* instruction) const {
753     size_t id = static_cast<size_t>(instruction->GetId());
754     if (id >= substitute_instructions_for_loads_.size()) {
755       // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet.
756       DCHECK_IMPLIES(IsLoad(instruction), instruction->IsPredicatedInstanceFieldGet());
757       return instruction;
758     }
759     HInstruction* substitute = substitute_instructions_for_loads_[id];
760     DCHECK(substitute == nullptr || IsLoad(instruction));
761     return (substitute != nullptr) ? substitute : instruction;
762   }
763 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)764   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
765     DCHECK(IsLoad(load));
766     DCHECK_EQ(FindSubstitute(load), load);
767     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
768         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
769 
770     // The load expects to load the heap value as type load->GetType().
771     // However the tracked heap value may not be of that type. An explicit
772     // type conversion may be needed.
773     // There are actually three types involved here:
774     // (1) tracked heap value's type (type A)
775     // (2) heap location (field or element)'s type (type B)
776     // (3) load's type (type C)
777     // We guarantee that type A stored as type B and then fetched out as
778     // type C is the same as casting from type A to type C directly, since
779     // type B and type C will have the same size which is guaranteed in
780     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
781     // So we only need one type conversion from type A to type C.
782     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
783         load, heap_value, load->GetType());
784 
785     substitute_instructions_for_loads_[load->GetId()] =
786         type_conversion != nullptr ? type_conversion : heap_value;
787   }
788 
IsLoad(HInstruction * instruction)789   static bool IsLoad(HInstruction* instruction) {
790     // Unresolved load is not treated as a load.
791     return instruction->IsInstanceFieldGet() ||
792            instruction->IsPredicatedInstanceFieldGet() ||
793            instruction->IsStaticFieldGet() ||
794            instruction->IsVecLoad() ||
795            instruction->IsArrayGet();
796   }
797 
IsStore(HInstruction * instruction)798   static bool IsStore(HInstruction* instruction) {
799     // Unresolved store is not treated as a store.
800     return instruction->IsInstanceFieldSet() ||
801            instruction->IsArraySet() ||
802            instruction->IsVecStore() ||
803            instruction->IsStaticFieldSet();
804   }
805 
806   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)807   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
808     DCHECK(IsLoad(instruction));
809     // Using defaults for VecLoads requires to create additional vector operations.
810     // As there are some issues with scheduling vector operations it is better to avoid creating
811     // them.
812     return !instruction->IsVecOperation();
813   }
814 
815   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
816   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)817   void KeepStores(Value value) {
818     if (value.IsPureUnknown() || value.IsPartialUnknown()) {
819       return;
820     }
821     if (value.IsMergedUnknown()) {
822       kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
823       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
824       return;
825     }
826     if (value.NeedsPhi()) {
827       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
828     } else {
829       HInstruction* instruction = value.GetInstruction();
830       DCHECK(IsStore(instruction));
831       kept_stores_.SetBit(instruction->GetId());
832     }
833   }
834 
835   // If a heap location X may alias with heap location at `loc_index`
836   // and heap_values of that heap location X holds a store, keep that store.
837   // It's needed for a dependent load that's not eliminated since any store
838   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)839   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
840                                      size_t loc_index) {
841     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
842       if (i == loc_index) {
843         // We use this function when reading a location with unknown value and
844         // therefore we cannot know what exact store wrote that unknown value.
845         // But we can have a phi placeholder here marking multiple stores to keep.
846         DCHECK(
847             !heap_values[i].stored_by.IsInstruction() ||
848             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
849         KeepStores(heap_values[i].stored_by);
850         heap_values[i].stored_by = Value::PureUnknown();
851       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
852         KeepStores(heap_values[i].stored_by);
853         heap_values[i].stored_by = Value::PureUnknown();
854       }
855     }
856   }
857 
GetDefaultValue(DataType::Type type)858   HInstruction* GetDefaultValue(DataType::Type type) {
859     switch (type) {
860       case DataType::Type::kReference:
861         return GetGraph()->GetNullConstant();
862       case DataType::Type::kBool:
863       case DataType::Type::kUint8:
864       case DataType::Type::kInt8:
865       case DataType::Type::kUint16:
866       case DataType::Type::kInt16:
867       case DataType::Type::kInt32:
868         return GetGraph()->GetIntConstant(0);
869       case DataType::Type::kInt64:
870         return GetGraph()->GetLongConstant(0);
871       case DataType::Type::kFloat32:
872         return GetGraph()->GetFloatConstant(0);
873       case DataType::Type::kFloat64:
874         return GetGraph()->GetDoubleConstant(0);
875       default:
876         UNREACHABLE();
877     }
878   }
879 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)880   bool CanValueBeKeptIfSameAsNew(Value value,
881                                  HInstruction* new_value,
882                                  HInstruction* new_value_set_instr) {
883     // For field/array set location operations, if the value is the same as the new_value
884     // it can be kept even if aliasing happens. All aliased operations will access the same memory
885     // range.
886     // For vector values, this is not true. For example:
887     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
888     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
889     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
890     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
891     //                                                     here is not packed_data anymore.
892     //
893     // TODO: to allow such 'same value' optimization on vector data,
894     // LSA needs to report more fine-grain MAY alias information:
895     // (1) May alias due to two vector data partial overlap.
896     //     e.g. a[i..i+3] and a[i+1,..,i+4].
897     // (2) May alias due to two vector data may complete overlap each other.
898     //     e.g. a[i..i+3] and b[i..i+3].
899     // (3) May alias but the exact relationship between two locations is unknown.
900     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
901     // This 'same value' optimization can apply only on case (2).
902     if (new_value_set_instr->IsVecOperation()) {
903       return false;
904     }
905 
906     return value.Equals(new_value);
907   }
908 
909   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
910   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
911   void PrepareLoopRecords(HBasicBlock* block);
912   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
913   void MergePredecessorRecords(HBasicBlock* block);
914 
915   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
916 
917   void VisitGetLocation(HInstruction* instruction, size_t idx);
918   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)919   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
920     field_infos_[heap_loc] = info;
921   }
922 
923   void VisitBasicBlock(HBasicBlock* block) override;
924 
925   enum class Phase {
926     kLoadElimination,
927     kStoreElimination,
928     kPartialElimination,
929   };
930 
931   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
932 
933   bool TryReplacingLoopPhiPlaceholderWithDefault(
934       PhiPlaceholder phi_placeholder,
935       DataType::Type type,
936       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
937   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
938       PhiPlaceholder phi_placeholder,
939       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
940   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
941       PhiPlaceholder phi_placeholder,
942       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
943       DataType::Type type,
944       bool can_use_default_or_phi);
945   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
946                            DataType::Type type);
947   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
948   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
949                            DataType::Type type);
950   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
951   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
952                                                          HInstruction* load);
953   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
954   void ProcessLoadsRequiringLoopPhis();
955 
956   void SearchPhiPlaceholdersForKeptStores();
957   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
958   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
959   void FindStoresWritingOldValues();
960   void FinishFullLSE();
961   void PrepareForPartialPhiComputation();
962   // Create materialization block and materialization object for the given predecessor of entry.
963   HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
964                                             HeapRefHolder&& holder,
965                                             size_t pred_idx,
966                                             HBasicBlock* blk);
967   // Returns the value that would be read by the 'read' instruction on
968   // 'orig_new_inst' if 'orig_new_inst' has not escaped.
969   HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
970   void MovePartialEscapes();
971 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)972   void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
973     LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
974                << " but LSE should be the only source of predicated-ifield-gets!";
975   }
976 
HandleAcquireLoad(HInstruction * instruction)977   void HandleAcquireLoad(HInstruction* instruction) {
978     DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
979            (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
980            (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
981         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
982 
983     // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
984     // invalidate all current values.
985     ScopedArenaVector<ValueRecord>& heap_values =
986         heap_values_for_[instruction->GetBlock()->GetBlockId()];
987     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
988       KeepStores(heap_values[i].stored_by);
989       heap_values[i].stored_by = Value::PureUnknown();
990       heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
991     }
992 
993     // Note that there's no need to record the load as subsequent acquire loads shouldn't be
994     // eliminated either.
995   }
996 
HandleReleaseStore(HInstruction * instruction)997   void HandleReleaseStore(HInstruction* instruction) {
998     DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
999            (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
1000            (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
1001         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
1002 
1003     // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
1004     // they will push the modifications for other threads to see. Therefore, we must keep the
1005     // stores but there's no need to clobber the value.
1006     ScopedArenaVector<ValueRecord>& heap_values =
1007         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1008     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1009       KeepStores(heap_values[i].stored_by);
1010       heap_values[i].stored_by = Value::PureUnknown();
1011     }
1012 
1013     // Note that there's no need to record the store as subsequent release store shouldn't be
1014     // eliminated either.
1015   }
1016 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)1017   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
1018     if (instruction->IsVolatile()) {
1019       HandleAcquireLoad(instruction);
1020       return;
1021     }
1022 
1023     HInstruction* object = instruction->InputAt(0);
1024     const FieldInfo& field = instruction->GetFieldInfo();
1025     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
1026   }
1027 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)1028   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
1029     if (instruction->IsVolatile()) {
1030       HandleReleaseStore(instruction);
1031       return;
1032     }
1033 
1034     HInstruction* object = instruction->InputAt(0);
1035     const FieldInfo& field = instruction->GetFieldInfo();
1036     HInstruction* value = instruction->InputAt(1);
1037     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
1038     VisitSetLocation(instruction, idx, value);
1039   }
1040 
VisitStaticFieldGet(HStaticFieldGet * instruction)1041   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
1042     if (instruction->IsVolatile()) {
1043       HandleAcquireLoad(instruction);
1044       return;
1045     }
1046 
1047     HInstruction* cls = instruction->InputAt(0);
1048     const FieldInfo& field = instruction->GetFieldInfo();
1049     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
1050   }
1051 
VisitStaticFieldSet(HStaticFieldSet * instruction)1052   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
1053     if (instruction->IsVolatile()) {
1054       HandleReleaseStore(instruction);
1055       return;
1056     }
1057 
1058     HInstruction* cls = instruction->InputAt(0);
1059     const FieldInfo& field = instruction->GetFieldInfo();
1060     HInstruction* value = instruction->InputAt(1);
1061     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
1062     VisitSetLocation(instruction, idx, value);
1063   }
1064 
VisitMonitorOperation(HMonitorOperation * monitor_op)1065   void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
1066     if (monitor_op->IsEnter()) {
1067       HandleAcquireLoad(monitor_op);
1068     } else {
1069       HandleReleaseStore(monitor_op);
1070     }
1071   }
1072 
VisitArrayGet(HArrayGet * instruction)1073   void VisitArrayGet(HArrayGet* instruction) override {
1074     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1075   }
1076 
VisitArraySet(HArraySet * instruction)1077   void VisitArraySet(HArraySet* instruction) override {
1078     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1079     VisitSetLocation(instruction, idx, instruction->GetValue());
1080   }
1081 
VisitVecLoad(HVecLoad * instruction)1082   void VisitVecLoad(HVecLoad* instruction) override {
1083     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1084   }
1085 
VisitVecStore(HVecStore * instruction)1086   void VisitVecStore(HVecStore* instruction) override {
1087     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1088     VisitSetLocation(instruction, idx, instruction->GetValue());
1089   }
1090 
VisitDeoptimize(HDeoptimize * instruction)1091   void VisitDeoptimize(HDeoptimize* instruction) override {
1092     // If we are in a try, even singletons are observable.
1093     const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
1094     HBasicBlock* block = instruction->GetBlock();
1095     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1096     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1097       Value* stored_by = &heap_values[i].stored_by;
1098       if (stored_by->IsUnknown()) {
1099         continue;
1100       }
1101       // Stores are generally observeable after deoptimization, except
1102       // for singletons that don't escape in the deoptimization environment.
1103       bool observable = true;
1104       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1105       if (!inside_a_try && info->IsSingleton()) {
1106         HInstruction* reference = info->GetReference();
1107         // Finalizable objects always escape.
1108         const bool finalizable_object =
1109             reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1110         if (!finalizable_object && !IsEscapingObject(info, block, i)) {
1111           // Check whether the reference for a store is used by an environment local of
1112           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1113           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1114           observable = std::any_of(
1115               env_uses.begin(),
1116               env_uses.end(),
1117               [instruction](const HUseListNode<HEnvironment*>& use) {
1118                 return use.GetUser()->GetHolder() == instruction;
1119               });
1120         }
1121       }
1122       if (observable) {
1123         KeepStores(*stored_by);
1124         *stored_by = Value::PureUnknown();
1125       }
1126     }
1127   }
1128 
1129   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1130   void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1131     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1132     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1133       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1134       if (must_keep_stores || IsEscapingObject(ref_info, block, i)) {
1135         KeepStores(heap_values[i].stored_by);
1136         heap_values[i].stored_by = Value::PureUnknown();
1137       }
1138     }
1139   }
1140 
VisitReturn(HReturn * instruction)1141   void VisitReturn(HReturn* instruction) override {
1142     HandleExit(instruction->GetBlock());
1143   }
1144 
VisitReturnVoid(HReturnVoid * return_void)1145   void VisitReturnVoid(HReturnVoid* return_void) override {
1146     HandleExit(return_void->GetBlock());
1147   }
1148 
HandleThrowingInstruction(HInstruction * instruction)1149   void HandleThrowingInstruction(HInstruction* instruction) {
1150     DCHECK(instruction->CanThrow());
1151     // If we are inside of a try, singletons can become visible since we may not exit the method.
1152     HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
1153   }
1154 
VisitMethodEntryHook(HMethodEntryHook * method_entry)1155   void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1156     HandleThrowingInstruction(method_entry);
1157   }
1158 
VisitMethodExitHook(HMethodExitHook * method_exit)1159   void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1160     HandleThrowingInstruction(method_exit);
1161   }
1162 
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1163   void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1164     HandleThrowingInstruction(div_zero_check);
1165   }
1166 
VisitNullCheck(HNullCheck * null_check)1167   void VisitNullCheck(HNullCheck* null_check) override {
1168     HandleThrowingInstruction(null_check);
1169   }
1170 
VisitBoundsCheck(HBoundsCheck * bounds_check)1171   void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1172     HandleThrowingInstruction(bounds_check);
1173   }
1174 
VisitLoadClass(HLoadClass * load_class)1175   void VisitLoadClass(HLoadClass* load_class) override {
1176     if (load_class->CanThrow()) {
1177       HandleThrowingInstruction(load_class);
1178     }
1179   }
1180 
VisitLoadString(HLoadString * load_string)1181   void VisitLoadString(HLoadString* load_string) override {
1182     if (load_string->CanThrow()) {
1183       HandleThrowingInstruction(load_string);
1184     }
1185   }
1186 
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)1187   void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
1188     HandleThrowingInstruction(load_method_handle);
1189   }
1190 
VisitLoadMethodType(HLoadMethodType * load_method_type)1191   void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
1192     HandleThrowingInstruction(load_method_type);
1193   }
1194 
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1195   void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1196     HandleThrowingInstruction(sb_append);
1197   }
1198 
VisitThrow(HThrow * throw_instruction)1199   void VisitThrow(HThrow* throw_instruction) override {
1200     HandleThrowingInstruction(throw_instruction);
1201   }
1202 
VisitCheckCast(HCheckCast * check_cast)1203   void VisitCheckCast(HCheckCast* check_cast) override {
1204     HandleThrowingInstruction(check_cast);
1205   }
1206 
HandleInvoke(HInstruction * instruction)1207   void HandleInvoke(HInstruction* instruction) {
1208     // If `instruction` can throw we have to presume all stores are visible.
1209     const bool can_throw = instruction->CanThrow();
1210     // If we are in a try, even singletons are observable.
1211     const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
1212     SideEffects side_effects = instruction->GetSideEffects();
1213     ScopedArenaVector<ValueRecord>& heap_values =
1214         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1215     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1216       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1217       HBasicBlock* blk = instruction->GetBlock();
1218       // We don't need to do anything if the reference has not escaped at this point.
1219       // This is true if either we (1) never escape or (2) sometimes escape but
1220       // there is no possible execution where we have done so at this time. NB
1221       // We count being in the excluded cohort as escaping. Technically, this is
1222       // a bit over-conservative (since we can have multiple non-escaping calls
1223       // before a single escaping one) but this simplifies everything greatly.
1224       auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) {
1225         DCHECK(ref_info->IsPartialSingleton());
1226         if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) {
1227           return false;
1228         }
1229         ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
1230             ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
1231         return std::none_of(cohorts.begin(),
1232                             cohorts.end(),
1233                             [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
1234                               return cohort.PrecedesBlock(blk);
1235                             });
1236       };
1237       if (!can_throw_inside_a_try &&
1238           (ref_info->IsSingleton() ||
1239            // partial and we aren't currently escaping and we haven't escaped yet.
1240            (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
1241         // Singleton references cannot be seen by the callee.
1242       } else {
1243         if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1244           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1245           KeepStores(heap_values[i].stored_by);
1246           heap_values[i].stored_by = Value::PureUnknown();
1247         }
1248         if (side_effects.DoesAnyWrite()) {
1249           // The value may be clobbered.
1250           heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1251         }
1252       }
1253     }
1254   }
1255 
VisitInvoke(HInvoke * invoke)1256   void VisitInvoke(HInvoke* invoke) override {
1257     HandleInvoke(invoke);
1258   }
1259 
VisitClinitCheck(HClinitCheck * clinit)1260   void VisitClinitCheck(HClinitCheck* clinit) override {
1261     // Class initialization check can result in class initializer calling arbitrary methods.
1262     HandleInvoke(clinit);
1263   }
1264 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1265   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1266     // Conservatively treat it as an invocation.
1267     HandleInvoke(instruction);
1268   }
1269 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1270   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1271     // Conservatively treat it as an invocation.
1272     HandleInvoke(instruction);
1273   }
1274 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1275   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1276     // Conservatively treat it as an invocation.
1277     HandleInvoke(instruction);
1278   }
1279 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1280   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1281     // Conservatively treat it as an invocation.
1282     HandleInvoke(instruction);
1283   }
1284 
VisitNewInstance(HNewInstance * new_instance)1285   void VisitNewInstance(HNewInstance* new_instance) override {
1286     // If we are in a try, even singletons are observable.
1287     const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1288     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1289     if (ref_info == nullptr) {
1290       // new_instance isn't used for field accesses. No need to process it.
1291       return;
1292     }
1293     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1294       DCHECK(!new_instance->IsFinalizable());
1295       // new_instance can potentially be eliminated.
1296       singleton_new_instances_.push_back(new_instance);
1297     }
1298     HBasicBlock* block = new_instance->GetBlock();
1299     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1300     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1301       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1302       HInstruction* ref = info->GetReference();
1303       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1304       if (ref == new_instance) {
1305         if (offset >= mirror::kObjectHeaderSize ||
1306             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1307           // Instance fields except the header fields are set to default heap values.
1308           // The shadow$_monitor_ field is set to the default value however.
1309           heap_values[i].value = Value::Default();
1310           heap_values[i].stored_by = Value::PureUnknown();
1311         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1312           // The shadow$_klass_ field is special and has an actual value however.
1313           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1314           heap_values[i].stored_by = Value::PureUnknown();
1315         }
1316       } else if (inside_a_try || IsEscapingObject(info, block, i)) {
1317         // Since NewInstance can throw, we presume all previous stores could be visible.
1318         KeepStores(heap_values[i].stored_by);
1319         heap_values[i].stored_by = Value::PureUnknown();
1320       }
1321     }
1322   }
1323 
VisitNewArray(HNewArray * new_array)1324   void VisitNewArray(HNewArray* new_array) override {
1325     // If we are in a try, even singletons are observable.
1326     const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1327     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1328     if (ref_info == nullptr) {
1329       // new_array isn't used for array accesses. No need to process it.
1330       return;
1331     }
1332     if (ref_info->IsSingletonAndRemovable()) {
1333       if (new_array->GetLength()->IsIntConstant() &&
1334           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1335         // new_array can potentially be eliminated.
1336         singleton_new_instances_.push_back(new_array);
1337       } else {
1338         // new_array may throw NegativeArraySizeException. Keep it.
1339       }
1340     }
1341     HBasicBlock* block = new_array->GetBlock();
1342     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1343     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1344       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1345       ReferenceInfo* info = location->GetReferenceInfo();
1346       HInstruction* ref = info->GetReference();
1347       if (ref == new_array && location->GetIndex() != nullptr) {
1348         // Array elements are set to default heap values.
1349         heap_values[i].value = Value::Default();
1350         heap_values[i].stored_by = Value::PureUnknown();
1351       } else if (inside_a_try || IsEscapingObject(info, block, i)) {
1352         // Since NewArray can throw, we presume all previous stores could be visible.
1353         KeepStores(heap_values[i].stored_by);
1354         heap_values[i].stored_by = Value::PureUnknown();
1355       }
1356     }
1357   }
1358 
VisitInstruction(HInstruction * instruction)1359   void VisitInstruction(HInstruction* instruction) override {
1360     // Throwing instructions must be handled specially.
1361     DCHECK(!instruction->CanThrow());
1362   }
1363 
ShouldPerformPartialLSE() const1364   bool ShouldPerformPartialLSE() const {
1365     return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
1366   }
1367 
1368   bool perform_partial_lse_;
1369 
1370   const HeapLocationCollector& heap_location_collector_;
1371 
1372   // Use local allocator for allocating memory.
1373   ScopedArenaAllocator allocator_;
1374 
1375   // The number of unique phi_placeholders there possibly are
1376   size_t num_phi_placeholders_;
1377 
1378   // One array of heap value records for each block.
1379   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1380 
1381   // We record loads and stores for re-processing when we find a loop Phi placeholder
1382   // with unknown value from a predecessor, and also for removing stores that are
1383   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1384   struct LoadStoreRecord {
1385     HInstruction* load_or_store;
1386     size_t heap_location_index;
1387   };
1388   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1389 
1390   // We record the substitute instructions for loads that should be
1391   // eliminated but may be used by heap locations. They'll be removed
1392   // in the end. These are indexed by the load's id.
1393   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1394 
1395   // Value at the start of the given instruction for instructions which directly
1396   // read from a heap-location (i.e. FieldGet). The mapping to heap-location is
1397   // implicit through the fact that each instruction can only directly refer to
1398   // a single heap-location.
1399   ScopedArenaHashMap<HInstruction*, Value> intermediate_values_;
1400 
1401   // Record stores to keep in a bit vector indexed by instruction ID.
1402   ArenaBitVector kept_stores_;
1403   // When we need to keep all stores that feed a Phi placeholder, we just record the
1404   // index of that placeholder for processing after graph traversal.
1405   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1406 
1407   // Loads that would require a loop Phi to replace are recorded for processing
1408   // later as we do not have enough information from back-edges to determine if
1409   // a suitable Phi can be found or created when we visit these loads.
1410   ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
1411 
1412   // For stores, record the old value records that were replaced and the stored values.
1413   struct StoreRecord {
1414     ValueRecord old_value_record;
1415     HInstruction* stored_value;
1416   };
1417   // Small pre-allocated initial buffer avoids initializing a large one until it's really needed.
1418   static constexpr size_t kStoreRecordsInitialBufferSize = 16;
1419   std::pair<HInstruction*, StoreRecord> store_records_buffer_[kStoreRecordsInitialBufferSize];
1420   ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
1421 
1422   // Replacements for Phi placeholders.
1423   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1424   ScopedArenaVector<Value> phi_placeholder_replacements_;
1425 
1426   // Merged-unknowns that must have their predecessor values kept to ensure
1427   // partially escaped values are written
1428   ArenaBitVector kept_merged_unknowns_;
1429 
1430   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1431 
1432   // The field infos for each heap location (if relevant).
1433   ScopedArenaVector<const FieldInfo*> field_infos_;
1434 
1435   Phase current_phase_;
1436 
1437   friend class PartialLoadStoreEliminationHelper;
1438   friend struct ScopedRestoreHeapValues;
1439 
1440   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1441   friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1442   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1443 
1444   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1445 };
1446 
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1447 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1448   p.Dump(oss);
1449   return oss;
1450 }
1451 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1452 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1453   switch (phase) {
1454     case LSEVisitor::Phase::kLoadElimination:
1455       return oss << "kLoadElimination";
1456     case LSEVisitor::Phase::kStoreElimination:
1457       return oss << "kStoreElimination";
1458     case LSEVisitor::Phase::kPartialElimination:
1459       return oss << "kPartialElimination";
1460   }
1461 }
1462 
Dump(std::ostream & oss) const1463 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1464   if (IsDefault()) {
1465     oss << "Default";
1466   } else if (IsPhi()) {
1467     oss << "Phi: " << GetPhiPlaceholder();
1468   } else {
1469     oss << "Instruction: " << *GetInstruction();
1470   }
1471 }
1472 
PriorValueHolder(Value val)1473 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1474     : value_(Marker{}) {
1475   DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1476   if (val.IsPartialUnknown()) {
1477     value_ = val.GetPriorValue().value_;
1478   } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1479     value_ = val.GetPhiPlaceholder();
1480   } else if (val.IsInstruction()) {
1481     value_ = val.GetInstruction();
1482   } else {
1483     DCHECK(val.IsDefault());
1484   }
1485 }
1486 
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1487 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1488   return true;
1489 }
1490 
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1491 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1492                           const LSEVisitor::PriorValueHolder& p2) {
1493   return p1.Equals(p2);
1494 }
1495 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1496 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1497                           const LSEVisitor::PhiPlaceholder& p2) {
1498   return p1.Equals(p2);
1499 }
1500 
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1501 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1502                           const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1503   return p1.phi_ == p2.phi_;
1504 }
1505 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1506 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1507                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1508   return p1.phi_ == p2.phi_;
1509 }
1510 
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1511 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1512                           const LSEVisitor::Value::MergedUnknownMarker& p2) {
1513   return p1.phi_ == p2.phi_;
1514 }
1515 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1516 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1517   p.Dump(oss);
1518   return oss;
1519 }
1520 
ToValue() const1521 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1522   if (IsDefault()) {
1523     return Value::Default();
1524   } else if (IsPhi()) {
1525     return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1526   } else {
1527     return Value::ForInstruction(GetInstruction());
1528   }
1529 }
1530 
ExactEquals(LSEVisitor::Value other) const1531 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1532   return value_ == other.value_;
1533 }
1534 
Equals(LSEVisitor::Value other) const1535 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1536   // Only valid values can be compared.
1537   DCHECK(IsValid());
1538   DCHECK(other.IsValid());
1539   if (value_ == other.value_) {
1540     // Note: Two unknown values are considered different.
1541     return !IsUnknown();
1542   } else {
1543     // Default is considered equal to zero-bit-pattern instructions.
1544     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1545             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1546   }
1547 }
1548 
Dump(std::ostream & os) const1549 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1550   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1551     switch (GetValuelessType()) {
1552       case ValuelessType::kDefault:
1553         return os << "Default";
1554       case ValuelessType::kPureUnknown:
1555         return os << "PureUnknown";
1556       case ValuelessType::kInvalid:
1557         return os << "Invalid";
1558     }
1559   } else if (IsPartialUnknown()) {
1560     return os << "PartialUnknown[" << GetPriorValue() << "]";
1561   } else if (IsInstruction()) {
1562     return os << "Instruction[id: " << GetInstruction()->GetId()
1563               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1564   } else if (IsMergedUnknown()) {
1565     return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1566               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1567 
1568   } else if (NeedsLoopPhi()) {
1569     return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1570               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1571   } else {
1572     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1573               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1574   }
1575 }
1576 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1577 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1578   return v.Dump(os);
1579 }
1580 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)1581 LSEVisitor::LSEVisitor(HGraph* graph,
1582                        const HeapLocationCollector& heap_location_collector,
1583                        bool perform_partial_lse,
1584                        OptimizingCompilerStats* stats)
1585     : HGraphDelegateVisitor(graph, stats),
1586       perform_partial_lse_(perform_partial_lse),
1587       heap_location_collector_(heap_location_collector),
1588       allocator_(graph->GetArenaStack()),
1589       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1590                             heap_location_collector_.GetNumberOfHeapLocations()),
1591       heap_values_for_(graph->GetBlocks().size(),
1592                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1593                        allocator_.Adapter(kArenaAllocLSE)),
1594       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1595       // We may add new instructions (default values, Phis) but we're not adding loads
1596       // or stores, so we shall not need to resize following vector and BitVector.
1597       substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1598                                          nullptr,
1599                                          allocator_.Adapter(kArenaAllocLSE)),
1600       intermediate_values_(allocator_.Adapter(kArenaAllocLSE)),
1601       kept_stores_(&allocator_,
1602                    /*start_bits=*/graph->GetCurrentInstructionId(),
1603                    /*expandable=*/false,
1604                    kArenaAllocLSE),
1605       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1606                                                   num_phi_placeholders_,
1607                                                   /*expandable=*/false,
1608                                                   kArenaAllocLSE),
1609       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1610       store_records_(store_records_buffer_,
1611                      kStoreRecordsInitialBufferSize,
1612                      allocator_.Adapter(kArenaAllocLSE)),
1613       phi_placeholder_replacements_(num_phi_placeholders_,
1614                                     Value::Invalid(),
1615                                     allocator_.Adapter(kArenaAllocLSE)),
1616       kept_merged_unknowns_(&allocator_,
1617                             /*start_bits=*/num_phi_placeholders_,
1618                             /*expandable=*/false,
1619                             kArenaAllocLSE),
1620       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1621       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1622                    allocator_.Adapter(kArenaAllocLSE)),
1623       current_phase_(Phase::kLoadElimination) {
1624   // Clear bit vectors.
1625   phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1626   kept_stores_.ClearAllBits();
1627 }
1628 
PrepareLoopValue(HBasicBlock * block,size_t idx)1629 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1630   // If the pre-header value is known (which implies that the reference dominates this
1631   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1632   // are later found to have a known value, we can replace loads from this location,
1633   // either with the pre-header value or with a new Phi. For array locations, the index
1634   // may be defined inside the loop but the only known value in that case should be the
1635   // default value or a Phi placeholder that can be replaced only with the default value.
1636   HLoopInformation* loop_info = block->GetLoopInformation();
1637   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1638   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1639   if (pre_header_value.IsUnknown()) {
1640     return pre_header_value;
1641   }
1642   if (kIsDebugBuild) {
1643     // Check that the reference indeed dominates this loop.
1644     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1645     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1646     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1647         << GetGraph()->PrettyMethod();
1648     // Check that the index, if defined inside the loop, tracks a default value
1649     // or a Phi placeholder requiring a loop Phi.
1650     HInstruction* index = location->GetIndex();
1651     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1652       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1653           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1654           << pre_header_value;
1655     }
1656   }
1657   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1658   return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1659 }
1660 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1661 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1662   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1663   // if the value in the location escapes. This is not applicable to singletons that are
1664   // defined inside the loop as they shall be dead in the loop header.
1665   const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1666   const HInstruction* reference = ref_info->GetReference();
1667   // Finalizable objects always escape.
1668   const bool is_finalizable =
1669       reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1670   if (ref_info->IsSingleton() &&
1671       block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1672       !is_finalizable) {
1673     return Value::PureUnknown();
1674   }
1675   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1676   return Value::ForLoopPhiPlaceholder(phi_placeholder);
1677 }
1678 
PrepareLoopRecords(HBasicBlock * block)1679 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1680   DCHECK(block->IsLoopHeader());
1681   int block_id = block->GetBlockId();
1682   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1683   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1684       heap_values_for_[pre_header->GetBlockId()];
1685   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1686   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1687   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1688   DCHECK(heap_values.empty());
1689 
1690   // Don't eliminate loads in irreducible loops.
1691   if (block->GetLoopInformation()->IsIrreducible()) {
1692     heap_values.resize(num_heap_locations,
1693                        {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1694     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1695     bool is_osr = GetGraph()->IsCompilingOsr();
1696     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1697       heap_values[idx].value =
1698           is_osr ? Value::PureUnknown()
1699                  : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1700       KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1701     }
1702     return;
1703   }
1704 
1705   // Fill `heap_values` based on values from pre-header.
1706   heap_values.reserve(num_heap_locations);
1707   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1708     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1709   }
1710 }
1711 
MergePredecessorValues(HBasicBlock * block,size_t idx)1712 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1713   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1714   DCHECK(!predecessors.empty());
1715   Value merged_value =
1716       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1717   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1718     Value pred_value =
1719         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1720     if (pred_value.Equals(merged_value)) {
1721       // Value is the same. No need to update our merged value.
1722       continue;
1723     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1724       // If one is unknown and the other is a different type of unknown
1725       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1726       merged_value = Value::MergedUnknown(phi_placeholder);
1727       // We know that at least one of the merge points is unknown (and both are
1728       // not pure-unknowns since that's captured above). This means that the
1729       // overall value needs to be a MergedUnknown. Just return that.
1730       break;
1731     } else {
1732       // There are conflicting known values. We may still be able to replace loads with a Phi.
1733       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1734       // Propagate the need for a new loop Phi from all predecessors.
1735       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1736       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1737     }
1738   }
1739   DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1)
1740       << merged_value << " in " << GetGraph()->PrettyMethod();
1741   return merged_value;
1742 }
1743 
MergePredecessorRecords(HBasicBlock * block)1744 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1745   if (block->IsExitBlock()) {
1746     // Exit block doesn't really merge values since the control flow ends in
1747     // its predecessors. Each predecessor needs to make sure stores are kept
1748     // if necessary.
1749     return;
1750   }
1751 
1752   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1753   DCHECK(heap_values.empty());
1754   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1755   if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1756     DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1757     heap_values.resize(num_heap_locations,
1758                        {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1759     return;
1760   }
1761 
1762   heap_values.reserve(num_heap_locations);
1763   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1764     Value merged_value = MergePredecessorValues(block, idx);
1765     if (kIsDebugBuild) {
1766       if (merged_value.NeedsPhi()) {
1767         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1768         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1769       } else if (merged_value.IsInstruction()) {
1770         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1771       }
1772     }
1773     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1774     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1775     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1776       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1777       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1778       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1779           !merged_stored_by.Equals(stored_by)) {
1780         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1781         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1782         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1783         break;
1784       }
1785     }
1786     heap_values.push_back({ merged_value, merged_stored_by });
1787   }
1788 }
1789 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1790 static HInstruction* FindOrConstructNonLoopPhi(
1791     HBasicBlock* block,
1792     const ScopedArenaVector<HInstruction*>& phi_inputs,
1793     DataType::Type type) {
1794   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1795     HInstruction* phi = phi_it.Current();
1796     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1797     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1798       return lhs == rhs.GetInstruction();
1799     };
1800     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1801       return phi;
1802     }
1803   }
1804   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1805   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1806   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1807     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1808     phi->SetRawInputAt(i, phi_inputs[i]);
1809   }
1810   block->AddPhi(phi);
1811   if (type == DataType::Type::kReference) {
1812     // Update reference type information. Pass invalid handles, these are not used for Phis.
1813     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1814                                        Handle<mirror::DexCache>(),
1815                                        /* is_first_run= */ false);
1816     rtp_fixup.Visit(phi);
1817   }
1818   return phi;
1819 }
1820 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1821 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1822   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1823   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1824   size_t idx = phi_placeholder.GetHeapLocation();
1825 
1826   // Use local allocator to reduce peak memory usage.
1827   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1828   // Reuse the same vector for collecting phi inputs.
1829   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1830 
1831   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1832   work_queue.push_back(phi_placeholder);
1833   while (!work_queue.empty()) {
1834     PhiPlaceholder current_phi_placeholder = work_queue.back();
1835     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1836       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1837       // that directly or indirectly depends on it, so it was already processed as part of the
1838       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1839       work_queue.pop_back();
1840       continue;
1841     }
1842     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1843     HBasicBlock* current_block = blocks[current_block_id];
1844     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1845 
1846     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1847     // And the only way for such merged value to reach a different heap location is through
1848     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1849     // seen here are tied to one heap location.
1850     DCHECK(!current_block->IsLoopHeader())
1851         << current_phi_placeholder << " phase: " << current_phase_;
1852     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1853 
1854     phi_inputs.clear();
1855     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1856       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1857       DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1858                                           << " pred: " << predecessor->GetBlockId();
1859       if (pred_value.NeedsNonLoopPhi() ||
1860           (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
1861         // We need to process the Phi placeholder first.
1862         work_queue.push_back(pred_value.GetPhiPlaceholder());
1863       } else if (pred_value.IsDefault()) {
1864         phi_inputs.push_back(GetDefaultValue(type));
1865       } else {
1866         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1867                                            << " pred: " << predecessor->GetBlockId();
1868         phi_inputs.push_back(pred_value.GetInstruction());
1869       }
1870     }
1871     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1872       // All inputs are available. Find or construct the Phi replacement.
1873       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1874           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1875       // Remove the block from the queue.
1876       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1877       work_queue.pop_back();
1878     }
1879   }
1880 }
1881 
VisitGetLocation(HInstruction * instruction,size_t idx)1882 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1883   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1884   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1885   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1886   ValueRecord& record = heap_values[idx];
1887   if (instruction->IsFieldAccess()) {
1888     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1889   }
1890   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1891   // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
1892   // value.
1893   DCHECK(!record.value.IsPureUnknown() ||
1894          heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
1895          !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
1896          << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
1897   intermediate_values_.insert({instruction, record.value});
1898   loads_and_stores_.push_back({ instruction, idx });
1899   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1900       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1901     record.value = Value::PureUnknown();
1902   }
1903   if (record.value.IsDefault()) {
1904     KeepStores(record.stored_by);
1905     HInstruction* constant = GetDefaultValue(instruction->GetType());
1906     AddRemovedLoad(instruction, constant);
1907     record.value = Value::ForInstruction(constant);
1908   } else if (record.value.IsUnknown()) {
1909     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1910     // This acts like GVN but with better aliasing analysis.
1911     Value old_value = record.value;
1912     record.value = Value::ForInstruction(instruction);
1913     KeepStoresIfAliasedToLocation(heap_values, idx);
1914     KeepStores(old_value);
1915   } else if (record.value.NeedsLoopPhi()) {
1916     // We do not know yet if the value is known for all back edges. Record for future processing.
1917     loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1918   } else {
1919     // This load can be eliminated but we may need to construct non-loop Phis.
1920     if (record.value.NeedsNonLoopPhi()) {
1921       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1922       record.value = Replacement(record.value);
1923     }
1924     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1925     AddRemovedLoad(instruction, heap_value);
1926   }
1927 }
1928 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1929 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1930   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1931   DCHECK(!IsStore(value)) << value->DebugName();
1932   if (instruction->IsFieldAccess()) {
1933     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1934   }
1935   // value may already have a substitute.
1936   value = FindSubstitute(value);
1937   HBasicBlock* block = instruction->GetBlock();
1938   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1939   ValueRecord& record = heap_values[idx];
1940   DCHECK_IMPLIES(record.value.IsInstruction(),
1941                  FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1942 
1943   if (record.value.Equals(value)) {
1944     // Store into the heap location with the same value.
1945     // This store can be eliminated right away.
1946     block->RemoveInstruction(instruction);
1947     return;
1948   }
1949 
1950   store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1951   loads_and_stores_.push_back({ instruction, idx });
1952 
1953   // If the `record.stored_by` specified a store from this block, it shall be removed
1954   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1955   // `kept_stores_` anymore after we update the `record.stored_by` below.
1956   DCHECK(!record.stored_by.IsInstruction() ||
1957          record.stored_by.GetInstruction()->GetBlock() != block ||
1958          record.stored_by.GetInstruction()->CanThrow() ||
1959          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1960 
1961   if (instruction->CanThrow()) {
1962     // Previous stores can become visible.
1963     HandleThrowingInstruction(instruction);
1964     // We cannot remove a possibly throwing store.
1965     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1966     kept_stores_.SetBit(instruction->GetId());
1967   }
1968 
1969   // Update the record.
1970   auto it = loads_requiring_loop_phi_.find(value);
1971   if (it != loads_requiring_loop_phi_.end()) {
1972     // Propapate the Phi placeholder to the record.
1973     record.value = it->second.value;
1974     DCHECK(record.value.NeedsLoopPhi());
1975   } else {
1976     record.value = Value::ForInstruction(value);
1977   }
1978   // Track the store in the value record. If the value is loaded or needed after
1979   // return/deoptimization later, this store isn't really redundant.
1980   record.stored_by = Value::ForInstruction(instruction);
1981 
1982   // This store may kill values in other heap locations due to aliasing.
1983   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1984     if (i == idx ||
1985         heap_values[i].value.IsUnknown() ||
1986         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1987         !heap_location_collector_.MayAlias(i, idx)) {
1988       continue;
1989     }
1990     // Kill heap locations that may alias and keep previous stores to these locations.
1991     KeepStores(heap_values[i].stored_by);
1992     heap_values[i].stored_by = Value::PureUnknown();
1993     heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1994   }
1995 }
1996 
VisitBasicBlock(HBasicBlock * block)1997 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1998   // Populate the heap_values array for this block.
1999   // TODO: try to reuse the heap_values array from one predecessor if possible.
2000   if (block->IsLoopHeader()) {
2001     PrepareLoopRecords(block);
2002   } else {
2003     MergePredecessorRecords(block);
2004   }
2005   // Visit instructions.
2006   HGraphVisitor::VisitBasicBlock(block);
2007 }
2008 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const2009 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
2010   DCHECK_NE(idx1, idx2);
2011   DCHECK(loop_header->IsLoopHeader());
2012   if (heap_location_collector_.MayAlias(idx1, idx2)) {
2013     return true;
2014   }
2015   // For array locations with index defined inside the loop, include
2016   // all other locations in the array, even those that LSA declares
2017   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
2018   // refer to the same locations for different iterations. (LSA's
2019   // `ComputeMayAlias()` does not consider different loop iterations.)
2020   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
2021   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
2022   if (loc1->IsArray() &&
2023       loc2->IsArray() &&
2024       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
2025                                                 loc2->GetReferenceInfo())) {
2026     HLoopInformation* loop_info = loop_header->GetLoopInformation();
2027     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
2028         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
2029       // Consider the locations aliasing. Do not optimize the case where both indexes
2030       // are loop invariants defined inside the loop, rely on LICM to pull them out.
2031       return true;
2032     }
2033   }
2034   return false;
2035 }
2036 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)2037 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
2038     PhiPlaceholder phi_placeholder,
2039     DataType::Type type,
2040     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
2041   // Use local allocator to reduce peak memory usage.
2042   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2043   ArenaBitVector visited(&allocator,
2044                          /*start_bits=*/ num_phi_placeholders_,
2045                          /*expandable=*/ false,
2046                          kArenaAllocLSE);
2047   visited.ClearAllBits();
2048   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2049 
2050   // Use depth first search to check if any non-Phi input is unknown.
2051   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2052   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2053   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2054   work_queue.push_back(phi_placeholder);
2055   while (!work_queue.empty()) {
2056     PhiPlaceholder current_phi_placeholder = work_queue.back();
2057     work_queue.pop_back();
2058     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
2059     DCHECK_GE(block->GetPredecessors().size(), 2u);
2060     size_t idx = current_phi_placeholder.GetHeapLocation();
2061     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2062       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2063       if (value.NeedsPhi()) {
2064         // Visit the predecessor Phi placeholder if it's not visited yet.
2065         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2066           visited.SetBit(PhiPlaceholderIndex(value));
2067           work_queue.push_back(value.GetPhiPlaceholder());
2068         }
2069       } else if (!value.Equals(Value::Default())) {
2070         return false;  // Report failure.
2071       }
2072     }
2073     if (block->IsLoopHeader()) {
2074       // For back-edges we need to check all locations that write to the same array,
2075       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
2076       // as they may actually refer to the same locations for different iterations.
2077       for (size_t i = 0; i != num_heap_locations; ++i) {
2078         if (i == idx ||
2079             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
2080                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
2081           continue;
2082         }
2083         for (HBasicBlock* predecessor : block->GetPredecessors()) {
2084           // Check if there were any writes to this location.
2085           // Note: We could simply process the values but due to the vector operation
2086           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
2087           // the value to change and not be equal to default. To work around this and
2088           // allow replacing the non-vector load of loop-invariant default values
2089           // anyway, skip over paths that do not have any writes.
2090           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
2091           while (record.stored_by.NeedsLoopPhi() &&
2092                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
2093             HLoopInformation* loop_info =
2094                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
2095             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
2096           }
2097           Value value = ReplacementOrValue(record.value);
2098           if (value.NeedsPhi()) {
2099             // Visit the predecessor Phi placeholder if it's not visited yet.
2100             if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2101               visited.SetBit(PhiPlaceholderIndex(value));
2102               work_queue.push_back(value.GetPhiPlaceholder());
2103             }
2104           } else if (!value.Equals(Value::Default())) {
2105             return false;  // Report failure.
2106           }
2107         }
2108       }
2109     }
2110   }
2111 
2112   // Record replacement and report success.
2113   HInstruction* replacement = GetDefaultValue(type);
2114   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2115     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2116     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2117     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2118     // We use both vector and non vector operations to analyze the information. However, we replace
2119     // only non vector operations in this code path.
2120     if (!hl->IsVecOp()) {
2121       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2122       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2123     }
2124   }
2125   return true;
2126 }
2127 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)2128 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
2129     PhiPlaceholder phi_placeholder,
2130     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
2131   // Use local allocator to reduce peak memory usage.
2132   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2133   ArenaBitVector visited(&allocator,
2134                          /*start_bits=*/ num_phi_placeholders_,
2135                          /*expandable=*/ false,
2136                          kArenaAllocLSE);
2137   visited.ClearAllBits();
2138   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2139 
2140   // Use depth first search to check if any non-Phi input is unknown.
2141   HInstruction* replacement = nullptr;
2142   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2143   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2144   work_queue.push_back(phi_placeholder);
2145   while (!work_queue.empty()) {
2146     PhiPlaceholder current_phi_placeholder = work_queue.back();
2147     work_queue.pop_back();
2148     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2149     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2150     size_t idx = current_phi_placeholder.GetHeapLocation();
2151     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2152       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2153       if (value.NeedsPhi()) {
2154         // Visit the predecessor Phi placeholder if it's not visited yet.
2155         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2156           visited.SetBit(PhiPlaceholderIndex(value));
2157           work_queue.push_back(value.GetPhiPlaceholder());
2158         }
2159       } else {
2160         if (!value.IsInstruction() ||
2161             (replacement != nullptr && replacement != value.GetInstruction())) {
2162           return false;  // Report failure.
2163         }
2164         replacement = value.GetInstruction();
2165       }
2166     }
2167     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2168     // for back-edges, it is not needed here. When looking for a single input
2169     // instruction coming from before the loop, the array index must also be
2170     // defined before the loop and the aliasing analysis done by LSA is sufficient.
2171     // Any writes of a different value with an index that is not loop invariant
2172     // would invalidate the heap location in `VisitSetLocation()`.
2173   }
2174 
2175   // Record replacement and report success.
2176   DCHECK(replacement != nullptr);
2177   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2178     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2179     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2180     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2181     // We use both vector and non vector operations to analyze the information. However, we replace
2182     // only vector operations in this code path.
2183     if (hl->IsVecOp()) {
2184       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2185       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2186     }
2187   }
2188   return true;
2189 }
2190 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2191 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2192     PhiPlaceholder phi_placeholder,
2193     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2194     DataType::Type type,
2195     bool can_use_default_or_phi) {
2196   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2197 
2198   // Use local allocator to reduce peak memory usage.
2199   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2200   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2201 
2202   // Use depth first search to check if any non-Phi input is unknown.
2203   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2204   phi_placeholders_to_materialize->ClearAllBits();
2205   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2206   work_queue.push_back(phi_placeholder);
2207   while (!work_queue.empty()) {
2208     PhiPlaceholder current_phi_placeholder = work_queue.back();
2209     work_queue.pop_back();
2210     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2211       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2212       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2213                  Value::Default()));
2214       continue;
2215     }
2216     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2217     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2218     size_t idx = current_phi_placeholder.GetHeapLocation();
2219     if (current_block->IsLoopHeader()) {
2220       // If the index is defined inside the loop, it may reference different elements of the
2221       // array on each iteration. Since we do not track if all elements of an array are set
2222       // to the same value explicitly, the only known value in pre-header can be the default
2223       // value from NewArray or a Phi placeholder depending on a default value from some outer
2224       // loop pre-header. This Phi placeholder can be replaced only by the default value.
2225       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2226       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2227         if (can_use_default_or_phi &&
2228             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2229                                                       type,
2230                                                       phi_placeholders_to_materialize)) {
2231           continue;
2232         } else {
2233           return current_phi_placeholder;  // Report the loop Phi placeholder.
2234         }
2235       }
2236       // A similar situation arises with the index defined outside the loop if we cannot use
2237       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2238       // placeholder with a single instruction defined before the loop.
2239       if (!can_use_default_or_phi) {
2240         DCHECK(index != nullptr);  // Vector operations are array operations.
2241         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2242                                                           phi_placeholders_to_materialize)) {
2243           continue;
2244         } else {
2245           return current_phi_placeholder;  // Report the loop Phi placeholder.
2246         }
2247       }
2248     }
2249     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2250       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2251       Value value = ReplacementOrValue(heap_values[idx].value);
2252       if (value.IsUnknown()) {
2253         // We cannot create a Phi for this loop Phi placeholder.
2254         return current_phi_placeholder;  // Report the loop Phi placeholder.
2255       }
2256       // For arrays, the location may have been clobbered by writes to other locations
2257       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2258       if (current_block->IsLoopHeader() &&
2259           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2260           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2261         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2262           if (i != idx &&
2263               !heap_values[i].stored_by.IsUnknown() &&
2264               MayAliasOnBackEdge(current_block, idx, i)) {
2265             // We cannot create a Phi for this loop Phi placeholder.
2266             return current_phi_placeholder;
2267           }
2268         }
2269       }
2270       if (value.NeedsLoopPhi()) {
2271         // Visit the predecessor Phi placeholder if it's not visited yet.
2272         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2273           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2274           work_queue.push_back(value.GetPhiPlaceholder());
2275           LSE_VLOG << "For materialization of " << phi_placeholder
2276                    << " we need to materialize " << value;
2277         }
2278       }
2279     }
2280   }
2281 
2282   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2283   return std::nullopt;
2284 }
2285 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2286 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2287                                      DataType::Type type) {
2288   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2289 }
2290 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2291 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2292                                      DataType::Type type) {
2293   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2294   // other than loop Phis are the same.
2295   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2296   std::optional<Value> other_value = std::nullopt;
2297   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2298     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2299     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2300     DCHECK_GE(block->GetPredecessors().size(), 2u);
2301     size_t idx = phi_placeholder.GetHeapLocation();
2302     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2303       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2304       if (value.NeedsNonLoopPhi()) {
2305         DCHECK(current_phase_ == Phase::kLoadElimination ||
2306                current_phase_ == Phase::kPartialElimination)
2307             << current_phase_;
2308         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2309         value = Replacement(value);
2310       }
2311       if (!value.NeedsLoopPhi()) {
2312         if (!other_value) {
2313           // The first other value we found.
2314           other_value = value;
2315         } else if (!other_value->IsInvalid()) {
2316           // Check if the current `value` differs from the previous `other_value`.
2317           if (!value.Equals(*other_value)) {
2318             other_value = Value::Invalid();
2319           }
2320         }
2321       }
2322     }
2323   }
2324 
2325   DCHECK(other_value.has_value());
2326   if (!other_value->IsInvalid()) {
2327     HInstruction* replacement =
2328         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2329     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2330       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2331     }
2332     return true;
2333   }
2334 
2335   // If we're materializing only a single Phi, try to match it with an existing Phi.
2336   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2337   // This also covers the case when after replacing a previous set of Phi placeholders,
2338   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2339   if (phi_placeholder_indexes.size() == 1u) {
2340     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2341     size_t idx = phi_placeholder.GetHeapLocation();
2342     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2343     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2344     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2345       HInstruction* phi = phi_it.Current();
2346       DCHECK_EQ(phi->InputCount(), predecessors.size());
2347       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2348       auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2349         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2350         if (value.NeedsPhi()) {
2351           DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2352           return lhs.GetInstruction() == phi;
2353         } else {
2354           DCHECK(value.IsDefault() || value.IsInstruction());
2355           return value.Equals(lhs.GetInstruction());
2356         }
2357       };
2358       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2359         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2360         return true;
2361       }
2362     }
2363   }
2364 
2365   if (current_phase_ == Phase::kStoreElimination) {
2366     // We're not creating Phis during the final store elimination phase.
2367     return false;
2368   }
2369 
2370   // There are different inputs to the Phi chain. Create the Phis.
2371   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2372   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2373     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2374     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2375     CHECK_GE(block->GetPredecessors().size(), 2u);
2376     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2377         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2378   }
2379   // Fill the Phi inputs.
2380   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2381     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2382     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2383     size_t idx = phi_placeholder.GetHeapLocation();
2384     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2385     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2386         << "type=" << type << " vs phi-type=" << phi->GetType();
2387     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2388       HBasicBlock* predecessor = block->GetPredecessors()[i];
2389       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2390       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2391       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2392       phi->SetRawInputAt(i, input);
2393       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2394           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2395           << " request: " << type;
2396     }
2397   }
2398   // Add the Phis to their blocks.
2399   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2400     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2401     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2402     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2403   }
2404   if (type == DataType::Type::kReference) {
2405     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2406     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2407     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2408       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2409     }
2410     // Update reference type information. Pass invalid handles, these are not used for Phis.
2411     ReferenceTypePropagation rtp_fixup(GetGraph(),
2412                                        Handle<mirror::DexCache>(),
2413                                        /* is_first_run= */ false);
2414     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2415   }
2416 
2417   return true;
2418 }
2419 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2420 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2421                                      DataType::Type type) {
2422   // Use local allocator to reduce peak memory usage.
2423   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2424 
2425   // We want to recognize when a subset of these loop Phis that do not need other
2426   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2427   // i.e. that instruction can be used instead of each Phi in the set. See for example
2428   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2429   // materialize these loop Phis from the smallest transitive closure.
2430 
2431   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2432   // assign new indexes to the Phi placeholders, making the matrix dense.
2433   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2434                                            static_cast<size_t>(-1),  // Invalid.
2435                                            allocator.Adapter(kArenaAllocLSE));
2436   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2437   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2438   phi_placeholder_indexes.reserve(num_phi_placeholders);
2439   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2440     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2441     phi_placeholder_indexes.push_back(marker_index);
2442   }
2443   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2444   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2445   dependencies.reserve(num_phi_placeholders);
2446   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2447     static constexpr bool kExpandable = false;
2448     dependencies.push_back(
2449         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2450     ArenaBitVector* current_dependencies = dependencies.back();
2451     current_dependencies->ClearAllBits();
2452     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2453     PhiPlaceholder current_phi_placeholder =
2454         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2455     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2456     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2457     size_t idx = current_phi_placeholder.GetHeapLocation();
2458     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2459       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2460       if (pred_value.NeedsLoopPhi()) {
2461         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2462         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2463         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2464         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2465       }
2466     }
2467   }
2468 
2469   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2470   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2471     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2472       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2473         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2474           dependencies[i]->SetBit(j);
2475         }
2476       }
2477     }
2478   }
2479 
2480   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2481   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2482   num_dependencies.reserve(num_phi_placeholders);
2483   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2484     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2485   }
2486 
2487   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2488   // materialize it and its dependencies. Repeat until we have materialized all.
2489   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2490   current_subset.reserve(num_phi_placeholders);
2491   size_t remaining_phi_placeholders = num_phi_placeholders;
2492   while (remaining_phi_placeholders != 0u) {
2493     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2494     DCHECK_LE(*it, remaining_phi_placeholders);
2495     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2496     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2497     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2498     current_subset.clear();
2499     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2500       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2501     }
2502     if (!MaterializeLoopPhis(current_subset, type)) {
2503       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2504       // This is the final store elimination phase and we shall not be able to eliminate any
2505       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2506       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2507         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2508           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2509           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2510               Value::PureUnknown();
2511         }
2512       }
2513       return false;
2514     }
2515     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2516       if (current_dependencies->IsBitSet(matrix_index)) {
2517         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2518         // so that they shall never be the minimum again.
2519         num_dependencies[matrix_index] = num_phi_placeholders;
2520       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2521         // Remove dependencies from other Phi placeholders.
2522         dependencies[matrix_index]->Subtract(current_dependencies);
2523         num_dependencies[matrix_index] -= current_num_dependencies;
2524       }
2525     }
2526     remaining_phi_placeholders -= current_num_dependencies;
2527   }
2528   return true;
2529 }
2530 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2531 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2532   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2533   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2534   auto res =
2535       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2536   CHECK(!res.has_value()) << *res;
2537   return MaterializeLoopPhis(abv, type);
2538 }
2539 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2540 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2541     PhiPlaceholder phi_placeholder, HInstruction* load) {
2542   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2543 
2544   // Use local allocator to reduce peak memory usage.
2545   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2546 
2547   // Find Phi placeholders to materialize.
2548   ArenaBitVector phi_placeholders_to_materialize(
2549       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2550   phi_placeholders_to_materialize.ClearAllBits();
2551   DataType::Type type = load->GetType();
2552   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2553   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2554       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2555   if (loop_phi_with_unknown_input) {
2556     DCHECK_GE(GetGraph()
2557                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2558                   ->GetPredecessors()
2559                   .size(),
2560               2u);
2561     return loop_phi_with_unknown_input;  // Return failure.
2562   }
2563 
2564   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2565   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2566   DCHECK(success);
2567 
2568   // Report success.
2569   return std::nullopt;
2570 }
2571 
2572 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2573 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2574 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2575 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2576 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2577 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2578   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2579   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2580   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2581       Value::MergedUnknown(loop_phi_with_unknown_input);
2582 
2583   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2584   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2585   size_t reverse_post_order_index = 0;
2586   size_t reverse_post_order_size = reverse_post_order.size();
2587   size_t loads_and_stores_index = 0u;
2588   size_t loads_and_stores_size = loads_and_stores_.size();
2589 
2590   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2591   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2592   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2593     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2594     while (loads_and_stores_index != loads_and_stores_size &&
2595            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2596       ++loads_and_stores_index;
2597     }
2598     ++reverse_post_order_index;
2599     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2600   }
2601 
2602   // Use local allocator to reduce peak memory usage.
2603   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2604   // Reuse one temporary vector for all remaining blocks.
2605   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2606   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2607 
2608   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2609     Value value;
2610     if (block->IsLoopHeader()) {
2611       if (block->GetLoopInformation()->IsIrreducible()) {
2612         PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2613         value = Value::MergedUnknown(placeholder);
2614       } else {
2615         value = PrepareLoopValue(block, idx);
2616       }
2617     } else {
2618       value = MergePredecessorValues(block, idx);
2619     }
2620     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2621     return value;
2622   };
2623 
2624   // Process remaining blocks and instructions.
2625   bool found_unreplaceable_load = false;
2626   bool replaced_heap_value_with_unknown = false;
2627   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2628     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2629     if (block->IsExitBlock()) {
2630       continue;
2631     }
2632 
2633     // We shall reconstruct only the heap values that we need for processing loads and stores.
2634     local_heap_values.clear();
2635     local_heap_values.resize(num_heap_locations, Value::Invalid());
2636 
2637     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2638       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2639       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2640       if (load_or_store->GetBlock() != block) {
2641         break;  // End of instructions from the current block.
2642       }
2643       bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
2644       DCHECK_EQ(is_store, IsStore(load_or_store));
2645       HInstruction* stored_value = nullptr;
2646       if (is_store) {
2647         auto it = store_records_.find(load_or_store);
2648         DCHECK(it != store_records_.end());
2649         stored_value = it->second.stored_value;
2650       }
2651       auto it = loads_requiring_loop_phi_.find(
2652           stored_value != nullptr ? stored_value : load_or_store);
2653       if (it == loads_requiring_loop_phi_.end()) {
2654         continue;  // This load or store never needed a loop Phi.
2655       }
2656       ValueRecord& record = it->second;
2657       if (is_store) {
2658         // Process the store by updating `local_heap_values[idx]`. The last update shall
2659         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2660         // at the end of the block.
2661         Value replacement = ReplacementOrValue(record.value);
2662         if (replacement.NeedsLoopPhi()) {
2663           // No replacement yet, use the Phi placeholder from the load.
2664           DCHECK(record.value.NeedsLoopPhi());
2665           local_heap_values[idx] = record.value;
2666         } else {
2667           // If the load fetched a known value, use it, otherwise use the load.
2668           local_heap_values[idx] = Value::ForInstruction(
2669               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2670         }
2671       } else {
2672         // Process the load unless it has previously been marked unreplaceable.
2673         if (record.value.NeedsLoopPhi()) {
2674           if (local_heap_values[idx].IsInvalid()) {
2675             local_heap_values[idx] = get_initial_value(block, idx);
2676           }
2677           if (local_heap_values[idx].IsUnknown()) {
2678             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2679             // (no aliasing since then, otherwise the Phi placeholder would not have been
2680             // propagated as a value to this load) and store the load as the new heap value.
2681             found_unreplaceable_load = true;
2682             KeepStores(record.value);
2683             record.value = Value::MergedUnknown(record.value.GetPhiPlaceholder());
2684             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2685           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2686             // The load may still be replaced with a Phi later.
2687             DCHECK(local_heap_values[idx].Equals(record.value));
2688           } else {
2689             // This load can be eliminated but we may need to construct non-loop Phis.
2690             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2691               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2692                                      load_or_store->GetType());
2693               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2694             }
2695             record.value = local_heap_values[idx];
2696             HInstruction* heap_value = local_heap_values[idx].GetInstruction();
2697             AddRemovedLoad(load_or_store, heap_value);
2698           }
2699         }
2700       }
2701     }
2702 
2703     // All heap values that previously needed a loop Phi at the end of the block
2704     // need to be updated for processing successors.
2705     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2706     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2707       if (heap_values[idx].value.NeedsLoopPhi()) {
2708         if (local_heap_values[idx].IsValid()) {
2709           heap_values[idx].value = local_heap_values[idx];
2710         } else {
2711           heap_values[idx].value = get_initial_value(block, idx);
2712         }
2713         if (heap_values[idx].value.IsUnknown()) {
2714           replaced_heap_value_with_unknown = true;
2715         }
2716       }
2717     }
2718   }
2719   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2720 }
2721 
ProcessLoadsRequiringLoopPhis()2722 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2723   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2724   // make the result of the processing depend on the order in which we process these loads.
2725   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2726   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2727   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2728     auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2729     if (it == loads_requiring_loop_phi_.end()) {
2730       continue;
2731     }
2732     HInstruction* load = it->first;
2733     ValueRecord& record = it->second;
2734     while (record.value.NeedsLoopPhi() &&
2735            phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2736       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2737           TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2738       DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2739                 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2740       if (loop_phi_with_unknown_input) {
2741         DCHECK_GE(GetGraph()
2742                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2743                       ->GetPredecessors()
2744                       .size(),
2745                   2u);
2746         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2747       }
2748     }
2749     // The load could have been marked as unreplaceable (and stores marked for keeping)
2750     // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2751     DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2752     if (record.value.NeedsLoopPhi()) {
2753       record.value = Replacement(record.value);
2754       HInstruction* heap_value = record.value.GetInstruction();
2755       AddRemovedLoad(load, heap_value);
2756     }
2757   }
2758 }
2759 
SearchPhiPlaceholdersForKeptStores()2760 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2761   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2762   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2763   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2764   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2765     work_queue.push_back(index);
2766   }
2767   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2768   std::optional<ArenaBitVector> not_kept_stores;
2769   if (stats_) {
2770     not_kept_stores.emplace(GetGraph()->GetAllocator(),
2771                             kept_stores_.GetBitSizeOf(),
2772                             false,
2773                             ArenaAllocKind::kArenaAllocLSE);
2774   }
2775   while (!work_queue.empty()) {
2776     uint32_t cur_phi_idx = work_queue.back();
2777     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2778     // Only writes to partial-escapes need to be specifically kept.
2779     bool is_partial_kept_merged_unknown =
2780         kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
2781         heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation())
2782             ->GetReferenceInfo()
2783             ->IsPartialSingleton();
2784     work_queue.pop_back();
2785     size_t idx = phi_placeholder.GetHeapLocation();
2786     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2787     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2788                              << " (blocks: " << blocks.size() << ")";
2789     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2790       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2791       // For loop back-edges we must also preserve all stores to locations that
2792       // may alias with the location `idx`.
2793       // TODO: Add tests cases around this.
2794       bool is_back_edge =
2795           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2796       size_t start = is_back_edge ? 0u : idx;
2797       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2798       for (size_t i = start; i != end; ++i) {
2799         Value stored_by = heap_values[i].stored_by;
2800         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2801           if (stored_by.NeedsPhi()) {
2802             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2803             if (is_partial_kept_merged_unknown) {
2804               // Propagate merged-unknown keep since otherwise this might look
2805               // like a partial escape we can remove.
2806               kept_merged_unknowns_.SetBit(phi_placeholder_index);
2807             }
2808             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2809               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2810               work_queue.push_back(phi_placeholder_index);
2811             }
2812           } else {
2813             DCHECK(IsStore(stored_by.GetInstruction()));
2814             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2815             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2816                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2817                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2818             if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
2819               if (not_kept_stores) {
2820                 not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
2821               }
2822             } else {
2823               kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2824             }
2825           }
2826         }
2827       }
2828     }
2829   }
2830   if (not_kept_stores) {
2831     // a - b := (a & ~b)
2832     not_kept_stores->Subtract(&kept_stores_);
2833     auto num_removed = not_kept_stores->NumSetBits();
2834     MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
2835   }
2836 }
2837 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2838 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2839   while (value_record->stored_by.IsInstruction() &&
2840          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2841     auto it = store_records_.find(value_record->stored_by.GetInstruction());
2842     DCHECK(it != store_records_.end());
2843     *value_record = it->second.old_value_record;
2844   }
2845   if (value_record->stored_by.NeedsPhi() &&
2846       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2847            PhiPlaceholderIndex(value_record->stored_by))) {
2848     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2849     // Phi placeholder to recalculate the actual value.
2850     value_record->value = value_record->stored_by;
2851   }
2852   value_record->value = ReplacementOrValue(value_record->value);
2853   if (value_record->value.NeedsNonLoopPhi()) {
2854     // Treat all Phi placeholders as requiring loop Phis at this point.
2855     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2856     value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2857   }
2858 }
2859 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2860 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2861                                                DataType::Type type) {
2862   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2863 
2864   // Use local allocator to reduce peak memory usage.
2865   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2866   ArenaBitVector visited(&allocator,
2867                          /*start_bits=*/ num_phi_placeholders_,
2868                          /*expandable=*/ false,
2869                          kArenaAllocLSE);
2870   visited.ClearAllBits();
2871 
2872   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2873   ArenaBitVector phi_placeholders_to_materialize(
2874       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2875   phi_placeholders_to_materialize.ClearAllBits();
2876   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2877       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2878   if (loop_phi_with_unknown_input) {
2879     DCHECK_GE(GetGraph()
2880                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2881                   ->GetPredecessors()
2882                   .size(),
2883               2u);
2884     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2885     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2886     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2887         Value::PureUnknown();
2888     return;
2889   }
2890 
2891   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2892   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2893   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2894   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2895             !success);
2896 }
2897 
2898 struct ScopedRestoreHeapValues {
2899  public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2900   ScopedRestoreHeapValues(ArenaStack* alloc,
2901                           size_t num_heap_locs,
2902                           ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2903       : alloc_(alloc),
2904         updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2905         to_restore_(to_restore) {
2906     updated_values_.reserve(num_heap_locs * to_restore_.size());
2907   }
2908 
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2909   ~ScopedRestoreHeapValues() {
2910     for (const auto& rec : updated_values_) {
2911       to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2912     }
2913   }
2914 
2915   template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2916   void ForEachRecord(Func func) {
2917     for (size_t blk_id : Range(to_restore_.size())) {
2918       for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2919         LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2920         LSEVisitor::Value initial = vr->value;
2921         func(vr);
2922         if (!vr->value.ExactEquals(initial)) {
2923           updated_values_.push_back({blk_id, heap_loc, initial});
2924         }
2925       }
2926     }
2927   }
2928 
2929  private:
2930   struct UpdateRecord {
2931     size_t blk_id;
2932     size_t heap_loc;
2933     LSEVisitor::Value val_;
2934   };
2935   ScopedArenaAllocator alloc_;
2936   ScopedArenaVector<UpdateRecord> updated_values_;
2937   ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2938 
2939   DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2940 };
2941 
FindStoresWritingOldValues()2942 void LSEVisitor::FindStoresWritingOldValues() {
2943   // Partial LSE relies on knowing the real heap-values not the
2944   // store-replacement versions so we need to restore the map after removing
2945   // stores.
2946   ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2947                                     heap_location_collector_.GetNumberOfHeapLocations(),
2948                                     heap_values_for_);
2949   // The Phi placeholder replacements have so far been used for eliminating loads,
2950   // tracking values that would be stored if all stores were kept. As we want to
2951   // compare actual old values after removing unmarked stores, prune the Phi
2952   // placeholder replacements that can be fed by values we may not actually store.
2953   // Replacements marked as unknown can be kept as they are fed by some unknown
2954   // value and would end up as unknown again if we recalculated them.
2955   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2956     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2957         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2958       phi_placeholder_replacements_[i] = Value::Invalid();
2959     }
2960   }
2961 
2962   // Update heap values at end of blocks.
2963   heap_vals.ForEachRecord([&](ValueRecord* rec) {
2964     UpdateValueRecordForStoreElimination(rec);
2965   });
2966 
2967   if (kIsDebugBuild) {
2968     heap_vals.ForEachRecord([](ValueRecord* rec) {
2969       DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2970     });
2971   }
2972 
2973   // Use local allocator to reduce peak memory usage.
2974   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2975   // Mark the stores we want to eliminate in a separate bit vector.
2976   ArenaBitVector eliminated_stores(&allocator,
2977                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2978                                    /*expandable=*/ false,
2979                                    kArenaAllocLSE);
2980   eliminated_stores.ClearAllBits();
2981 
2982   for (auto& entry : store_records_) {
2983     HInstruction* store = entry.first;
2984     StoreRecord& store_record = entry.second;
2985     if (!kept_stores_.IsBitSet(store->GetId())) {
2986       continue;  // Ignore stores that are not kept.
2987     }
2988     UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2989     if (store_record.old_value_record.value.NeedsPhi()) {
2990       DataType::Type type = store_record.stored_value->GetType();
2991       FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2992       store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2993     }
2994     DCHECK(!store_record.old_value_record.value.NeedsPhi());
2995     HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2996     if (store_record.old_value_record.value.Equals(stored_value)) {
2997       eliminated_stores.SetBit(store->GetId());
2998     }
2999   }
3000 
3001   // Commit the stores to eliminate by removing them from `kept_stores_`.
3002   kept_stores_.Subtract(&eliminated_stores);
3003 }
3004 
Run()3005 void LSEVisitor::Run() {
3006   // 1. Process blocks and instructions in reverse post order.
3007   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
3008     VisitBasicBlock(block);
3009   }
3010 
3011   // 2. Process loads that require loop Phis, trying to find/create replacements.
3012   current_phase_ = Phase::kLoadElimination;
3013   ProcessLoadsRequiringLoopPhis();
3014 
3015   // 3. Determine which stores to keep and which to eliminate.
3016   current_phase_ = Phase::kStoreElimination;
3017   // Finish marking stores for keeping.
3018   SearchPhiPlaceholdersForKeptStores();
3019 
3020   // Find stores that write the same value as is already present in the location.
3021   FindStoresWritingOldValues();
3022 
3023   // 4. Replace loads and remove unnecessary stores and singleton allocations.
3024   FinishFullLSE();
3025 
3026   // 5. Move partial escapes down and fixup with PHIs.
3027   current_phase_ = Phase::kPartialElimination;
3028   MovePartialEscapes();
3029 }
3030 
3031 // Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
3032 // retry all of them with more information about where they come from.
PrepareForPartialPhiComputation()3033 void LSEVisitor::PrepareForPartialPhiComputation() {
3034   std::replace_if(
3035       phi_placeholder_replacements_.begin(),
3036       phi_placeholder_replacements_.end(),
3037       [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); },
3038       Value::Invalid());
3039 }
3040 
3041 class PartialLoadStoreEliminationHelper {
3042  public:
PartialLoadStoreEliminationHelper(LSEVisitor * lse,ScopedArenaAllocator * alloc)3043   PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
3044       : lse_(lse),
3045         alloc_(alloc),
3046         new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
3047         heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
3048         max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
3049                                                 GetGraph()->GetActiveBlocks().end(),
3050                                                 [](HBasicBlock* a, HBasicBlock* b) {
3051                                                   return a->GetNumberOfPredecessors() <
3052                                                          b->GetNumberOfPredecessors();
3053                                                 }))
3054                                  ->GetNumberOfPredecessors()),
3055         materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
3056                                 nullptr,
3057                                 alloc_->Adapter(kArenaAllocLSE)),
3058         first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
3059     size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons();
3060     heap_refs_.reserve(num_partial_singletons);
3061     new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size());
3062     CollectInterestingHeapRefs();
3063   }
3064 
~PartialLoadStoreEliminationHelper()3065   ~PartialLoadStoreEliminationHelper() {
3066     if (heap_refs_.empty()) {
3067       return;
3068     }
3069     ReferenceTypePropagation rtp_fixup(GetGraph(),
3070                                        Handle<mirror::DexCache>(),
3071                                        /* is_first_run= */ false);
3072     rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
3073     GetGraph()->ClearLoopInformation();
3074     GetGraph()->ClearDominanceInformation();
3075     GetGraph()->ClearReachabilityInformation();
3076     GetGraph()->BuildDominatorTree();
3077     GetGraph()->ComputeReachabilityInformation();
3078   }
3079 
3080   class IdxToHeapLoc {
3081    public:
IdxToHeapLoc(const HeapLocationCollector * hlc)3082     explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
operator ()(size_t idx) const3083     HeapLocation* operator()(size_t idx) const {
3084       return collector_->GetHeapLocation(idx);
3085     }
3086 
3087    private:
3088     const HeapLocationCollector* collector_;
3089   };
3090 
3091 
3092   class HeapReferenceData {
3093    public:
3094     using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
HeapReferenceData(PartialLoadStoreEliminationHelper * helper,HNewInstance * new_inst,const ExecutionSubgraph * subgraph,ScopedArenaAllocator * alloc)3095     HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
3096                       HNewInstance* new_inst,
3097                       const ExecutionSubgraph* subgraph,
3098                       ScopedArenaAllocator* alloc)
3099         : new_instance_(new_inst),
3100           helper_(helper),
3101           heap_locs_(alloc,
3102                      helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
3103                      /* expandable= */ false,
3104                      kArenaAllocLSE),
3105           materializations_(
3106               // We generally won't need to create too many materialization blocks and we can expand
3107               // this as needed so just start off with 2x.
3108               2 * helper->lse_->GetGraph()->GetBlocks().size(),
3109               nullptr,
3110               alloc->Adapter(kArenaAllocLSE)),
3111           collector_(helper->lse_->heap_location_collector_),
3112           subgraph_(subgraph) {}
3113 
IterateLocations()3114     LocIterator IterateLocations() {
3115       auto idxs = heap_locs_.Indexes();
3116       return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
3117     }
3118 
AddHeapLocation(size_t idx)3119     void AddHeapLocation(size_t idx) {
3120       heap_locs_.SetBit(idx);
3121     }
3122 
GetNoEscapeSubgraph() const3123     const ExecutionSubgraph* GetNoEscapeSubgraph() const {
3124       return subgraph_;
3125     }
3126 
IsPostEscape(HBasicBlock * blk)3127     bool IsPostEscape(HBasicBlock* blk) {
3128       return std::any_of(
3129           subgraph_->GetExcludedCohorts().cbegin(),
3130           subgraph_->GetExcludedCohorts().cend(),
3131           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
3132     }
3133 
InEscapeCohort(HBasicBlock * blk)3134     bool InEscapeCohort(HBasicBlock* blk) {
3135       return std::any_of(
3136           subgraph_->GetExcludedCohorts().cbegin(),
3137           subgraph_->GetExcludedCohorts().cend(),
3138           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
3139     }
3140 
BeforeAllEscapes(HBasicBlock * b)3141     bool BeforeAllEscapes(HBasicBlock* b) {
3142       return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
3143                           subgraph_->GetExcludedCohorts().cend(),
3144                           [&](const ExecutionSubgraph::ExcludedCohort& ec) {
3145                             return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
3146                           });
3147     }
3148 
OriginalNewInstance() const3149     HNewInstance* OriginalNewInstance() const {
3150       return new_instance_;
3151     }
3152 
3153     // Collect and replace all uses. We need to perform this twice since we will
3154     // generate PHIs and additional uses as we create the default-values for
3155     // pred-gets. These values might be other references that are also being
3156     // partially eliminated. By running just the replacement part again we are
3157     // able to avoid having to keep another whole in-progress partial map
3158     // around. Since we will have already handled all the other uses in the
3159     // first pass the second one will be quite fast.
FixupUses(bool first_pass)3160     void FixupUses(bool first_pass) {
3161       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3162       // Replace uses with materialized values.
3163       ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
3164       ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
3165       // Do we need to add a constructor-fence.
3166       ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
3167           saa.Adapter(kArenaAllocLSE));
3168       ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
3169 
3170       CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
3171 
3172       if (!first_pass) {
3173         // If another partial creates new references they can only be in Phis or pred-get defaults
3174         // so they must be in the to_replace group.
3175         DCHECK(to_predicate.empty());
3176         DCHECK(constructor_fences.empty());
3177         DCHECK(to_remove.empty());
3178       }
3179 
3180       ReplaceInput(to_replace);
3181       RemoveAndReplaceInputs(to_remove);
3182       CreateConstructorFences(constructor_fences);
3183       PredicateInstructions(to_predicate);
3184 
3185       CHECK(OriginalNewInstance()->GetUses().empty())
3186           << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
3187     }
3188 
AddMaterialization(HBasicBlock * blk,HInstruction * ins)3189     void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
3190       if (blk->GetBlockId() >= materializations_.size()) {
3191         // Make sure the materialization array is large enough, try to avoid
3192         // re-sizing too many times by giving extra space.
3193         materializations_.resize(blk->GetBlockId() * 2, nullptr);
3194       }
3195       DCHECK(materializations_[blk->GetBlockId()] == nullptr)
3196           << "Already have a materialization in block " << blk->GetBlockId() << ": "
3197           << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
3198           << *ins;
3199       materializations_[blk->GetBlockId()] = ins;
3200       LSE_VLOG << "In  block " << blk->GetBlockId() << " materialization is " << *ins;
3201       helper_->NotifyNewMaterialization(ins);
3202     }
3203 
HasMaterialization(HBasicBlock * blk) const3204     bool HasMaterialization(HBasicBlock* blk) const {
3205       return blk->GetBlockId() < materializations_.size() &&
3206              materializations_[blk->GetBlockId()] != nullptr;
3207     }
3208 
GetMaterialization(HBasicBlock * blk) const3209     HInstruction* GetMaterialization(HBasicBlock* blk) const {
3210       if (materializations_.size() <= blk->GetBlockId() ||
3211           materializations_[blk->GetBlockId()] == nullptr) {
3212         // This must be a materialization block added after the partial LSE of
3213         // the current reference finished. Since every edge can only have at
3214         // most one materialization block added to it we can just check the
3215         // blocks predecessor.
3216         DCHECK(helper_->IsMaterializationBlock(blk));
3217         blk = helper_->FindDominatingNonMaterializationBlock(blk);
3218         DCHECK(!helper_->IsMaterializationBlock(blk));
3219       }
3220       DCHECK_GT(materializations_.size(), blk->GetBlockId());
3221       DCHECK(materializations_[blk->GetBlockId()] != nullptr);
3222       return materializations_[blk->GetBlockId()];
3223     }
3224 
GenerateMaterializationValueFromPredecessors(HBasicBlock * blk)3225     void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
3226       DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3227                           GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3228                           [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3229                             return cohort.IsEntryBlock(blk);
3230                           }));
3231       DCHECK(!HasMaterialization(blk));
3232       if (blk->IsExitBlock()) {
3233         return;
3234       } else if (blk->IsLoopHeader()) {
3235         // See comment in execution_subgraph.h. Currently we act as though every
3236         // allocation for partial elimination takes place in the entry block.
3237         // This simplifies the analysis by making it so any escape cohort
3238         // expands to contain any loops it is a part of. This is something that
3239         // we should rectify at some point. In either case however we can still
3240         // special case the loop-header since (1) currently the loop can't have
3241         // any merges between different cohort entries since the pre-header will
3242         // be the earliest place entry can happen and (2) even if the analysis
3243         // is improved to consider lifetime of the object WRT loops any values
3244         // which would require loop-phis would have to make the whole loop
3245         // escape anyway.
3246         // This all means we can always use value from the pre-header when the
3247         // block is the loop-header and we didn't already create a
3248         // materialization block. (NB when we do improve the analysis we will
3249         // need to modify the materialization creation code to deal with this
3250         // correctly.)
3251         HInstruction* pre_header_val =
3252             GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
3253         AddMaterialization(blk, pre_header_val);
3254         return;
3255       }
3256       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3257       ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
3258       pred_vals.reserve(blk->GetNumberOfPredecessors());
3259       for (HBasicBlock* pred : blk->GetPredecessors()) {
3260         DCHECK(HasMaterialization(pred));
3261         pred_vals.push_back(GetMaterialization(pred));
3262       }
3263       GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
3264     }
3265 
GenerateMaterializationValueFromPredecessorsForEntry(HBasicBlock * entry,const ScopedArenaVector<HInstruction * > & pred_vals)3266     void GenerateMaterializationValueFromPredecessorsForEntry(
3267         HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
3268       DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3269                          GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3270                          [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3271                            return cohort.IsEntryBlock(entry);
3272                          }));
3273       GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
3274     }
3275 
3276    private:
3277     template <typename InstructionType>
3278     struct InstructionUse {
3279       InstructionType* instruction_;
3280       size_t index_;
3281     };
3282 
ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>> & to_replace)3283     void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
3284       for (auto& [ins, idx] : to_replace) {
3285         HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3286         if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
3287           // Phis we just pass through the appropriate inputs.
3288           ins->ReplaceInput(merged_inst->InputAt(idx), idx);
3289         } else {
3290           ins->ReplaceInput(merged_inst, idx);
3291         }
3292       }
3293     }
3294 
RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction * > & to_remove)3295     void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) {
3296       for (HInstruction* ins : to_remove) {
3297         if (ins->GetBlock() == nullptr) {
3298           // Already dealt with.
3299           continue;
3300         }
3301         DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
3302         if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
3303           bool instruction_has_users =
3304               ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty());
3305           if (instruction_has_users) {
3306             // Make sure any remaining users of read are replaced.
3307             HInstruction* replacement =
3308                 helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins);
3309             // NB ReplaceInput will remove a use from the list so this is
3310             // guaranteed to finish eventually.
3311             while (!ins->GetUses().empty()) {
3312               const HUseListNode<HInstruction*>& use = ins->GetUses().front();
3313               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3314             }
3315             while (!ins->GetEnvUses().empty()) {
3316               const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front();
3317               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3318             }
3319           } else {
3320             DCHECK(ins->GetUses().empty())
3321                 << "Instruction has users!\n"
3322                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses();
3323             DCHECK(ins->GetEnvUses().empty())
3324                 << "Instruction has users!\n"
3325                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses();
3326           }
3327           ins->GetBlock()->RemoveInstruction(ins);
3328         } else {
3329           // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
3330           // Since PHIs are escapes as far as LSE is concerned and we are before
3331           // any escapes these are the only 4 options.
3332           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
3333           HInstruction* replacement;
3334           if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
3335             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
3336                                          : GetGraph()->GetIntConstant(0);
3337           } else {
3338             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
3339                                          : GetGraph()->GetIntConstant(1);
3340           }
3341           ins->ReplaceWith(replacement);
3342           ins->GetBlock()->RemoveInstruction(ins);
3343         }
3344       }
3345     }
3346 
CreateConstructorFences(const ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences)3347     void CreateConstructorFences(
3348         const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
3349       if (!constructor_fences.empty()) {
3350         uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
3351         for (auto& [cf, idx] : constructor_fences) {
3352           if (cf->GetInputs().size() == 1) {
3353             cf->GetBlock()->RemoveInstruction(cf);
3354           } else {
3355             cf->RemoveInputAt(idx);
3356           }
3357         }
3358         for (const ExecutionSubgraph::ExcludedCohort& ec :
3359             GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3360           for (HBasicBlock* blk : ec.EntryBlocks()) {
3361             for (HBasicBlock* materializer :
3362                 Filter(MakeIterationRange(blk->GetPredecessors()),
3363                         [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
3364               HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
3365                   GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
3366               materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
3367             }
3368           }
3369         }
3370       }
3371     }
3372 
PredicateInstructions(const ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3373     void PredicateInstructions(
3374         const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3375       for (auto& [ins, idx] : to_predicate) {
3376         if (UNLIKELY(ins->GetBlock() == nullptr)) {
3377           // Already handled due to obj == obj;
3378           continue;
3379         } else if (ins->IsInstanceFieldGet()) {
3380           // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
3381           HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
3382               ins->AsInstanceFieldGet(),
3383               GetMaterialization(ins->GetBlock()),
3384               helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
3385           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
3386           ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
3387           if (ins->GetType() == DataType::Type::kReference) {
3388             // Reference info is the same
3389             new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo());
3390           }
3391           // In this phase, substitute instructions are used only for the predicated get
3392           // default values which are used only if the partial singleton did not escape,
3393           // so the out value of the `new_fget` for the relevant cases is the same as
3394           // the default value.
3395           // TODO: Use the default value for materializing default values used by
3396           // other predicated loads to avoid some unnecessary Phis. (This shall
3397           // complicate the search for replacement in `ReplacementOrValue()`.)
3398           DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr);
3399           helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget;
3400           ins->ReplaceWith(new_fget);
3401           ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
3402           CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
3403               << "Instruction: " << *ins << " uses: " << ins->GetUses()
3404               << ", env: " << ins->GetEnvUses();
3405           ins->GetBlock()->RemoveInstruction(ins);
3406         } else if (ins->IsInstanceFieldSet()) {
3407           // Any predicated sets shouldn't require movement.
3408           ins->AsInstanceFieldSet()->SetIsPredicatedSet();
3409           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
3410           HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3411           ins->ReplaceInput(merged_inst, idx);
3412         } else {
3413           // comparisons need to be split into 2.
3414           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
3415           bool this_is_first = idx == 0;
3416           if (ins->InputAt(0) == ins->InputAt(1)) {
3417             // This is a obj == obj or obj != obj.
3418             // No idea why anyone would do this but whatever.
3419             ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
3420             ins->GetBlock()->RemoveInstruction(ins);
3421             continue;
3422           } else {
3423             HInstruction* is_escaped = new (GetGraph()->GetAllocator())
3424                 HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
3425             HInstruction* combine_inst =
3426                 ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
3427                                      DataType::Type::kBool, is_escaped, ins))
3428                                : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
3429                                      DataType::Type::kBool, is_escaped, ins));
3430             ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
3431             ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
3432             ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
3433             ins->ReplaceWith(combine_inst);
3434             combine_inst->ReplaceInput(ins, 1);
3435           }
3436         }
3437       }
3438     }
3439 
3440     // Figure out all the instructions we need to
3441     // fixup/replace/remove/duplicate. Since this requires an iteration of an
3442     // intrusive linked list we want to do it only once and collect all the data
3443     // here.
CollectReplacements(ScopedArenaVector<InstructionUse<HInstruction>> & to_replace,ScopedArenaVector<HInstruction * > & to_remove,ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences,ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3444     void CollectReplacements(
3445         ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
3446         ScopedArenaVector<HInstruction*>& to_remove,
3447         ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
3448         ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3449       size_t size = new_instance_->GetUses().SizeSlow();
3450       to_replace.reserve(size);
3451       to_remove.reserve(size);
3452       constructor_fences.reserve(size);
3453       to_predicate.reserve(size);
3454       for (auto& use : new_instance_->GetUses()) {
3455         HBasicBlock* blk =
3456             helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
3457         if (InEscapeCohort(blk)) {
3458           LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
3459                    << *GetMaterialization(blk);
3460           to_replace.push_back({use.GetUser(), use.GetIndex()});
3461         } else if (IsPostEscape(blk)) {
3462           LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
3463           // The fields + cmp are normal uses. Phi can only be here if it was
3464           // generated by full LSE so whatever store+load that created the phi
3465           // is the escape.
3466           if (use.GetUser()->IsPhi()) {
3467             to_replace.push_back({use.GetUser(), use.GetIndex()});
3468           } else {
3469             DCHECK(use.GetUser()->IsFieldAccess() ||
3470                    use.GetUser()->IsEqual() ||
3471                    use.GetUser()->IsNotEqual())
3472                 << *use.GetUser() << "@" << use.GetIndex();
3473             to_predicate.push_back({use.GetUser(), use.GetIndex()});
3474           }
3475         } else if (use.GetUser()->IsConstructorFence()) {
3476           LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
3477           constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
3478         } else {
3479           LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
3480           to_remove.push_back(use.GetUser());
3481         }
3482       }
3483       DCHECK_EQ(
3484           to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
3485           size);
3486     }
3487 
GenerateMaterializationValueFromPredecessorsDirect(HBasicBlock * blk,const ScopedArenaVector<HInstruction * > & pred_vals)3488     void GenerateMaterializationValueFromPredecessorsDirect(
3489         HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
3490       DCHECK(!pred_vals.empty());
3491       bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
3492         return val == pred_vals.front();
3493       });
3494       if (LIKELY(all_equal)) {
3495         AddMaterialization(blk, pred_vals.front());
3496       } else {
3497         // Make a PHI for the predecessors.
3498         HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
3499             GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
3500         for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
3501           phi->SetRawInputAt(off, ins);
3502         }
3503         blk->AddPhi(phi);
3504         AddMaterialization(blk, phi);
3505       }
3506     }
3507 
GetGraph() const3508     HGraph* GetGraph() const {
3509       return helper_->GetGraph();
3510     }
3511 
3512     HNewInstance* new_instance_;
3513     PartialLoadStoreEliminationHelper* helper_;
3514     ArenaBitVector heap_locs_;
3515     ScopedArenaVector<HInstruction*> materializations_;
3516     const HeapLocationCollector& collector_;
3517     const ExecutionSubgraph* subgraph_;
3518   };
3519 
GetHeapRefs()3520   ArrayRef<HeapReferenceData> GetHeapRefs() {
3521     return ArrayRef<HeapReferenceData>(heap_refs_);
3522   }
3523 
IsMaterializationBlock(HBasicBlock * blk) const3524   bool IsMaterializationBlock(HBasicBlock* blk) const {
3525     return blk->GetBlockId() >= first_materialization_block_id_;
3526   }
3527 
GetOrCreateMaterializationBlock(HBasicBlock * entry,size_t pred_num)3528   HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3529     size_t idx = GetMaterializationBlockIndex(entry, pred_num);
3530     HBasicBlock* blk = materialization_blocks_[idx];
3531     if (blk == nullptr) {
3532       blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
3533       GetGraph()->AddBlock(blk);
3534       LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
3535                << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
3536       blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
3537       materialization_blocks_[idx] = blk;
3538     }
3539     return blk;
3540   }
3541 
GetMaterializationBlock(HBasicBlock * entry,size_t pred_num)3542   HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3543     HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
3544     DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
3545                            << entry->GetPredecessors()[pred_num]->GetBlockId();
3546     return out;
3547   }
3548 
IterateMaterializationBlocks()3549   IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
3550     return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
3551                               GetGraph()->GetBlocks().end());
3552   }
3553 
FixupPartialObjectUsers()3554   void FixupPartialObjectUsers() {
3555     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3556       // Use the materialized instances to replace original instance
3557       ref_data.FixupUses(/*first_pass=*/true);
3558       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3559           << ref_data.OriginalNewInstance()->GetUses() << ", "
3560           << ref_data.OriginalNewInstance()->GetEnvUses();
3561     }
3562     // This can cause new uses to be created due to the creation of phis/pred-get defaults
3563     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3564       // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
3565       ref_data.FixupUses(/*first_pass=*/false);
3566       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3567           << ref_data.OriginalNewInstance()->GetUses() << ", "
3568           << ref_data.OriginalNewInstance()->GetEnvUses();
3569     }
3570   }
3571 
3572   // Finds the first block which either is or dominates the given block which is
3573   // not a materialization block
FindDominatingNonMaterializationBlock(HBasicBlock * blk)3574   HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
3575     if (LIKELY(!IsMaterializationBlock(blk))) {
3576       // Not a materialization block so itself.
3577       return blk;
3578     } else if (blk->GetNumberOfPredecessors() != 0) {
3579       // We're far enough along that the materialization blocks have been
3580       // inserted into the graph so no need to go searching.
3581       return blk->GetSinglePredecessor();
3582     }
3583     // Search through the materialization blocks to find where it will be
3584     // inserted.
3585     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3586       if (mat == blk) {
3587         size_t cur_pred_idx = idx % max_preds_per_block_;
3588         HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3589         return entry->GetPredecessors()[cur_pred_idx];
3590       }
3591     }
3592     LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
3593     return nullptr;
3594   }
3595 
InsertMaterializationBlocks()3596   void InsertMaterializationBlocks() {
3597     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3598       if (mat == nullptr) {
3599         continue;
3600       }
3601       size_t cur_pred_idx = idx % max_preds_per_block_;
3602       HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3603       HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
3604       mat->InsertBetween(pred, entry);
3605       LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
3606                << pred->GetBlockId() << "->" << entry->GetBlockId();
3607     }
3608   }
3609 
3610   // Replace any env-uses remaining of the partial singletons with the
3611   // appropriate phis and remove the instructions.
RemoveReplacedInstructions()3612   void RemoveReplacedInstructions() {
3613     for (HeapReferenceData& ref_data : GetHeapRefs()) {
3614       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3615           << ref_data.OriginalNewInstance()->GetUses() << ", "
3616           << ref_data.OriginalNewInstance()->GetEnvUses()
3617           << " inst is: " << ref_data.OriginalNewInstance();
3618       const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
3619       while (!env_uses.empty()) {
3620         const HUseListNode<HEnvironment*>& use = env_uses.front();
3621         HInstruction* merged_inst =
3622             ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
3623         LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
3624                  << " with " << *merged_inst;
3625         use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
3626       }
3627       ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
3628     }
3629   }
3630 
3631   // We need to make sure any allocations dominate their environment uses.
3632   // Technically we could probably remove the env-uses and be fine but this is easy.
ReorderMaterializationsForEnvDominance()3633   void ReorderMaterializationsForEnvDominance() {
3634     for (HBasicBlock* blk : IterateMaterializationBlocks()) {
3635       ScopedArenaAllocator alloc(alloc_->GetArenaStack());
3636       ArenaBitVector still_unsorted(
3637           &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
3638       // This is guaranteed to be very short (since we will abandon LSE if there
3639       // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
3640       // absolute maximum size this list can be) so doing a selection sort is
3641       // fine. This avoids the need to do a complicated recursive check to
3642       // ensure transitivity for std::sort.
3643       ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
3644       materializations.reserve(GetHeapRefs().size());
3645       for (HInstruction* ins :
3646            MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
3647         if (ins->IsNewInstance()) {
3648           materializations.push_back(ins->AsNewInstance());
3649           still_unsorted.SetBit(ins->GetId());
3650         }
3651       }
3652       using Iter = ScopedArenaVector<HNewInstance*>::iterator;
3653       Iter unsorted_start = materializations.begin();
3654       Iter unsorted_end = materializations.end();
3655       // selection sort. Required since the only check we can easily perform a
3656       // is-before-all-unsorted check.
3657       while (unsorted_start != unsorted_end) {
3658         bool found_instruction = false;
3659         for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
3660           HNewInstance* ni = *candidate;
3661           if (std::none_of(ni->GetAllEnvironments().cbegin(),
3662                            ni->GetAllEnvironments().cend(),
3663                            [&](const HEnvironment* env) {
3664                              return std::any_of(
3665                                  env->GetEnvInputs().cbegin(),
3666                                  env->GetEnvInputs().cend(),
3667                                  [&](const HInstruction* env_element) {
3668                                    return env_element != nullptr &&
3669                                           still_unsorted.IsBitSet(env_element->GetId());
3670                                  });
3671                            })) {
3672             still_unsorted.ClearBit(ni->GetId());
3673             std::swap(*unsorted_start, *candidate);
3674             ++unsorted_start;
3675             found_instruction = true;
3676             break;
3677           }
3678         }
3679         CHECK(found_instruction) << "Unable to select next materialization instruction."
3680                                  << " Environments have a dependency loop!";
3681       }
3682       // Reverse so we as we prepend them we end up with the correct order.
3683       auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
3684       for (HNewInstance* ins : reverse_iter) {
3685         if (blk->GetFirstInstruction() != ins) {
3686           // Don't do checks since that makes sure the move is safe WRT
3687           // ins->CanBeMoved which for NewInstance is false.
3688           ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
3689         }
3690       }
3691     }
3692   }
3693 
3694  private:
CollectInterestingHeapRefs()3695   void CollectInterestingHeapRefs() {
3696     // Get all the partials we need to move around.
3697     for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
3698       ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
3699       if (ri->IsPartialSingleton() &&
3700           ri->GetReference()->GetBlock() != nullptr &&
3701           ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
3702         RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
3703       }
3704     }
3705   }
3706 
RecordHeapRefField(HNewInstance * ni,size_t loc)3707   void RecordHeapRefField(HNewInstance* ni, size_t loc) {
3708     DCHECK(ni != nullptr);
3709     // This is likely to be very short so just do a linear search.
3710     auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
3711       return data.OriginalNewInstance() == ni;
3712     });
3713     HeapReferenceData& cur_ref =
3714         (it == heap_refs_.end())
3715             ? heap_refs_.emplace_back(this,
3716                                       ni,
3717                                       lse_->heap_location_collector_.GetHeapLocation(loc)
3718                                           ->GetReferenceInfo()
3719                                           ->GetNoEscapeSubgraph(),
3720                                       alloc_)
3721             : *it;
3722     cur_ref.AddHeapLocation(loc);
3723   }
3724 
3725 
NotifyNewMaterialization(HInstruction * ins)3726   void NotifyNewMaterialization(HInstruction* ins) {
3727     if (ins->IsPhi()) {
3728       new_ref_phis_.push_back(ins->AsPhi());
3729     }
3730   }
3731 
GetMaterializationBlockIndex(HBasicBlock * blk,size_t pred_num) const3732   size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
3733     DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
3734         << "block is a materialization block!";
3735     DCHECK_LT(pred_num, max_preds_per_block_);
3736     return blk->GetBlockId() * max_preds_per_block_ + pred_num;
3737   }
3738 
GetGraph() const3739   HGraph* GetGraph() const {
3740     return lse_->GetGraph();
3741   }
3742 
3743   LSEVisitor* lse_;
3744   ScopedArenaAllocator* alloc_;
3745   ScopedArenaVector<HInstruction*> new_ref_phis_;
3746   ScopedArenaVector<HeapReferenceData> heap_refs_;
3747   size_t max_preds_per_block_;
3748   // An array of (# of non-materialization blocks) * max_preds_per_block
3749   // arranged in block-id major order. Since we can only have at most one
3750   // materialization block on each edge this is the maximum possible number of
3751   // materialization blocks.
3752   ScopedArenaVector<HBasicBlock*> materialization_blocks_;
3753   size_t first_materialization_block_id_;
3754 
3755   friend void LSEVisitor::MovePartialEscapes();
3756 };
3757 
3758 // Work around c++ type checking annoyances with not being able to forward-declare inner types.
3759 class HeapRefHolder
3760     : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
3761 
SetupPartialMaterialization(PartialLoadStoreEliminationHelper & helper,HeapRefHolder && holder,size_t pred_idx,HBasicBlock * entry)3762 HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
3763                                                       HeapRefHolder&& holder,
3764                                                       size_t pred_idx,
3765                                                       HBasicBlock* entry) {
3766   PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
3767   HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
3768   HInstruction* new_inst = ref_data.OriginalNewInstance();
3769   if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
3770     LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
3771              << " is null!";
3772     return GetGraph()->GetNullConstant();
3773   }
3774   HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
3775   CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
3776   HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
3777   repl_create->SetPartialMaterialization();
3778   bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
3779   repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
3780   MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
3781   LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
3782   ref_data.AddMaterialization(bb, repl_create);
3783   const FieldInfo* info = nullptr;
3784   for (const HeapLocation* loc : ref_data.IterateLocations()) {
3785     size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3786     info = field_infos_[loc_off];
3787     DCHECK(loc->GetIndex() == nullptr);
3788     Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
3789     if (value.NeedsLoopPhi() || value.IsMergedUnknown()) {
3790       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3791       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3792           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3793       if (!repl.IsInvalid()) {
3794         value = repl;
3795       } else {
3796         FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
3797         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3798       }
3799     } else if (value.NeedsNonLoopPhi()) {
3800       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3801       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3802           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3803       if (!repl.IsInvalid()) {
3804         value = repl;
3805       } else {
3806         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
3807         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3808       }
3809     }
3810     DCHECK(value.IsDefault() || value.IsInstruction())
3811         << GetGraph()->PrettyMethod() << ": " << value;
3812 
3813     if (!value.IsDefault() &&
3814         // shadow$_klass_ doesn't need to be manually initialized.
3815         MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
3816       CHECK(info != nullptr);
3817       HInstruction* set_value =
3818           new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
3819                                                              value.GetInstruction(),
3820                                                              field_infos_[loc_off]->GetField(),
3821                                                              loc->GetType(),
3822                                                              MemberOffset(loc->GetOffset()),
3823                                                              false,
3824                                                              field_infos_[loc_off]->GetFieldIndex(),
3825                                                              loc->GetDeclaringClassDefIndex(),
3826                                                              field_infos_[loc_off]->GetDexFile(),
3827                                                              0u);
3828       bb->InsertInstructionAfter(set_value, repl_create);
3829       LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
3830     }
3831   }
3832   return repl_create;
3833 }
3834 
GetPartialValueAt(HNewInstance * orig_new_inst,HInstruction * read)3835 HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
3836   size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
3837   Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
3838   LSE_VLOG << "using " << pred << " as default value for " << *read;
3839   if (pred.IsInstruction()) {
3840     return pred.GetInstruction();
3841   } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
3842     FullyMaterializePhi(pred.GetPhiPlaceholder(),
3843                         heap_location_collector_.GetHeapLocation(loc)->GetType());
3844     HInstruction* res = Replacement(pred).GetInstruction();
3845     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3846     return res;
3847   } else if (pred.IsDefault()) {
3848     HInstruction* res = GetDefaultValue(read->GetType());
3849     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3850     return res;
3851   }
3852   LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
3853              << "! This should be impossible! Value is " << pred;
3854   UNREACHABLE();
3855 }
3856 
MovePartialEscapes()3857 void LSEVisitor::MovePartialEscapes() {
3858   if (!ShouldPerformPartialLSE()) {
3859     return;
3860   }
3861 
3862   ScopedArenaAllocator saa(allocator_.GetArenaStack());
3863   PartialLoadStoreEliminationHelper helper(this, &saa);
3864 
3865   // Since for PHIs we now will have more information (since we know the object
3866   // hasn't escaped) we need to clear the old phi-replacements where we weren't
3867   // able to find the value.
3868   PrepareForPartialPhiComputation();
3869 
3870   for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
3871     LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
3872     // Setup entry and exit blocks.
3873     for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3874       // Setup materialization blocks.
3875       for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
3876         // Setup entries.
3877         // TODO Assuming we correctly break critical edges every entry block
3878         // must have only a single predecessor so we could just put all this
3879         // stuff in there. OTOH simplifier can do it for us and this is simpler
3880         // to implement - giving clean separation between the original graph and
3881         // materialization blocks - so for now we might as well have these new
3882         // blocks.
3883         ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
3884         ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
3885         pred_vals.reserve(entry->GetNumberOfPredecessors());
3886         for (const auto& [pred, pred_idx] :
3887              ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
3888           DCHECK(!helper.IsMaterializationBlock(pred));
3889           if (excluded_cohort.IsEntryBlock(pred)) {
3890             pred_vals.push_back(ref_data.GetMaterialization(pred));
3891             continue;
3892           } else {
3893             pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
3894           }
3895         }
3896         ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
3897       }
3898 
3899       // Setup exit block heap-values for later phi-generation.
3900       for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
3901         // mark every exit of cohorts as having a value so we can easily
3902         // materialize the PHIs.
3903         // TODO By setting this we can easily use the normal MaterializeLoopPhis
3904         //      (via FullyMaterializePhis) in order to generate the default-values
3905         //      for predicated-gets. This has the unfortunate side effect of creating
3906         //      somewhat more phis than are really needed (in some cases). We really
3907         //      should try to eventually know that we can lower these PHIs to only
3908         //      the non-escaping value in cases where it is possible. Currently this
3909         //      is done to some extent in instruction_simplifier but we have more
3910         //      information here to do the right thing.
3911         for (const HeapLocation* loc : ref_data.IterateLocations()) {
3912           size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3913           // This Value::Default() is only used to fill in PHIs used as the
3914           // default value for PredicatedInstanceFieldGets. The actual value
3915           // stored there is meaningless since the Predicated-iget will use the
3916           // actual field value instead on these paths.
3917           heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
3918         }
3919       }
3920     }
3921 
3922     // string materialization through the graph.
3923     // // Visit RPO to PHI the materialized object through the cohort.
3924     for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
3925       // NB This doesn't include materialization blocks.
3926       DCHECK(!helper.IsMaterializationBlock(blk))
3927           << "Materialization blocks should not be in RPO yet.";
3928       if (ref_data.HasMaterialization(blk)) {
3929         continue;
3930       } else if (ref_data.BeforeAllEscapes(blk)) {
3931         ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
3932         continue;
3933       } else {
3934         ref_data.GenerateMaterializationValueFromPredecessors(blk);
3935       }
3936     }
3937   }
3938 
3939   // Once we've generated all the materializations we can update the users.
3940   helper.FixupPartialObjectUsers();
3941 
3942   // Actually put materialization blocks into the graph
3943   helper.InsertMaterializationBlocks();
3944 
3945   // Get rid of the original instructions.
3946   helper.RemoveReplacedInstructions();
3947 
3948   // Ensure everything is ordered correctly in the materialization blocks. This
3949   // involves moving every NewInstance to the top and ordering them so that any
3950   // required env-uses are correctly ordered.
3951   helper.ReorderMaterializationsForEnvDominance();
3952 }
3953 
FinishFullLSE()3954 void LSEVisitor::FinishFullLSE() {
3955   // Remove recorded load instructions that should be eliminated.
3956   for (const LoadStoreRecord& record : loads_and_stores_) {
3957     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
3958     HInstruction* substitute = substitute_instructions_for_loads_[id];
3959     if (substitute == nullptr) {
3960       continue;
3961     }
3962     HInstruction* load = record.load_or_store;
3963     DCHECK(load != nullptr);
3964     DCHECK(IsLoad(load));
3965     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
3966     // We proactively retrieve the substitute for a removed load, so
3967     // a load that has a substitute should not be observed as a heap
3968     // location value.
3969     DCHECK_EQ(FindSubstitute(substitute), substitute);
3970 
3971     load->ReplaceWith(substitute);
3972     load->GetBlock()->RemoveInstruction(load);
3973   }
3974 
3975   // Remove all the stores we can.
3976   for (const LoadStoreRecord& record : loads_and_stores_) {
3977     bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
3978     DCHECK_EQ(is_store, IsStore(record.load_or_store));
3979     if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
3980       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
3981     }
3982   }
3983 
3984   // Eliminate singleton-classified instructions:
3985   //   * - Constructor fences (they never escape this thread).
3986   //   * - Allocations (if they are unused).
3987   for (HInstruction* new_instance : singleton_new_instances_) {
3988     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
3989     MaybeRecordStat(stats_,
3990                     MethodCompilationStat::kConstructorFenceRemovedLSE,
3991                     removed);
3992 
3993     if (!new_instance->HasNonEnvironmentUses()) {
3994       new_instance->RemoveEnvironmentUsers();
3995       new_instance->GetBlock()->RemoveInstruction(new_instance);
3996       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
3997     }
3998   }
3999 }
4000 
4001 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
4002 // cannot be directly allocated with an arena allocator, so we need to wrap it.
4003 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
4004  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)4005   LSEVisitorWrapper(HGraph* graph,
4006                     const HeapLocationCollector& heap_location_collector,
4007                     bool perform_partial_lse,
4008                     OptimizingCompilerStats* stats)
4009       : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {}
4010 
Run()4011   void Run() {
4012     lse_visitor_.Run();
4013   }
4014 
4015  private:
4016   LSEVisitor lse_visitor_;
4017 };
4018 
Run(bool enable_partial_lse)4019 bool LoadStoreElimination::Run(bool enable_partial_lse) {
4020   if (graph_->IsDebuggable()) {
4021     // Debugger may set heap values or trigger deoptimization of callers.
4022     // Skip this optimization.
4023     return false;
4024   }
4025   // We need to be able to determine reachability. Clear it just to be safe but
4026   // this should initially be empty.
4027   graph_->ClearReachabilityInformation();
4028   // This is O(blocks^3) time complexity. It means we can query reachability in
4029   // O(1) though.
4030   graph_->ComputeReachabilityInformation();
4031   ScopedArenaAllocator allocator(graph_->GetArenaStack());
4032   LoadStoreAnalysis lsa(graph_,
4033                         stats_,
4034                         &allocator,
4035                         enable_partial_lse ? LoadStoreAnalysisType::kFull
4036                                            : LoadStoreAnalysisType::kBasic);
4037   lsa.Run();
4038   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
4039   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
4040     // No HeapLocation information from LSA, skip this optimization.
4041     return false;
4042   }
4043 
4044   std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
4045       graph_, heap_location_collector, enable_partial_lse, stats_));
4046   lse_visitor->Run();
4047   return true;
4048 }
4049 
4050 #undef LSE_VLOG
4051 
4052 }  // namespace art
4053