1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "load_store_elimination.h"
18
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "execution_subgraph.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "optimizing/execution_subgraph.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46 #include "stack_map.h"
47
48 /**
49 * The general algorithm of load-store elimination (LSE).
50 *
51 * We use load-store analysis to collect a list of heap locations and perform
52 * alias analysis of those heap locations. LSE then keeps track of a list of
53 * heap values corresponding to the heap locations and stores that put those
54 * values in these locations.
55 * - In phase 1, we visit basic blocks in reverse post order and for each basic
56 * block, visit instructions sequentially, recording heap values and looking
57 * for loads and stores to eliminate without relying on loop Phis.
58 * - In phase 2, we look for loads that can be replaced by creating loop Phis
59 * or using a loop-invariant value.
60 * - In phase 3, we determine which stores are dead and can be eliminated and
61 * based on that information we re-evaluate whether some kept stores are
62 * storing the same value as the value in the heap location; such stores are
63 * also marked for elimination.
64 * - In phase 4, we commit the changes, replacing loads marked for elimination
65 * in previous processing and removing stores not marked for keeping. We also
66 * remove allocations that are no longer needed.
67 * - In phase 5, we move allocations which only escape along some executions
68 * closer to their escape points and fixup non-escaping paths with their actual
69 * values, creating PHIs when needed.
70 *
71 * 1. Walk over blocks and their instructions.
72 *
73 * The initial set of heap values for a basic block is
74 * - For a loop header of an irreducible loop, all heap values are unknown.
75 * - For a loop header of a normal loop, all values unknown at the end of the
76 * preheader are initialized to unknown, other heap values are set to Phi
77 * placeholders as we cannot determine yet whether these values are known on
78 * all back-edges. We use Phi placeholders also for array heap locations with
79 * index defined inside the loop but this helps only when the value remains
80 * zero from the array allocation throughout the loop.
81 * - For catch blocks, we clear all assumptions since we arrived due to an
82 * instruction throwing.
83 * - For other basic blocks, we merge incoming values from the end of all
84 * predecessors. If any incoming value is unknown, the start value for this
85 * block is also unknown. Otherwise, if all the incoming values are the same
86 * (including the case of a single predecessor), the incoming value is used.
87 * Otherwise, we use a Phi placeholder to indicate different incoming values.
88 * We record whether such Phi placeholder depends on a loop Phi placeholder.
89 *
90 * For each instruction in the block
91 * - If the instruction is a load from a heap location with a known value not
92 * dependent on a loop Phi placeholder, the load can be eliminated, either by
93 * using an existing instruction or by creating new Phi(s) instead. In order
94 * to maintain the validity of all heap locations during the optimization
95 * phase, we only record substitutes at this phase and the real elimination
96 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
97 * replacement are recorded for processing later. We also keep track of the
98 * heap-value at the start load so that later partial-LSE can predicate the
99 * load.
100 * - If the instruction is a store, it updates the heap value for the heap
101 * location with the stored value and records the store itself so that we can
102 * mark it for keeping if the value becomes observable. Heap values are
103 * invalidated for heap locations that may alias with the store instruction's
104 * heap location and their recorded stores are marked for keeping as they are
105 * now potentially observable. The store instruction can be eliminated unless
106 * the value stored is later needed e.g. by a load from the same/aliased heap
107 * location or the heap location persists at method return/deoptimization.
108 * - A store that stores the same value as the heap value is eliminated.
109 * - For newly instantiated instances, their heap values are initialized to
110 * language defined default values.
111 * - Finalizable objects are considered as persisting at method
112 * return/deoptimization.
113 * - Some instructions such as invokes are treated as loading and invalidating
114 * all the heap values, depending on the instruction's side effects.
115 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
116 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
117 * alias and no load/store is eliminated in such case.
118 *
119 * The time complexity of the initial phase has several components. The total
120 * time for the initialization of heap values for all blocks is
121 * O(heap_locations * edges)
122 * and the time complexity for simple instruction processing is
123 * O(instructions).
124 * See the description of phase 3 for additional complexity due to matching of
125 * existing Phis for replacing loads.
126 *
127 * 2. Process loads that depend on loop Phi placeholders.
128 *
129 * We go over these loads to determine whether they can be eliminated. We look
130 * for the set of all Phi placeholders that feed the load and depend on a loop
131 * Phi placeholder and, if we find no unknown value, we construct the necessary
132 * Phi(s) or, if all other inputs are identical, i.e. the location does not
133 * change in the loop, just use that input. If we do find an unknown input, this
134 * must be from a loop back-edge and we replace the loop Phi placeholder with
135 * unknown value and re-process loads and stores that previously depended on
136 * loop Phi placeholders. This shall find at least one load of an unknown value
137 * which is now known to be unreplaceable or a new unknown value on a back-edge
138 * and we repeat this process until each load is either marked for replacement
139 * or found to be unreplaceable. As we mark at least one additional loop Phi
140 * placeholder as unreplacable in each iteration, this process shall terminate.
141 *
142 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
143 * is limited by the number of Phi placeholders and their dependencies we need
144 * to search with worst-case time complexity
145 * O(phi_placeholder_dependencies) .
146 * The dependencies are usually just the Phi placeholders' potential inputs,
147 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
148 * replacement search, there are additional dependencies to consider, see below.
149 *
150 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
151 * algorithm to determine transitive closures for each found Phi placeholder,
152 * and then match or materialize Phis from the smallest transitive closure,
153 * so that we can determine if such subset has a single other input. This has
154 * time complexity
155 * O(phi_placeholders_found^3) .
156 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
157 * contribute to this as such Phi placeholders are replaced immediately.
158 * The total time of all such successful cases has time complexity
159 * O(phi_placeholders^3)
160 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
161 * argument applies to the searches used to find all successful cases, so their
162 * total contribution is also just an insignificant
163 * O(phi_placeholder_dependencies) .
164 * The materialization of Phis has an insignificant total time complexity
165 * O(phi_placeholders * edges) .
166 *
167 * If we find an unknown input, we re-process heap values and loads with a time
168 * complexity that's the same as the phase 1 in the worst case. Adding this to
169 * the depth-first search time complexity yields
170 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
171 * for a single iteration. We can ignore the middle term as it's proprotional
172 * to the number of Phi placeholder inputs included in the first term. Using
173 * the upper limit of number of such iterations, the total time complexity is
174 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
175 *
176 * The upper bound of Phi placeholder inputs is
177 * heap_locations * edges
178 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
179 * include other heap locations in predecessor blocks with the upper bound of
180 * heap_locations^2 * edges .
181 * Using the estimate
182 * edges <= blocks^2
183 * and
184 * phi_placeholders <= heap_locations * blocks ,
185 * the worst-case time complexity of the
186 * O(phi_placeholder_dependencies * phi_placeholders)
187 * term from unknown input cases is actually
188 * O(heap_locations^3 * blocks^3) ,
189 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
190 * Adding the other term from the unknown input cases (to account for the case
191 * with significantly more instructions than blocks and heap locations), the
192 * phase 2 time complexity is
193 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
194 *
195 * See the description of phase 3 for additional complexity due to matching of
196 * existing Phis for replacing loads.
197 *
198 * 3. Determine which stores to keep and which to eliminate.
199 *
200 * During instruction processing in phase 1 and re-processing in phase 2, we are
201 * keeping a record of the stores and Phi placeholders that become observable
202 * and now propagate the observable Phi placeholders to all actual stores that
203 * feed them. Having determined observable stores, we look for stores that just
204 * overwrite the old value with the same. Since ignoring non-observable stores
205 * actually changes the old values in heap locations, we need to recalculate
206 * Phi placeholder replacements but we proceed similarly to the previous phase.
207 * We look for the set of all Phis that feed the old value replaced by the store
208 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
209 * value, we try to match existing Phis (we do not create new Phis anymore) or,
210 * if all other inputs are identical, i.e. the location does not change in the
211 * loop, just use that input. If this succeeds and the old value is identical to
212 * the value we're storing, such store shall be eliminated.
213 *
214 * The work is similar to the phase 2, except that we're not re-processing loads
215 * and stores anymore, so the time complexity of phase 3 is
216 * O(heap_locations^3 * blocks^3) .
217 *
218 * There is additional complexity in matching existing Phis shared between the
219 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
220 * time (this could be difficult and slow), so each matching attempt is just
221 * looking at Phis in the block (both old Phis and newly created Phis) and their
222 * inputs. As we create at most `heap_locations` Phis in each block, the upper
223 * bound on the number of Phis we look at is
224 * heap_locations * (old_phis + heap_locations)
225 * and the worst-case time complexity is
226 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
227 * The first term is lower than one term in phase 2, so the relevant part is
228 * O(heap_locations * old_phis * edges) .
229 *
230 * 4. Replace loads and remove unnecessary stores and singleton allocations.
231 *
232 * A special type of objects called singletons are instantiated in the method
233 * and have a single name, i.e. no aliases. Singletons have exclusive heap
234 * locations since they have no aliases. Singletons are helpful in narrowing
235 * down the life span of a heap location such that they do not always need to
236 * participate in merging heap values. Allocation of a singleton can be
237 * eliminated if that singleton is not used and does not persist at method
238 * return/deoptimization.
239 *
240 * The time complexity of this phase is
241 * O(instructions + instruction_uses) .
242 *
243 * 5. Partial LSE
244 *
245 * Move allocations closer to their escapes and remove/predicate loads and
246 * stores as required.
247 *
248 * Partial singletons are objects which only escape from the function or have
249 * multiple names along certain execution paths. In cases where we recognize
250 * these partial singletons we can move the allocation and initialization
251 * closer to the actual escape(s). We can then perform a simplified version of
252 * LSE step 2 to determine the unescaped value of any reads performed after the
253 * object may have escaped. These are used to replace these reads with
254 * 'predicated-read' instructions where the value is only read if the object
255 * has actually escaped. We use the existence of the object itself as the
256 * marker of whether escape has occurred.
257 *
258 * There are several steps in this sub-pass
259 *
260 * 5.1 Group references
261 *
262 * Since all heap-locations for a single reference escape at the same time, we
263 * need to group the heap-locations by reference and process them at the same
264 * time.
265 *
266 * O(heap_locations).
267 *
268 * FIXME: The time complexity above assumes we can bucket the heap-locations in
269 * O(1) which is not true since we just perform a linear-scan of the heap-ref
270 * list. Since there are generally only a small number of heap-references which
271 * are partial-singletons this is fine and lower real overhead than a hash map.
272 *
273 * 5.2 Generate materializations
274 *
275 * Once we have the references we add new 'materialization blocks' on the edges
276 * where escape becomes inevitable. This information is calculated by the
277 * execution-subgraphs created during load-store-analysis. We create new
278 * 'materialization's in these blocks and initialize them with the value of
279 * each heap-location ignoring side effects (since the object hasn't escaped
280 * yet). Worst case this is the same time-complexity as step 3 since we may
281 * need to materialize phis.
282 *
283 * O(heap_locations^2 * materialization_edges)
284 *
285 * 5.3 Propagate materializations
286 *
287 * Since we use the materialization as the marker for escape we need to
288 * propagate it throughout the graph. Since the subgraph analysis considers any
289 * lifetime that escapes a loop (and hence would require a loop-phi) to be
290 * escaping at the loop-header we do not need to create any loop-phis to do
291 * this.
292 *
293 * O(edges)
294 *
295 * NB: Currently the subgraph analysis considers all objects to have their
296 * lifetimes start at the entry block. This simplifies that analysis enormously
297 * but means that we cannot distinguish between an escape in a loop where the
298 * lifetime does not escape the loop (in which case this pass could optimize)
299 * and one where it does escape the loop (in which case the whole loop is
300 * escaping). This is a shortcoming that would be good to fix at some point.
301 *
302 * 5.4 Propagate partial values
303 *
304 * We need to replace loads and stores to the partial reference with predicated
305 * ones that have default non-escaping values. Again this is the same as step 3.
306 *
307 * O(heap_locations^2 * edges)
308 *
309 * 5.5 Final fixup
310 *
311 * Now all we need to do is replace and remove uses of the old reference with the
312 * appropriate materialization.
313 *
314 * O(instructions + uses)
315 *
316 * FIXME: The time complexities described above assumes that the
317 * HeapLocationCollector finds a heap location for an instruction in O(1)
318 * time but it is currently O(heap_locations); this can be fixed by adding
319 * a hash map to the HeapLocationCollector.
320 */
321
322 namespace art {
323
324 #define LSE_VLOG \
325 if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
326
327 class PartialLoadStoreEliminationHelper;
328 class HeapRefHolder;
329
330 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
331 class LSEVisitor final : private HGraphDelegateVisitor {
332 public:
333 LSEVisitor(HGraph* graph,
334 const HeapLocationCollector& heap_location_collector,
335 bool perform_partial_lse,
336 OptimizingCompilerStats* stats);
337
338 void Run();
339
340 private:
341 class PhiPlaceholder {
342 public:
PhiPlaceholder()343 constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)344 constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
345 : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
346
347 constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
348 constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
349 constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
350 constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
351
GetBlockId() const352 constexpr uint32_t GetBlockId() const {
353 return block_id_;
354 }
355
GetHeapLocation() const356 constexpr size_t GetHeapLocation() const {
357 return heap_location_;
358 }
359
Equals(const PhiPlaceholder & p2) const360 constexpr bool Equals(const PhiPlaceholder& p2) const {
361 return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
362 }
363
Dump(std::ostream & oss) const364 void Dump(std::ostream& oss) const {
365 oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
366 }
367
368 private:
369 uint32_t block_id_;
370 uint32_t heap_location_;
371 };
372
373 struct Marker {};
374
375 class Value;
376
377 class PriorValueHolder {
378 public:
379 constexpr explicit PriorValueHolder(Value prior);
380
IsInstruction() const381 constexpr bool IsInstruction() const {
382 return std::holds_alternative<HInstruction*>(value_);
383 }
IsPhi() const384 constexpr bool IsPhi() const {
385 return std::holds_alternative<PhiPlaceholder>(value_);
386 }
IsDefault() const387 constexpr bool IsDefault() const {
388 return std::holds_alternative<Marker>(value_);
389 }
GetPhiPlaceholder() const390 constexpr PhiPlaceholder GetPhiPlaceholder() const {
391 DCHECK(IsPhi());
392 return std::get<PhiPlaceholder>(value_);
393 }
GetInstruction() const394 constexpr HInstruction* GetInstruction() const {
395 DCHECK(IsInstruction());
396 return std::get<HInstruction*>(value_);
397 }
398
399 Value ToValue() const;
400 void Dump(std::ostream& oss) const;
401
Equals(PriorValueHolder other) const402 constexpr bool Equals(PriorValueHolder other) const {
403 return value_ == other.value_;
404 }
405
406 private:
407 std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
408 };
409
410 friend constexpr bool operator==(const Marker&, const Marker&);
411 friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
412 friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
413 friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
414
415 class Value {
416 public:
417 enum class ValuelessType {
418 kInvalid,
419 kPureUnknown,
420 kDefault,
421 };
422 struct MergedUnknownMarker {
423 PhiPlaceholder phi_;
424 };
425 struct NeedsNonLoopPhiMarker {
426 PhiPlaceholder phi_;
427 };
428 struct NeedsLoopPhiMarker {
429 PhiPlaceholder phi_;
430 };
431
Invalid()432 static constexpr Value Invalid() {
433 return Value(ValuelessType::kInvalid);
434 }
435
436 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
437 // A heap location can be set to an unknown heap value when:
438 // - it is coming from outside the method,
439 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()440 static constexpr Value PureUnknown() {
441 return Value(ValuelessType::kPureUnknown);
442 }
443
PartialUnknown(Value old_value)444 static constexpr Value PartialUnknown(Value old_value) {
445 if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
446 return PureUnknown();
447 } else {
448 return Value(PriorValueHolder(old_value));
449 }
450 }
451
MergedUnknown(PhiPlaceholder phi_placeholder)452 static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
453 return Value(MergedUnknownMarker{phi_placeholder});
454 }
455
456 // Default heap value after an allocation.
457 // A heap location can be set to that value right after an allocation.
Default()458 static constexpr Value Default() {
459 return Value(ValuelessType::kDefault);
460 }
461
ForInstruction(HInstruction * instruction)462 static constexpr Value ForInstruction(HInstruction* instruction) {
463 return Value(instruction);
464 }
465
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)466 static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
467 return Value(NeedsNonLoopPhiMarker{phi_placeholder});
468 }
469
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)470 static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
471 return Value(NeedsLoopPhiMarker{phi_placeholder});
472 }
473
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)474 static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
475 return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
476 : ForNonLoopPhiPlaceholder(phi_placeholder);
477 }
478
IsValid() const479 constexpr bool IsValid() const {
480 return !IsInvalid();
481 }
482
IsInvalid() const483 constexpr bool IsInvalid() const {
484 return std::holds_alternative<ValuelessType>(value_) &&
485 GetValuelessType() == ValuelessType::kInvalid;
486 }
487
IsPartialUnknown() const488 bool IsPartialUnknown() const {
489 return std::holds_alternative<PriorValueHolder>(value_);
490 }
491
IsMergedUnknown() const492 bool IsMergedUnknown() const {
493 return std::holds_alternative<MergedUnknownMarker>(value_);
494 }
495
IsPureUnknown() const496 bool IsPureUnknown() const {
497 return std::holds_alternative<ValuelessType>(value_) &&
498 GetValuelessType() == ValuelessType::kPureUnknown;
499 }
500
IsUnknown() const501 bool IsUnknown() const {
502 return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
503 }
504
IsDefault() const505 bool IsDefault() const {
506 return std::holds_alternative<ValuelessType>(value_) &&
507 GetValuelessType() == ValuelessType::kDefault;
508 }
509
IsInstruction() const510 bool IsInstruction() const {
511 return std::holds_alternative<HInstruction*>(value_);
512 }
513
NeedsNonLoopPhi() const514 bool NeedsNonLoopPhi() const {
515 return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
516 }
517
NeedsLoopPhi() const518 bool NeedsLoopPhi() const {
519 return std::holds_alternative<NeedsLoopPhiMarker>(value_);
520 }
521
NeedsPhi() const522 bool NeedsPhi() const {
523 return NeedsNonLoopPhi() || NeedsLoopPhi();
524 }
525
GetInstruction() const526 HInstruction* GetInstruction() const {
527 DCHECK(IsInstruction()) << *this;
528 return std::get<HInstruction*>(value_);
529 }
530
GetPriorValue() const531 PriorValueHolder GetPriorValue() const {
532 DCHECK(IsPartialUnknown());
533 return std::get<PriorValueHolder>(value_);
534 }
535
GetPhiPlaceholder() const536 PhiPlaceholder GetPhiPlaceholder() const {
537 DCHECK(NeedsPhi() || IsMergedUnknown());
538 if (NeedsNonLoopPhi()) {
539 return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
540 } else if (NeedsLoopPhi()) {
541 return std::get<NeedsLoopPhiMarker>(value_).phi_;
542 } else {
543 return std::get<MergedUnknownMarker>(value_).phi_;
544 }
545 }
546
GetMergeBlockId() const547 uint32_t GetMergeBlockId() const {
548 DCHECK(IsMergedUnknown()) << this;
549 return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
550 }
551
GetMergeBlock(const HGraph * graph) const552 HBasicBlock* GetMergeBlock(const HGraph* graph) const {
553 DCHECK(IsMergedUnknown()) << *this;
554 return graph->GetBlocks()[GetMergeBlockId()];
555 }
556
GetHeapLocation() const557 size_t GetHeapLocation() const {
558 DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
559 return GetPhiPlaceholder().GetHeapLocation();
560 }
561
562 constexpr bool ExactEquals(Value other) const;
563
564 constexpr bool Equals(Value other) const;
565
Equals(HInstruction * instruction) const566 constexpr bool Equals(HInstruction* instruction) const {
567 return Equals(ForInstruction(instruction));
568 }
569
570 std::ostream& Dump(std::ostream& os) const;
571
572 // Public for use with lists.
Value()573 constexpr Value() : value_(ValuelessType::kInvalid) {}
574
575 private:
576 using ValueHolder = std::variant<ValuelessType,
577 HInstruction*,
578 MergedUnknownMarker,
579 NeedsNonLoopPhiMarker,
580 NeedsLoopPhiMarker,
581 PriorValueHolder>;
GetValuelessType() const582 constexpr ValuelessType GetValuelessType() const {
583 return std::get<ValuelessType>(value_);
584 }
585
Value(ValueHolder v)586 constexpr explicit Value(ValueHolder v) : value_(v) {}
587
588 friend std::ostream& operator<<(std::ostream& os, const Value& v);
589
590 ValueHolder value_;
591
592 static_assert(std::is_move_assignable<PhiPlaceholder>::value);
593 };
594
595 friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
596 const Value::NeedsLoopPhiMarker& p2);
597 friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
598 const Value::NeedsNonLoopPhiMarker& p2);
599 friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
600 const Value::MergedUnknownMarker& p2);
601
602 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
603 // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const604 size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
605 size_t res =
606 phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
607 phi_placeholder.GetHeapLocation();
608 DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
609 << res << "blks: " << GetGraph()->GetBlocks().size()
610 << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
611 return res;
612 }
613
PhiPlaceholderIndex(Value phi_placeholder) const614 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
615 return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
616 }
617
IsEscapingObject(ReferenceInfo * info,HBasicBlock * block,size_t index)618 bool IsEscapingObject(ReferenceInfo* info, HBasicBlock* block, size_t index) {
619 return !info->IsSingletonAndRemovable() &&
620 !(info->IsPartialSingleton() && IsPartialNoEscape(block, index));
621 }
622
IsPartialNoEscape(HBasicBlock * blk,size_t idx)623 bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
624 auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
625 if (!ri->IsPartialSingleton()) {
626 return false;
627 }
628 ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
629 ri->GetNoEscapeSubgraph()->GetExcludedCohorts();
630 return std::none_of(cohorts.cbegin(),
631 cohorts.cend(),
632 [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
633 // Make sure we haven't yet and never will escape.
634 return ex.PrecedesBlock(blk) ||
635 ex.ContainsBlock(blk) ||
636 ex.SucceedsBlock(blk);
637 });
638 }
639
GetPhiPlaceholderAt(size_t off) const640 PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
641 DCHECK_LT(off, num_phi_placeholders_);
642 size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
643 // Technically this should be (off - id) / NumberOfHeapLocations
644 // but due to truncation it's all the same.
645 size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
646 return GetPhiPlaceholder(blk_id, id);
647 }
648
GetPhiPlaceholder(uint32_t block_id,size_t idx) const649 PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
650 DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
651 return PhiPlaceholder(block_id, idx);
652 }
653
Replacement(Value value) const654 Value Replacement(Value value) const {
655 DCHECK(value.NeedsPhi() ||
656 (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
657 << value << " phase: " << current_phase_;
658 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
659 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
660 DCHECK(replacement.IsUnknown() ||
661 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
662 return replacement;
663 }
664
ReplacementOrValue(Value value) const665 Value ReplacementOrValue(Value value) const {
666 if (current_phase_ == Phase::kPartialElimination) {
667 // In this phase we are materializing the default values which are used
668 // only if the partial singleton did not escape, so we can replace
669 // a partial unknown with the prior value.
670 if (value.IsPartialUnknown()) {
671 value = value.GetPriorValue().ToValue();
672 }
673 if ((value.IsMergedUnknown() || value.NeedsPhi()) &&
674 phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
675 value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
676 DCHECK(!value.IsMergedUnknown());
677 DCHECK(!value.NeedsPhi());
678 } else if (value.IsMergedUnknown()) {
679 return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
680 }
681 if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) {
682 DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()),
683 substitute_instructions_for_loads_.size());
684 HInstruction* substitute =
685 substitute_instructions_for_loads_[value.GetInstruction()->GetId()];
686 if (substitute != nullptr) {
687 DCHECK(substitute->IsPredicatedInstanceFieldGet());
688 return Value::ForInstruction(substitute);
689 }
690 }
691 DCHECK_IMPLIES(value.IsInstruction(),
692 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
693 return value;
694 }
695 if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
696 return Replacement(value);
697 } else {
698 DCHECK_IMPLIES(value.IsInstruction(),
699 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
700 return value;
701 }
702 }
703
704 // The record of a heap value and instruction(s) that feed that value.
705 struct ValueRecord {
706 Value value;
707 Value stored_by;
708 };
709
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)710 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
711 HInstruction* value,
712 DataType::Type expected_type) {
713 // Should never add type conversion into boolean value.
714 if (expected_type == DataType::Type::kBool ||
715 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
716 // TODO: This prevents type conversion of default values but we can still insert
717 // type conversion of other constants and there is no constant folding pass after LSE.
718 IsZeroBitPattern(value)) {
719 return nullptr;
720 }
721
722 // Check if there is already a suitable TypeConversion we can reuse.
723 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
724 if (use.GetUser()->IsTypeConversion() &&
725 use.GetUser()->GetType() == expected_type &&
726 // TODO: We could move the TypeConversion to a common dominator
727 // if it does not cross irreducible loop header.
728 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
729 // Don't share across irreducible loop headers.
730 // TODO: can be more fine-grained than this by testing each dominator.
731 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
732 !GetGraph()->HasIrreducibleLoops())) {
733 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
734 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
735 // Move the TypeConversion before the instruction.
736 use.GetUser()->MoveBefore(instruction);
737 }
738 DCHECK(use.GetUser()->StrictlyDominates(instruction));
739 return use.GetUser()->AsTypeConversion();
740 }
741 }
742
743 // We must create a new TypeConversion instruction.
744 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
745 expected_type, value, instruction->GetDexPc());
746 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
747 return type_conversion;
748 }
749
750 // Find an instruction's substitute if it's a removed load.
751 // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const752 HInstruction* FindSubstitute(HInstruction* instruction) const {
753 size_t id = static_cast<size_t>(instruction->GetId());
754 if (id >= substitute_instructions_for_loads_.size()) {
755 // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet.
756 DCHECK_IMPLIES(IsLoad(instruction), instruction->IsPredicatedInstanceFieldGet());
757 return instruction;
758 }
759 HInstruction* substitute = substitute_instructions_for_loads_[id];
760 DCHECK(substitute == nullptr || IsLoad(instruction));
761 return (substitute != nullptr) ? substitute : instruction;
762 }
763
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)764 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
765 DCHECK(IsLoad(load));
766 DCHECK_EQ(FindSubstitute(load), load);
767 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
768 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
769
770 // The load expects to load the heap value as type load->GetType().
771 // However the tracked heap value may not be of that type. An explicit
772 // type conversion may be needed.
773 // There are actually three types involved here:
774 // (1) tracked heap value's type (type A)
775 // (2) heap location (field or element)'s type (type B)
776 // (3) load's type (type C)
777 // We guarantee that type A stored as type B and then fetched out as
778 // type C is the same as casting from type A to type C directly, since
779 // type B and type C will have the same size which is guaranteed in
780 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
781 // So we only need one type conversion from type A to type C.
782 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
783 load, heap_value, load->GetType());
784
785 substitute_instructions_for_loads_[load->GetId()] =
786 type_conversion != nullptr ? type_conversion : heap_value;
787 }
788
IsLoad(HInstruction * instruction)789 static bool IsLoad(HInstruction* instruction) {
790 // Unresolved load is not treated as a load.
791 return instruction->IsInstanceFieldGet() ||
792 instruction->IsPredicatedInstanceFieldGet() ||
793 instruction->IsStaticFieldGet() ||
794 instruction->IsVecLoad() ||
795 instruction->IsArrayGet();
796 }
797
IsStore(HInstruction * instruction)798 static bool IsStore(HInstruction* instruction) {
799 // Unresolved store is not treated as a store.
800 return instruction->IsInstanceFieldSet() ||
801 instruction->IsArraySet() ||
802 instruction->IsVecStore() ||
803 instruction->IsStaticFieldSet();
804 }
805
806 // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)807 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
808 DCHECK(IsLoad(instruction));
809 // Using defaults for VecLoads requires to create additional vector operations.
810 // As there are some issues with scheduling vector operations it is better to avoid creating
811 // them.
812 return !instruction->IsVecOperation();
813 }
814
815 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
816 // This is necessary if the stored heap value can be observed.
KeepStores(Value value)817 void KeepStores(Value value) {
818 if (value.IsPureUnknown() || value.IsPartialUnknown()) {
819 return;
820 }
821 if (value.IsMergedUnknown()) {
822 kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
823 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
824 return;
825 }
826 if (value.NeedsPhi()) {
827 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
828 } else {
829 HInstruction* instruction = value.GetInstruction();
830 DCHECK(IsStore(instruction));
831 kept_stores_.SetBit(instruction->GetId());
832 }
833 }
834
835 // If a heap location X may alias with heap location at `loc_index`
836 // and heap_values of that heap location X holds a store, keep that store.
837 // It's needed for a dependent load that's not eliminated since any store
838 // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)839 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
840 size_t loc_index) {
841 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
842 if (i == loc_index) {
843 // We use this function when reading a location with unknown value and
844 // therefore we cannot know what exact store wrote that unknown value.
845 // But we can have a phi placeholder here marking multiple stores to keep.
846 DCHECK(
847 !heap_values[i].stored_by.IsInstruction() ||
848 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
849 KeepStores(heap_values[i].stored_by);
850 heap_values[i].stored_by = Value::PureUnknown();
851 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
852 KeepStores(heap_values[i].stored_by);
853 heap_values[i].stored_by = Value::PureUnknown();
854 }
855 }
856 }
857
858 // `instruction` is being removed. Try to see if the null check on it
859 // can be removed. This can happen if the same value is set in two branches
860 // but not in dominators. Such as:
861 // int[] a = foo();
862 // if () {
863 // a[0] = 2;
864 // } else {
865 // a[0] = 2;
866 // }
867 // // a[0] can now be replaced with constant 2, and the null check on it can be removed.
TryRemovingNullCheck(HInstruction * instruction)868 void TryRemovingNullCheck(HInstruction* instruction) {
869 HInstruction* prev = instruction->GetPrevious();
870 if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
871 // Previous instruction is a null check for this instruction. Remove the null check.
872 prev->ReplaceWith(prev->InputAt(0));
873 prev->GetBlock()->RemoveInstruction(prev);
874 }
875 }
876
GetDefaultValue(DataType::Type type)877 HInstruction* GetDefaultValue(DataType::Type type) {
878 switch (type) {
879 case DataType::Type::kReference:
880 return GetGraph()->GetNullConstant();
881 case DataType::Type::kBool:
882 case DataType::Type::kUint8:
883 case DataType::Type::kInt8:
884 case DataType::Type::kUint16:
885 case DataType::Type::kInt16:
886 case DataType::Type::kInt32:
887 return GetGraph()->GetIntConstant(0);
888 case DataType::Type::kInt64:
889 return GetGraph()->GetLongConstant(0);
890 case DataType::Type::kFloat32:
891 return GetGraph()->GetFloatConstant(0);
892 case DataType::Type::kFloat64:
893 return GetGraph()->GetDoubleConstant(0);
894 default:
895 UNREACHABLE();
896 }
897 }
898
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)899 bool CanValueBeKeptIfSameAsNew(Value value,
900 HInstruction* new_value,
901 HInstruction* new_value_set_instr) {
902 // For field/array set location operations, if the value is the same as the new_value
903 // it can be kept even if aliasing happens. All aliased operations will access the same memory
904 // range.
905 // For vector values, this is not true. For example:
906 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
907 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
908 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
909 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
910 // here is not packed_data anymore.
911 //
912 // TODO: to allow such 'same value' optimization on vector data,
913 // LSA needs to report more fine-grain MAY alias information:
914 // (1) May alias due to two vector data partial overlap.
915 // e.g. a[i..i+3] and a[i+1,..,i+4].
916 // (2) May alias due to two vector data may complete overlap each other.
917 // e.g. a[i..i+3] and b[i..i+3].
918 // (3) May alias but the exact relationship between two locations is unknown.
919 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
920 // This 'same value' optimization can apply only on case (2).
921 if (new_value_set_instr->IsVecOperation()) {
922 return false;
923 }
924
925 return value.Equals(new_value);
926 }
927
928 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
929 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
930 void PrepareLoopRecords(HBasicBlock* block);
931 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
932 void MergePredecessorRecords(HBasicBlock* block);
933
934 void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
935
936 void VisitGetLocation(HInstruction* instruction, size_t idx);
937 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)938 void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
939 field_infos_[heap_loc] = info;
940 }
941
942 void VisitBasicBlock(HBasicBlock* block) override;
943
944 enum class Phase {
945 kLoadElimination,
946 kStoreElimination,
947 kPartialElimination,
948 };
949
950 bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
951
952 bool TryReplacingLoopPhiPlaceholderWithDefault(
953 PhiPlaceholder phi_placeholder,
954 DataType::Type type,
955 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
956 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
957 PhiPlaceholder phi_placeholder,
958 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
959 std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
960 PhiPlaceholder phi_placeholder,
961 /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
962 DataType::Type type,
963 bool can_use_default_or_phi);
964 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
965 DataType::Type type);
966 bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
967 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
968 DataType::Type type);
969 bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
970 std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
971 HInstruction* load);
972 void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
973 void ProcessLoadsRequiringLoopPhis();
974
975 void SearchPhiPlaceholdersForKeptStores();
976 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
977 void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
978 void FindStoresWritingOldValues();
979 void FinishFullLSE();
980 void PrepareForPartialPhiComputation();
981 // Create materialization block and materialization object for the given predecessor of entry.
982 HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
983 HeapRefHolder&& holder,
984 size_t pred_idx,
985 HBasicBlock* blk);
986 // Returns the value that would be read by the 'read' instruction on
987 // 'orig_new_inst' if 'orig_new_inst' has not escaped.
988 HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
989 void MovePartialEscapes();
990
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)991 void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
992 LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
993 << " but LSE should be the only source of predicated-ifield-gets!";
994 }
995
VisitInstanceFieldGet(HInstanceFieldGet * instruction)996 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
997 HInstruction* object = instruction->InputAt(0);
998 const FieldInfo& field = instruction->GetFieldInfo();
999 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
1000 }
1001
VisitInstanceFieldSet(HInstanceFieldSet * instruction)1002 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
1003 HInstruction* object = instruction->InputAt(0);
1004 const FieldInfo& field = instruction->GetFieldInfo();
1005 HInstruction* value = instruction->InputAt(1);
1006 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
1007 VisitSetLocation(instruction, idx, value);
1008 }
1009
VisitStaticFieldGet(HStaticFieldGet * instruction)1010 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
1011 HInstruction* cls = instruction->InputAt(0);
1012 const FieldInfo& field = instruction->GetFieldInfo();
1013 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
1014 }
1015
VisitStaticFieldSet(HStaticFieldSet * instruction)1016 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
1017 HInstruction* cls = instruction->InputAt(0);
1018 const FieldInfo& field = instruction->GetFieldInfo();
1019 HInstruction* value = instruction->InputAt(1);
1020 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
1021 VisitSetLocation(instruction, idx, value);
1022 }
1023
VisitArrayGet(HArrayGet * instruction)1024 void VisitArrayGet(HArrayGet* instruction) override {
1025 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1026 }
1027
VisitArraySet(HArraySet * instruction)1028 void VisitArraySet(HArraySet* instruction) override {
1029 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1030 VisitSetLocation(instruction, idx, instruction->GetValue());
1031 }
1032
VisitVecLoad(HVecLoad * instruction)1033 void VisitVecLoad(HVecLoad* instruction) override {
1034 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1035 }
1036
VisitVecStore(HVecStore * instruction)1037 void VisitVecStore(HVecStore* instruction) override {
1038 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1039 VisitSetLocation(instruction, idx, instruction->GetValue());
1040 }
1041
VisitDeoptimize(HDeoptimize * instruction)1042 void VisitDeoptimize(HDeoptimize* instruction) override {
1043 // If we are in a try catch, even singletons are observable.
1044 const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr;
1045 HBasicBlock* block = instruction->GetBlock();
1046 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1047 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1048 Value* stored_by = &heap_values[i].stored_by;
1049 if (stored_by->IsUnknown()) {
1050 continue;
1051 }
1052 // Stores are generally observeable after deoptimization, except
1053 // for singletons that don't escape in the deoptimization environment.
1054 bool observable = true;
1055 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1056 if (!in_try_catch && info->IsSingleton()) {
1057 HInstruction* reference = info->GetReference();
1058 // Finalizable objects always escape.
1059 const bool finalizable_object =
1060 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1061 if (!finalizable_object && !IsEscapingObject(info, block, i)) {
1062 // Check whether the reference for a store is used by an environment local of
1063 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1064 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1065 observable = std::any_of(
1066 env_uses.begin(),
1067 env_uses.end(),
1068 [instruction](const HUseListNode<HEnvironment*>& use) {
1069 return use.GetUser()->GetHolder() == instruction;
1070 });
1071 }
1072 }
1073 if (observable) {
1074 KeepStores(*stored_by);
1075 *stored_by = Value::PureUnknown();
1076 }
1077 }
1078 }
1079
1080 // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1081 void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1082 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1083 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1084 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1085 if (must_keep_stores || IsEscapingObject(ref_info, block, i)) {
1086 KeepStores(heap_values[i].stored_by);
1087 heap_values[i].stored_by = Value::PureUnknown();
1088 }
1089 }
1090 }
1091
VisitReturn(HReturn * instruction)1092 void VisitReturn(HReturn* instruction) override {
1093 HandleExit(instruction->GetBlock());
1094 }
1095
VisitReturnVoid(HReturnVoid * return_void)1096 void VisitReturnVoid(HReturnVoid* return_void) override {
1097 HandleExit(return_void->GetBlock());
1098 }
1099
HandleThrowingInstruction(HInstruction * instruction)1100 void HandleThrowingInstruction(HInstruction* instruction) {
1101 DCHECK(instruction->CanThrow());
1102 // If we are inside of a try catch, singletons can become visible since we may not exit the
1103 // method.
1104 HandleExit(instruction->GetBlock(),
1105 instruction->GetBlock()->GetTryCatchInformation() != nullptr);
1106 }
1107
VisitMethodEntryHook(HMethodEntryHook * method_entry)1108 void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1109 HandleThrowingInstruction(method_entry);
1110 }
1111
VisitMethodExitHook(HMethodExitHook * method_exit)1112 void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1113 HandleThrowingInstruction(method_exit);
1114 }
1115
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1116 void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1117 HandleThrowingInstruction(div_zero_check);
1118 }
1119
VisitNullCheck(HNullCheck * null_check)1120 void VisitNullCheck(HNullCheck* null_check) override {
1121 HandleThrowingInstruction(null_check);
1122 }
1123
VisitBoundsCheck(HBoundsCheck * bounds_check)1124 void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1125 HandleThrowingInstruction(bounds_check);
1126 }
1127
VisitLoadClass(HLoadClass * load_class)1128 void VisitLoadClass(HLoadClass* load_class) override {
1129 if (load_class->CanThrow()) {
1130 HandleThrowingInstruction(load_class);
1131 }
1132 }
1133
VisitLoadString(HLoadString * load_string)1134 void VisitLoadString(HLoadString* load_string) override {
1135 if (load_string->CanThrow()) {
1136 HandleThrowingInstruction(load_string);
1137 }
1138 }
1139
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1140 void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1141 HandleThrowingInstruction(sb_append);
1142 }
1143
VisitThrow(HThrow * throw_instruction)1144 void VisitThrow(HThrow* throw_instruction) override {
1145 HandleThrowingInstruction(throw_instruction);
1146 }
1147
VisitCheckCast(HCheckCast * check_cast)1148 void VisitCheckCast(HCheckCast* check_cast) override {
1149 HandleThrowingInstruction(check_cast);
1150 }
1151
VisitMonitorOperation(HMonitorOperation * monitor_op)1152 void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
1153 if (monitor_op->CanThrow()) {
1154 HandleThrowingInstruction(monitor_op);
1155 }
1156 }
1157
HandleInvoke(HInstruction * instruction)1158 void HandleInvoke(HInstruction* instruction) {
1159 // If `instruction` can throw we have to presume all stores are visible.
1160 const bool can_throw = instruction->CanThrow();
1161 // If we are in a try catch, even singletons are observable.
1162 const bool can_throw_in_try_catch =
1163 can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr;
1164 SideEffects side_effects = instruction->GetSideEffects();
1165 ScopedArenaVector<ValueRecord>& heap_values =
1166 heap_values_for_[instruction->GetBlock()->GetBlockId()];
1167 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1168 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1169 HBasicBlock* blk = instruction->GetBlock();
1170 // We don't need to do anything if the reference has not escaped at this point.
1171 // This is true if either we (1) never escape or (2) sometimes escape but
1172 // there is no possible execution where we have done so at this time. NB
1173 // We count being in the excluded cohort as escaping. Technically, this is
1174 // a bit over-conservative (since we can have multiple non-escaping calls
1175 // before a single escaping one) but this simplifies everything greatly.
1176 auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) {
1177 DCHECK(ref_info->IsPartialSingleton());
1178 if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) {
1179 return false;
1180 }
1181 ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
1182 ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
1183 return std::none_of(cohorts.begin(),
1184 cohorts.end(),
1185 [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
1186 return cohort.PrecedesBlock(blk);
1187 });
1188 };
1189 if (!can_throw_in_try_catch &&
1190 (ref_info->IsSingleton() ||
1191 // partial and we aren't currently escaping and we haven't escaped yet.
1192 (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
1193 // Singleton references cannot be seen by the callee.
1194 } else {
1195 if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1196 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1197 KeepStores(heap_values[i].stored_by);
1198 heap_values[i].stored_by = Value::PureUnknown();
1199 }
1200 if (side_effects.DoesAnyWrite()) {
1201 // The value may be clobbered.
1202 heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1203 }
1204 }
1205 }
1206 }
1207
VisitInvoke(HInvoke * invoke)1208 void VisitInvoke(HInvoke* invoke) override {
1209 HandleInvoke(invoke);
1210 }
1211
VisitClinitCheck(HClinitCheck * clinit)1212 void VisitClinitCheck(HClinitCheck* clinit) override {
1213 // Class initialization check can result in class initializer calling arbitrary methods.
1214 HandleInvoke(clinit);
1215 }
1216
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1217 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1218 // Conservatively treat it as an invocation.
1219 HandleInvoke(instruction);
1220 }
1221
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1222 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1223 // Conservatively treat it as an invocation.
1224 HandleInvoke(instruction);
1225 }
1226
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1227 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1228 // Conservatively treat it as an invocation.
1229 HandleInvoke(instruction);
1230 }
1231
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1232 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1233 // Conservatively treat it as an invocation.
1234 HandleInvoke(instruction);
1235 }
1236
VisitNewInstance(HNewInstance * new_instance)1237 void VisitNewInstance(HNewInstance* new_instance) override {
1238 // If we are in a try catch, even singletons are observable.
1239 const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr;
1240 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1241 if (ref_info == nullptr) {
1242 // new_instance isn't used for field accesses. No need to process it.
1243 return;
1244 }
1245 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1246 DCHECK(!new_instance->IsFinalizable());
1247 // new_instance can potentially be eliminated.
1248 singleton_new_instances_.push_back(new_instance);
1249 }
1250 HBasicBlock* block = new_instance->GetBlock();
1251 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1252 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1253 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1254 HInstruction* ref = info->GetReference();
1255 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1256 if (ref == new_instance) {
1257 if (offset >= mirror::kObjectHeaderSize ||
1258 MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1259 // Instance fields except the header fields are set to default heap values.
1260 // The shadow$_monitor_ field is set to the default value however.
1261 heap_values[i].value = Value::Default();
1262 heap_values[i].stored_by = Value::PureUnknown();
1263 } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1264 // The shadow$_klass_ field is special and has an actual value however.
1265 heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1266 heap_values[i].stored_by = Value::PureUnknown();
1267 }
1268 } else if (in_try_catch || IsEscapingObject(info, block, i)) {
1269 // Since NewInstance can throw, we presume all previous stores could be visible.
1270 KeepStores(heap_values[i].stored_by);
1271 heap_values[i].stored_by = Value::PureUnknown();
1272 }
1273 }
1274 }
1275
VisitNewArray(HNewArray * new_array)1276 void VisitNewArray(HNewArray* new_array) override {
1277 // If we are in a try catch, even singletons are observable.
1278 const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr;
1279 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1280 if (ref_info == nullptr) {
1281 // new_array isn't used for array accesses. No need to process it.
1282 return;
1283 }
1284 if (ref_info->IsSingletonAndRemovable()) {
1285 if (new_array->GetLength()->IsIntConstant() &&
1286 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1287 // new_array can potentially be eliminated.
1288 singleton_new_instances_.push_back(new_array);
1289 } else {
1290 // new_array may throw NegativeArraySizeException. Keep it.
1291 }
1292 }
1293 HBasicBlock* block = new_array->GetBlock();
1294 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1295 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1296 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1297 ReferenceInfo* info = location->GetReferenceInfo();
1298 HInstruction* ref = info->GetReference();
1299 if (ref == new_array && location->GetIndex() != nullptr) {
1300 // Array elements are set to default heap values.
1301 heap_values[i].value = Value::Default();
1302 heap_values[i].stored_by = Value::PureUnknown();
1303 } else if (in_try_catch || IsEscapingObject(info, block, i)) {
1304 // Since NewArray can throw, we presume all previous stores could be visible.
1305 KeepStores(heap_values[i].stored_by);
1306 heap_values[i].stored_by = Value::PureUnknown();
1307 }
1308 }
1309 }
1310
VisitInstruction(HInstruction * instruction)1311 void VisitInstruction(HInstruction* instruction) override {
1312 // Throwing instructions must be handled specially.
1313 DCHECK(!instruction->CanThrow());
1314 }
1315
ShouldPerformPartialLSE() const1316 bool ShouldPerformPartialLSE() const {
1317 return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
1318 }
1319
1320 bool perform_partial_lse_;
1321
1322 const HeapLocationCollector& heap_location_collector_;
1323
1324 // Use local allocator for allocating memory.
1325 ScopedArenaAllocator allocator_;
1326
1327 // The number of unique phi_placeholders there possibly are
1328 size_t num_phi_placeholders_;
1329
1330 // One array of heap value records for each block.
1331 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1332
1333 // We record loads and stores for re-processing when we find a loop Phi placeholder
1334 // with unknown value from a predecessor, and also for removing stores that are
1335 // found to be dead, i.e. not marked in `kept_stores_` at the end.
1336 struct LoadStoreRecord {
1337 HInstruction* load_or_store;
1338 size_t heap_location_index;
1339 };
1340 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1341
1342 // We record the substitute instructions for loads that should be
1343 // eliminated but may be used by heap locations. They'll be removed
1344 // in the end. These are indexed by the load's id.
1345 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1346
1347 // Value at the start of the given instruction for instructions which directly
1348 // read from a heap-location (i.e. FieldGet). The mapping to heap-location is
1349 // implicit through the fact that each instruction can only directly refer to
1350 // a single heap-location.
1351 ScopedArenaHashMap<HInstruction*, Value> intermediate_values_;
1352
1353 // Record stores to keep in a bit vector indexed by instruction ID.
1354 ArenaBitVector kept_stores_;
1355 // When we need to keep all stores that feed a Phi placeholder, we just record the
1356 // index of that placeholder for processing after graph traversal.
1357 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1358
1359 // Loads that would require a loop Phi to replace are recorded for processing
1360 // later as we do not have enough information from back-edges to determine if
1361 // a suitable Phi can be found or created when we visit these loads.
1362 ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
1363
1364 // For stores, record the old value records that were replaced and the stored values.
1365 struct StoreRecord {
1366 ValueRecord old_value_record;
1367 HInstruction* stored_value;
1368 };
1369 // Small pre-allocated initial buffer avoids initializing a large one until it's really needed.
1370 static constexpr size_t kStoreRecordsInitialBufferSize = 16;
1371 std::pair<HInstruction*, StoreRecord> store_records_buffer_[kStoreRecordsInitialBufferSize];
1372 ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
1373
1374 // Replacements for Phi placeholders.
1375 // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1376 ScopedArenaVector<Value> phi_placeholder_replacements_;
1377
1378 // Merged-unknowns that must have their predecessor values kept to ensure
1379 // partially escaped values are written
1380 ArenaBitVector kept_merged_unknowns_;
1381
1382 ScopedArenaVector<HInstruction*> singleton_new_instances_;
1383
1384 // The field infos for each heap location (if relevant).
1385 ScopedArenaVector<const FieldInfo*> field_infos_;
1386
1387 Phase current_phase_;
1388
1389 friend class PartialLoadStoreEliminationHelper;
1390 friend struct ScopedRestoreHeapValues;
1391
1392 friend std::ostream& operator<<(std::ostream& os, const Value& v);
1393 friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1394 friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1395
1396 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1397 };
1398
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1399 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1400 p.Dump(oss);
1401 return oss;
1402 }
1403
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1404 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1405 switch (phase) {
1406 case LSEVisitor::Phase::kLoadElimination:
1407 return oss << "kLoadElimination";
1408 case LSEVisitor::Phase::kStoreElimination:
1409 return oss << "kStoreElimination";
1410 case LSEVisitor::Phase::kPartialElimination:
1411 return oss << "kPartialElimination";
1412 }
1413 }
1414
Dump(std::ostream & oss) const1415 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1416 if (IsDefault()) {
1417 oss << "Default";
1418 } else if (IsPhi()) {
1419 oss << "Phi: " << GetPhiPlaceholder();
1420 } else {
1421 oss << "Instruction: " << *GetInstruction();
1422 }
1423 }
1424
PriorValueHolder(Value val)1425 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1426 : value_(Marker{}) {
1427 DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1428 if (val.IsPartialUnknown()) {
1429 value_ = val.GetPriorValue().value_;
1430 } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1431 value_ = val.GetPhiPlaceholder();
1432 } else if (val.IsInstruction()) {
1433 value_ = val.GetInstruction();
1434 } else {
1435 DCHECK(val.IsDefault());
1436 }
1437 }
1438
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1439 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1440 return true;
1441 }
1442
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1443 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1444 const LSEVisitor::PriorValueHolder& p2) {
1445 return p1.Equals(p2);
1446 }
1447
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1448 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1449 const LSEVisitor::PhiPlaceholder& p2) {
1450 return p1.Equals(p2);
1451 }
1452
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1453 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1454 const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1455 return p1.phi_ == p2.phi_;
1456 }
1457
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1458 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1459 const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1460 return p1.phi_ == p2.phi_;
1461 }
1462
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1463 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1464 const LSEVisitor::Value::MergedUnknownMarker& p2) {
1465 return p1.phi_ == p2.phi_;
1466 }
1467
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1468 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1469 p.Dump(oss);
1470 return oss;
1471 }
1472
ToValue() const1473 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1474 if (IsDefault()) {
1475 return Value::Default();
1476 } else if (IsPhi()) {
1477 return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1478 } else {
1479 return Value::ForInstruction(GetInstruction());
1480 }
1481 }
1482
ExactEquals(LSEVisitor::Value other) const1483 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1484 return value_ == other.value_;
1485 }
1486
Equals(LSEVisitor::Value other) const1487 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1488 // Only valid values can be compared.
1489 DCHECK(IsValid());
1490 DCHECK(other.IsValid());
1491 if (value_ == other.value_) {
1492 // Note: Two unknown values are considered different.
1493 return !IsUnknown();
1494 } else {
1495 // Default is considered equal to zero-bit-pattern instructions.
1496 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1497 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1498 }
1499 }
1500
Dump(std::ostream & os) const1501 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1502 if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1503 switch (GetValuelessType()) {
1504 case ValuelessType::kDefault:
1505 return os << "Default";
1506 case ValuelessType::kPureUnknown:
1507 return os << "PureUnknown";
1508 case ValuelessType::kInvalid:
1509 return os << "Invalid";
1510 }
1511 } else if (IsPartialUnknown()) {
1512 return os << "PartialUnknown[" << GetPriorValue() << "]";
1513 } else if (IsInstruction()) {
1514 return os << "Instruction[id: " << GetInstruction()->GetId()
1515 << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1516 } else if (IsMergedUnknown()) {
1517 return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1518 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1519
1520 } else if (NeedsLoopPhi()) {
1521 return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1522 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1523 } else {
1524 return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1525 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1526 }
1527 }
1528
operator <<(std::ostream & os,const LSEVisitor::Value & v)1529 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1530 return v.Dump(os);
1531 }
1532
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)1533 LSEVisitor::LSEVisitor(HGraph* graph,
1534 const HeapLocationCollector& heap_location_collector,
1535 bool perform_partial_lse,
1536 OptimizingCompilerStats* stats)
1537 : HGraphDelegateVisitor(graph, stats),
1538 perform_partial_lse_(perform_partial_lse),
1539 heap_location_collector_(heap_location_collector),
1540 allocator_(graph->GetArenaStack()),
1541 num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1542 heap_location_collector_.GetNumberOfHeapLocations()),
1543 heap_values_for_(graph->GetBlocks().size(),
1544 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1545 allocator_.Adapter(kArenaAllocLSE)),
1546 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1547 // We may add new instructions (default values, Phis) but we're not adding loads
1548 // or stores, so we shall not need to resize following vector and BitVector.
1549 substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1550 nullptr,
1551 allocator_.Adapter(kArenaAllocLSE)),
1552 intermediate_values_(allocator_.Adapter(kArenaAllocLSE)),
1553 kept_stores_(&allocator_,
1554 /*start_bits=*/graph->GetCurrentInstructionId(),
1555 /*expandable=*/false,
1556 kArenaAllocLSE),
1557 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1558 num_phi_placeholders_,
1559 /*expandable=*/false,
1560 kArenaAllocLSE),
1561 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1562 store_records_(store_records_buffer_,
1563 kStoreRecordsInitialBufferSize,
1564 allocator_.Adapter(kArenaAllocLSE)),
1565 phi_placeholder_replacements_(num_phi_placeholders_,
1566 Value::Invalid(),
1567 allocator_.Adapter(kArenaAllocLSE)),
1568 kept_merged_unknowns_(&allocator_,
1569 /*start_bits=*/num_phi_placeholders_,
1570 /*expandable=*/false,
1571 kArenaAllocLSE),
1572 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1573 field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1574 allocator_.Adapter(kArenaAllocLSE)),
1575 current_phase_(Phase::kLoadElimination) {
1576 // Clear bit vectors.
1577 phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1578 kept_stores_.ClearAllBits();
1579 }
1580
PrepareLoopValue(HBasicBlock * block,size_t idx)1581 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1582 // If the pre-header value is known (which implies that the reference dominates this
1583 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1584 // are later found to have a known value, we can replace loads from this location,
1585 // either with the pre-header value or with a new Phi. For array locations, the index
1586 // may be defined inside the loop but the only known value in that case should be the
1587 // default value or a Phi placeholder that can be replaced only with the default value.
1588 HLoopInformation* loop_info = block->GetLoopInformation();
1589 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1590 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1591 if (pre_header_value.IsUnknown()) {
1592 return pre_header_value;
1593 }
1594 if (kIsDebugBuild) {
1595 // Check that the reference indeed dominates this loop.
1596 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1597 HInstruction* ref = location->GetReferenceInfo()->GetReference();
1598 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1599 << GetGraph()->PrettyMethod();
1600 // Check that the index, if defined inside the loop, tracks a default value
1601 // or a Phi placeholder requiring a loop Phi.
1602 HInstruction* index = location->GetIndex();
1603 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1604 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1605 << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1606 << pre_header_value;
1607 }
1608 }
1609 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1610 return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1611 }
1612
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1613 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1614 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1615 // if the value in the location escapes. This is not applicable to singletons that are
1616 // defined inside the loop as they shall be dead in the loop header.
1617 const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1618 const HInstruction* reference = ref_info->GetReference();
1619 // Finalizable objects always escape.
1620 const bool is_finalizable =
1621 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1622 if (ref_info->IsSingleton() &&
1623 block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1624 !is_finalizable) {
1625 return Value::PureUnknown();
1626 }
1627 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1628 return Value::ForLoopPhiPlaceholder(phi_placeholder);
1629 }
1630
PrepareLoopRecords(HBasicBlock * block)1631 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1632 DCHECK(block->IsLoopHeader());
1633 int block_id = block->GetBlockId();
1634 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1635 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1636 heap_values_for_[pre_header->GetBlockId()];
1637 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1638 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1639 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1640 DCHECK(heap_values.empty());
1641
1642 // Don't eliminate loads in irreducible loops.
1643 if (block->GetLoopInformation()->IsIrreducible()) {
1644 heap_values.resize(num_heap_locations,
1645 {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1646 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1647 bool is_osr = GetGraph()->IsCompilingOsr();
1648 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1649 heap_values[idx].value =
1650 is_osr ? Value::PureUnknown()
1651 : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1652 KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1653 }
1654 return;
1655 }
1656
1657 // Fill `heap_values` based on values from pre-header.
1658 heap_values.reserve(num_heap_locations);
1659 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1660 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1661 }
1662 }
1663
MergePredecessorValues(HBasicBlock * block,size_t idx)1664 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1665 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1666 DCHECK(!predecessors.empty());
1667 Value merged_value =
1668 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1669 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1670 Value pred_value =
1671 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1672 if (pred_value.Equals(merged_value)) {
1673 // Value is the same. No need to update our merged value.
1674 continue;
1675 } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1676 // If one is unknown and the other is a different type of unknown
1677 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1678 merged_value = Value::MergedUnknown(phi_placeholder);
1679 // We know that at least one of the merge points is unknown (and both are
1680 // not pure-unknowns since that's captured above). This means that the
1681 // overall value needs to be a MergedUnknown. Just return that.
1682 break;
1683 } else {
1684 // There are conflicting known values. We may still be able to replace loads with a Phi.
1685 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1686 // Propagate the need for a new loop Phi from all predecessors.
1687 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1688 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1689 }
1690 }
1691 DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1)
1692 << merged_value << " in " << GetGraph()->PrettyMethod();
1693 return merged_value;
1694 }
1695
MergePredecessorRecords(HBasicBlock * block)1696 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1697 if (block->IsExitBlock()) {
1698 // Exit block doesn't really merge values since the control flow ends in
1699 // its predecessors. Each predecessor needs to make sure stores are kept
1700 // if necessary.
1701 return;
1702 }
1703
1704 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1705 DCHECK(heap_values.empty());
1706 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1707 if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr &&
1708 block->GetTryCatchInformation()->IsCatchBlock())) {
1709 DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1710 heap_values.resize(num_heap_locations,
1711 {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1712 return;
1713 }
1714
1715 heap_values.reserve(num_heap_locations);
1716 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1717 Value merged_value = MergePredecessorValues(block, idx);
1718 if (kIsDebugBuild) {
1719 if (merged_value.NeedsPhi()) {
1720 uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1721 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1722 } else if (merged_value.IsInstruction()) {
1723 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1724 }
1725 }
1726 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1727 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1728 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1729 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1730 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1731 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1732 !merged_stored_by.Equals(stored_by)) {
1733 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1734 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1735 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1736 break;
1737 }
1738 }
1739 heap_values.push_back({ merged_value, merged_stored_by });
1740 }
1741 }
1742
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1743 static HInstruction* FindOrConstructNonLoopPhi(
1744 HBasicBlock* block,
1745 const ScopedArenaVector<HInstruction*>& phi_inputs,
1746 DataType::Type type) {
1747 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1748 HInstruction* phi = phi_it.Current();
1749 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1750 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1751 return lhs == rhs.GetInstruction();
1752 };
1753 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1754 return phi;
1755 }
1756 }
1757 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1758 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1759 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1760 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1761 phi->SetRawInputAt(i, phi_inputs[i]);
1762 }
1763 block->AddPhi(phi);
1764 if (type == DataType::Type::kReference) {
1765 // Update reference type information. Pass invalid handles, these are not used for Phis.
1766 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1767 Handle<mirror::ClassLoader>(),
1768 Handle<mirror::DexCache>(),
1769 /* is_first_run= */ false);
1770 rtp_fixup.Visit(phi);
1771 }
1772 return phi;
1773 }
1774
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1775 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1776 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1777 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1778 size_t idx = phi_placeholder.GetHeapLocation();
1779
1780 // Use local allocator to reduce peak memory usage.
1781 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1782 // Reuse the same vector for collecting phi inputs.
1783 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1784
1785 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1786 work_queue.push_back(phi_placeholder);
1787 while (!work_queue.empty()) {
1788 PhiPlaceholder current_phi_placeholder = work_queue.back();
1789 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1790 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1791 // that directly or indirectly depends on it, so it was already processed as part of the
1792 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1793 work_queue.pop_back();
1794 continue;
1795 }
1796 uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1797 HBasicBlock* current_block = blocks[current_block_id];
1798 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1799
1800 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1801 // And the only way for such merged value to reach a different heap location is through
1802 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1803 // seen here are tied to one heap location.
1804 DCHECK(!current_block->IsLoopHeader())
1805 << current_phi_placeholder << " phase: " << current_phase_;
1806 DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1807
1808 phi_inputs.clear();
1809 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1810 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1811 DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1812 << " pred: " << predecessor->GetBlockId();
1813 if (pred_value.NeedsNonLoopPhi() ||
1814 (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
1815 // We need to process the Phi placeholder first.
1816 work_queue.push_back(pred_value.GetPhiPlaceholder());
1817 } else if (pred_value.IsDefault()) {
1818 phi_inputs.push_back(GetDefaultValue(type));
1819 } else {
1820 DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1821 << " pred: " << predecessor->GetBlockId();
1822 phi_inputs.push_back(pred_value.GetInstruction());
1823 }
1824 }
1825 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1826 // All inputs are available. Find or construct the Phi replacement.
1827 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1828 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1829 // Remove the block from the queue.
1830 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1831 work_queue.pop_back();
1832 }
1833 }
1834 }
1835
VisitGetLocation(HInstruction * instruction,size_t idx)1836 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1837 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1838 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1839 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1840 ValueRecord& record = heap_values[idx];
1841 if (instruction->IsFieldAccess()) {
1842 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1843 }
1844 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1845 // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
1846 // value.
1847 DCHECK(!record.value.IsPureUnknown() ||
1848 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
1849 !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
1850 << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
1851 intermediate_values_.insert({instruction, record.value});
1852 loads_and_stores_.push_back({ instruction, idx });
1853 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1854 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1855 record.value = Value::PureUnknown();
1856 }
1857 if (record.value.IsDefault()) {
1858 KeepStores(record.stored_by);
1859 HInstruction* constant = GetDefaultValue(instruction->GetType());
1860 AddRemovedLoad(instruction, constant);
1861 record.value = Value::ForInstruction(constant);
1862 } else if (record.value.IsUnknown()) {
1863 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1864 // This acts like GVN but with better aliasing analysis.
1865 Value old_value = record.value;
1866 record.value = Value::ForInstruction(instruction);
1867 KeepStoresIfAliasedToLocation(heap_values, idx);
1868 KeepStores(old_value);
1869 } else if (record.value.NeedsLoopPhi()) {
1870 // We do not know yet if the value is known for all back edges. Record for future processing.
1871 loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1872 } else {
1873 // This load can be eliminated but we may need to construct non-loop Phis.
1874 if (record.value.NeedsNonLoopPhi()) {
1875 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1876 record.value = Replacement(record.value);
1877 }
1878 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1879 AddRemovedLoad(instruction, heap_value);
1880 TryRemovingNullCheck(instruction);
1881 }
1882 }
1883
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1884 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1885 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1886 DCHECK(!IsStore(value)) << value->DebugName();
1887 if (instruction->IsFieldAccess()) {
1888 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1889 }
1890 // value may already have a substitute.
1891 value = FindSubstitute(value);
1892 HBasicBlock* block = instruction->GetBlock();
1893 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1894 ValueRecord& record = heap_values[idx];
1895 DCHECK_IMPLIES(record.value.IsInstruction(),
1896 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1897
1898 if (record.value.Equals(value)) {
1899 // Store into the heap location with the same value.
1900 // This store can be eliminated right away.
1901 block->RemoveInstruction(instruction);
1902 return;
1903 }
1904
1905 store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1906 loads_and_stores_.push_back({ instruction, idx });
1907
1908 // If the `record.stored_by` specified a store from this block, it shall be removed
1909 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1910 // `kept_stores_` anymore after we update the `record.stored_by` below.
1911 DCHECK(!record.stored_by.IsInstruction() ||
1912 record.stored_by.GetInstruction()->GetBlock() != block ||
1913 record.stored_by.GetInstruction()->CanThrow() ||
1914 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1915
1916 if (instruction->CanThrow()) {
1917 // Previous stores can become visible.
1918 HandleThrowingInstruction(instruction);
1919 // We cannot remove a possibly throwing store.
1920 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1921 kept_stores_.SetBit(instruction->GetId());
1922 }
1923
1924 // Update the record.
1925 auto it = loads_requiring_loop_phi_.find(value);
1926 if (it != loads_requiring_loop_phi_.end()) {
1927 // Propapate the Phi placeholder to the record.
1928 record.value = it->second.value;
1929 DCHECK(record.value.NeedsLoopPhi());
1930 } else {
1931 record.value = Value::ForInstruction(value);
1932 }
1933 // Track the store in the value record. If the value is loaded or needed after
1934 // return/deoptimization later, this store isn't really redundant.
1935 record.stored_by = Value::ForInstruction(instruction);
1936
1937 // This store may kill values in other heap locations due to aliasing.
1938 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1939 if (i == idx ||
1940 heap_values[i].value.IsUnknown() ||
1941 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1942 !heap_location_collector_.MayAlias(i, idx)) {
1943 continue;
1944 }
1945 // Kill heap locations that may alias and keep previous stores to these locations.
1946 KeepStores(heap_values[i].stored_by);
1947 heap_values[i].stored_by = Value::PureUnknown();
1948 heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1949 }
1950 }
1951
VisitBasicBlock(HBasicBlock * block)1952 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1953 // Populate the heap_values array for this block.
1954 // TODO: try to reuse the heap_values array from one predecessor if possible.
1955 if (block->IsLoopHeader()) {
1956 PrepareLoopRecords(block);
1957 } else {
1958 MergePredecessorRecords(block);
1959 }
1960 // Visit instructions.
1961 HGraphVisitor::VisitBasicBlock(block);
1962 }
1963
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1964 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1965 DCHECK_NE(idx1, idx2);
1966 DCHECK(loop_header->IsLoopHeader());
1967 if (heap_location_collector_.MayAlias(idx1, idx2)) {
1968 return true;
1969 }
1970 // For array locations with index defined inside the loop, include
1971 // all other locations in the array, even those that LSA declares
1972 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1973 // refer to the same locations for different iterations. (LSA's
1974 // `ComputeMayAlias()` does not consider different loop iterations.)
1975 HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1976 HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1977 if (loc1->IsArray() &&
1978 loc2->IsArray() &&
1979 HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1980 loc2->GetReferenceInfo())) {
1981 HLoopInformation* loop_info = loop_header->GetLoopInformation();
1982 if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1983 loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1984 // Consider the locations aliasing. Do not optimize the case where both indexes
1985 // are loop invariants defined inside the loop, rely on LICM to pull them out.
1986 return true;
1987 }
1988 }
1989 return false;
1990 }
1991
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1992 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1993 PhiPlaceholder phi_placeholder,
1994 DataType::Type type,
1995 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1996 // Use local allocator to reduce peak memory usage.
1997 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1998 ArenaBitVector visited(&allocator,
1999 /*start_bits=*/ num_phi_placeholders_,
2000 /*expandable=*/ false,
2001 kArenaAllocLSE);
2002 visited.ClearAllBits();
2003 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2004
2005 // Use depth first search to check if any non-Phi input is unknown.
2006 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2007 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2008 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2009 work_queue.push_back(phi_placeholder);
2010 while (!work_queue.empty()) {
2011 PhiPlaceholder current_phi_placeholder = work_queue.back();
2012 work_queue.pop_back();
2013 HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
2014 DCHECK_GE(block->GetPredecessors().size(), 2u);
2015 size_t idx = current_phi_placeholder.GetHeapLocation();
2016 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2017 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2018 if (value.NeedsPhi()) {
2019 // Visit the predecessor Phi placeholder if it's not visited yet.
2020 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2021 visited.SetBit(PhiPlaceholderIndex(value));
2022 work_queue.push_back(value.GetPhiPlaceholder());
2023 }
2024 } else if (!value.Equals(Value::Default())) {
2025 return false; // Report failure.
2026 }
2027 }
2028 if (block->IsLoopHeader()) {
2029 // For back-edges we need to check all locations that write to the same array,
2030 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
2031 // as they may actually refer to the same locations for different iterations.
2032 for (size_t i = 0; i != num_heap_locations; ++i) {
2033 if (i == idx ||
2034 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
2035 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
2036 continue;
2037 }
2038 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2039 // Check if there were any writes to this location.
2040 // Note: We could simply process the values but due to the vector operation
2041 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
2042 // the value to change and not be equal to default. To work around this and
2043 // allow replacing the non-vector load of loop-invariant default values
2044 // anyway, skip over paths that do not have any writes.
2045 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
2046 while (record.stored_by.NeedsLoopPhi() &&
2047 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
2048 HLoopInformation* loop_info =
2049 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
2050 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
2051 }
2052 Value value = ReplacementOrValue(record.value);
2053 if (value.NeedsPhi()) {
2054 // Visit the predecessor Phi placeholder if it's not visited yet.
2055 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2056 visited.SetBit(PhiPlaceholderIndex(value));
2057 work_queue.push_back(value.GetPhiPlaceholder());
2058 }
2059 } else if (!value.Equals(Value::Default())) {
2060 return false; // Report failure.
2061 }
2062 }
2063 }
2064 }
2065 }
2066
2067 // Record replacement and report success.
2068 HInstruction* replacement = GetDefaultValue(type);
2069 for (uint32_t phi_placeholder_index : visited.Indexes()) {
2070 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2071 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2072 }
2073 phi_placeholders_to_materialize->Subtract(&visited);
2074 return true;
2075 }
2076
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)2077 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
2078 PhiPlaceholder phi_placeholder,
2079 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
2080 // Use local allocator to reduce peak memory usage.
2081 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2082 ArenaBitVector visited(&allocator,
2083 /*start_bits=*/ num_phi_placeholders_,
2084 /*expandable=*/ false,
2085 kArenaAllocLSE);
2086 visited.ClearAllBits();
2087 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2088
2089 // Use depth first search to check if any non-Phi input is unknown.
2090 HInstruction* replacement = nullptr;
2091 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2092 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2093 work_queue.push_back(phi_placeholder);
2094 while (!work_queue.empty()) {
2095 PhiPlaceholder current_phi_placeholder = work_queue.back();
2096 work_queue.pop_back();
2097 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2098 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2099 size_t idx = current_phi_placeholder.GetHeapLocation();
2100 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2101 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2102 if (value.NeedsPhi()) {
2103 // Visit the predecessor Phi placeholder if it's not visited yet.
2104 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2105 visited.SetBit(PhiPlaceholderIndex(value));
2106 work_queue.push_back(value.GetPhiPlaceholder());
2107 }
2108 } else {
2109 if (!value.IsInstruction() ||
2110 (replacement != nullptr && replacement != value.GetInstruction())) {
2111 return false; // Report failure.
2112 }
2113 replacement = value.GetInstruction();
2114 }
2115 }
2116 // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2117 // for back-edges, it is not needed here. When looking for a single input
2118 // instruction coming from before the loop, the array index must also be
2119 // defined before the loop and the aliasing analysis done by LSA is sufficient.
2120 // Any writes of a different value with an index that is not loop invariant
2121 // would invalidate the heap location in `VisitSetLocation()`.
2122 }
2123
2124 // Record replacement and report success.
2125 DCHECK(replacement != nullptr);
2126 for (uint32_t phi_placeholder_index : visited.Indexes()) {
2127 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2128 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2129 }
2130 phi_placeholders_to_materialize->Subtract(&visited);
2131 return true;
2132 }
2133
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2134 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2135 PhiPlaceholder phi_placeholder,
2136 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2137 DataType::Type type,
2138 bool can_use_default_or_phi) {
2139 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2140
2141 // Use local allocator to reduce peak memory usage.
2142 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2143 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2144
2145 // Use depth first search to check if any non-Phi input is unknown.
2146 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2147 phi_placeholders_to_materialize->ClearAllBits();
2148 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2149 work_queue.push_back(phi_placeholder);
2150 while (!work_queue.empty()) {
2151 PhiPlaceholder current_phi_placeholder = work_queue.back();
2152 work_queue.pop_back();
2153 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2154 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2155 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2156 Value::Default()));
2157 continue;
2158 }
2159 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2160 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2161 size_t idx = current_phi_placeholder.GetHeapLocation();
2162 if (current_block->IsLoopHeader()) {
2163 // If the index is defined inside the loop, it may reference different elements of the
2164 // array on each iteration. Since we do not track if all elements of an array are set
2165 // to the same value explicitly, the only known value in pre-header can be the default
2166 // value from NewArray or a Phi placeholder depending on a default value from some outer
2167 // loop pre-header. This Phi placeholder can be replaced only by the default value.
2168 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2169 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2170 if (can_use_default_or_phi &&
2171 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2172 type,
2173 phi_placeholders_to_materialize)) {
2174 continue;
2175 } else {
2176 return current_phi_placeholder; // Report the loop Phi placeholder.
2177 }
2178 }
2179 // A similar situation arises with the index defined outside the loop if we cannot use
2180 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2181 // placeholder with a single instruction defined before the loop.
2182 if (!can_use_default_or_phi) {
2183 DCHECK(index != nullptr); // Vector operations are array operations.
2184 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2185 phi_placeholders_to_materialize)) {
2186 continue;
2187 } else {
2188 return current_phi_placeholder; // Report the loop Phi placeholder.
2189 }
2190 }
2191 }
2192 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2193 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2194 Value value = ReplacementOrValue(heap_values[idx].value);
2195 if (value.IsUnknown()) {
2196 // We cannot create a Phi for this loop Phi placeholder.
2197 return current_phi_placeholder; // Report the loop Phi placeholder.
2198 }
2199 // For arrays, the location may have been clobbered by writes to other locations
2200 // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2201 if (current_block->IsLoopHeader() &&
2202 predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2203 heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2204 for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2205 if (i != idx &&
2206 !heap_values[i].stored_by.IsUnknown() &&
2207 MayAliasOnBackEdge(current_block, idx, i)) {
2208 // We cannot create a Phi for this loop Phi placeholder.
2209 return current_phi_placeholder;
2210 }
2211 }
2212 }
2213 if (value.NeedsLoopPhi()) {
2214 // Visit the predecessor Phi placeholder if it's not visited yet.
2215 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2216 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2217 work_queue.push_back(value.GetPhiPlaceholder());
2218 LSE_VLOG << "For materialization of " << phi_placeholder
2219 << " we need to materialize " << value;
2220 }
2221 }
2222 }
2223 }
2224
2225 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2226 return std::nullopt;
2227 }
2228
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2229 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2230 DataType::Type type) {
2231 return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2232 }
2233
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2234 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2235 DataType::Type type) {
2236 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2237 // other than loop Phis are the same.
2238 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2239 std::optional<Value> other_value = std::nullopt;
2240 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2241 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2242 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2243 DCHECK_GE(block->GetPredecessors().size(), 2u);
2244 size_t idx = phi_placeholder.GetHeapLocation();
2245 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2246 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2247 if (value.NeedsNonLoopPhi()) {
2248 DCHECK(current_phase_ == Phase::kLoadElimination ||
2249 current_phase_ == Phase::kPartialElimination)
2250 << current_phase_;
2251 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2252 value = Replacement(value);
2253 }
2254 if (!value.NeedsLoopPhi()) {
2255 if (!other_value) {
2256 // The first other value we found.
2257 other_value = value;
2258 } else if (!other_value->IsInvalid()) {
2259 // Check if the current `value` differs from the previous `other_value`.
2260 if (!value.Equals(*other_value)) {
2261 other_value = Value::Invalid();
2262 }
2263 }
2264 }
2265 }
2266 }
2267
2268 DCHECK(other_value.has_value());
2269 if (!other_value->IsInvalid()) {
2270 HInstruction* replacement =
2271 (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2272 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2273 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2274 }
2275 return true;
2276 }
2277
2278 // If we're materializing only a single Phi, try to match it with an existing Phi.
2279 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2280 // This also covers the case when after replacing a previous set of Phi placeholders,
2281 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2282 if (phi_placeholder_indexes.size() == 1u) {
2283 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2284 size_t idx = phi_placeholder.GetHeapLocation();
2285 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2286 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2287 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2288 HInstruction* phi = phi_it.Current();
2289 DCHECK_EQ(phi->InputCount(), predecessors.size());
2290 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2291 auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2292 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2293 if (value.NeedsPhi()) {
2294 DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2295 return lhs.GetInstruction() == phi;
2296 } else {
2297 DCHECK(value.IsDefault() || value.IsInstruction());
2298 return value.Equals(lhs.GetInstruction());
2299 }
2300 };
2301 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2302 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2303 return true;
2304 }
2305 }
2306 }
2307
2308 if (current_phase_ == Phase::kStoreElimination) {
2309 // We're not creating Phis during the final store elimination phase.
2310 return false;
2311 }
2312
2313 // There are different inputs to the Phi chain. Create the Phis.
2314 ArenaAllocator* allocator = GetGraph()->GetAllocator();
2315 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2316 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2317 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2318 CHECK_GE(block->GetPredecessors().size(), 2u);
2319 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2320 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2321 }
2322 // Fill the Phi inputs.
2323 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2324 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2325 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2326 size_t idx = phi_placeholder.GetHeapLocation();
2327 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2328 DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2329 << "type=" << type << " vs phi-type=" << phi->GetType();
2330 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2331 HBasicBlock* predecessor = block->GetPredecessors()[i];
2332 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2333 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2334 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2335 phi->SetRawInputAt(i, input);
2336 DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2337 << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2338 << " request: " << type;
2339 }
2340 }
2341 // Add the Phis to their blocks.
2342 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2343 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2344 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2345 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2346 }
2347 if (type == DataType::Type::kReference) {
2348 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2349 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2350 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2351 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2352 }
2353 // Update reference type information. Pass invalid handles, these are not used for Phis.
2354 ReferenceTypePropagation rtp_fixup(GetGraph(),
2355 Handle<mirror::ClassLoader>(),
2356 Handle<mirror::DexCache>(),
2357 /* is_first_run= */ false);
2358 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2359 }
2360
2361 return true;
2362 }
2363
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2364 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2365 DataType::Type type) {
2366 // Use local allocator to reduce peak memory usage.
2367 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2368
2369 // We want to recognize when a subset of these loop Phis that do not need other
2370 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2371 // i.e. that instruction can be used instead of each Phi in the set. See for example
2372 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2373 // materialize these loop Phis from the smallest transitive closure.
2374
2375 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2376 // assign new indexes to the Phi placeholders, making the matrix dense.
2377 ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2378 static_cast<size_t>(-1), // Invalid.
2379 allocator.Adapter(kArenaAllocLSE));
2380 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2381 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2382 phi_placeholder_indexes.reserve(num_phi_placeholders);
2383 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2384 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2385 phi_placeholder_indexes.push_back(marker_index);
2386 }
2387 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2388 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2389 dependencies.reserve(num_phi_placeholders);
2390 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2391 static constexpr bool kExpandable = false;
2392 dependencies.push_back(
2393 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2394 ArenaBitVector* current_dependencies = dependencies.back();
2395 current_dependencies->ClearAllBits();
2396 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
2397 PhiPlaceholder current_phi_placeholder =
2398 GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2399 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2400 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2401 size_t idx = current_phi_placeholder.GetHeapLocation();
2402 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2403 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2404 if (pred_value.NeedsLoopPhi()) {
2405 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2406 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2407 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2408 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2409 }
2410 }
2411 }
2412
2413 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2414 for (size_t k = 0; k != num_phi_placeholders; ++k) {
2415 for (size_t i = 0; i != num_phi_placeholders; ++i) {
2416 for (size_t j = 0; j != num_phi_placeholders; ++j) {
2417 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2418 dependencies[i]->SetBit(j);
2419 }
2420 }
2421 }
2422 }
2423
2424 // Count the number of transitive dependencies for each replaceable Phi placeholder.
2425 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2426 num_dependencies.reserve(num_phi_placeholders);
2427 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2428 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2429 }
2430
2431 // Pick a Phi placeholder with the smallest number of transitive dependencies and
2432 // materialize it and its dependencies. Repeat until we have materialized all.
2433 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2434 current_subset.reserve(num_phi_placeholders);
2435 size_t remaining_phi_placeholders = num_phi_placeholders;
2436 while (remaining_phi_placeholders != 0u) {
2437 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2438 DCHECK_LE(*it, remaining_phi_placeholders);
2439 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2440 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2441 size_t current_num_dependencies = num_dependencies[current_matrix_index];
2442 current_subset.clear();
2443 for (uint32_t matrix_index : current_dependencies->Indexes()) {
2444 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2445 }
2446 if (!MaterializeLoopPhis(current_subset, type)) {
2447 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2448 // This is the final store elimination phase and we shall not be able to eliminate any
2449 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2450 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2451 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2452 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2453 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2454 Value::PureUnknown();
2455 }
2456 }
2457 return false;
2458 }
2459 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2460 if (current_dependencies->IsBitSet(matrix_index)) {
2461 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2462 // so that they shall never be the minimum again.
2463 num_dependencies[matrix_index] = num_phi_placeholders;
2464 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2465 // Remove dependencies from other Phi placeholders.
2466 dependencies[matrix_index]->Subtract(current_dependencies);
2467 num_dependencies[matrix_index] -= current_num_dependencies;
2468 }
2469 }
2470 remaining_phi_placeholders -= current_num_dependencies;
2471 }
2472 return true;
2473 }
2474
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2475 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2476 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2477 ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2478 auto res =
2479 FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2480 CHECK(!res.has_value()) << *res;
2481 return MaterializeLoopPhis(abv, type);
2482 }
2483
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2484 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2485 PhiPlaceholder phi_placeholder, HInstruction* load) {
2486 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2487
2488 // Use local allocator to reduce peak memory usage.
2489 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2490
2491 // Find Phi placeholders to materialize.
2492 ArenaBitVector phi_placeholders_to_materialize(
2493 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2494 phi_placeholders_to_materialize.ClearAllBits();
2495 DataType::Type type = load->GetType();
2496 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2497 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2498 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2499 if (loop_phi_with_unknown_input) {
2500 DCHECK_GE(GetGraph()
2501 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2502 ->GetPredecessors()
2503 .size(),
2504 2u);
2505 return loop_phi_with_unknown_input; // Return failure.
2506 }
2507
2508 DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2509 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2510 DCHECK(success);
2511
2512 // Report success.
2513 return std::nullopt;
2514 }
2515
2516 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2517 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2518 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2519 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2520 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2521 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2522 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2523 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2524 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2525 Value::MergedUnknown(loop_phi_with_unknown_input);
2526
2527 uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2528 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2529 size_t reverse_post_order_index = 0;
2530 size_t reverse_post_order_size = reverse_post_order.size();
2531 size_t loads_and_stores_index = 0u;
2532 size_t loads_and_stores_size = loads_and_stores_.size();
2533
2534 // Skip blocks and instructions before the block containing the loop phi with unknown input.
2535 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2536 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2537 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2538 while (loads_and_stores_index != loads_and_stores_size &&
2539 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2540 ++loads_and_stores_index;
2541 }
2542 ++reverse_post_order_index;
2543 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2544 }
2545
2546 // Use local allocator to reduce peak memory usage.
2547 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2548 // Reuse one temporary vector for all remaining blocks.
2549 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2550 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2551
2552 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2553 Value value;
2554 if (block->IsLoopHeader()) {
2555 if (block->GetLoopInformation()->IsIrreducible()) {
2556 PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2557 value = Value::MergedUnknown(placeholder);
2558 } else {
2559 value = PrepareLoopValue(block, idx);
2560 }
2561 } else {
2562 value = MergePredecessorValues(block, idx);
2563 }
2564 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2565 return value;
2566 };
2567
2568 // Process remaining blocks and instructions.
2569 bool found_unreplaceable_load = false;
2570 bool replaced_heap_value_with_unknown = false;
2571 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2572 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2573 if (block->IsExitBlock()) {
2574 continue;
2575 }
2576
2577 // We shall reconstruct only the heap values that we need for processing loads and stores.
2578 local_heap_values.clear();
2579 local_heap_values.resize(num_heap_locations, Value::Invalid());
2580
2581 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2582 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2583 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2584 if (load_or_store->GetBlock() != block) {
2585 break; // End of instructions from the current block.
2586 }
2587 bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
2588 DCHECK_EQ(is_store, IsStore(load_or_store));
2589 HInstruction* stored_value = nullptr;
2590 if (is_store) {
2591 auto it = store_records_.find(load_or_store);
2592 DCHECK(it != store_records_.end());
2593 stored_value = it->second.stored_value;
2594 }
2595 auto it = loads_requiring_loop_phi_.find(
2596 stored_value != nullptr ? stored_value : load_or_store);
2597 if (it == loads_requiring_loop_phi_.end()) {
2598 continue; // This load or store never needed a loop Phi.
2599 }
2600 ValueRecord& record = it->second;
2601 if (is_store) {
2602 // Process the store by updating `local_heap_values[idx]`. The last update shall
2603 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2604 // at the end of the block.
2605 Value replacement = ReplacementOrValue(record.value);
2606 if (replacement.NeedsLoopPhi()) {
2607 // No replacement yet, use the Phi placeholder from the load.
2608 DCHECK(record.value.NeedsLoopPhi());
2609 local_heap_values[idx] = record.value;
2610 } else {
2611 // If the load fetched a known value, use it, otherwise use the load.
2612 local_heap_values[idx] = Value::ForInstruction(
2613 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2614 }
2615 } else {
2616 // Process the load unless it has previously been marked unreplaceable.
2617 if (record.value.NeedsLoopPhi()) {
2618 if (local_heap_values[idx].IsInvalid()) {
2619 local_heap_values[idx] = get_initial_value(block, idx);
2620 }
2621 if (local_heap_values[idx].IsUnknown()) {
2622 // This load cannot be replaced. Keep stores that feed the Phi placeholder
2623 // (no aliasing since then, otherwise the Phi placeholder would not have been
2624 // propagated as a value to this load) and store the load as the new heap value.
2625 found_unreplaceable_load = true;
2626 KeepStores(record.value);
2627 record.value = Value::MergedUnknown(record.value.GetPhiPlaceholder());
2628 local_heap_values[idx] = Value::ForInstruction(load_or_store);
2629 } else if (local_heap_values[idx].NeedsLoopPhi()) {
2630 // The load may still be replaced with a Phi later.
2631 DCHECK(local_heap_values[idx].Equals(record.value));
2632 } else {
2633 // This load can be eliminated but we may need to construct non-loop Phis.
2634 if (local_heap_values[idx].NeedsNonLoopPhi()) {
2635 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2636 load_or_store->GetType());
2637 local_heap_values[idx] = Replacement(local_heap_values[idx]);
2638 }
2639 record.value = local_heap_values[idx];
2640 HInstruction* heap_value = local_heap_values[idx].GetInstruction();
2641 AddRemovedLoad(load_or_store, heap_value);
2642 TryRemovingNullCheck(load_or_store);
2643 }
2644 }
2645 }
2646 }
2647
2648 // All heap values that previously needed a loop Phi at the end of the block
2649 // need to be updated for processing successors.
2650 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2651 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2652 if (heap_values[idx].value.NeedsLoopPhi()) {
2653 if (local_heap_values[idx].IsValid()) {
2654 heap_values[idx].value = local_heap_values[idx];
2655 } else {
2656 heap_values[idx].value = get_initial_value(block, idx);
2657 }
2658 if (heap_values[idx].value.IsUnknown()) {
2659 replaced_heap_value_with_unknown = true;
2660 }
2661 }
2662 }
2663 }
2664 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2665 }
2666
ProcessLoadsRequiringLoopPhis()2667 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2668 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2669 // make the result of the processing depend on the order in which we process these loads.
2670 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2671 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2672 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2673 auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2674 if (it == loads_requiring_loop_phi_.end()) {
2675 continue;
2676 }
2677 HInstruction* load = it->first;
2678 ValueRecord& record = it->second;
2679 while (record.value.NeedsLoopPhi() &&
2680 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2681 std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2682 TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2683 DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2684 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2685 if (loop_phi_with_unknown_input) {
2686 DCHECK_GE(GetGraph()
2687 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2688 ->GetPredecessors()
2689 .size(),
2690 2u);
2691 ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2692 }
2693 }
2694 // The load could have been marked as unreplaceable (and stores marked for keeping)
2695 // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2696 DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2697 if (record.value.NeedsLoopPhi()) {
2698 record.value = Replacement(record.value);
2699 HInstruction* heap_value = record.value.GetInstruction();
2700 AddRemovedLoad(load, heap_value);
2701 TryRemovingNullCheck(load);
2702 }
2703 }
2704 }
2705
SearchPhiPlaceholdersForKeptStores()2706 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2707 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2708 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2709 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2710 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2711 work_queue.push_back(index);
2712 }
2713 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2714 std::optional<ArenaBitVector> not_kept_stores;
2715 if (stats_) {
2716 not_kept_stores.emplace(GetGraph()->GetAllocator(),
2717 kept_stores_.GetBitSizeOf(),
2718 false,
2719 ArenaAllocKind::kArenaAllocLSE);
2720 }
2721 while (!work_queue.empty()) {
2722 uint32_t cur_phi_idx = work_queue.back();
2723 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2724 // Only writes to partial-escapes need to be specifically kept.
2725 bool is_partial_kept_merged_unknown =
2726 kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
2727 heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation())
2728 ->GetReferenceInfo()
2729 ->IsPartialSingleton();
2730 work_queue.pop_back();
2731 size_t idx = phi_placeholder.GetHeapLocation();
2732 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2733 DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2734 << " (blocks: " << blocks.size() << ")";
2735 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2736 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2737 // For loop back-edges we must also preserve all stores to locations that
2738 // may alias with the location `idx`.
2739 // TODO: Add tests cases around this.
2740 bool is_back_edge =
2741 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2742 size_t start = is_back_edge ? 0u : idx;
2743 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2744 for (size_t i = start; i != end; ++i) {
2745 Value stored_by = heap_values[i].stored_by;
2746 if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2747 if (stored_by.NeedsPhi()) {
2748 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2749 if (is_partial_kept_merged_unknown) {
2750 // Propagate merged-unknown keep since otherwise this might look
2751 // like a partial escape we can remove.
2752 kept_merged_unknowns_.SetBit(phi_placeholder_index);
2753 }
2754 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2755 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2756 work_queue.push_back(phi_placeholder_index);
2757 }
2758 } else {
2759 DCHECK(IsStore(stored_by.GetInstruction()));
2760 ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2761 DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2762 << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2763 << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2764 if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
2765 if (not_kept_stores) {
2766 not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
2767 }
2768 } else {
2769 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2770 }
2771 }
2772 }
2773 }
2774 }
2775 }
2776 if (not_kept_stores) {
2777 // a - b := (a & ~b)
2778 not_kept_stores->Subtract(&kept_stores_);
2779 auto num_removed = not_kept_stores->NumSetBits();
2780 MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
2781 }
2782 }
2783
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2784 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2785 while (value_record->stored_by.IsInstruction() &&
2786 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2787 auto it = store_records_.find(value_record->stored_by.GetInstruction());
2788 DCHECK(it != store_records_.end());
2789 *value_record = it->second.old_value_record;
2790 }
2791 if (value_record->stored_by.NeedsPhi() &&
2792 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2793 PhiPlaceholderIndex(value_record->stored_by))) {
2794 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2795 // Phi placeholder to recalculate the actual value.
2796 value_record->value = value_record->stored_by;
2797 }
2798 value_record->value = ReplacementOrValue(value_record->value);
2799 if (value_record->value.NeedsNonLoopPhi()) {
2800 // Treat all Phi placeholders as requiring loop Phis at this point.
2801 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2802 value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2803 }
2804 }
2805
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2806 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2807 DataType::Type type) {
2808 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2809
2810 // Use local allocator to reduce peak memory usage.
2811 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2812 ArenaBitVector visited(&allocator,
2813 /*start_bits=*/ num_phi_placeholders_,
2814 /*expandable=*/ false,
2815 kArenaAllocLSE);
2816 visited.ClearAllBits();
2817
2818 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2819 ArenaBitVector phi_placeholders_to_materialize(
2820 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2821 phi_placeholders_to_materialize.ClearAllBits();
2822 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2823 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2824 if (loop_phi_with_unknown_input) {
2825 DCHECK_GE(GetGraph()
2826 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2827 ->GetPredecessors()
2828 .size(),
2829 2u);
2830 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2831 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2832 phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2833 Value::PureUnknown();
2834 return;
2835 }
2836
2837 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2838 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2839 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2840 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2841 !success);
2842 }
2843
2844 struct ScopedRestoreHeapValues {
2845 public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2846 ScopedRestoreHeapValues(ArenaStack* alloc,
2847 size_t num_heap_locs,
2848 ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2849 : alloc_(alloc),
2850 updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2851 to_restore_(to_restore) {
2852 updated_values_.reserve(num_heap_locs * to_restore_.size());
2853 }
2854
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2855 ~ScopedRestoreHeapValues() {
2856 for (const auto& rec : updated_values_) {
2857 to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2858 }
2859 }
2860
2861 template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2862 void ForEachRecord(Func func) {
2863 for (size_t blk_id : Range(to_restore_.size())) {
2864 for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2865 LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2866 LSEVisitor::Value initial = vr->value;
2867 func(vr);
2868 if (!vr->value.ExactEquals(initial)) {
2869 updated_values_.push_back({blk_id, heap_loc, initial});
2870 }
2871 }
2872 }
2873 }
2874
2875 private:
2876 struct UpdateRecord {
2877 size_t blk_id;
2878 size_t heap_loc;
2879 LSEVisitor::Value val_;
2880 };
2881 ScopedArenaAllocator alloc_;
2882 ScopedArenaVector<UpdateRecord> updated_values_;
2883 ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2884
2885 DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2886 };
2887
FindStoresWritingOldValues()2888 void LSEVisitor::FindStoresWritingOldValues() {
2889 // Partial LSE relies on knowing the real heap-values not the
2890 // store-replacement versions so we need to restore the map after removing
2891 // stores.
2892 ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2893 heap_location_collector_.GetNumberOfHeapLocations(),
2894 heap_values_for_);
2895 // The Phi placeholder replacements have so far been used for eliminating loads,
2896 // tracking values that would be stored if all stores were kept. As we want to
2897 // compare actual old values after removing unmarked stores, prune the Phi
2898 // placeholder replacements that can be fed by values we may not actually store.
2899 // Replacements marked as unknown can be kept as they are fed by some unknown
2900 // value and would end up as unknown again if we recalculated them.
2901 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2902 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2903 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2904 phi_placeholder_replacements_[i] = Value::Invalid();
2905 }
2906 }
2907
2908 // Update heap values at end of blocks.
2909 heap_vals.ForEachRecord([&](ValueRecord* rec) {
2910 UpdateValueRecordForStoreElimination(rec);
2911 });
2912
2913 if (kIsDebugBuild) {
2914 heap_vals.ForEachRecord([](ValueRecord* rec) {
2915 DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2916 });
2917 }
2918
2919 // Use local allocator to reduce peak memory usage.
2920 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2921 // Mark the stores we want to eliminate in a separate bit vector.
2922 ArenaBitVector eliminated_stores(&allocator,
2923 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2924 /*expandable=*/ false,
2925 kArenaAllocLSE);
2926 eliminated_stores.ClearAllBits();
2927
2928 for (auto& entry : store_records_) {
2929 HInstruction* store = entry.first;
2930 StoreRecord& store_record = entry.second;
2931 if (!kept_stores_.IsBitSet(store->GetId())) {
2932 continue; // Ignore stores that are not kept.
2933 }
2934 UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2935 if (store_record.old_value_record.value.NeedsPhi()) {
2936 DataType::Type type = store_record.stored_value->GetType();
2937 FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2938 store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2939 }
2940 DCHECK(!store_record.old_value_record.value.NeedsPhi());
2941 HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2942 if (store_record.old_value_record.value.Equals(stored_value)) {
2943 eliminated_stores.SetBit(store->GetId());
2944 }
2945 }
2946
2947 // Commit the stores to eliminate by removing them from `kept_stores_`.
2948 kept_stores_.Subtract(&eliminated_stores);
2949 }
2950
Run()2951 void LSEVisitor::Run() {
2952 // 1. Process blocks and instructions in reverse post order.
2953 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2954 VisitBasicBlock(block);
2955 }
2956
2957 // 2. Process loads that require loop Phis, trying to find/create replacements.
2958 current_phase_ = Phase::kLoadElimination;
2959 ProcessLoadsRequiringLoopPhis();
2960
2961 // 3. Determine which stores to keep and which to eliminate.
2962 current_phase_ = Phase::kStoreElimination;
2963 // Finish marking stores for keeping.
2964 SearchPhiPlaceholdersForKeptStores();
2965
2966 // Find stores that write the same value as is already present in the location.
2967 FindStoresWritingOldValues();
2968
2969 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2970 FinishFullLSE();
2971
2972 // 5. Move partial escapes down and fixup with PHIs.
2973 current_phase_ = Phase::kPartialElimination;
2974 MovePartialEscapes();
2975 }
2976
2977 // Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
2978 // retry all of them with more information about where they come from.
PrepareForPartialPhiComputation()2979 void LSEVisitor::PrepareForPartialPhiComputation() {
2980 std::replace_if(
2981 phi_placeholder_replacements_.begin(),
2982 phi_placeholder_replacements_.end(),
2983 [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); },
2984 Value::Invalid());
2985 }
2986
2987 class PartialLoadStoreEliminationHelper {
2988 public:
PartialLoadStoreEliminationHelper(LSEVisitor * lse,ScopedArenaAllocator * alloc)2989 PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
2990 : lse_(lse),
2991 alloc_(alloc),
2992 new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
2993 heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
2994 max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
2995 GetGraph()->GetActiveBlocks().end(),
2996 [](HBasicBlock* a, HBasicBlock* b) {
2997 return a->GetNumberOfPredecessors() <
2998 b->GetNumberOfPredecessors();
2999 }))
3000 ->GetNumberOfPredecessors()),
3001 materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
3002 nullptr,
3003 alloc_->Adapter(kArenaAllocLSE)),
3004 first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
3005 size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons();
3006 heap_refs_.reserve(num_partial_singletons);
3007 new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size());
3008 CollectInterestingHeapRefs();
3009 }
3010
~PartialLoadStoreEliminationHelper()3011 ~PartialLoadStoreEliminationHelper() {
3012 if (heap_refs_.empty()) {
3013 return;
3014 }
3015 ReferenceTypePropagation rtp_fixup(GetGraph(),
3016 Handle<mirror::ClassLoader>(),
3017 Handle<mirror::DexCache>(),
3018 /* is_first_run= */ false);
3019 rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
3020 GetGraph()->ClearLoopInformation();
3021 GetGraph()->ClearDominanceInformation();
3022 GetGraph()->ClearReachabilityInformation();
3023 GetGraph()->BuildDominatorTree();
3024 GetGraph()->ComputeReachabilityInformation();
3025 }
3026
3027 class IdxToHeapLoc {
3028 public:
IdxToHeapLoc(const HeapLocationCollector * hlc)3029 explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
operator ()(size_t idx) const3030 HeapLocation* operator()(size_t idx) const {
3031 return collector_->GetHeapLocation(idx);
3032 }
3033
3034 private:
3035 const HeapLocationCollector* collector_;
3036 };
3037
3038
3039 class HeapReferenceData {
3040 public:
3041 using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
HeapReferenceData(PartialLoadStoreEliminationHelper * helper,HNewInstance * new_inst,const ExecutionSubgraph * subgraph,ScopedArenaAllocator * alloc)3042 HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
3043 HNewInstance* new_inst,
3044 const ExecutionSubgraph* subgraph,
3045 ScopedArenaAllocator* alloc)
3046 : new_instance_(new_inst),
3047 helper_(helper),
3048 heap_locs_(alloc,
3049 helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
3050 /* expandable= */ false,
3051 kArenaAllocLSE),
3052 materializations_(
3053 // We generally won't need to create too many materialization blocks and we can expand
3054 // this as needed so just start off with 2x.
3055 2 * helper->lse_->GetGraph()->GetBlocks().size(),
3056 nullptr,
3057 alloc->Adapter(kArenaAllocLSE)),
3058 collector_(helper->lse_->heap_location_collector_),
3059 subgraph_(subgraph) {}
3060
IterateLocations()3061 LocIterator IterateLocations() {
3062 auto idxs = heap_locs_.Indexes();
3063 return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
3064 }
3065
AddHeapLocation(size_t idx)3066 void AddHeapLocation(size_t idx) {
3067 heap_locs_.SetBit(idx);
3068 }
3069
GetNoEscapeSubgraph() const3070 const ExecutionSubgraph* GetNoEscapeSubgraph() const {
3071 return subgraph_;
3072 }
3073
IsPostEscape(HBasicBlock * blk)3074 bool IsPostEscape(HBasicBlock* blk) {
3075 return std::any_of(
3076 subgraph_->GetExcludedCohorts().cbegin(),
3077 subgraph_->GetExcludedCohorts().cend(),
3078 [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
3079 }
3080
InEscapeCohort(HBasicBlock * blk)3081 bool InEscapeCohort(HBasicBlock* blk) {
3082 return std::any_of(
3083 subgraph_->GetExcludedCohorts().cbegin(),
3084 subgraph_->GetExcludedCohorts().cend(),
3085 [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
3086 }
3087
BeforeAllEscapes(HBasicBlock * b)3088 bool BeforeAllEscapes(HBasicBlock* b) {
3089 return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
3090 subgraph_->GetExcludedCohorts().cend(),
3091 [&](const ExecutionSubgraph::ExcludedCohort& ec) {
3092 return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
3093 });
3094 }
3095
OriginalNewInstance() const3096 HNewInstance* OriginalNewInstance() const {
3097 return new_instance_;
3098 }
3099
3100 // Collect and replace all uses. We need to perform this twice since we will
3101 // generate PHIs and additional uses as we create the default-values for
3102 // pred-gets. These values might be other references that are also being
3103 // partially eliminated. By running just the replacement part again we are
3104 // able to avoid having to keep another whole in-progress partial map
3105 // around. Since we will have already handled all the other uses in the
3106 // first pass the second one will be quite fast.
FixupUses(bool first_pass)3107 void FixupUses(bool first_pass) {
3108 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3109 // Replace uses with materialized values.
3110 ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
3111 ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
3112 // Do we need to add a constructor-fence.
3113 ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
3114 saa.Adapter(kArenaAllocLSE));
3115 ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
3116
3117 CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
3118
3119 if (!first_pass) {
3120 // If another partial creates new references they can only be in Phis or pred-get defaults
3121 // so they must be in the to_replace group.
3122 DCHECK(to_predicate.empty());
3123 DCHECK(constructor_fences.empty());
3124 DCHECK(to_remove.empty());
3125 }
3126
3127 ReplaceInput(to_replace);
3128 RemoveAndReplaceInputs(to_remove);
3129 CreateConstructorFences(constructor_fences);
3130 PredicateInstructions(to_predicate);
3131
3132 CHECK(OriginalNewInstance()->GetUses().empty())
3133 << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
3134 }
3135
AddMaterialization(HBasicBlock * blk,HInstruction * ins)3136 void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
3137 if (blk->GetBlockId() >= materializations_.size()) {
3138 // Make sure the materialization array is large enough, try to avoid
3139 // re-sizing too many times by giving extra space.
3140 materializations_.resize(blk->GetBlockId() * 2, nullptr);
3141 }
3142 DCHECK(materializations_[blk->GetBlockId()] == nullptr)
3143 << "Already have a materialization in block " << blk->GetBlockId() << ": "
3144 << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
3145 << *ins;
3146 materializations_[blk->GetBlockId()] = ins;
3147 LSE_VLOG << "In block " << blk->GetBlockId() << " materialization is " << *ins;
3148 helper_->NotifyNewMaterialization(ins);
3149 }
3150
HasMaterialization(HBasicBlock * blk) const3151 bool HasMaterialization(HBasicBlock* blk) const {
3152 return blk->GetBlockId() < materializations_.size() &&
3153 materializations_[blk->GetBlockId()] != nullptr;
3154 }
3155
GetMaterialization(HBasicBlock * blk) const3156 HInstruction* GetMaterialization(HBasicBlock* blk) const {
3157 if (materializations_.size() <= blk->GetBlockId() ||
3158 materializations_[blk->GetBlockId()] == nullptr) {
3159 // This must be a materialization block added after the partial LSE of
3160 // the current reference finished. Since every edge can only have at
3161 // most one materialization block added to it we can just check the
3162 // blocks predecessor.
3163 DCHECK(helper_->IsMaterializationBlock(blk));
3164 blk = helper_->FindDominatingNonMaterializationBlock(blk);
3165 DCHECK(!helper_->IsMaterializationBlock(blk));
3166 }
3167 DCHECK_GT(materializations_.size(), blk->GetBlockId());
3168 DCHECK(materializations_[blk->GetBlockId()] != nullptr);
3169 return materializations_[blk->GetBlockId()];
3170 }
3171
GenerateMaterializationValueFromPredecessors(HBasicBlock * blk)3172 void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
3173 DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3174 GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3175 [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3176 return cohort.IsEntryBlock(blk);
3177 }));
3178 DCHECK(!HasMaterialization(blk));
3179 if (blk->IsExitBlock()) {
3180 return;
3181 } else if (blk->IsLoopHeader()) {
3182 // See comment in execution_subgraph.h. Currently we act as though every
3183 // allocation for partial elimination takes place in the entry block.
3184 // This simplifies the analysis by making it so any escape cohort
3185 // expands to contain any loops it is a part of. This is something that
3186 // we should rectify at some point. In either case however we can still
3187 // special case the loop-header since (1) currently the loop can't have
3188 // any merges between different cohort entries since the pre-header will
3189 // be the earliest place entry can happen and (2) even if the analysis
3190 // is improved to consider lifetime of the object WRT loops any values
3191 // which would require loop-phis would have to make the whole loop
3192 // escape anyway.
3193 // This all means we can always use value from the pre-header when the
3194 // block is the loop-header and we didn't already create a
3195 // materialization block. (NB when we do improve the analysis we will
3196 // need to modify the materialization creation code to deal with this
3197 // correctly.)
3198 HInstruction* pre_header_val =
3199 GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
3200 AddMaterialization(blk, pre_header_val);
3201 return;
3202 }
3203 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3204 ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
3205 pred_vals.reserve(blk->GetNumberOfPredecessors());
3206 for (HBasicBlock* pred : blk->GetPredecessors()) {
3207 DCHECK(HasMaterialization(pred));
3208 pred_vals.push_back(GetMaterialization(pred));
3209 }
3210 GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
3211 }
3212
GenerateMaterializationValueFromPredecessorsForEntry(HBasicBlock * entry,const ScopedArenaVector<HInstruction * > & pred_vals)3213 void GenerateMaterializationValueFromPredecessorsForEntry(
3214 HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
3215 DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3216 GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3217 [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3218 return cohort.IsEntryBlock(entry);
3219 }));
3220 GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
3221 }
3222
3223 private:
3224 template <typename InstructionType>
3225 struct InstructionUse {
3226 InstructionType* instruction_;
3227 size_t index_;
3228 };
3229
ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>> & to_replace)3230 void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
3231 for (auto& [ins, idx] : to_replace) {
3232 HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3233 if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
3234 // Phis we just pass through the appropriate inputs.
3235 ins->ReplaceInput(merged_inst->InputAt(idx), idx);
3236 } else {
3237 ins->ReplaceInput(merged_inst, idx);
3238 }
3239 }
3240 }
3241
RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction * > & to_remove)3242 void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) {
3243 for (HInstruction* ins : to_remove) {
3244 if (ins->GetBlock() == nullptr) {
3245 // Already dealt with.
3246 continue;
3247 }
3248 DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
3249 if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
3250 bool instruction_has_users =
3251 ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty());
3252 if (instruction_has_users) {
3253 // Make sure any remaining users of read are replaced.
3254 HInstruction* replacement =
3255 helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins);
3256 // NB ReplaceInput will remove a use from the list so this is
3257 // guaranteed to finish eventually.
3258 while (!ins->GetUses().empty()) {
3259 const HUseListNode<HInstruction*>& use = ins->GetUses().front();
3260 use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3261 }
3262 while (!ins->GetEnvUses().empty()) {
3263 const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front();
3264 use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3265 }
3266 } else {
3267 DCHECK(ins->GetUses().empty())
3268 << "Instruction has users!\n"
3269 << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses();
3270 DCHECK(ins->GetEnvUses().empty())
3271 << "Instruction has users!\n"
3272 << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses();
3273 }
3274 ins->GetBlock()->RemoveInstruction(ins);
3275 } else {
3276 // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
3277 // Since PHIs are escapes as far as LSE is concerned and we are before
3278 // any escapes these are the only 4 options.
3279 DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
3280 HInstruction* replacement;
3281 if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
3282 replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
3283 : GetGraph()->GetIntConstant(0);
3284 } else {
3285 replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
3286 : GetGraph()->GetIntConstant(1);
3287 }
3288 ins->ReplaceWith(replacement);
3289 ins->GetBlock()->RemoveInstruction(ins);
3290 }
3291 }
3292 }
3293
CreateConstructorFences(const ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences)3294 void CreateConstructorFences(
3295 const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
3296 if (!constructor_fences.empty()) {
3297 uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
3298 for (auto& [cf, idx] : constructor_fences) {
3299 if (cf->GetInputs().size() == 1) {
3300 cf->GetBlock()->RemoveInstruction(cf);
3301 } else {
3302 cf->RemoveInputAt(idx);
3303 }
3304 }
3305 for (const ExecutionSubgraph::ExcludedCohort& ec :
3306 GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3307 for (HBasicBlock* blk : ec.EntryBlocks()) {
3308 for (HBasicBlock* materializer :
3309 Filter(MakeIterationRange(blk->GetPredecessors()),
3310 [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
3311 HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
3312 GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
3313 materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
3314 }
3315 }
3316 }
3317 }
3318 }
3319
PredicateInstructions(const ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3320 void PredicateInstructions(
3321 const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3322 for (auto& [ins, idx] : to_predicate) {
3323 if (UNLIKELY(ins->GetBlock() == nullptr)) {
3324 // Already handled due to obj == obj;
3325 continue;
3326 } else if (ins->IsInstanceFieldGet()) {
3327 // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
3328 HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
3329 ins->AsInstanceFieldGet(),
3330 GetMaterialization(ins->GetBlock()),
3331 helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
3332 MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
3333 ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
3334 if (ins->GetType() == DataType::Type::kReference) {
3335 // Reference info is the same
3336 new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
3337 }
3338 // In this phase, substitute instructions are used only for the predicated get
3339 // default values which are used only if the partial singleton did not escape,
3340 // so the out value of the `new_fget` for the relevant cases is the same as
3341 // the default value.
3342 // TODO: Use the default value for materializing default values used by
3343 // other predicated loads to avoid some unnecessary Phis. (This shall
3344 // complicate the search for replacement in `ReplacementOrValue()`.)
3345 DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr);
3346 helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget;
3347 ins->ReplaceWith(new_fget);
3348 ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
3349 CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
3350 << "Instruction: " << *ins << " uses: " << ins->GetUses()
3351 << ", env: " << ins->GetEnvUses();
3352 ins->GetBlock()->RemoveInstruction(ins);
3353 } else if (ins->IsInstanceFieldSet()) {
3354 // Any predicated sets shouldn't require movement.
3355 ins->AsInstanceFieldSet()->SetIsPredicatedSet();
3356 MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
3357 HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3358 ins->ReplaceInput(merged_inst, idx);
3359 } else {
3360 // comparisons need to be split into 2.
3361 DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
3362 bool this_is_first = idx == 0;
3363 if (ins->InputAt(0) == ins->InputAt(1)) {
3364 // This is a obj == obj or obj != obj.
3365 // No idea why anyone would do this but whatever.
3366 ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
3367 ins->GetBlock()->RemoveInstruction(ins);
3368 continue;
3369 } else {
3370 HInstruction* is_escaped = new (GetGraph()->GetAllocator())
3371 HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
3372 HInstruction* combine_inst =
3373 ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
3374 DataType::Type::kBool, is_escaped, ins))
3375 : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
3376 DataType::Type::kBool, is_escaped, ins));
3377 ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
3378 ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
3379 ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
3380 ins->ReplaceWith(combine_inst);
3381 combine_inst->ReplaceInput(ins, 1);
3382 }
3383 }
3384 }
3385 }
3386
3387 // Figure out all the instructions we need to
3388 // fixup/replace/remove/duplicate. Since this requires an iteration of an
3389 // intrusive linked list we want to do it only once and collect all the data
3390 // here.
CollectReplacements(ScopedArenaVector<InstructionUse<HInstruction>> & to_replace,ScopedArenaVector<HInstruction * > & to_remove,ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences,ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3391 void CollectReplacements(
3392 ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
3393 ScopedArenaVector<HInstruction*>& to_remove,
3394 ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
3395 ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3396 size_t size = new_instance_->GetUses().SizeSlow();
3397 to_replace.reserve(size);
3398 to_remove.reserve(size);
3399 constructor_fences.reserve(size);
3400 to_predicate.reserve(size);
3401 for (auto& use : new_instance_->GetUses()) {
3402 HBasicBlock* blk =
3403 helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
3404 if (InEscapeCohort(blk)) {
3405 LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
3406 << *GetMaterialization(blk);
3407 to_replace.push_back({use.GetUser(), use.GetIndex()});
3408 } else if (IsPostEscape(blk)) {
3409 LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
3410 // The fields + cmp are normal uses. Phi can only be here if it was
3411 // generated by full LSE so whatever store+load that created the phi
3412 // is the escape.
3413 if (use.GetUser()->IsPhi()) {
3414 to_replace.push_back({use.GetUser(), use.GetIndex()});
3415 } else {
3416 DCHECK(use.GetUser()->IsFieldAccess() ||
3417 use.GetUser()->IsEqual() ||
3418 use.GetUser()->IsNotEqual())
3419 << *use.GetUser() << "@" << use.GetIndex();
3420 to_predicate.push_back({use.GetUser(), use.GetIndex()});
3421 }
3422 } else if (use.GetUser()->IsConstructorFence()) {
3423 LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
3424 constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
3425 } else {
3426 LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
3427 to_remove.push_back(use.GetUser());
3428 }
3429 }
3430 DCHECK_EQ(
3431 to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
3432 size);
3433 }
3434
GenerateMaterializationValueFromPredecessorsDirect(HBasicBlock * blk,const ScopedArenaVector<HInstruction * > & pred_vals)3435 void GenerateMaterializationValueFromPredecessorsDirect(
3436 HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
3437 DCHECK(!pred_vals.empty());
3438 bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
3439 return val == pred_vals.front();
3440 });
3441 if (LIKELY(all_equal)) {
3442 AddMaterialization(blk, pred_vals.front());
3443 } else {
3444 // Make a PHI for the predecessors.
3445 HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
3446 GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
3447 for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
3448 phi->SetRawInputAt(off, ins);
3449 }
3450 blk->AddPhi(phi);
3451 AddMaterialization(blk, phi);
3452 }
3453 }
3454
GetGraph() const3455 HGraph* GetGraph() const {
3456 return helper_->GetGraph();
3457 }
3458
3459 HNewInstance* new_instance_;
3460 PartialLoadStoreEliminationHelper* helper_;
3461 ArenaBitVector heap_locs_;
3462 ScopedArenaVector<HInstruction*> materializations_;
3463 const HeapLocationCollector& collector_;
3464 const ExecutionSubgraph* subgraph_;
3465 };
3466
GetHeapRefs()3467 ArrayRef<HeapReferenceData> GetHeapRefs() {
3468 return ArrayRef<HeapReferenceData>(heap_refs_);
3469 }
3470
IsMaterializationBlock(HBasicBlock * blk) const3471 bool IsMaterializationBlock(HBasicBlock* blk) const {
3472 return blk->GetBlockId() >= first_materialization_block_id_;
3473 }
3474
GetOrCreateMaterializationBlock(HBasicBlock * entry,size_t pred_num)3475 HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3476 size_t idx = GetMaterializationBlockIndex(entry, pred_num);
3477 HBasicBlock* blk = materialization_blocks_[idx];
3478 if (blk == nullptr) {
3479 blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
3480 GetGraph()->AddBlock(blk);
3481 LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
3482 << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
3483 blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
3484 materialization_blocks_[idx] = blk;
3485 }
3486 return blk;
3487 }
3488
GetMaterializationBlock(HBasicBlock * entry,size_t pred_num)3489 HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3490 HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
3491 DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
3492 << entry->GetPredecessors()[pred_num]->GetBlockId();
3493 return out;
3494 }
3495
IterateMaterializationBlocks()3496 IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
3497 return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
3498 GetGraph()->GetBlocks().end());
3499 }
3500
FixupPartialObjectUsers()3501 void FixupPartialObjectUsers() {
3502 for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3503 // Use the materialized instances to replace original instance
3504 ref_data.FixupUses(/*first_pass=*/true);
3505 CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3506 << ref_data.OriginalNewInstance()->GetUses() << ", "
3507 << ref_data.OriginalNewInstance()->GetEnvUses();
3508 }
3509 // This can cause new uses to be created due to the creation of phis/pred-get defaults
3510 for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3511 // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
3512 ref_data.FixupUses(/*first_pass=*/false);
3513 CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3514 << ref_data.OriginalNewInstance()->GetUses() << ", "
3515 << ref_data.OriginalNewInstance()->GetEnvUses();
3516 }
3517 }
3518
3519 // Finds the first block which either is or dominates the given block which is
3520 // not a materialization block
FindDominatingNonMaterializationBlock(HBasicBlock * blk)3521 HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
3522 if (LIKELY(!IsMaterializationBlock(blk))) {
3523 // Not a materialization block so itself.
3524 return blk;
3525 } else if (blk->GetNumberOfPredecessors() != 0) {
3526 // We're far enough along that the materialization blocks have been
3527 // inserted into the graph so no need to go searching.
3528 return blk->GetSinglePredecessor();
3529 }
3530 // Search through the materialization blocks to find where it will be
3531 // inserted.
3532 for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3533 if (mat == blk) {
3534 size_t cur_pred_idx = idx % max_preds_per_block_;
3535 HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3536 return entry->GetPredecessors()[cur_pred_idx];
3537 }
3538 }
3539 LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
3540 return nullptr;
3541 }
3542
InsertMaterializationBlocks()3543 void InsertMaterializationBlocks() {
3544 for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3545 if (mat == nullptr) {
3546 continue;
3547 }
3548 size_t cur_pred_idx = idx % max_preds_per_block_;
3549 HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3550 HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
3551 mat->InsertBetween(pred, entry);
3552 LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
3553 << pred->GetBlockId() << "->" << entry->GetBlockId();
3554 }
3555 }
3556
3557 // Replace any env-uses remaining of the partial singletons with the
3558 // appropriate phis and remove the instructions.
RemoveReplacedInstructions()3559 void RemoveReplacedInstructions() {
3560 for (HeapReferenceData& ref_data : GetHeapRefs()) {
3561 CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3562 << ref_data.OriginalNewInstance()->GetUses() << ", "
3563 << ref_data.OriginalNewInstance()->GetEnvUses()
3564 << " inst is: " << ref_data.OriginalNewInstance();
3565 const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
3566 while (!env_uses.empty()) {
3567 const HUseListNode<HEnvironment*>& use = env_uses.front();
3568 HInstruction* merged_inst =
3569 ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
3570 LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
3571 << " with " << *merged_inst;
3572 use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
3573 }
3574 ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
3575 }
3576 }
3577
3578 // We need to make sure any allocations dominate their environment uses.
3579 // Technically we could probably remove the env-uses and be fine but this is easy.
ReorderMaterializationsForEnvDominance()3580 void ReorderMaterializationsForEnvDominance() {
3581 for (HBasicBlock* blk : IterateMaterializationBlocks()) {
3582 ScopedArenaAllocator alloc(alloc_->GetArenaStack());
3583 ArenaBitVector still_unsorted(
3584 &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
3585 // This is guaranteed to be very short (since we will abandon LSE if there
3586 // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
3587 // absolute maximum size this list can be) so doing a selection sort is
3588 // fine. This avoids the need to do a complicated recursive check to
3589 // ensure transitivity for std::sort.
3590 ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
3591 materializations.reserve(GetHeapRefs().size());
3592 for (HInstruction* ins :
3593 MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
3594 if (ins->IsNewInstance()) {
3595 materializations.push_back(ins->AsNewInstance());
3596 still_unsorted.SetBit(ins->GetId());
3597 }
3598 }
3599 using Iter = ScopedArenaVector<HNewInstance*>::iterator;
3600 Iter unsorted_start = materializations.begin();
3601 Iter unsorted_end = materializations.end();
3602 // selection sort. Required since the only check we can easily perform a
3603 // is-before-all-unsorted check.
3604 while (unsorted_start != unsorted_end) {
3605 bool found_instruction = false;
3606 for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
3607 HNewInstance* ni = *candidate;
3608 if (std::none_of(ni->GetAllEnvironments().cbegin(),
3609 ni->GetAllEnvironments().cend(),
3610 [&](const HEnvironment* env) {
3611 return std::any_of(
3612 env->GetEnvInputs().cbegin(),
3613 env->GetEnvInputs().cend(),
3614 [&](const HInstruction* env_element) {
3615 return env_element != nullptr &&
3616 still_unsorted.IsBitSet(env_element->GetId());
3617 });
3618 })) {
3619 still_unsorted.ClearBit(ni->GetId());
3620 std::swap(*unsorted_start, *candidate);
3621 ++unsorted_start;
3622 found_instruction = true;
3623 break;
3624 }
3625 }
3626 CHECK(found_instruction) << "Unable to select next materialization instruction."
3627 << " Environments have a dependency loop!";
3628 }
3629 // Reverse so we as we prepend them we end up with the correct order.
3630 auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
3631 for (HNewInstance* ins : reverse_iter) {
3632 if (blk->GetFirstInstruction() != ins) {
3633 // Don't do checks since that makes sure the move is safe WRT
3634 // ins->CanBeMoved which for NewInstance is false.
3635 ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
3636 }
3637 }
3638 }
3639 }
3640
3641 private:
CollectInterestingHeapRefs()3642 void CollectInterestingHeapRefs() {
3643 // Get all the partials we need to move around.
3644 for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
3645 ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
3646 if (ri->IsPartialSingleton() &&
3647 ri->GetReference()->GetBlock() != nullptr &&
3648 ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
3649 RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
3650 }
3651 }
3652 }
3653
RecordHeapRefField(HNewInstance * ni,size_t loc)3654 void RecordHeapRefField(HNewInstance* ni, size_t loc) {
3655 DCHECK(ni != nullptr);
3656 // This is likely to be very short so just do a linear search.
3657 auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
3658 return data.OriginalNewInstance() == ni;
3659 });
3660 HeapReferenceData& cur_ref =
3661 (it == heap_refs_.end())
3662 ? heap_refs_.emplace_back(this,
3663 ni,
3664 lse_->heap_location_collector_.GetHeapLocation(loc)
3665 ->GetReferenceInfo()
3666 ->GetNoEscapeSubgraph(),
3667 alloc_)
3668 : *it;
3669 cur_ref.AddHeapLocation(loc);
3670 }
3671
3672
NotifyNewMaterialization(HInstruction * ins)3673 void NotifyNewMaterialization(HInstruction* ins) {
3674 if (ins->IsPhi()) {
3675 new_ref_phis_.push_back(ins->AsPhi());
3676 }
3677 }
3678
GetMaterializationBlockIndex(HBasicBlock * blk,size_t pred_num) const3679 size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
3680 DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
3681 << "block is a materialization block!";
3682 DCHECK_LT(pred_num, max_preds_per_block_);
3683 return blk->GetBlockId() * max_preds_per_block_ + pred_num;
3684 }
3685
GetGraph() const3686 HGraph* GetGraph() const {
3687 return lse_->GetGraph();
3688 }
3689
3690 LSEVisitor* lse_;
3691 ScopedArenaAllocator* alloc_;
3692 ScopedArenaVector<HInstruction*> new_ref_phis_;
3693 ScopedArenaVector<HeapReferenceData> heap_refs_;
3694 size_t max_preds_per_block_;
3695 // An array of (# of non-materialization blocks) * max_preds_per_block
3696 // arranged in block-id major order. Since we can only have at most one
3697 // materialization block on each edge this is the maximum possible number of
3698 // materialization blocks.
3699 ScopedArenaVector<HBasicBlock*> materialization_blocks_;
3700 size_t first_materialization_block_id_;
3701
3702 friend void LSEVisitor::MovePartialEscapes();
3703 };
3704
3705 // Work around c++ type checking annoyances with not being able to forward-declare inner types.
3706 class HeapRefHolder
3707 : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
3708
SetupPartialMaterialization(PartialLoadStoreEliminationHelper & helper,HeapRefHolder && holder,size_t pred_idx,HBasicBlock * entry)3709 HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
3710 HeapRefHolder&& holder,
3711 size_t pred_idx,
3712 HBasicBlock* entry) {
3713 PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
3714 HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
3715 HInstruction* new_inst = ref_data.OriginalNewInstance();
3716 if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
3717 LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
3718 << " is null!";
3719 return GetGraph()->GetNullConstant();
3720 }
3721 HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
3722 CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
3723 HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
3724 repl_create->SetPartialMaterialization();
3725 bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
3726 repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
3727 MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
3728 LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
3729 ref_data.AddMaterialization(bb, repl_create);
3730 const FieldInfo* info = nullptr;
3731 for (const HeapLocation* loc : ref_data.IterateLocations()) {
3732 size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3733 info = field_infos_[loc_off];
3734 DCHECK(loc->GetIndex() == nullptr);
3735 Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
3736 if (value.NeedsLoopPhi() || value.IsMergedUnknown()) {
3737 Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3738 DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3739 << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3740 if (!repl.IsInvalid()) {
3741 value = repl;
3742 } else {
3743 FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
3744 value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3745 }
3746 } else if (value.NeedsNonLoopPhi()) {
3747 Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3748 DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3749 << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3750 if (!repl.IsInvalid()) {
3751 value = repl;
3752 } else {
3753 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
3754 value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3755 }
3756 }
3757 DCHECK(value.IsDefault() || value.IsInstruction())
3758 << GetGraph()->PrettyMethod() << ": " << value;
3759
3760 if (!value.IsDefault() &&
3761 // shadow$_klass_ doesn't need to be manually initialized.
3762 MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
3763 CHECK(info != nullptr);
3764 HInstruction* set_value =
3765 new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
3766 value.GetInstruction(),
3767 field_infos_[loc_off]->GetField(),
3768 loc->GetType(),
3769 MemberOffset(loc->GetOffset()),
3770 false,
3771 field_infos_[loc_off]->GetFieldIndex(),
3772 loc->GetDeclaringClassDefIndex(),
3773 field_infos_[loc_off]->GetDexFile(),
3774 0u);
3775 bb->InsertInstructionAfter(set_value, repl_create);
3776 LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
3777 }
3778 }
3779 return repl_create;
3780 }
3781
GetPartialValueAt(HNewInstance * orig_new_inst,HInstruction * read)3782 HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
3783 size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
3784 Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
3785 LSE_VLOG << "using " << pred << " as default value for " << *read;
3786 if (pred.IsInstruction()) {
3787 return pred.GetInstruction();
3788 } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
3789 FullyMaterializePhi(pred.GetPhiPlaceholder(),
3790 heap_location_collector_.GetHeapLocation(loc)->GetType());
3791 HInstruction* res = Replacement(pred).GetInstruction();
3792 LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3793 return res;
3794 } else if (pred.IsDefault()) {
3795 HInstruction* res = GetDefaultValue(read->GetType());
3796 LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3797 return res;
3798 }
3799 LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
3800 << "! This should be impossible! Value is " << pred;
3801 UNREACHABLE();
3802 }
3803
MovePartialEscapes()3804 void LSEVisitor::MovePartialEscapes() {
3805 if (!ShouldPerformPartialLSE()) {
3806 return;
3807 }
3808
3809 ScopedArenaAllocator saa(allocator_.GetArenaStack());
3810 PartialLoadStoreEliminationHelper helper(this, &saa);
3811
3812 // Since for PHIs we now will have more information (since we know the object
3813 // hasn't escaped) we need to clear the old phi-replacements where we weren't
3814 // able to find the value.
3815 PrepareForPartialPhiComputation();
3816
3817 for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
3818 LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
3819 // Setup entry and exit blocks.
3820 for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3821 // Setup materialization blocks.
3822 for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
3823 // Setup entries.
3824 // TODO Assuming we correctly break critical edges every entry block
3825 // must have only a single predecessor so we could just put all this
3826 // stuff in there. OTOH simplifier can do it for us and this is simpler
3827 // to implement - giving clean separation between the original graph and
3828 // materialization blocks - so for now we might as well have these new
3829 // blocks.
3830 ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
3831 ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
3832 pred_vals.reserve(entry->GetNumberOfPredecessors());
3833 for (const auto& [pred, pred_idx] :
3834 ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
3835 DCHECK(!helper.IsMaterializationBlock(pred));
3836 if (excluded_cohort.IsEntryBlock(pred)) {
3837 pred_vals.push_back(ref_data.GetMaterialization(pred));
3838 continue;
3839 } else {
3840 pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
3841 }
3842 }
3843 ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
3844 }
3845
3846 // Setup exit block heap-values for later phi-generation.
3847 for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
3848 // mark every exit of cohorts as having a value so we can easily
3849 // materialize the PHIs.
3850 // TODO By setting this we can easily use the normal MaterializeLoopPhis
3851 // (via FullyMaterializePhis) in order to generate the default-values
3852 // for predicated-gets. This has the unfortunate side effect of creating
3853 // somewhat more phis than are really needed (in some cases). We really
3854 // should try to eventually know that we can lower these PHIs to only
3855 // the non-escaping value in cases where it is possible. Currently this
3856 // is done to some extent in instruction_simplifier but we have more
3857 // information here to do the right thing.
3858 for (const HeapLocation* loc : ref_data.IterateLocations()) {
3859 size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3860 // This Value::Default() is only used to fill in PHIs used as the
3861 // default value for PredicatedInstanceFieldGets. The actual value
3862 // stored there is meaningless since the Predicated-iget will use the
3863 // actual field value instead on these paths.
3864 heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
3865 }
3866 }
3867 }
3868
3869 // string materialization through the graph.
3870 // // Visit RPO to PHI the materialized object through the cohort.
3871 for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
3872 // NB This doesn't include materialization blocks.
3873 DCHECK(!helper.IsMaterializationBlock(blk))
3874 << "Materialization blocks should not be in RPO yet.";
3875 if (ref_data.HasMaterialization(blk)) {
3876 continue;
3877 } else if (ref_data.BeforeAllEscapes(blk)) {
3878 ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
3879 continue;
3880 } else {
3881 ref_data.GenerateMaterializationValueFromPredecessors(blk);
3882 }
3883 }
3884 }
3885
3886 // Once we've generated all the materializations we can update the users.
3887 helper.FixupPartialObjectUsers();
3888
3889 // Actually put materialization blocks into the graph
3890 helper.InsertMaterializationBlocks();
3891
3892 // Get rid of the original instructions.
3893 helper.RemoveReplacedInstructions();
3894
3895 // Ensure everything is ordered correctly in the materialization blocks. This
3896 // involves moving every NewInstance to the top and ordering them so that any
3897 // required env-uses are correctly ordered.
3898 helper.ReorderMaterializationsForEnvDominance();
3899 }
3900
FinishFullLSE()3901 void LSEVisitor::FinishFullLSE() {
3902 // Remove recorded load instructions that should be eliminated.
3903 for (const LoadStoreRecord& record : loads_and_stores_) {
3904 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
3905 HInstruction* substitute = substitute_instructions_for_loads_[id];
3906 if (substitute == nullptr) {
3907 continue;
3908 }
3909 HInstruction* load = record.load_or_store;
3910 DCHECK(load != nullptr);
3911 DCHECK(IsLoad(load));
3912 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
3913 // We proactively retrieve the substitute for a removed load, so
3914 // a load that has a substitute should not be observed as a heap
3915 // location value.
3916 DCHECK_EQ(FindSubstitute(substitute), substitute);
3917
3918 load->ReplaceWith(substitute);
3919 load->GetBlock()->RemoveInstruction(load);
3920 }
3921
3922 // Remove all the stores we can.
3923 for (const LoadStoreRecord& record : loads_and_stores_) {
3924 bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
3925 DCHECK_EQ(is_store, IsStore(record.load_or_store));
3926 if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
3927 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
3928 }
3929 }
3930
3931 // Eliminate singleton-classified instructions:
3932 // * - Constructor fences (they never escape this thread).
3933 // * - Allocations (if they are unused).
3934 for (HInstruction* new_instance : singleton_new_instances_) {
3935 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
3936 MaybeRecordStat(stats_,
3937 MethodCompilationStat::kConstructorFenceRemovedLSE,
3938 removed);
3939
3940 if (!new_instance->HasNonEnvironmentUses()) {
3941 new_instance->RemoveEnvironmentUsers();
3942 new_instance->GetBlock()->RemoveInstruction(new_instance);
3943 MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
3944 }
3945 }
3946 }
3947
3948 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
3949 // cannot be directly allocated with an arena allocator, so we need to wrap it.
3950 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
3951 public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)3952 LSEVisitorWrapper(HGraph* graph,
3953 const HeapLocationCollector& heap_location_collector,
3954 bool perform_partial_lse,
3955 OptimizingCompilerStats* stats)
3956 : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {}
3957
Run()3958 void Run() {
3959 lse_visitor_.Run();
3960 }
3961
3962 private:
3963 LSEVisitor lse_visitor_;
3964 };
3965
Run(bool enable_partial_lse)3966 bool LoadStoreElimination::Run(bool enable_partial_lse) {
3967 if (graph_->IsDebuggable()) {
3968 // Debugger may set heap values or trigger deoptimization of callers.
3969 // Skip this optimization.
3970 return false;
3971 }
3972 // We need to be able to determine reachability. Clear it just to be safe but
3973 // this should initially be empty.
3974 graph_->ClearReachabilityInformation();
3975 // This is O(blocks^3) time complexity. It means we can query reachability in
3976 // O(1) though.
3977 graph_->ComputeReachabilityInformation();
3978 ScopedArenaAllocator allocator(graph_->GetArenaStack());
3979 LoadStoreAnalysis lsa(graph_,
3980 stats_,
3981 &allocator,
3982 enable_partial_lse ? LoadStoreAnalysisType::kFull
3983 : LoadStoreAnalysisType::kBasic);
3984 lsa.Run();
3985 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
3986 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
3987 // No HeapLocation information from LSA, skip this optimization.
3988 return false;
3989 }
3990
3991 std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
3992 graph_, heap_location_collector, enable_partial_lse, stats_));
3993 lse_visitor->Run();
3994 return true;
3995 }
3996
3997 #undef LSE_VLOG
3998
3999 } // namespace art
4000