1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "load_store_elimination.h"
18
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_utils_iterator.h"
28 #include "base/bit_vector-inl.h"
29 #include "base/bit_vector.h"
30 #include "base/globals.h"
31 #include "base/indenter.h"
32 #include "base/iteration_range.h"
33 #include "base/scoped_arena_allocator.h"
34 #include "base/scoped_arena_containers.h"
35 #include "base/transform_iterator.h"
36 #include "escape.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "oat/stack_map.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46
47 /**
48 * The general algorithm of load-store elimination (LSE).
49 *
50 * We use load-store analysis to collect a list of heap locations and perform
51 * alias analysis of those heap locations. LSE then keeps track of a list of
52 * heap values corresponding to the heap locations and stores that put those
53 * values in these locations.
54 * - In phase 1, we visit basic blocks in reverse post order and for each basic
55 * block, visit instructions sequentially, recording heap values and looking
56 * for loads and stores to eliminate without relying on loop Phis.
57 * - In phase 2, we look for loads that can be replaced by creating loop Phis
58 * or using a loop-invariant value.
59 * - In phase 3, we determine which stores are dead and can be eliminated and
60 * based on that information we re-evaluate whether some kept stores are
61 * storing the same value as the value in the heap location; such stores are
62 * also marked for elimination.
63 * - In phase 4, we commit the changes, replacing loads marked for elimination
64 * in previous processing and removing stores not marked for keeping. We also
65 * remove allocations that are no longer needed.
66 * - In phase 5, we move allocations which only escape along some executions
67 * closer to their escape points and fixup non-escaping paths with their actual
68 * values, creating PHIs when needed.
69 *
70 * 1. Walk over blocks and their instructions.
71 *
72 * The initial set of heap values for a basic block is
73 * - For a loop header of an irreducible loop, all heap values are unknown.
74 * - For a loop header of a normal loop, all values unknown at the end of the
75 * preheader are initialized to unknown, other heap values are set to Phi
76 * placeholders as we cannot determine yet whether these values are known on
77 * all back-edges. We use Phi placeholders also for array heap locations with
78 * index defined inside the loop but this helps only when the value remains
79 * zero from the array allocation throughout the loop.
80 * - For catch blocks, we clear all assumptions since we arrived due to an
81 * instruction throwing.
82 * - For other basic blocks, we merge incoming values from the end of all
83 * predecessors. If any incoming value is unknown, the start value for this
84 * block is also unknown. Otherwise, if all the incoming values are the same
85 * (including the case of a single predecessor), the incoming value is used.
86 * Otherwise, we use a Phi placeholder to indicate different incoming values.
87 * We record whether such Phi placeholder depends on a loop Phi placeholder.
88 *
89 * For each instruction in the block
90 * - If the instruction is a load from a heap location with a known value not
91 * dependent on a loop Phi placeholder, the load can be eliminated, either by
92 * using an existing instruction or by creating new Phi(s) instead. In order
93 * to maintain the validity of all heap locations during the optimization
94 * phase, we only record substitutes at this phase and the real elimination
95 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
96 * replacement are recorded for processing later.
97 * - If the instruction is a store, it updates the heap value for the heap
98 * location with the stored value and records the store itself so that we can
99 * mark it for keeping if the value becomes observable. Heap values are
100 * invalidated for heap locations that may alias with the store instruction's
101 * heap location and their recorded stores are marked for keeping as they are
102 * now potentially observable. The store instruction can be eliminated unless
103 * the value stored is later needed e.g. by a load from the same/aliased heap
104 * location or the heap location persists at method return/deoptimization.
105 * - A store that stores the same value as the heap value is eliminated.
106 * - For newly instantiated instances, their heap values are initialized to
107 * language defined default values.
108 * - Finalizable objects are considered as persisting at method
109 * return/deoptimization.
110 * - Some instructions such as invokes are treated as loading and invalidating
111 * all the heap values, depending on the instruction's side effects.
112 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
113 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
114 * alias and no load/store is eliminated in such case.
115 *
116 * The time complexity of the initial phase has several components. The total
117 * time for the initialization of heap values for all blocks is
118 * O(heap_locations * edges)
119 * and the time complexity for simple instruction processing is
120 * O(instructions).
121 * See the description of phase 3 for additional complexity due to matching of
122 * existing Phis for replacing loads.
123 *
124 * 2. Process loads that depend on loop Phi placeholders.
125 *
126 * We go over these loads to determine whether they can be eliminated. We look
127 * for the set of all Phi placeholders that feed the load and depend on a loop
128 * Phi placeholder and, if we find no unknown value, we construct the necessary
129 * Phi(s) or, if all other inputs are identical, i.e. the location does not
130 * change in the loop, just use that input. If we do find an unknown input, this
131 * must be from a loop back-edge and we replace the loop Phi placeholder with
132 * unknown value and re-process loads and stores that previously depended on
133 * loop Phi placeholders. This shall find at least one load of an unknown value
134 * which is now known to be unreplaceable or a new unknown value on a back-edge
135 * and we repeat this process until each load is either marked for replacement
136 * or found to be unreplaceable. As we mark at least one additional loop Phi
137 * placeholder as unreplacable in each iteration, this process shall terminate.
138 *
139 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
140 * is limited by the number of Phi placeholders and their dependencies we need
141 * to search with worst-case time complexity
142 * O(phi_placeholder_dependencies) .
143 * The dependencies are usually just the Phi placeholders' potential inputs,
144 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
145 * replacement search, there are additional dependencies to consider, see below.
146 *
147 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
148 * algorithm to determine transitive closures for each found Phi placeholder,
149 * and then match or materialize Phis from the smallest transitive closure,
150 * so that we can determine if such subset has a single other input. This has
151 * time complexity
152 * O(phi_placeholders_found^3) .
153 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
154 * contribute to this as such Phi placeholders are replaced immediately.
155 * The total time of all such successful cases has time complexity
156 * O(phi_placeholders^3)
157 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
158 * argument applies to the searches used to find all successful cases, so their
159 * total contribution is also just an insignificant
160 * O(phi_placeholder_dependencies) .
161 * The materialization of Phis has an insignificant total time complexity
162 * O(phi_placeholders * edges) .
163 *
164 * If we find an unknown input, we re-process heap values and loads with a time
165 * complexity that's the same as the phase 1 in the worst case. Adding this to
166 * the depth-first search time complexity yields
167 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
168 * for a single iteration. We can ignore the middle term as it's proprotional
169 * to the number of Phi placeholder inputs included in the first term. Using
170 * the upper limit of number of such iterations, the total time complexity is
171 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
172 *
173 * The upper bound of Phi placeholder inputs is
174 * heap_locations * edges
175 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
176 * include other heap locations in predecessor blocks with the upper bound of
177 * heap_locations^2 * edges .
178 * Using the estimate
179 * edges <= blocks^2
180 * and
181 * phi_placeholders <= heap_locations * blocks ,
182 * the worst-case time complexity of the
183 * O(phi_placeholder_dependencies * phi_placeholders)
184 * term from unknown input cases is actually
185 * O(heap_locations^3 * blocks^3) ,
186 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
187 * Adding the other term from the unknown input cases (to account for the case
188 * with significantly more instructions than blocks and heap locations), the
189 * phase 2 time complexity is
190 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
191 *
192 * See the description of phase 3 for additional complexity due to matching of
193 * existing Phis for replacing loads.
194 *
195 * 3. Determine which stores to keep and which to eliminate.
196 *
197 * During instruction processing in phase 1 and re-processing in phase 2, we are
198 * keeping a record of the stores and Phi placeholders that become observable
199 * and now propagate the observable Phi placeholders to all actual stores that
200 * feed them. Having determined observable stores, we look for stores that just
201 * overwrite the old value with the same. Since ignoring non-observable stores
202 * actually changes the old values in heap locations, we need to recalculate
203 * Phi placeholder replacements but we proceed similarly to the previous phase.
204 * We look for the set of all Phis that feed the old value replaced by the store
205 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
206 * value, we try to match existing Phis (we do not create new Phis anymore) or,
207 * if all other inputs are identical, i.e. the location does not change in the
208 * loop, just use that input. If this succeeds and the old value is identical to
209 * the value we're storing, such store shall be eliminated.
210 *
211 * The work is similar to the phase 2, except that we're not re-processing loads
212 * and stores anymore, so the time complexity of phase 3 is
213 * O(heap_locations^3 * blocks^3) .
214 *
215 * There is additional complexity in matching existing Phis shared between the
216 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
217 * time (this could be difficult and slow), so each matching attempt is just
218 * looking at Phis in the block (both old Phis and newly created Phis) and their
219 * inputs. As we create at most `heap_locations` Phis in each block, the upper
220 * bound on the number of Phis we look at is
221 * heap_locations * (old_phis + heap_locations)
222 * and the worst-case time complexity is
223 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
224 * The first term is lower than one term in phase 2, so the relevant part is
225 * O(heap_locations * old_phis * edges) .
226 *
227 * 4. Replace loads and remove unnecessary stores and singleton allocations.
228 *
229 * A special type of objects called singletons are instantiated in the method
230 * and have a single name, i.e. no aliases. Singletons have exclusive heap
231 * locations since they have no aliases. Singletons are helpful in narrowing
232 * down the life span of a heap location such that they do not always need to
233 * participate in merging heap values. Allocation of a singleton can be
234 * eliminated if that singleton is not used and does not persist at method
235 * return/deoptimization.
236 *
237 * The time complexity of this phase is
238 * O(instructions + instruction_uses) .
239 *
240 * FIXME: The time complexities described above assumes that the
241 * HeapLocationCollector finds a heap location for an instruction in O(1)
242 * time but it is currently O(heap_locations); this can be fixed by adding
243 * a hash map to the HeapLocationCollector.
244 */
245
246 namespace art HIDDEN {
247
248 #define LSE_VLOG \
249 if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
250
251 class HeapRefHolder;
252
253 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
254 class LSEVisitor final : private HGraphDelegateVisitor {
255 public:
256 LSEVisitor(HGraph* graph,
257 const HeapLocationCollector& heap_location_collector,
258 OptimizingCompilerStats* stats);
259
260 void Run();
261
262 private:
263 class PhiPlaceholder {
264 public:
PhiPlaceholder()265 constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)266 constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
267 : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
268
269 constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
270 constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
271 constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
272 constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
273
GetBlockId() const274 constexpr uint32_t GetBlockId() const {
275 return block_id_;
276 }
277
GetHeapLocation() const278 constexpr size_t GetHeapLocation() const {
279 return heap_location_;
280 }
281
Equals(const PhiPlaceholder & p2) const282 constexpr bool Equals(const PhiPlaceholder& p2) const {
283 return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
284 }
285
Dump(std::ostream & oss) const286 void Dump(std::ostream& oss) const {
287 oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
288 }
289
290 private:
291 uint32_t block_id_;
292 uint32_t heap_location_;
293 };
294
295 friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
296 friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
297
298 class Value {
299 public:
300 enum class ValuelessType {
301 kInvalid,
302 kUnknown,
303 kDefault,
304 };
305 struct NeedsNonLoopPhiMarker {
306 PhiPlaceholder phi_;
307 };
308 struct NeedsPlainLoopPhiMarker {
309 PhiPlaceholder phi_;
310 };
311 struct NeedsConvertedLoopPhiMarker {
312 HInstruction* load_; // Load from a narrower location than the loop phi it needs.
313 };
314
Invalid()315 static constexpr Value Invalid() {
316 return Value(ValuelessType::kInvalid);
317 }
318
319 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
320 // A heap location can be set to an unknown heap value when:
321 // - it is coming from outside the method,
322 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
Unknown()323 static constexpr Value Unknown() {
324 return Value(ValuelessType::kUnknown);
325 }
326
327 // Default heap value after an allocation.
328 // A heap location can be set to that value right after an allocation.
Default()329 static constexpr Value Default() {
330 return Value(ValuelessType::kDefault);
331 }
332
ForInstruction(HInstruction * instruction)333 static constexpr Value ForInstruction(HInstruction* instruction) {
334 return Value(instruction);
335 }
336
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)337 static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
338 return Value(NeedsNonLoopPhiMarker{phi_placeholder});
339 }
340
ForPlainLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)341 static constexpr Value ForPlainLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
342 return Value(NeedsPlainLoopPhiMarker{phi_placeholder});
343 }
344
ForConvertedLoopPhiPlaceholder(HInstruction * load)345 static constexpr Value ForConvertedLoopPhiPlaceholder(HInstruction* load) {
346 return Value(NeedsConvertedLoopPhiMarker{load});
347 }
348
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)349 static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
350 return needs_loop_phi ? ForPlainLoopPhiPlaceholder(phi_placeholder)
351 : ForNonLoopPhiPlaceholder(phi_placeholder);
352 }
353
IsValid() const354 constexpr bool IsValid() const {
355 return !IsInvalid();
356 }
357
IsInvalid() const358 constexpr bool IsInvalid() const {
359 return std::holds_alternative<ValuelessType>(value_) &&
360 GetValuelessType() == ValuelessType::kInvalid;
361 }
362
IsUnknown() const363 bool IsUnknown() const {
364 return std::holds_alternative<ValuelessType>(value_) &&
365 GetValuelessType() == ValuelessType::kUnknown;
366 }
367
IsDefault() const368 bool IsDefault() const {
369 return std::holds_alternative<ValuelessType>(value_) &&
370 GetValuelessType() == ValuelessType::kDefault;
371 }
372
IsInstruction() const373 bool IsInstruction() const {
374 return std::holds_alternative<HInstruction*>(value_);
375 }
376
NeedsNonLoopPhi() const377 bool NeedsNonLoopPhi() const {
378 return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
379 }
380
NeedsPlainLoopPhi() const381 bool NeedsPlainLoopPhi() const {
382 return std::holds_alternative<NeedsPlainLoopPhiMarker>(value_);
383 }
384
NeedsConvertedLoopPhi() const385 bool NeedsConvertedLoopPhi() const {
386 return std::holds_alternative<NeedsConvertedLoopPhiMarker>(value_);
387 }
388
NeedsLoopPhi() const389 bool NeedsLoopPhi() const {
390 return NeedsPlainLoopPhi() || NeedsConvertedLoopPhi();
391 }
392
NeedsPhi() const393 bool NeedsPhi() const {
394 return NeedsNonLoopPhi() || NeedsLoopPhi();
395 }
396
GetInstruction() const397 HInstruction* GetInstruction() const {
398 DCHECK(IsInstruction()) << *this;
399 return std::get<HInstruction*>(value_);
400 }
401
GetPhiPlaceholder() const402 PhiPlaceholder GetPhiPlaceholder() const {
403 if (NeedsNonLoopPhi()) {
404 return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
405 } else {
406 DCHECK(NeedsPlainLoopPhi());
407 return std::get<NeedsPlainLoopPhiMarker>(value_).phi_;
408 }
409 }
410
GetHeapLocation() const411 size_t GetHeapLocation() const {
412 DCHECK(NeedsPhi()) << this;
413 return GetPhiPlaceholder().GetHeapLocation();
414 }
415
GetLoopPhiConversionLoad() const416 HInstruction* GetLoopPhiConversionLoad() const {
417 DCHECK(NeedsConvertedLoopPhi());
418 return std::get<NeedsConvertedLoopPhiMarker>(value_).load_;
419 }
420
421 constexpr bool ExactEquals(Value other) const;
422
423 constexpr bool Equals(Value other) const;
424
Equals(HInstruction * instruction) const425 constexpr bool Equals(HInstruction* instruction) const {
426 return Equals(ForInstruction(instruction));
427 }
428
429 std::ostream& Dump(std::ostream& os) const;
430
431 // Public for use with lists.
Value()432 constexpr Value() : value_(ValuelessType::kInvalid) {}
433
434 private:
435 using ValueHolder = std::variant<ValuelessType,
436 HInstruction*,
437 NeedsNonLoopPhiMarker,
438 NeedsPlainLoopPhiMarker,
439 NeedsConvertedLoopPhiMarker>;
GetValuelessType() const440 constexpr ValuelessType GetValuelessType() const {
441 return std::get<ValuelessType>(value_);
442 }
443
Value(ValueHolder v)444 constexpr explicit Value(ValueHolder v) : value_(v) {}
445
446 friend std::ostream& operator<<(std::ostream& os, const Value& v);
447
448 ValueHolder value_;
449
450 static_assert(std::is_move_assignable<PhiPlaceholder>::value);
451 };
452
453 friend constexpr bool operator==(const Value::NeedsPlainLoopPhiMarker& p1,
454 const Value::NeedsPlainLoopPhiMarker& p2);
455 friend constexpr bool operator==(const Value::NeedsConvertedLoopPhiMarker& p1,
456 const Value::NeedsConvertedLoopPhiMarker& p2);
457 friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
458 const Value::NeedsNonLoopPhiMarker& p2);
459
460 class TypeConversionSet {
461 public:
TypeConversionSet()462 TypeConversionSet() : type_conversions_(0u) {}
463
Add(DataType::Type result_type)464 void Add(DataType::Type result_type) {
465 static_assert(enum_cast<>(DataType::Type::kLast) < BitSizeOf<uint32_t>());
466 type_conversions_ |= 1u << enum_cast<>(result_type);
467 }
468
Add(TypeConversionSet other)469 void Add(TypeConversionSet other) {
470 type_conversions_ |= other.type_conversions_;
471 }
472
AreAllTypeConversionsImplicit(HInstruction * input) const473 bool AreAllTypeConversionsImplicit(HInstruction* input) const {
474 if (type_conversions_ != 0u) {
475 if (input->IsIntConstant()) {
476 int32_t value = input->AsIntConstant()->GetValue();
477 for (uint32_t raw_type : LowToHighBits(type_conversions_)) {
478 DataType::Type type = enum_cast<DataType::Type>(raw_type);
479 if (!DataType::IsTypeConversionImplicit(value, type)) {
480 return false;
481 }
482 }
483 } else {
484 DataType::Type input_type = input->GetType();
485 for (uint32_t raw_type : LowToHighBits(type_conversions_)) {
486 DataType::Type type = enum_cast<DataType::Type>(raw_type);
487 if (!DataType::IsTypeConversionImplicit(input_type, type)) {
488 return false;
489 }
490 }
491 }
492 }
493 return true;
494 }
495
496 private:
497 uint32_t type_conversions_;
498 };
499
SkipTypeConversions(Value value,TypeConversionSet * type_conversions=nullptr) const500 Value SkipTypeConversions(Value value,
501 /*inout*/ TypeConversionSet* type_conversions = nullptr) const {
502 while (value.NeedsConvertedLoopPhi()) {
503 HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
504 DCHECK(!conversion_load->IsVecLoad());
505 if (type_conversions != nullptr) {
506 type_conversions->Add(conversion_load->GetType());
507 }
508 ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
509 DCHECK(prev_record != nullptr);
510 value = prev_record->value;
511 }
512 return value;
513 }
514
515 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
516 // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const517 size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
518 size_t res =
519 phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
520 phi_placeholder.GetHeapLocation();
521 DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
522 << res << "blks: " << GetGraph()->GetBlocks().size()
523 << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
524 return res;
525 }
526
PhiPlaceholderIndex(Value phi_placeholder) const527 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
528 return PhiPlaceholderIndex(SkipTypeConversions(phi_placeholder).GetPhiPlaceholder());
529 }
530
IsEscapingObject(ReferenceInfo * info)531 bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
532
GetPhiPlaceholderAt(size_t off) const533 PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
534 DCHECK_LT(off, num_phi_placeholders_);
535 size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
536 // Technically this should be (off - id) / NumberOfHeapLocations
537 // but due to truncation it's all the same.
538 size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
539 return GetPhiPlaceholder(blk_id, id);
540 }
541
GetPhiPlaceholder(uint32_t block_id,size_t idx) const542 PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
543 DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
544 return PhiPlaceholder(block_id, idx);
545 }
546
Replacement(Value value) const547 Value Replacement(Value value) const {
548 DCHECK(value.NeedsNonLoopPhi() || value.NeedsPlainLoopPhi())
549 << value << " phase: " << current_phase_;
550 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
551 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
552 DCHECK(replacement.IsUnknown() ||
553 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
554 return replacement;
555 }
556
ReplacementOrValue(Value value) const557 Value ReplacementOrValue(Value value) const {
558 if (value.NeedsConvertedLoopPhi() &&
559 substitute_instructions_for_loads_[value.GetLoopPhiConversionLoad()->GetId()] != nullptr) {
560 return Value::ForInstruction(
561 substitute_instructions_for_loads_[value.GetLoopPhiConversionLoad()->GetId()]);
562 } else if ((value.NeedsNonLoopPhi() || value.NeedsPlainLoopPhi()) &&
563 phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
564 return Replacement(value);
565 } else {
566 DCHECK_IMPLIES(value.IsInstruction(),
567 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
568 return value;
569 }
570 }
571
572 // The record of a heap value and instruction(s) that feed that value.
573 struct ValueRecord {
574 Value value;
575 Value stored_by;
576 };
577
578 // Calculate the value stored in location `idx` for a loop Phi placeholder-dependent `load`.
StoredValueForLoopPhiPlaceholderDependentLoad(size_t idx,HInstruction * load) const579 Value StoredValueForLoopPhiPlaceholderDependentLoad(size_t idx, HInstruction* load) const {
580 DCHECK(IsLoad(load));
581 DCHECK_LT(static_cast<size_t>(load->GetId()), loads_requiring_loop_phi_.size());
582 DCHECK(loads_requiring_loop_phi_[load->GetId()] != nullptr);
583 Value loaded_value = loads_requiring_loop_phi_[load->GetId()]->value;
584 DCHECK(loaded_value.NeedsLoopPhi());
585 DataType::Type load_type = load->GetType();
586 size_t load_size = DataType::Size(load_type);
587 size_t store_size = DataType::Size(heap_location_collector_.GetHeapLocation(idx)->GetType());
588
589 if (kIsDebugBuild && load->IsVecLoad()) {
590 // For vector operations, the load type is always `Float64` and therefore the store size is
591 // never higher and we do not record any conversions below. This is OK because we currently
592 // do not vectorize any loops with widening operations.
593 CHECK_EQ(load_size, DataType::Size(DataType::Type::kFloat64));
594 CHECK_LE(store_size, load_size);
595 CHECK(!loaded_value.NeedsConvertedLoopPhi());
596 } else if (kIsDebugBuild) {
597 // There are no implicit conversions between 64-bit types and smaller types.
598 // We shall not record any conversions for 64-bit types.
599 CHECK_EQ(load_size == DataType::Size(DataType::Type::kInt64),
600 store_size == DataType::Size(DataType::Type::kInt64));
601 CHECK_IMPLIES(load_size == DataType::Size(DataType::Type::kInt64),
602 !loaded_value.NeedsConvertedLoopPhi());
603 }
604 // The `loaded_value` can record a conversion only if the `load` was from
605 // a wider field than the previous converting load.
606 DCHECK_IMPLIES(loaded_value.NeedsConvertedLoopPhi(),
607 load_size > DataType::Size(loaded_value.GetLoopPhiConversionLoad()->GetType()));
608
609 Value value = loaded_value;
610 if (load_size < store_size) {
611 // Add a type conversion to a narrow type unless it's an implicit conversion
612 // from an already converted value.
613 if (!loaded_value.NeedsConvertedLoopPhi() ||
614 !DataType::IsTypeConversionImplicit(loaded_value.GetLoopPhiConversionLoad()->GetType(),
615 load_type)) {
616 value = Value::ForConvertedLoopPhiPlaceholder(load);
617 } else {
618 DCHECK(value.Equals(loaded_value));
619 }
620 } else {
621 // Remove conversions to types at least as wide as the field we're storing to.
622 // We record only conversions that define sign-/zero-extension bits to store.
623 while (value.NeedsConvertedLoopPhi() &&
624 DataType::Size(value.GetLoopPhiConversionLoad()->GetType()) >= store_size) {
625 HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
626 ValueRecord* prev_record =
627 loads_requiring_loop_phi_[value.GetLoopPhiConversionLoad()->GetId()];
628 DCHECK(prev_record != nullptr);
629 value = prev_record->value;
630 DCHECK(value.NeedsLoopPhi());
631 }
632 }
633
634 DCHECK_EQ(PhiPlaceholderIndex(loaded_value), PhiPlaceholderIndex(value));
635 return value;
636 }
637
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)638 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
639 HInstruction* value,
640 DataType::Type expected_type) {
641 // Should never add type conversion into boolean value.
642 if (expected_type == DataType::Type::kBool ||
643 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
644 // TODO: This prevents type conversion of default values but we can still insert
645 // type conversion of other constants and there is no constant folding pass after LSE.
646 IsZeroBitPattern(value)) {
647 return nullptr;
648 }
649
650 // All vector instructions report their type as `Float64`, so the conversion is implicit.
651 // This is OK because we currently do not vectorize any loops with widening operations.
652 DCHECK(!instruction->IsVecLoad());
653
654 // Check if there is already a suitable TypeConversion we can reuse.
655 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
656 if (use.GetUser()->IsTypeConversion() &&
657 use.GetUser()->GetType() == expected_type &&
658 // TODO: We could move the TypeConversion to a common dominator
659 // if it does not cross irreducible loop header.
660 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
661 // Don't share across irreducible loop headers.
662 // TODO: can be more fine-grained than this by testing each dominator.
663 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
664 !GetGraph()->HasIrreducibleLoops())) {
665 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
666 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
667 // Move the TypeConversion before the instruction.
668 use.GetUser()->MoveBefore(instruction);
669 }
670 DCHECK(use.GetUser()->StrictlyDominates(instruction));
671 return use.GetUser()->AsTypeConversion();
672 }
673 }
674
675 // We must create a new TypeConversion instruction.
676 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
677 expected_type, value, instruction->GetDexPc());
678 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
679 return type_conversion;
680 }
681
682 // Find an instruction's substitute if it's a removed load.
683 // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const684 HInstruction* FindSubstitute(HInstruction* instruction) const {
685 size_t id = static_cast<size_t>(instruction->GetId());
686 if (id >= substitute_instructions_for_loads_.size()) {
687 // New Phi (may not be in the graph yet), or default value.
688 DCHECK(!IsLoad(instruction));
689 return instruction;
690 }
691 HInstruction* substitute = substitute_instructions_for_loads_[id];
692 DCHECK(substitute == nullptr || IsLoad(instruction));
693 return (substitute != nullptr) ? substitute : instruction;
694 }
695
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)696 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
697 DCHECK(IsLoad(load));
698 DCHECK_EQ(FindSubstitute(load), load);
699 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
700 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
701
702 // The load expects to load the heap value as type load->GetType().
703 // However the tracked heap value may not be of that type. An explicit
704 // type conversion may be needed.
705 // There are actually three types involved here:
706 // (1) tracked heap value's type (type A)
707 // (2) heap location (field or element)'s type (type B)
708 // (3) load's type (type C)
709 // We guarantee that type A stored as type B and then fetched out as
710 // type C is the same as casting from type A to type C directly, since
711 // type B and type C will have the same size which is guaranteed in
712 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
713 // So we only need one type conversion from type A to type C.
714 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
715 load, heap_value, load->GetType());
716
717 substitute_instructions_for_loads_[load->GetId()] =
718 type_conversion != nullptr ? type_conversion : heap_value;
719 }
720
IsLoad(HInstruction * instruction)721 static bool IsLoad(HInstruction* instruction) {
722 // Unresolved load is not treated as a load.
723 return instruction->IsInstanceFieldGet() ||
724 instruction->IsStaticFieldGet() ||
725 instruction->IsVecLoad() ||
726 instruction->IsArrayGet();
727 }
728
IsStore(HInstruction * instruction)729 static bool IsStore(HInstruction* instruction) {
730 // Unresolved store is not treated as a store.
731 return instruction->IsInstanceFieldSet() ||
732 instruction->IsArraySet() ||
733 instruction->IsVecStore() ||
734 instruction->IsStaticFieldSet();
735 }
736
737 // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)738 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
739 DCHECK(IsLoad(instruction));
740 // Using defaults for VecLoads requires to create additional vector operations.
741 // As there are some issues with scheduling vector operations it is better to avoid creating
742 // them.
743 return !instruction->IsVecOperation();
744 }
745
746 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
747 // This is necessary if the stored heap value can be observed.
KeepStores(Value value)748 void KeepStores(Value value) {
749 if (value.IsUnknown()) {
750 return;
751 }
752 if (value.NeedsPhi()) {
753 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
754 } else {
755 HInstruction* instruction = value.GetInstruction();
756 DCHECK(IsStore(instruction));
757 kept_stores_.SetBit(instruction->GetId());
758 }
759 }
760
761 // If a heap location X may alias with heap location at `loc_index`
762 // and heap_values of that heap location X holds a store, keep that store.
763 // It's needed for a dependent load that's not eliminated since any store
764 // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)765 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
766 size_t loc_index) {
767 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
768 if (i == loc_index) {
769 // We use this function when reading a location with unknown value and
770 // therefore we cannot know what exact store wrote that unknown value.
771 // But we can have a phi placeholder here marking multiple stores to keep.
772 DCHECK(!heap_values[i].stored_by.IsInstruction());
773 KeepStores(heap_values[i].stored_by);
774 heap_values[i].stored_by = Value::Unknown();
775 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
776 KeepStores(heap_values[i].stored_by);
777 heap_values[i].stored_by = Value::Unknown();
778 }
779 }
780 }
781
GetDefaultValue(DataType::Type type)782 HInstruction* GetDefaultValue(DataType::Type type) {
783 switch (type) {
784 case DataType::Type::kReference:
785 return GetGraph()->GetNullConstant();
786 case DataType::Type::kBool:
787 case DataType::Type::kUint8:
788 case DataType::Type::kInt8:
789 case DataType::Type::kUint16:
790 case DataType::Type::kInt16:
791 case DataType::Type::kInt32:
792 return GetGraph()->GetIntConstant(0);
793 case DataType::Type::kInt64:
794 return GetGraph()->GetLongConstant(0);
795 case DataType::Type::kFloat32:
796 return GetGraph()->GetFloatConstant(0);
797 case DataType::Type::kFloat64:
798 return GetGraph()->GetDoubleConstant(0);
799 default:
800 UNREACHABLE();
801 }
802 }
803
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)804 bool CanValueBeKeptIfSameAsNew(Value value,
805 HInstruction* new_value,
806 HInstruction* new_value_set_instr) {
807 // For field/array set location operations, if the value is the same as the new_value
808 // it can be kept even if aliasing happens. All aliased operations will access the same memory
809 // range.
810 // For vector values, this is not true. For example:
811 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
812 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
813 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
814 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
815 // here is not packed_data anymore.
816 //
817 // TODO: to allow such 'same value' optimization on vector data,
818 // LSA needs to report more fine-grain MAY alias information:
819 // (1) May alias due to two vector data partial overlap.
820 // e.g. a[i..i+3] and a[i+1,..,i+4].
821 // (2) May alias due to two vector data may complete overlap each other.
822 // e.g. a[i..i+3] and b[i..i+3].
823 // (3) May alias but the exact relationship between two locations is unknown.
824 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
825 // This 'same value' optimization can apply only on case (2).
826 if (new_value_set_instr->IsVecOperation()) {
827 return false;
828 }
829
830 return value.Equals(new_value);
831 }
832
833 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
834 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
835 void PrepareLoopRecords(HBasicBlock* block);
836 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
837 void MergePredecessorRecords(HBasicBlock* block);
838
839 void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
840
841 void VisitGetLocation(HInstruction* instruction, size_t idx);
842 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)843 void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
844 field_infos_[heap_loc] = info;
845 }
846
847 void VisitBasicBlock(HBasicBlock* block) override;
848
849 enum class Phase {
850 kLoadElimination,
851 kStoreElimination,
852 };
853
854 bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
855
856 bool TryReplacingLoopPhiPlaceholderWithDefault(
857 PhiPlaceholder phi_placeholder,
858 DataType::Type type,
859 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
860 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
861 PhiPlaceholder phi_placeholder,
862 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
863 std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
864 PhiPlaceholder phi_placeholder,
865 /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
866 DataType::Type type,
867 bool can_use_default_or_phi);
868 void MaterializeTypeConversionsIfNeeded(Value value);
869 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
870 DataType::Type type);
871 bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
872 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
873 DataType::Type type);
874 bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
875 std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
876 HInstruction* load);
877 void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
878 void ProcessLoadsRequiringLoopPhis();
879
880 void SearchPhiPlaceholdersForKeptStores();
881 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
882 void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
883 void FindStoresWritingOldValues();
884 void FinishFullLSE();
885
HandleAcquireLoad(HInstruction * instruction)886 void HandleAcquireLoad(HInstruction* instruction) {
887 DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
888 (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
889 (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
890 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
891
892 // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
893 // invalidate all current values.
894 ScopedArenaVector<ValueRecord>& heap_values =
895 heap_values_for_[instruction->GetBlock()->GetBlockId()];
896 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
897 KeepStores(heap_values[i].stored_by);
898 heap_values[i].stored_by = Value::Unknown();
899 heap_values[i].value = Value::Unknown();
900 }
901
902 // Note that there's no need to record the load as subsequent acquire loads shouldn't be
903 // eliminated either.
904 }
905
HandleReleaseStore(HInstruction * instruction)906 void HandleReleaseStore(HInstruction* instruction) {
907 DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
908 (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
909 (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
910 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
911
912 // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
913 // they will push the modifications for other threads to see. Therefore, we must keep the
914 // stores but there's no need to clobber the value.
915 ScopedArenaVector<ValueRecord>& heap_values =
916 heap_values_for_[instruction->GetBlock()->GetBlockId()];
917 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
918 KeepStores(heap_values[i].stored_by);
919 heap_values[i].stored_by = Value::Unknown();
920 }
921
922 // Note that there's no need to record the store as subsequent release store shouldn't be
923 // eliminated either.
924 }
925
VisitInstanceFieldGet(HInstanceFieldGet * instruction)926 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
927 HInstruction* object = instruction->InputAt(0);
928 if (instruction->IsVolatile()) {
929 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
930 heap_location_collector_.HuntForOriginalReference(object));
931 if (!ref_info->IsSingletonAndRemovable()) {
932 HandleAcquireLoad(instruction);
933 return;
934 }
935 // Treat it as a normal load if it is a removable singleton.
936 }
937
938 const FieldInfo& field_info = instruction->GetFieldInfo();
939 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field_info);
940 RecordFieldInfo(&field_info, idx);
941 VisitGetLocation(instruction, idx);
942 }
943
VisitInstanceFieldSet(HInstanceFieldSet * instruction)944 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
945 HInstruction* object = instruction->InputAt(0);
946 if (instruction->IsVolatile()) {
947 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
948 heap_location_collector_.HuntForOriginalReference(object));
949 if (!ref_info->IsSingletonAndRemovable()) {
950 HandleReleaseStore(instruction);
951 return;
952 }
953 // Treat it as a normal store if it is a removable singleton.
954 }
955
956 const FieldInfo& field_info = instruction->GetFieldInfo();
957 HInstruction* value = instruction->InputAt(1);
958 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field_info);
959 RecordFieldInfo(&field_info, idx);
960 VisitSetLocation(instruction, idx, value);
961 }
962
VisitStaticFieldGet(HStaticFieldGet * instruction)963 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
964 if (instruction->IsVolatile()) {
965 HandleAcquireLoad(instruction);
966 return;
967 }
968
969 const FieldInfo& field_info = instruction->GetFieldInfo();
970 HInstruction* cls = instruction->InputAt(0);
971 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field_info);
972 RecordFieldInfo(&field_info, idx);
973 VisitGetLocation(instruction, idx);
974 }
975
VisitStaticFieldSet(HStaticFieldSet * instruction)976 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
977 if (instruction->IsVolatile()) {
978 HandleReleaseStore(instruction);
979 return;
980 }
981
982 const FieldInfo& field_info = instruction->GetFieldInfo();
983 HInstruction* cls = instruction->InputAt(0);
984 HInstruction* value = instruction->InputAt(1);
985 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field_info);
986 RecordFieldInfo(&field_info, idx);
987 VisitSetLocation(instruction, idx, value);
988 }
989
VisitMonitorOperation(HMonitorOperation * monitor_op)990 void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
991 HInstruction* object = monitor_op->InputAt(0);
992 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
993 heap_location_collector_.HuntForOriginalReference(object));
994 if (ref_info->IsSingletonAndRemovable()) {
995 // If the object is a removable singleton, we know that no other threads will have
996 // access to it, and we can remove the MonitorOperation instruction.
997 // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
998 // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
999 DCHECK(!object->CanBeNull());
1000 monitor_op->GetBlock()->RemoveInstruction(monitor_op);
1001 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
1002 return;
1003 }
1004
1005 // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
1006 monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
1007 if (monitor_op->IsEnter()) {
1008 HandleAcquireLoad(monitor_op);
1009 } else {
1010 HandleReleaseStore(monitor_op);
1011 }
1012 }
1013
VisitArrayGet(HArrayGet * instruction)1014 void VisitArrayGet(HArrayGet* instruction) override {
1015 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1016 }
1017
VisitArraySet(HArraySet * instruction)1018 void VisitArraySet(HArraySet* instruction) override {
1019 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1020 VisitSetLocation(instruction, idx, instruction->GetValue());
1021 }
1022
VisitVecLoad(HVecLoad * instruction)1023 void VisitVecLoad(HVecLoad* instruction) override {
1024 DCHECK(!instruction->IsPredicated());
1025 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1026 }
1027
VisitVecStore(HVecStore * instruction)1028 void VisitVecStore(HVecStore* instruction) override {
1029 DCHECK(!instruction->IsPredicated());
1030 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1031 VisitSetLocation(instruction, idx, instruction->GetValue());
1032 }
1033
VisitDeoptimize(HDeoptimize * instruction)1034 void VisitDeoptimize(HDeoptimize* instruction) override {
1035 // If we are in a try, even singletons are observable.
1036 const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
1037 HBasicBlock* block = instruction->GetBlock();
1038 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1039 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1040 Value* stored_by = &heap_values[i].stored_by;
1041 if (stored_by->IsUnknown()) {
1042 continue;
1043 }
1044 // Stores are generally observeable after deoptimization, except
1045 // for singletons that don't escape in the deoptimization environment.
1046 bool observable = true;
1047 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1048 if (!inside_a_try && info->IsSingleton()) {
1049 HInstruction* reference = info->GetReference();
1050 // Finalizable objects always escape.
1051 const bool finalizable_object =
1052 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1053 if (!finalizable_object && !IsEscapingObject(info)) {
1054 // Check whether the reference for a store is used by an environment local of
1055 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1056 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1057 observable = std::any_of(
1058 env_uses.begin(),
1059 env_uses.end(),
1060 [instruction](const HUseListNode<HEnvironment*>& use) {
1061 return use.GetUser()->GetHolder() == instruction;
1062 });
1063 }
1064 }
1065 if (observable) {
1066 KeepStores(*stored_by);
1067 *stored_by = Value::Unknown();
1068 }
1069 }
1070 }
1071
1072 // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1073 void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1074 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1075 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1076 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1077 if (must_keep_stores || IsEscapingObject(ref_info)) {
1078 KeepStores(heap_values[i].stored_by);
1079 heap_values[i].stored_by = Value::Unknown();
1080 }
1081 }
1082 }
1083
VisitReturn(HReturn * instruction)1084 void VisitReturn(HReturn* instruction) override {
1085 HandleExit(instruction->GetBlock());
1086 }
1087
VisitReturnVoid(HReturnVoid * return_void)1088 void VisitReturnVoid(HReturnVoid* return_void) override {
1089 HandleExit(return_void->GetBlock());
1090 }
1091
HandleThrowingInstruction(HInstruction * instruction)1092 void HandleThrowingInstruction(HInstruction* instruction) {
1093 DCHECK(instruction->CanThrow());
1094 // If we are inside of a try, singletons can become visible since we may not exit the method.
1095 HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
1096 }
1097
VisitMethodEntryHook(HMethodEntryHook * method_entry)1098 void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1099 HandleThrowingInstruction(method_entry);
1100 }
1101
VisitMethodExitHook(HMethodExitHook * method_exit)1102 void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1103 HandleThrowingInstruction(method_exit);
1104 }
1105
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1106 void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1107 HandleThrowingInstruction(div_zero_check);
1108 }
1109
VisitNullCheck(HNullCheck * null_check)1110 void VisitNullCheck(HNullCheck* null_check) override {
1111 HandleThrowingInstruction(null_check);
1112 }
1113
VisitBoundsCheck(HBoundsCheck * bounds_check)1114 void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1115 HandleThrowingInstruction(bounds_check);
1116 }
1117
VisitLoadClass(HLoadClass * load_class)1118 void VisitLoadClass(HLoadClass* load_class) override {
1119 if (load_class->CanThrow()) {
1120 HandleThrowingInstruction(load_class);
1121 }
1122 }
1123
VisitLoadString(HLoadString * load_string)1124 void VisitLoadString(HLoadString* load_string) override {
1125 if (load_string->CanThrow()) {
1126 HandleThrowingInstruction(load_string);
1127 }
1128 }
1129
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)1130 void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
1131 HandleThrowingInstruction(load_method_handle);
1132 }
1133
VisitLoadMethodType(HLoadMethodType * load_method_type)1134 void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
1135 HandleThrowingInstruction(load_method_type);
1136 }
1137
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1138 void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1139 HandleThrowingInstruction(sb_append);
1140 }
1141
VisitThrow(HThrow * throw_instruction)1142 void VisitThrow(HThrow* throw_instruction) override {
1143 HandleThrowingInstruction(throw_instruction);
1144 }
1145
VisitCheckCast(HCheckCast * check_cast)1146 void VisitCheckCast(HCheckCast* check_cast) override {
1147 HandleThrowingInstruction(check_cast);
1148 }
1149
HandleInvoke(HInstruction * instruction)1150 void HandleInvoke(HInstruction* instruction) {
1151 // If `instruction` can throw we have to presume all stores are visible.
1152 const bool can_throw = instruction->CanThrow();
1153 // If we are in a try, even singletons are observable.
1154 const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
1155 SideEffects side_effects = instruction->GetSideEffects();
1156 ScopedArenaVector<ValueRecord>& heap_values =
1157 heap_values_for_[instruction->GetBlock()->GetBlockId()];
1158 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1159 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1160 // We don't need to do anything if the reference has not escaped at this point.
1161 // This is true if we never escape.
1162 if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1163 // Singleton references cannot be seen by the callee.
1164 } else {
1165 if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1166 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1167 KeepStores(heap_values[i].stored_by);
1168 heap_values[i].stored_by = Value::Unknown();
1169 }
1170 if (side_effects.DoesAnyWrite()) {
1171 // The value may be clobbered.
1172 heap_values[i].value = Value::Unknown();
1173 }
1174 }
1175 }
1176 }
1177
VisitInvoke(HInvoke * invoke)1178 void VisitInvoke(HInvoke* invoke) override {
1179 HandleInvoke(invoke);
1180 }
1181
VisitClinitCheck(HClinitCheck * clinit)1182 void VisitClinitCheck(HClinitCheck* clinit) override {
1183 // Class initialization check can result in class initializer calling arbitrary methods.
1184 HandleInvoke(clinit);
1185 }
1186
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1187 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1188 // Conservatively treat it as an invocation.
1189 HandleInvoke(instruction);
1190 }
1191
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1192 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1193 // Conservatively treat it as an invocation.
1194 HandleInvoke(instruction);
1195 }
1196
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1197 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1198 // Conservatively treat it as an invocation.
1199 HandleInvoke(instruction);
1200 }
1201
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1202 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1203 // Conservatively treat it as an invocation.
1204 HandleInvoke(instruction);
1205 }
1206
VisitNewInstance(HNewInstance * new_instance)1207 void VisitNewInstance(HNewInstance* new_instance) override {
1208 // If we are in a try, even singletons are observable.
1209 const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1210 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1211 if (ref_info == nullptr) {
1212 // new_instance isn't used for field accesses. No need to process it.
1213 return;
1214 }
1215 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1216 DCHECK(!new_instance->IsFinalizable());
1217 // new_instance can potentially be eliminated.
1218 singleton_new_instances_.push_back(new_instance);
1219 }
1220 HBasicBlock* block = new_instance->GetBlock();
1221 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1222 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1223 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1224 HInstruction* ref = info->GetReference();
1225 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1226 if (ref == new_instance) {
1227 if (offset >= mirror::kObjectHeaderSize ||
1228 MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1229 // Instance fields except the header fields are set to default heap values.
1230 // The shadow$_monitor_ field is set to the default value however.
1231 heap_values[i].value = Value::Default();
1232 heap_values[i].stored_by = Value::Unknown();
1233 } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1234 // The shadow$_klass_ field is special and has an actual value however.
1235 heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1236 heap_values[i].stored_by = Value::Unknown();
1237 }
1238 } else if (inside_a_try || IsEscapingObject(info)) {
1239 // Since NewInstance can throw, we presume all previous stores could be visible.
1240 KeepStores(heap_values[i].stored_by);
1241 heap_values[i].stored_by = Value::Unknown();
1242 }
1243 }
1244 }
1245
VisitNewArray(HNewArray * new_array)1246 void VisitNewArray(HNewArray* new_array) override {
1247 // If we are in a try, even singletons are observable.
1248 const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1249 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1250 if (ref_info == nullptr) {
1251 // new_array isn't used for array accesses. No need to process it.
1252 return;
1253 }
1254 if (ref_info->IsSingletonAndRemovable()) {
1255 if (new_array->GetLength()->IsIntConstant() &&
1256 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1257 // new_array can potentially be eliminated.
1258 singleton_new_instances_.push_back(new_array);
1259 } else {
1260 // new_array may throw NegativeArraySizeException. Keep it.
1261 }
1262 }
1263 HBasicBlock* block = new_array->GetBlock();
1264 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1265 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1266 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1267 ReferenceInfo* info = location->GetReferenceInfo();
1268 HInstruction* ref = info->GetReference();
1269 if (ref == new_array && location->GetIndex() != nullptr) {
1270 // Array elements are set to default heap values.
1271 heap_values[i].value = Value::Default();
1272 heap_values[i].stored_by = Value::Unknown();
1273 } else if (inside_a_try || IsEscapingObject(info)) {
1274 // Since NewArray can throw, we presume all previous stores could be visible.
1275 KeepStores(heap_values[i].stored_by);
1276 heap_values[i].stored_by = Value::Unknown();
1277 }
1278 }
1279 }
1280
VisitInstruction(HInstruction * instruction)1281 void VisitInstruction(HInstruction* instruction) override {
1282 // Throwing instructions must be handled specially.
1283 DCHECK(!instruction->CanThrow());
1284 }
1285
1286 const HeapLocationCollector& heap_location_collector_;
1287
1288 // Use local allocator for allocating memory.
1289 ScopedArenaAllocator allocator_;
1290
1291 // The number of unique phi_placeholders there possibly are
1292 size_t num_phi_placeholders_;
1293
1294 // One array of heap value records for each block.
1295 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1296
1297 // We record loads and stores for re-processing when we find a loop Phi placeholder
1298 // with unknown value from a predecessor, and also for removing stores that are
1299 // found to be dead, i.e. not marked in `kept_stores_` at the end.
1300 struct LoadStoreRecord {
1301 HInstruction* load_or_store;
1302 size_t heap_location_index;
1303 };
1304 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1305
1306 // We record the substitute instructions for loads that should be
1307 // eliminated but may be used by heap locations. They'll be removed
1308 // in the end. These are indexed by the load's id.
1309 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1310
1311 // Record stores to keep in a bit vector indexed by instruction ID.
1312 ArenaBitVector kept_stores_;
1313 // When we need to keep all stores that feed a Phi placeholder, we just record the
1314 // index of that placeholder for processing after graph traversal.
1315 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1316
1317 // Loads that would require a loop Phi to replace are recorded for processing
1318 // later as we do not have enough information from back-edges to determine if
1319 // a suitable Phi can be found or created when we visit these loads.
1320 // This is a flat "map" indexed by the load instruction id.
1321 ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1322
1323 // For stores, record the old value records that were replaced and the stored values.
1324 struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1325 StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1326 : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1327 ValueRecord old_value_record;
1328 HInstruction* stored_value;
1329 };
1330 // This is a flat "map" indexed by the store instruction id.
1331 ScopedArenaVector<StoreRecord*> store_records_;
1332
1333 // Replacements for Phi placeholders.
1334 // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1335 ScopedArenaVector<Value> phi_placeholder_replacements_;
1336
1337 ScopedArenaVector<HInstruction*> singleton_new_instances_;
1338
1339 // The field infos for each heap location (if relevant).
1340 ScopedArenaVector<const FieldInfo*> field_infos_;
1341
1342 Phase current_phase_;
1343
1344 friend std::ostream& operator<<(std::ostream& os, const Value& v);
1345 friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1346
1347 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1348 };
1349
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1350 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1351 switch (phase) {
1352 case LSEVisitor::Phase::kLoadElimination:
1353 return oss << "kLoadElimination";
1354 case LSEVisitor::Phase::kStoreElimination:
1355 return oss << "kStoreElimination";
1356 }
1357 }
1358
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1359 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1360 const LSEVisitor::PhiPlaceholder& p2) {
1361 return p1.Equals(p2);
1362 }
1363
operator ==(const LSEVisitor::Value::NeedsPlainLoopPhiMarker & p1,const LSEVisitor::Value::NeedsPlainLoopPhiMarker & p2)1364 constexpr bool operator==(const LSEVisitor::Value::NeedsPlainLoopPhiMarker& p1,
1365 const LSEVisitor::Value::NeedsPlainLoopPhiMarker& p2) {
1366 return p1.phi_ == p2.phi_;
1367 }
1368
operator ==(const LSEVisitor::Value::NeedsConvertedLoopPhiMarker & p1,const LSEVisitor::Value::NeedsConvertedLoopPhiMarker & p2)1369 constexpr bool operator==(const LSEVisitor::Value::NeedsConvertedLoopPhiMarker& p1,
1370 const LSEVisitor::Value::NeedsConvertedLoopPhiMarker& p2) {
1371 return p1.load_ == p2.load_;
1372 }
1373
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1374 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1375 const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1376 return p1.phi_ == p2.phi_;
1377 }
1378
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1379 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1380 p.Dump(oss);
1381 return oss;
1382 }
1383
ExactEquals(LSEVisitor::Value other) const1384 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1385 return value_ == other.value_;
1386 }
1387
Equals(LSEVisitor::Value other) const1388 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1389 // Only valid values can be compared.
1390 DCHECK(IsValid());
1391 DCHECK(other.IsValid());
1392 if (value_ == other.value_) {
1393 // Note: Two unknown values are considered different.
1394 return !IsUnknown();
1395 } else {
1396 // Default is considered equal to zero-bit-pattern instructions.
1397 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1398 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1399 }
1400 }
1401
Dump(std::ostream & os) const1402 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1403 if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1404 switch (GetValuelessType()) {
1405 case ValuelessType::kDefault:
1406 return os << "Default";
1407 case ValuelessType::kUnknown:
1408 return os << "Unknown";
1409 case ValuelessType::kInvalid:
1410 return os << "Invalid";
1411 }
1412 } else if (IsInstruction()) {
1413 return os << "Instruction[id: " << GetInstruction()->GetId()
1414 << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1415 } else if (NeedsPlainLoopPhi()) {
1416 return os << "NeedsPlainLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1417 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1418 } else if (NeedsConvertedLoopPhi()) {
1419 return os << "NeedsConvertedLoopPhi[id: " << GetLoopPhiConversionLoad()->GetId()
1420 << ", block: " << GetLoopPhiConversionLoad()->GetBlock()->GetBlockId() << "]";
1421 } else {
1422 return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1423 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1424 }
1425 }
1426
operator <<(std::ostream & os,const LSEVisitor::Value & v)1427 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1428 return v.Dump(os);
1429 }
1430
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1431 LSEVisitor::LSEVisitor(HGraph* graph,
1432 const HeapLocationCollector& heap_location_collector,
1433 OptimizingCompilerStats* stats)
1434 : HGraphDelegateVisitor(graph, stats),
1435 heap_location_collector_(heap_location_collector),
1436 allocator_(graph->GetArenaStack()),
1437 num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1438 heap_location_collector_.GetNumberOfHeapLocations()),
1439 heap_values_for_(graph->GetBlocks().size(),
1440 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1441 allocator_.Adapter(kArenaAllocLSE)),
1442 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1443 // We may add new instructions (default values, Phis) but we're not adding loads
1444 // or stores, so we shall not need to resize following vector and BitVector.
1445 substitute_instructions_for_loads_(
1446 graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1447 kept_stores_(&allocator_,
1448 /*start_bits=*/graph->GetCurrentInstructionId(),
1449 /*expandable=*/false,
1450 kArenaAllocLSE),
1451 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1452 num_phi_placeholders_,
1453 /*expandable=*/false,
1454 kArenaAllocLSE),
1455 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1456 store_records_(allocator_.Adapter(kArenaAllocLSE)),
1457 phi_placeholder_replacements_(
1458 num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1459 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1460 field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1461 allocator_.Adapter(kArenaAllocLSE)),
1462 current_phase_(Phase::kLoadElimination) {}
1463
PrepareLoopValue(HBasicBlock * block,size_t idx)1464 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1465 // If the pre-header value is known (which implies that the reference dominates this
1466 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1467 // are later found to have a known value, we can replace loads from this location,
1468 // either with the pre-header value or with a new Phi. For array locations, the index
1469 // may be defined inside the loop but the only known value in that case should be the
1470 // default value or a Phi placeholder that can be replaced only with the default value.
1471 HLoopInformation* loop_info = block->GetLoopInformation();
1472 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1473 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1474 if (pre_header_value.IsUnknown()) {
1475 return pre_header_value;
1476 }
1477 if (kIsDebugBuild) {
1478 // Check that the reference indeed dominates this loop.
1479 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1480 HInstruction* ref = location->GetReferenceInfo()->GetReference();
1481 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1482 << GetGraph()->PrettyMethod();
1483 // Check that the index, if defined inside the loop, tracks a default value
1484 // or a Phi placeholder requiring a loop Phi.
1485 HInstruction* index = location->GetIndex();
1486 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1487 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1488 << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1489 << pre_header_value;
1490 }
1491 }
1492 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1493 return ReplacementOrValue(Value::ForPlainLoopPhiPlaceholder(phi_placeholder));
1494 }
1495
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1496 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1497 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1498 // if the value in the location escapes. This is not applicable to singletons that are
1499 // defined inside the loop as they shall be dead in the loop header.
1500 const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1501 const HInstruction* reference = ref_info->GetReference();
1502 // Finalizable objects always escape.
1503 const bool is_finalizable =
1504 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1505 if (ref_info->IsSingleton() &&
1506 block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1507 !is_finalizable) {
1508 return Value::Unknown();
1509 }
1510 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1511 return Value::ForPlainLoopPhiPlaceholder(phi_placeholder);
1512 }
1513
PrepareLoopRecords(HBasicBlock * block)1514 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1515 DCHECK(block->IsLoopHeader());
1516 int block_id = block->GetBlockId();
1517 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1518 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1519 heap_values_for_[pre_header->GetBlockId()];
1520 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1521 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1522 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1523 DCHECK(heap_values.empty());
1524
1525 // Don't eliminate loads in irreducible loops.
1526 if (block->GetLoopInformation()->IsIrreducible()) {
1527 heap_values.resize(num_heap_locations,
1528 {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1529 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1530 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1531 KeepStores(Value::ForPlainLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1532 }
1533 return;
1534 }
1535
1536 // Fill `heap_values` based on values from pre-header.
1537 heap_values.reserve(num_heap_locations);
1538 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1539 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1540 }
1541 }
1542
MergePredecessorValues(HBasicBlock * block,size_t idx)1543 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1544 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1545 DCHECK(!predecessors.empty());
1546 Value merged_value =
1547 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1548 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1549 Value pred_value =
1550 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1551 if (pred_value.Equals(merged_value)) {
1552 // Value is the same. No need to update our merged value.
1553 continue;
1554 } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1555 // If one is unknown and the other is not, the merged value is unknown.
1556 merged_value = Value::Unknown();
1557 break;
1558 } else {
1559 // There are conflicting known values. We may still be able to replace loads with a Phi.
1560 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1561 // Propagate the need for a new loop Phi from all predecessors.
1562 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1563 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1564 }
1565 }
1566 return merged_value;
1567 }
1568
MergePredecessorRecords(HBasicBlock * block)1569 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1570 if (block->IsExitBlock()) {
1571 // Exit block doesn't really merge values since the control flow ends in
1572 // its predecessors. Each predecessor needs to make sure stores are kept
1573 // if necessary.
1574 return;
1575 }
1576
1577 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1578 DCHECK(heap_values.empty());
1579 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1580 if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1581 DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1582 heap_values.resize(num_heap_locations,
1583 {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1584 return;
1585 }
1586
1587 heap_values.reserve(num_heap_locations);
1588 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1589 Value merged_value = MergePredecessorValues(block, idx);
1590 if (kIsDebugBuild) {
1591 if (merged_value.NeedsPhi()) {
1592 uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1593 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1594 } else if (merged_value.IsInstruction()) {
1595 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1596 }
1597 }
1598 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1599 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1600 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1601 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1602 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1603 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1604 !merged_stored_by.Equals(stored_by)) {
1605 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1606 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1607 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1608 break;
1609 }
1610 }
1611 heap_values.push_back({ merged_value, merged_stored_by });
1612 }
1613 }
1614
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1615 static HInstruction* FindOrConstructNonLoopPhi(
1616 HBasicBlock* block,
1617 const ScopedArenaVector<HInstruction*>& phi_inputs,
1618 DataType::Type type) {
1619 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1620 HInstruction* phi = phi_it.Current();
1621 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1622 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1623 return lhs == rhs.GetInstruction();
1624 };
1625 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1626 return phi;
1627 }
1628 }
1629 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1630 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1631 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1632 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1633 phi->SetRawInputAt(i, phi_inputs[i]);
1634 }
1635 block->AddPhi(phi);
1636 if (type == DataType::Type::kReference) {
1637 // Update reference type information. Pass invalid handles, these are not used for Phis.
1638 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1639 Handle<mirror::DexCache>(),
1640 /* is_first_run= */ false);
1641 rtp_fixup.Visit(phi);
1642 }
1643 return phi;
1644 }
1645
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1646 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1647 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1648 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1649 size_t idx = phi_placeholder.GetHeapLocation();
1650
1651 // Use local allocator to reduce peak memory usage.
1652 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1653 // Reuse the same vector for collecting phi inputs.
1654 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1655
1656 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1657 work_queue.push_back(phi_placeholder);
1658 while (!work_queue.empty()) {
1659 PhiPlaceholder current_phi_placeholder = work_queue.back();
1660 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1661 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1662 // that directly or indirectly depends on it, so it was already processed as part of the
1663 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1664 work_queue.pop_back();
1665 continue;
1666 }
1667 uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1668 HBasicBlock* current_block = blocks[current_block_id];
1669 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1670
1671 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1672 // And the only way for such merged value to reach a different heap location is through
1673 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1674 // seen here are tied to one heap location.
1675 DCHECK(!current_block->IsLoopHeader())
1676 << current_phi_placeholder << " phase: " << current_phase_;
1677 DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1678
1679 phi_inputs.clear();
1680 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1681 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1682 DCHECK(!pred_value.IsUnknown()) << pred_value << " block " << current_block->GetBlockId()
1683 << " pred: " << predecessor->GetBlockId();
1684 if (pred_value.NeedsNonLoopPhi()) {
1685 // We need to process the Phi placeholder first.
1686 work_queue.push_back(pred_value.GetPhiPlaceholder());
1687 } else if (pred_value.IsDefault()) {
1688 phi_inputs.push_back(GetDefaultValue(type));
1689 } else {
1690 DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1691 << " pred: " << predecessor->GetBlockId();
1692 phi_inputs.push_back(pred_value.GetInstruction());
1693 }
1694 }
1695 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1696 // All inputs are available. Find or construct the Phi replacement.
1697 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1698 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1699 // Remove the block from the queue.
1700 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1701 work_queue.pop_back();
1702 }
1703 }
1704 }
1705
VisitGetLocation(HInstruction * instruction,size_t idx)1706 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1707 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1708 DCHECK_EQ(DataType::Size(heap_location_collector_.GetHeapLocation(idx)->GetType()),
1709 DataType::Size(instruction->IsVecLoad() ? instruction->AsVecLoad()->GetPackedType()
1710 : instruction->GetType()));
1711 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1712 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1713 ValueRecord& record = heap_values[idx];
1714 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1715 loads_and_stores_.push_back({ instruction, idx });
1716 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1717 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1718 record.value = Value::Unknown();
1719 }
1720 if (record.value.IsDefault()) {
1721 KeepStores(record.stored_by);
1722 HInstruction* constant = GetDefaultValue(instruction->GetType());
1723 AddRemovedLoad(instruction, constant);
1724 record.value = Value::ForInstruction(constant);
1725 } else if (record.value.IsUnknown()) {
1726 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1727 // This acts like GVN but with better aliasing analysis.
1728 Value old_value = record.value;
1729 record.value = Value::ForInstruction(instruction);
1730 KeepStoresIfAliasedToLocation(heap_values, idx);
1731 KeepStores(old_value);
1732 } else if (record.value.NeedsLoopPhi()) {
1733 // We do not know yet if the value is known for all back edges. Record for future processing.
1734 if (loads_requiring_loop_phi_.empty()) {
1735 loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1736 }
1737 DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1738 loads_requiring_loop_phi_[instruction->GetId()] =
1739 new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1740 } else {
1741 // This load can be eliminated but we may need to construct non-loop Phis.
1742 if (record.value.NeedsNonLoopPhi()) {
1743 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1744 record.value = Replacement(record.value);
1745 }
1746 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1747 AddRemovedLoad(instruction, heap_value);
1748 }
1749 }
1750
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1751 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1752 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1753 DCHECK(!IsStore(value)) << value->DebugName();
1754 // The `value` may already have a substitute.
1755 value = FindSubstitute(value);
1756 HBasicBlock* block = instruction->GetBlock();
1757 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1758 ValueRecord& record = heap_values[idx];
1759 DCHECK_IMPLIES(record.value.IsInstruction(),
1760 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1761
1762 // Calculate the new `Value` to store to the `record`.
1763 Value new_value = Value::ForInstruction(value);
1764 // Note that the `value` can be a newly created `Phi` with an id that falls outside
1765 // the allocated `loads_requiring_loop_phi_` range.
1766 DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1767 static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1768 if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1769 loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1770 // Propapate the Phi placeholder or appropriate converting load to the record.
1771 new_value = StoredValueForLoopPhiPlaceholderDependentLoad(idx, value);
1772 DCHECK(new_value.NeedsLoopPhi());
1773 }
1774
1775 if (record.value.Equals(value)) {
1776 // Store into the heap location with the same value.
1777 // This store can be eliminated right away.
1778 block->RemoveInstruction(instruction);
1779 return;
1780 }
1781
1782 if (store_records_.empty()) {
1783 store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1784 }
1785 DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1786 store_records_[instruction->GetId()] =
1787 new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1788 loads_and_stores_.push_back({ instruction, idx });
1789
1790 // If the `record.stored_by` specified a store from this block, it shall be removed
1791 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1792 // `kept_stores_` anymore after we update the `record.stored_by` below.
1793 DCHECK(!record.stored_by.IsInstruction() ||
1794 record.stored_by.GetInstruction()->GetBlock() != block ||
1795 record.stored_by.GetInstruction()->CanThrow() ||
1796 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1797
1798 if (instruction->CanThrow()) {
1799 // Previous stores can become visible.
1800 HandleThrowingInstruction(instruction);
1801 // We cannot remove a possibly throwing store.
1802 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1803 kept_stores_.SetBit(instruction->GetId());
1804 }
1805
1806 // Update the record.
1807 record.value = new_value;
1808 // Track the store in the value record. If the value is loaded or needed after
1809 // return/deoptimization later, this store isn't really redundant.
1810 record.stored_by = Value::ForInstruction(instruction);
1811
1812 // This store may kill values in other heap locations due to aliasing.
1813 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1814 if (i == idx ||
1815 heap_values[i].value.IsUnknown() ||
1816 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1817 !heap_location_collector_.MayAlias(i, idx)) {
1818 continue;
1819 }
1820 // Kill heap locations that may alias and keep previous stores to these locations.
1821 KeepStores(heap_values[i].stored_by);
1822 heap_values[i].stored_by = Value::Unknown();
1823 heap_values[i].value = Value::Unknown();
1824 }
1825 }
1826
VisitBasicBlock(HBasicBlock * block)1827 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1828 // Populate the heap_values array for this block.
1829 // TODO: try to reuse the heap_values array from one predecessor if possible.
1830 if (block->IsLoopHeader()) {
1831 PrepareLoopRecords(block);
1832 } else {
1833 MergePredecessorRecords(block);
1834 }
1835 // Visit non-Phi instructions.
1836 VisitNonPhiInstructions(block);
1837 }
1838
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1839 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1840 DCHECK_NE(idx1, idx2);
1841 DCHECK(loop_header->IsLoopHeader());
1842 if (heap_location_collector_.MayAlias(idx1, idx2)) {
1843 return true;
1844 }
1845 // For array locations with index defined inside the loop, include
1846 // all other locations in the array, even those that LSA declares
1847 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1848 // refer to the same locations for different iterations. (LSA's
1849 // `ComputeMayAlias()` does not consider different loop iterations.)
1850 HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1851 HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1852 if (loc1->IsArray() &&
1853 loc2->IsArray() &&
1854 HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1855 loc2->GetReferenceInfo())) {
1856 HLoopInformation* loop_info = loop_header->GetLoopInformation();
1857 if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1858 loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1859 // Consider the locations aliasing. Do not optimize the case where both indexes
1860 // are loop invariants defined inside the loop, rely on LICM to pull them out.
1861 return true;
1862 }
1863 }
1864 return false;
1865 }
1866
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1867 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1868 PhiPlaceholder phi_placeholder,
1869 DataType::Type type,
1870 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1871 // Use local allocator to reduce peak memory usage.
1872 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1873 ArenaBitVector visited(&allocator,
1874 /*start_bits=*/ num_phi_placeholders_,
1875 /*expandable=*/ false,
1876 kArenaAllocLSE);
1877 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1878
1879 auto maybe_add_to_work_queue = [&](Value predecessor_value) {
1880 // Visit the predecessor Phi placeholder if it's not visited yet.
1881 DCHECK(predecessor_value.NeedsNonLoopPhi() || predecessor_value.NeedsPlainLoopPhi());
1882 PhiPlaceholder predecessor_phi_placeholder = predecessor_value.GetPhiPlaceholder();
1883 if (!visited.IsBitSet(PhiPlaceholderIndex(predecessor_phi_placeholder))) {
1884 visited.SetBit(PhiPlaceholderIndex(predecessor_phi_placeholder));
1885 work_queue.push_back(predecessor_phi_placeholder);
1886 }
1887 };
1888
1889 // Use depth first search to check if any non-Phi input is unknown.
1890 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1891 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1892 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1893 work_queue.push_back(phi_placeholder);
1894 while (!work_queue.empty()) {
1895 PhiPlaceholder current_phi_placeholder = work_queue.back();
1896 work_queue.pop_back();
1897 HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1898 DCHECK_GE(block->GetPredecessors().size(), 2u);
1899 size_t idx = current_phi_placeholder.GetHeapLocation();
1900 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1901 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1902 // Skip over type conversions (these are unnecessary for the default value).
1903 value = SkipTypeConversions(value);
1904 if (value.NeedsPhi()) {
1905 maybe_add_to_work_queue(value);
1906 } else if (!value.Equals(Value::Default())) {
1907 return false; // Report failure.
1908 }
1909 }
1910 if (block->IsLoopHeader()) {
1911 // For back-edges we need to check all locations that write to the same array,
1912 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1913 // as they may actually refer to the same locations for different iterations.
1914 for (size_t i = 0; i != num_heap_locations; ++i) {
1915 if (i == idx ||
1916 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1917 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1918 continue;
1919 }
1920 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1921 // Check if there were any writes to this location.
1922 // Note: We could simply process the values but due to the vector operation
1923 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1924 // the value to change and not be equal to default. To work around this and
1925 // allow replacing the non-vector load of loop-invariant default values
1926 // anyway, skip over paths that do not have any writes.
1927 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1928 while (record.stored_by.NeedsPlainLoopPhi() &&
1929 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1930 HLoopInformation* loop_info =
1931 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1932 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1933 }
1934 DCHECK(!record.stored_by.NeedsConvertedLoopPhi());
1935 Value value = ReplacementOrValue(record.value);
1936 // Skip over type conversions (these are unnecessary for the default value).
1937 value = SkipTypeConversions(value);
1938 if (value.NeedsPhi()) {
1939 maybe_add_to_work_queue(value);
1940 } else if (!value.Equals(Value::Default())) {
1941 return false; // Report failure.
1942 }
1943 }
1944 }
1945 }
1946 }
1947
1948 // Record replacement and report success.
1949 HInstruction* replacement = GetDefaultValue(type);
1950 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1951 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1952 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1953 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1954 // We use both vector and non vector operations to analyze the information. However, we replace
1955 // only non vector operations in this code path.
1956 if (!hl->IsVecOp()) {
1957 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1958 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1959 }
1960 }
1961 return true;
1962 }
1963
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1964 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1965 PhiPlaceholder phi_placeholder,
1966 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1967 // Use local allocator to reduce peak memory usage.
1968 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1969 ArenaBitVector visited(&allocator,
1970 /*start_bits=*/ num_phi_placeholders_,
1971 /*expandable=*/ false,
1972 kArenaAllocLSE);
1973 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1974
1975 TypeConversionSet type_conversions;
1976
1977 // Use depth first search to check if any non-Phi input is unknown.
1978 HInstruction* replacement = nullptr;
1979 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1980 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1981 work_queue.push_back(phi_placeholder);
1982 while (!work_queue.empty()) {
1983 PhiPlaceholder current_phi_placeholder = work_queue.back();
1984 work_queue.pop_back();
1985 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1986 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1987 size_t idx = current_phi_placeholder.GetHeapLocation();
1988 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1989 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1990 // Skip type conversions but record them for checking later.
1991 value = SkipTypeConversions(value, &type_conversions);
1992 if (value.NeedsPhi()) {
1993 // Visit the predecessor Phi placeholder if it's not visited yet.
1994 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1995 visited.SetBit(PhiPlaceholderIndex(value));
1996 work_queue.push_back(value.GetPhiPlaceholder());
1997 }
1998 } else {
1999 if (!value.IsInstruction() ||
2000 (replacement != nullptr && replacement != value.GetInstruction())) {
2001 return false; // Report failure.
2002 }
2003 replacement = value.GetInstruction();
2004 }
2005 }
2006 // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2007 // for back-edges, it is not needed here. When looking for a single input
2008 // instruction coming from before the loop, the array index must also be
2009 // defined before the loop and the aliasing analysis done by LSA is sufficient.
2010 // Any writes of a different value with an index that is not loop invariant
2011 // would invalidate the heap location in `VisitSetLocation()`.
2012 }
2013
2014 // Check that there are no type conversions that would change the stored value.
2015 DCHECK(replacement != nullptr);
2016 if (!type_conversions.AreAllTypeConversionsImplicit(replacement)) {
2017 return false;
2018 }
2019
2020 // Record replacement and report success.
2021 // Note: Replacements for the loads where we skipped type conversions above (and do not really
2022 // need the type conversion) shall be recorded later, either when we process the loads in
2023 // `ProcessLoadsRequiringLoopPhis()` or when needed to materialize another Phi.
2024 for (uint32_t phi_placeholder_index : visited.Indexes()) {
2025 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2026 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2027 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2028 // We use both vector and non vector operations to analyze the information. However, we replace
2029 // only vector operations in this code path.
2030 if (hl->IsVecOp()) {
2031 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2032 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2033 }
2034 }
2035 return true;
2036 }
2037
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2038 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2039 PhiPlaceholder phi_placeholder,
2040 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2041 DataType::Type type,
2042 bool can_use_default_or_phi) {
2043 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2044
2045 // Use local allocator to reduce peak memory usage.
2046 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2047 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2048
2049 // Use depth first search to check if any non-Phi input is unknown.
2050 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2051 phi_placeholders_to_materialize->ClearAllBits();
2052 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2053 work_queue.push_back(phi_placeholder);
2054 while (!work_queue.empty()) {
2055 PhiPlaceholder current_phi_placeholder = work_queue.back();
2056 work_queue.pop_back();
2057 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2058 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2059 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2060 Value::Default()));
2061 continue;
2062 }
2063 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2064 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2065 size_t idx = current_phi_placeholder.GetHeapLocation();
2066 if (current_block->IsLoopHeader()) {
2067 // If the index is defined inside the loop, it may reference different elements of the
2068 // array on each iteration. Since we do not track if all elements of an array are set
2069 // to the same value explicitly, the only known value in pre-header can be the default
2070 // value from NewArray or a Phi placeholder depending on a default value from some outer
2071 // loop pre-header. This Phi placeholder can be replaced only by the default value.
2072 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2073 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2074 if (can_use_default_or_phi &&
2075 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2076 type,
2077 phi_placeholders_to_materialize)) {
2078 continue;
2079 } else {
2080 return current_phi_placeholder; // Report the loop Phi placeholder.
2081 }
2082 }
2083 // A similar situation arises with the index defined outside the loop if we cannot use
2084 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2085 // placeholder with a single instruction defined before the loop.
2086 if (!can_use_default_or_phi) {
2087 DCHECK(index != nullptr); // Vector operations are array operations.
2088 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2089 phi_placeholders_to_materialize)) {
2090 continue;
2091 } else {
2092 return current_phi_placeholder; // Report the loop Phi placeholder.
2093 }
2094 }
2095 }
2096 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2097 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2098 Value value = ReplacementOrValue(heap_values[idx].value);
2099 if (value.IsUnknown()) {
2100 // We cannot create a Phi for this loop Phi placeholder.
2101 return current_phi_placeholder; // Report the loop Phi placeholder.
2102 }
2103 // For arrays, the location may have been clobbered by writes to other locations
2104 // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2105 if (current_block->IsLoopHeader() &&
2106 predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2107 heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2108 for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2109 if (i != idx &&
2110 !heap_values[i].stored_by.IsUnknown() &&
2111 MayAliasOnBackEdge(current_block, idx, i)) {
2112 // We cannot create a Phi for this loop Phi placeholder.
2113 return current_phi_placeholder;
2114 }
2115 }
2116 }
2117 // Skip type conversions. We're looking for the Phi placeholders now.
2118 value = SkipTypeConversions(value);
2119 if (value.NeedsPlainLoopPhi()) {
2120 // Visit the predecessor Phi placeholder if it's not visited yet.
2121 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2122 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2123 work_queue.push_back(value.GetPhiPlaceholder());
2124 LSE_VLOG << "For materialization of " << phi_placeholder
2125 << " we need to materialize " << value;
2126 }
2127 }
2128 }
2129 }
2130
2131 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2132 return std::nullopt;
2133 }
2134
MaterializeTypeConversionsIfNeeded(Value value)2135 void LSEVisitor::MaterializeTypeConversionsIfNeeded(Value value) {
2136 if (!value.NeedsConvertedLoopPhi()) {
2137 return;
2138 }
2139 // There are at most 2 conversions (Uint8+Int16 or Int8+Uint16). Conversion to Int32
2140 // is implicit and conversions to same or smaller size replace previous conversions.
2141 static constexpr size_t kMaxConversionLoads = 2u;
2142 HInstruction* conversion_loads[kMaxConversionLoads];
2143 size_t num_conversion_loads = 0u;
2144 do {
2145 DCHECK_LT(num_conversion_loads, kMaxConversionLoads);
2146 HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
2147 DCHECK(!conversion_load->IsVecLoad());
2148 HInstruction* substitute = FindSubstitute(conversion_load);
2149 if (substitute != conversion_load) {
2150 value = Value::ForInstruction(substitute);
2151 break;
2152 }
2153 conversion_loads[num_conversion_loads] = conversion_load;
2154 ++num_conversion_loads;
2155 ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
2156 DCHECK(prev_record != nullptr);
2157 value = prev_record->value;
2158 } while (value.NeedsConvertedLoopPhi());
2159 value = value.NeedsPlainLoopPhi() ? Replacement(value) : value;
2160 HInstruction* replacement = value.GetInstruction();
2161 ArrayRef<HInstruction*> conversion_loads_array(conversion_loads, num_conversion_loads);
2162 for (HInstruction* conversion_load : ReverseRange(conversion_loads_array)) {
2163 AddRemovedLoad(conversion_load, replacement);
2164 replacement = substitute_instructions_for_loads_[conversion_load->GetId()];
2165 DCHECK(replacement != nullptr);
2166 DCHECK(replacement->IsTypeConversion());
2167 }
2168 }
2169
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2170 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2171 DataType::Type type) {
2172 return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2173 }
2174
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2175 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2176 DataType::Type type) {
2177 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2178 // other than loop Phis are the same.
2179 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2180 TypeConversionSet type_conversions;
2181 std::optional<Value> other_value = std::nullopt;
2182 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2183 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2184 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2185 DCHECK_GE(block->GetPredecessors().size(), 2u);
2186 size_t idx = phi_placeholder.GetHeapLocation();
2187 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2188 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2189 if (value.NeedsNonLoopPhi()) {
2190 DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
2191 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2192 value = Replacement(value);
2193 } else if (value.NeedsConvertedLoopPhi()) {
2194 TypeConversionSet local_type_conversions;
2195 Value without_conversions = SkipTypeConversions(value, &local_type_conversions);
2196 DCHECK(!without_conversions.NeedsNonLoopPhi()); // Would have been already materialized.
2197 if (without_conversions.NeedsPlainLoopPhi()) {
2198 type_conversions.Add(local_type_conversions);
2199 value = without_conversions;
2200 } else {
2201 MaterializeTypeConversionsIfNeeded(value);
2202 value = ReplacementOrValue(value);
2203 }
2204 }
2205 if (!value.NeedsPlainLoopPhi()) {
2206 if (!other_value) {
2207 // The first other value we found.
2208 other_value = value;
2209 } else if (!other_value->IsInvalid()) {
2210 // Check if the current `value` differs from the previous `other_value`.
2211 if (!value.Equals(*other_value)) {
2212 other_value = Value::Invalid();
2213 }
2214 }
2215 }
2216 }
2217 }
2218
2219 DCHECK(other_value.has_value());
2220 DCHECK(other_value->IsInvalid() || other_value->IsDefault() || other_value->IsInstruction());
2221 if (other_value->IsDefault() || // Default value does not need type conversions.
2222 (other_value->IsInstruction() &&
2223 type_conversions.AreAllTypeConversionsImplicit(other_value->GetInstruction()))) {
2224 HInstruction* replacement =
2225 (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2226 DCHECK(type_conversions.AreAllTypeConversionsImplicit(replacement));
2227 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2228 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2229 }
2230 return true;
2231 }
2232
2233 // If we're materializing only a single Phi, try to match it with an existing Phi.
2234 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2235 // This also covers the case when after replacing a previous set of Phi placeholders,
2236 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2237 if (phi_placeholder_indexes.size() == 1u) {
2238 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2239 size_t idx = phi_placeholder.GetHeapLocation();
2240 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2241 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2242 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2243 HInstruction* phi = phi_it.Current();
2244 DCHECK_EQ(phi->InputCount(), predecessors.size());
2245 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2246 auto cmp = [=, this](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2247 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2248 HInstruction* lhs_instruction = lhs.GetInstruction();
2249 while (value.NeedsConvertedLoopPhi()) {
2250 HInstruction* conversion_load = value.GetLoopPhiConversionLoad();
2251 if (!lhs_instruction->IsTypeConversion() ||
2252 lhs_instruction->GetType() != conversion_load->GetType()) {
2253 return false;
2254 }
2255 lhs_instruction = lhs_instruction->InputAt(0);
2256 ValueRecord* prev_record = loads_requiring_loop_phi_[conversion_load->GetId()];
2257 DCHECK(prev_record != nullptr);
2258 value = prev_record->value;
2259 }
2260 if (value.NeedsPlainLoopPhi() && value.GetPhiPlaceholder().Equals(phi_placeholder)) {
2261 return lhs_instruction == phi;
2262 } else {
2263 value = ReplacementOrValue(value);
2264 DCHECK(value.IsDefault() || value.IsInstruction());
2265 return value.Equals(lhs_instruction);
2266 }
2267 };
2268 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2269 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2270 return true;
2271 }
2272 }
2273 }
2274
2275 if (current_phase_ == Phase::kStoreElimination) {
2276 // We're not creating Phis during the final store elimination phase.
2277 return false;
2278 }
2279
2280 // There are different inputs to the Phi chain. Create the Phis.
2281 ArenaAllocator* allocator = GetGraph()->GetAllocator();
2282 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2283 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2284 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2285 CHECK_GE(block->GetPredecessors().size(), 2u);
2286 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2287 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2288 }
2289 // Fill the Phi inputs.
2290 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2291 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2292 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2293 size_t idx = phi_placeholder.GetHeapLocation();
2294 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2295 DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2296 << "type=" << type << " vs phi-type=" << phi->GetType();
2297 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2298 HBasicBlock* predecessor = block->GetPredecessors()[i];
2299 Value predecessor_value = heap_values_for_[predecessor->GetBlockId()][idx].value;
2300 MaterializeTypeConversionsIfNeeded(predecessor_value);
2301 Value value = ReplacementOrValue(predecessor_value);
2302 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2303 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2304 phi->SetRawInputAt(i, input);
2305 DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2306 << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2307 << " request: " << type;
2308 }
2309 }
2310 // Add the Phis to their blocks.
2311 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2312 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2313 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2314 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2315 }
2316 if (type == DataType::Type::kReference) {
2317 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2318 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2319 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2320 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2321 }
2322 // Update reference type information. Pass invalid handles, these are not used for Phis.
2323 ReferenceTypePropagation rtp_fixup(GetGraph(),
2324 Handle<mirror::DexCache>(),
2325 /* is_first_run= */ false);
2326 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2327 }
2328
2329 return true;
2330 }
2331
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2332 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2333 DataType::Type type) {
2334 // Use local allocator to reduce peak memory usage.
2335 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2336
2337 // We want to recognize when a subset of these loop Phis that do not need other
2338 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2339 // i.e. that instruction can be used instead of each Phi in the set. See for example
2340 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2341 // materialize these loop Phis from the smallest transitive closure.
2342
2343 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2344 // assign new indexes to the Phi placeholders, making the matrix dense.
2345 ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2346 static_cast<size_t>(-1), // Invalid.
2347 allocator.Adapter(kArenaAllocLSE));
2348 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2349 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2350 phi_placeholder_indexes.reserve(num_phi_placeholders);
2351 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2352 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2353 phi_placeholder_indexes.push_back(marker_index);
2354 }
2355 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2356 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2357 dependencies.reserve(num_phi_placeholders);
2358 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2359 static constexpr bool kExpandable = false;
2360 dependencies.push_back(
2361 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2362 ArenaBitVector* current_dependencies = dependencies.back();
2363 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
2364 PhiPlaceholder current_phi_placeholder =
2365 GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2366 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2367 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2368 size_t idx = current_phi_placeholder.GetHeapLocation();
2369 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2370 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2371 if (pred_value.NeedsLoopPhi()) {
2372 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2373 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2374 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2375 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2376 }
2377 }
2378 }
2379
2380 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2381 for (size_t k = 0; k != num_phi_placeholders; ++k) {
2382 for (size_t i = 0; i != num_phi_placeholders; ++i) {
2383 for (size_t j = 0; j != num_phi_placeholders; ++j) {
2384 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2385 dependencies[i]->SetBit(j);
2386 }
2387 }
2388 }
2389 }
2390
2391 // Count the number of transitive dependencies for each replaceable Phi placeholder.
2392 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2393 num_dependencies.reserve(num_phi_placeholders);
2394 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2395 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2396 }
2397
2398 // Pick a Phi placeholder with the smallest number of transitive dependencies and
2399 // materialize it and its dependencies. Repeat until we have materialized all.
2400 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2401 current_subset.reserve(num_phi_placeholders);
2402 size_t remaining_phi_placeholders = num_phi_placeholders;
2403 while (remaining_phi_placeholders != 0u) {
2404 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2405 DCHECK_LE(*it, remaining_phi_placeholders);
2406 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2407 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2408 size_t current_num_dependencies = num_dependencies[current_matrix_index];
2409 current_subset.clear();
2410 for (uint32_t matrix_index : current_dependencies->Indexes()) {
2411 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2412 }
2413 if (!MaterializeLoopPhis(current_subset, type)) {
2414 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2415 // This is the final store elimination phase and we shall not be able to eliminate any
2416 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2417 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2418 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2419 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2420 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
2421 }
2422 }
2423 return false;
2424 }
2425 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2426 if (current_dependencies->IsBitSet(matrix_index)) {
2427 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2428 // so that they shall never be the minimum again.
2429 num_dependencies[matrix_index] = num_phi_placeholders;
2430 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2431 // Remove dependencies from other Phi placeholders.
2432 dependencies[matrix_index]->Subtract(current_dependencies);
2433 num_dependencies[matrix_index] -= current_num_dependencies;
2434 }
2435 }
2436 remaining_phi_placeholders -= current_num_dependencies;
2437 }
2438 return true;
2439 }
2440
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2441 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2442 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2443 ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2444 auto res =
2445 FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2446 CHECK(!res.has_value()) << *res;
2447 return MaterializeLoopPhis(abv, type);
2448 }
2449
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2450 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2451 PhiPlaceholder phi_placeholder, HInstruction* load) {
2452 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2453
2454 // Use local allocator to reduce peak memory usage.
2455 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2456
2457 // Find Phi placeholders to materialize.
2458 ArenaBitVector phi_placeholders_to_materialize(
2459 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2460 DataType::Type type = load->GetType();
2461 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2462 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2463 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2464 if (loop_phi_with_unknown_input) {
2465 DCHECK_GE(GetGraph()
2466 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2467 ->GetPredecessors()
2468 .size(),
2469 2u);
2470 return loop_phi_with_unknown_input; // Return failure.
2471 }
2472
2473 DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2474 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2475 DCHECK(success);
2476
2477 // Report success.
2478 return std::nullopt;
2479 }
2480
2481 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2482 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2483 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2484 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2485 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2486 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2487 DCHECK(!loads_requiring_loop_phi_.empty());
2488 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2489 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2490 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::Unknown();
2491
2492 uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2493 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2494 size_t reverse_post_order_index = 0;
2495 size_t reverse_post_order_size = reverse_post_order.size();
2496 size_t loads_and_stores_index = 0u;
2497 size_t loads_and_stores_size = loads_and_stores_.size();
2498
2499 // Skip blocks and instructions before the block containing the loop phi with unknown input.
2500 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2501 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2502 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2503 while (loads_and_stores_index != loads_and_stores_size &&
2504 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2505 ++loads_and_stores_index;
2506 }
2507 ++reverse_post_order_index;
2508 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2509 }
2510
2511 // Use local allocator to reduce peak memory usage.
2512 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2513 // Reuse one temporary vector for all remaining blocks.
2514 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2515 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2516
2517 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2518 Value value;
2519 if (block->IsLoopHeader()) {
2520 if (block->GetLoopInformation()->IsIrreducible()) {
2521 value = Value::Unknown();
2522 } else {
2523 value = PrepareLoopValue(block, idx);
2524 }
2525 } else {
2526 value = MergePredecessorValues(block, idx);
2527 }
2528 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2529 return value;
2530 };
2531
2532 // Process remaining blocks and instructions.
2533 bool found_unreplaceable_load = false;
2534 bool replaced_heap_value_with_unknown = false;
2535 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2536 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2537 if (block->IsExitBlock()) {
2538 continue;
2539 }
2540
2541 // We shall reconstruct only the heap values that we need for processing loads and stores.
2542 local_heap_values.clear();
2543 local_heap_values.resize(num_heap_locations, Value::Invalid());
2544
2545 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2546 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2547 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2548 if (load_or_store->GetBlock() != block) {
2549 break; // End of instructions from the current block.
2550 }
2551 if (IsStore(load_or_store)) {
2552 StoreRecord* store_record = store_records_[load_or_store->GetId()];
2553 DCHECK(store_record != nullptr);
2554 HInstruction* stored_value = store_record->stored_value;
2555 DCHECK(stored_value != nullptr);
2556 // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2557 // outside the allocated `loads_requiring_loop_phi_` range.
2558 DCHECK_IMPLIES(
2559 IsLoad(stored_value),
2560 static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2561 if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2562 loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2563 continue; // This store never needed a loop Phi.
2564 }
2565 ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2566 // Process the store by updating `local_heap_values[idx]`. The last update shall
2567 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2568 // at the end of the block.
2569 Value replacement = ReplacementOrValue(record->value);
2570 if (replacement.NeedsLoopPhi()) {
2571 // No replacement yet. Use the Phi placeholder or an appropriate converting load.
2572 DCHECK(record->value.NeedsLoopPhi());
2573 local_heap_values[idx] = StoredValueForLoopPhiPlaceholderDependentLoad(idx, stored_value);
2574 DCHECK(local_heap_values[idx].NeedsLoopPhi());
2575 } else {
2576 // If the load fetched a known value, use it, otherwise use the load.
2577 local_heap_values[idx] = Value::ForInstruction(
2578 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2579 }
2580 } else {
2581 // Process the load unless it has previously been marked unreplaceable.
2582 DCHECK(IsLoad(load_or_store));
2583 ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2584 if (record == nullptr) {
2585 continue; // This load never needed a loop Phi.
2586 }
2587 if (record->value.NeedsLoopPhi()) {
2588 if (local_heap_values[idx].IsInvalid()) {
2589 local_heap_values[idx] = get_initial_value(block, idx);
2590 }
2591 if (local_heap_values[idx].IsUnknown()) {
2592 // This load cannot be replaced. Keep stores that feed the Phi placeholder
2593 // (no aliasing since then, otherwise the Phi placeholder would not have been
2594 // propagated as a value to this load) and store the load as the new heap value.
2595 found_unreplaceable_load = true;
2596 KeepStores(record->value);
2597 record->value = Value::Unknown();
2598 local_heap_values[idx] = Value::ForInstruction(load_or_store);
2599 } else if (local_heap_values[idx].NeedsLoopPhi()) {
2600 // The load may still be replaced with a Phi later.
2601 DCHECK(local_heap_values[idx].Equals(record->value));
2602 } else {
2603 // This load can be eliminated but we may need to construct non-loop Phis.
2604 if (local_heap_values[idx].NeedsNonLoopPhi()) {
2605 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2606 load_or_store->GetType());
2607 local_heap_values[idx] = Replacement(local_heap_values[idx]);
2608 }
2609 record->value = local_heap_values[idx];
2610 DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2611 << "The replacement heap value can be an HIR instruction or the default value.";
2612 HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2613 GetDefaultValue(load_or_store->GetType()) :
2614 local_heap_values[idx].GetInstruction();
2615 AddRemovedLoad(load_or_store, heap_value);
2616 }
2617 }
2618 }
2619 }
2620
2621 // All heap values that previously needed a loop Phi at the end of the block
2622 // need to be updated for processing successors.
2623 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2624 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2625 if (heap_values[idx].value.NeedsLoopPhi()) {
2626 if (local_heap_values[idx].IsValid()) {
2627 heap_values[idx].value = local_heap_values[idx];
2628 } else {
2629 heap_values[idx].value = get_initial_value(block, idx);
2630 }
2631 if (heap_values[idx].value.IsUnknown()) {
2632 replaced_heap_value_with_unknown = true;
2633 }
2634 }
2635 }
2636 }
2637 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2638 }
2639
ProcessLoadsRequiringLoopPhis()2640 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2641 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2642 // make the result of the processing depend on the order in which we process these loads.
2643 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2644 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2645 if (loads_requiring_loop_phi_.empty()) {
2646 return; // No loads to process.
2647 }
2648 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2649 ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2650 if (record == nullptr) {
2651 continue;
2652 }
2653 HInstruction* load = load_store_record.load_or_store;
2654 while (record->value.NeedsLoopPhi()) {
2655 Value without_conversions = SkipTypeConversions(record->value);
2656 if (!without_conversions.NeedsPlainLoopPhi() ||
2657 phi_placeholder_replacements_[PhiPlaceholderIndex(without_conversions)].IsValid()) {
2658 break;
2659 }
2660 std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2661 TryToMaterializeLoopPhis(without_conversions.GetPhiPlaceholder(), load);
2662 DCHECK_EQ(
2663 loop_phi_with_unknown_input.has_value(),
2664 phi_placeholder_replacements_[PhiPlaceholderIndex(without_conversions)].IsInvalid());
2665 if (loop_phi_with_unknown_input) {
2666 DCHECK_GE(GetGraph()
2667 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2668 ->GetPredecessors()
2669 .size(),
2670 2u);
2671 ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2672 }
2673 }
2674 // The load, or converting load's underlying phi placeholder, could have been marked
2675 // as unreplaceable (and stores marked for keeping) or marked for replacement with an
2676 // instruction in `ProcessLoopPhiWithUnknownInput()`.
2677 DCHECK(record->value.IsUnknown() ||
2678 record->value.IsInstruction() ||
2679 record->value.NeedsLoopPhi());
2680 if (record->value.NeedsLoopPhi()) {
2681 MaterializeTypeConversionsIfNeeded(record->value);
2682 record->value = ReplacementOrValue(record->value);
2683 HInstruction* heap_value = record->value.GetInstruction();
2684 // Type conversion substitutes can be created by `MaterializeTypeConversionsIfNeeded()`,
2685 // either in the call directly above, or while materializing Phis. For all loads that did
2686 // not have a substitute recorded, record it now; this can also be a type conversion.
2687 HInstruction* substitute = FindSubstitute(load);
2688 if (substitute == load) {
2689 AddRemovedLoad(load, heap_value);
2690 } else {
2691 DCHECK(substitute->IsTypeConversion());
2692 }
2693 }
2694 }
2695 }
2696
SearchPhiPlaceholdersForKeptStores()2697 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2698 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2699 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2700 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2701 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2702 work_queue.push_back(index);
2703 }
2704 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2705 while (!work_queue.empty()) {
2706 uint32_t cur_phi_idx = work_queue.back();
2707 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2708 work_queue.pop_back();
2709 size_t idx = phi_placeholder.GetHeapLocation();
2710 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2711 DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2712 << " (blocks: " << blocks.size() << ")";
2713 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2714 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2715 // For loop back-edges we must also preserve all stores to locations that
2716 // may alias with the location `idx`.
2717 // TODO: Add tests cases around this.
2718 bool is_back_edge =
2719 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2720 size_t start = is_back_edge ? 0u : idx;
2721 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2722 for (size_t i = start; i != end; ++i) {
2723 Value stored_by = heap_values[i].stored_by;
2724 if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2725 if (stored_by.NeedsPhi()) {
2726 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2727 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2728 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2729 work_queue.push_back(phi_placeholder_index);
2730 }
2731 } else {
2732 DCHECK(IsStore(stored_by.GetInstruction()));
2733 ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2734 DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2735 << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2736 << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2737 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2738 }
2739 }
2740 }
2741 }
2742 }
2743 }
2744
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2745 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2746 while (value_record->stored_by.IsInstruction() &&
2747 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2748 StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2749 DCHECK(store_record != nullptr);
2750 *value_record = store_record->old_value_record;
2751 }
2752 if (value_record->stored_by.NeedsPhi() &&
2753 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2754 PhiPlaceholderIndex(value_record->stored_by))) {
2755 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2756 // Phi placeholder to recalculate the actual value.
2757 value_record->value = value_record->stored_by;
2758 }
2759 value_record->value = ReplacementOrValue(value_record->value);
2760 if (value_record->value.NeedsNonLoopPhi()) {
2761 // Treat all Phi placeholders as requiring loop Phis at this point.
2762 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2763 value_record->value =
2764 Value::ForPlainLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2765 }
2766 }
2767
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2768 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2769 DataType::Type type) {
2770 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2771
2772 // Use local allocator to reduce peak memory usage.
2773 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2774 ArenaBitVector visited(&allocator,
2775 /*start_bits=*/ num_phi_placeholders_,
2776 /*expandable=*/ false,
2777 kArenaAllocLSE);
2778
2779 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2780 ArenaBitVector phi_placeholders_to_materialize(
2781 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2782 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2783 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2784 if (loop_phi_with_unknown_input) {
2785 DCHECK_GE(GetGraph()
2786 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2787 ->GetPredecessors()
2788 .size(),
2789 2u);
2790 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2791 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2792 phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2793 Value::Unknown();
2794 return;
2795 }
2796
2797 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2798 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2799 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2800 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2801 !success);
2802 }
2803
FindStoresWritingOldValues()2804 void LSEVisitor::FindStoresWritingOldValues() {
2805 // The Phi placeholder replacements have so far been used for eliminating loads,
2806 // tracking values that would be stored if all stores were kept. As we want to
2807 // compare actual old values after removing unmarked stores, prune the Phi
2808 // placeholder replacements that can be fed by values we may not actually store.
2809 // Replacements marked as unknown can be kept as they are fed by some unknown
2810 // value and would end up as unknown again if we recalculated them.
2811 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2812 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2813 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2814 phi_placeholder_replacements_[i] = Value::Invalid();
2815 }
2816 }
2817
2818 // Update heap values at end of blocks.
2819 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2820 for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2821 UpdateValueRecordForStoreElimination(&value_record);
2822 }
2823 }
2824
2825 // Use local allocator to reduce peak memory usage.
2826 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2827 // Mark the stores we want to eliminate in a separate bit vector.
2828 ArenaBitVector eliminated_stores(&allocator,
2829 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2830 /*expandable=*/ false,
2831 kArenaAllocLSE);
2832
2833 for (uint32_t store_id : kept_stores_.Indexes()) {
2834 DCHECK(kept_stores_.IsBitSet(store_id));
2835 StoreRecord* store_record = store_records_[store_id];
2836 DCHECK(store_record != nullptr);
2837 UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2838 if (store_record->old_value_record.value.NeedsPhi()) {
2839 DataType::Type type = store_record->stored_value->GetType();
2840 FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2841 store_record->old_value_record.value =
2842 ReplacementOrValue(store_record->old_value_record.value);
2843 }
2844 DCHECK(!store_record->old_value_record.value.NeedsPhi());
2845 HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2846 if (store_record->old_value_record.value.Equals(stored_value)) {
2847 eliminated_stores.SetBit(store_id);
2848 }
2849 }
2850
2851 // Commit the stores to eliminate by removing them from `kept_stores_`.
2852 kept_stores_.Subtract(&eliminated_stores);
2853 }
2854
Run()2855 void LSEVisitor::Run() {
2856 // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2857 // remove, we will set it to true in VisitMonitorOperation.
2858 GetGraph()->SetHasMonitorOperations(false);
2859
2860 // 1. Process blocks and instructions in reverse post order.
2861 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2862 VisitBasicBlock(block);
2863 }
2864
2865 // 2. Process loads that require loop Phis, trying to find/create replacements.
2866 current_phase_ = Phase::kLoadElimination;
2867 ProcessLoadsRequiringLoopPhis();
2868
2869 // 3. Determine which stores to keep and which to eliminate.
2870 current_phase_ = Phase::kStoreElimination;
2871 // Finish marking stores for keeping.
2872 SearchPhiPlaceholdersForKeptStores();
2873
2874 // Find stores that write the same value as is already present in the location.
2875 FindStoresWritingOldValues();
2876
2877 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2878 FinishFullLSE();
2879 }
2880
2881
FinishFullLSE()2882 void LSEVisitor::FinishFullLSE() {
2883 // Remove recorded load instructions that should be eliminated.
2884 for (const LoadStoreRecord& record : loads_and_stores_) {
2885 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2886 HInstruction* substitute = substitute_instructions_for_loads_[id];
2887 if (substitute == nullptr) {
2888 continue;
2889 }
2890 HInstruction* load = record.load_or_store;
2891 DCHECK(load != nullptr);
2892 DCHECK(IsLoad(load));
2893 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2894 // We proactively retrieve the substitute for a removed load, so
2895 // a load that has a substitute should not be observed as a heap
2896 // location value.
2897 DCHECK_EQ(FindSubstitute(substitute), substitute);
2898
2899 load->ReplaceWith(substitute);
2900 load->GetBlock()->RemoveInstruction(load);
2901 if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2902 (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2903 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2904 }
2905 }
2906
2907 // Remove all the stores we can.
2908 for (const LoadStoreRecord& record : loads_and_stores_) {
2909 if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2910 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2911 if ((record.load_or_store->IsInstanceFieldSet() &&
2912 record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2913 (record.load_or_store->IsStaticFieldSet() &&
2914 record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2915 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2916 }
2917 }
2918 }
2919
2920 // Eliminate singleton-classified instructions:
2921 // * - Constructor fences (they never escape this thread).
2922 // * - Allocations (if they are unused).
2923 for (HInstruction* new_instance : singleton_new_instances_) {
2924 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2925 MaybeRecordStat(stats_,
2926 MethodCompilationStat::kConstructorFenceRemovedLSE,
2927 removed);
2928
2929 if (!new_instance->HasNonEnvironmentUses()) {
2930 new_instance->RemoveEnvironmentUsers();
2931 new_instance->GetBlock()->RemoveInstruction(new_instance);
2932 MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2933 }
2934 }
2935 }
2936
2937 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2938 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2939 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2940 public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2941 LSEVisitorWrapper(HGraph* graph,
2942 const HeapLocationCollector& heap_location_collector,
2943 OptimizingCompilerStats* stats)
2944 : lse_visitor_(graph, heap_location_collector, stats) {}
2945
Run()2946 void Run() {
2947 lse_visitor_.Run();
2948 }
2949
2950 private:
2951 LSEVisitor lse_visitor_;
2952 };
2953
Run()2954 bool LoadStoreElimination::Run() {
2955 if (graph_->IsDebuggable()) {
2956 // Debugger may set heap values or trigger deoptimization of callers.
2957 // Skip this optimization.
2958 return false;
2959 }
2960 ScopedArenaAllocator allocator(graph_->GetArenaStack());
2961 LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2962 lsa.Run();
2963 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2964 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2965 // No HeapLocation information from LSA, skip this optimization.
2966 return false;
2967 }
2968
2969 // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2970 // memory operations with different predicates.
2971 // TODO: support predicated SIMD.
2972 if (graph_->HasPredicatedSIMD()) {
2973 return false;
2974 }
2975
2976 std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2977 new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2978 lse_visitor->Run();
2979 return true;
2980 }
2981
2982 #undef LSE_VLOG
2983
2984 } // namespace art
2985