1 /*
2 * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "compiler_logger.h"
17 #include "optimizer/ir/graph_visitor.h"
18 #include "optimizer/ir/basicblock.h"
19 #include "optimizer/ir/inst.h"
20 #include "optimizer/ir/analysis.h"
21 #include "optimizer/analysis/alias_analysis.h"
22 #include "optimizer/analysis/rpo.h"
23 #include "optimizer/analysis/loop_analyzer.h"
24 #include "optimizer/optimizations/memory_coalescing.h"
25
26 namespace panda::compiler {
27 /**
28 * Basic analysis for variables used in loops. It works as follows:
29 * 1) Identify variables that are derived from another variables and their difference (AddI, SubI supported).
30 * 2) Based on previous step reveal loop variables and their iteration increment if possible.
31 */
32 class VariableAnalysis {
33 public:
34 struct BaseVariable {
35 int64_t initial;
36 int64_t step;
37 };
38 struct DerivedVariable {
39 Inst *base;
40 int64_t diff;
41 };
42
VariableAnalysis(Graph * graph)43 explicit VariableAnalysis(Graph *graph)
44 : base_(graph->GetLocalAllocator()->Adapter()), derived_(graph->GetLocalAllocator()->Adapter())
45 {
46 for (auto block : graph->GetBlocksRPO()) {
47 for (auto inst : block->AllInsts()) {
48 if (GetCommonType(inst->GetType()) == DataType::INT64) {
49 AddUsers(inst);
50 }
51 }
52 }
53 for (auto loop : graph->GetRootLoop()->GetInnerLoops()) {
54 if (loop->IsIrreducible()) {
55 continue;
56 }
57 auto header = loop->GetHeader();
58 for (auto phi : header->PhiInsts()) {
59 constexpr auto INPUTS_COUNT = 2;
60 if (phi->GetInputsCount() != INPUTS_COUNT || GetCommonType(phi->GetType()) != DataType::INT64) {
61 continue;
62 }
63 auto var = phi->CastToPhi();
64 Inst *initial = var->GetPhiInput(var->GetPhiInputBb(0));
65 Inst *update = var->GetPhiInput(var->GetPhiInputBb(1));
66 if (var->GetPhiInputBb(0) != loop->GetPreHeader()) {
67 std::swap(initial, update);
68 }
69
70 if (!initial->IsConst()) {
71 continue;
72 }
73
74 if (derived_.find(update) != derived_.end()) {
75 auto initVal = static_cast<int64_t>(initial->CastToConstant()->GetIntValue());
76 base_[var] = {initVal, derived_[update].diff};
77 }
78 }
79 }
80
81 COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Evolution variables:";
82 for (auto entry : base_) {
83 COMPILER_LOG(DEBUG, MEMORY_COALESCING)
84 << "v" << entry.first->GetId() << " = {" << entry.second.initial << ", " << entry.second.step << "}";
85 }
86 COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Loop variables:";
87 for (auto entry : derived_) {
88 COMPILER_LOG(DEBUG, MEMORY_COALESCING)
89 << "v" << entry.first->GetId() << " = v" << entry.second.base->GetId() << " + " << entry.second.diff;
90 }
91 }
92
93 DEFAULT_MOVE_SEMANTIC(VariableAnalysis);
94 DEFAULT_COPY_SEMANTIC(VariableAnalysis);
95 ~VariableAnalysis() = default;
96
IsAnalyzed(Inst * inst) const97 bool IsAnalyzed(Inst *inst) const
98 {
99 return derived_.find(inst) != derived_.end();
100 }
101
GetBase(Inst * inst) const102 Inst *GetBase(Inst *inst) const
103 {
104 return derived_.at(inst).base;
105 }
106
GetInitial(Inst * inst) const107 int64_t GetInitial(Inst *inst) const
108 {
109 auto var = derived_.at(inst);
110 return base_.at(var.base).initial + var.diff;
111 }
112
GetDiff(Inst * inst) const113 int64_t GetDiff(Inst *inst) const
114 {
115 return derived_.at(inst).diff;
116 }
117
GetStep(Inst * inst) const118 int64_t GetStep(Inst *inst) const
119 {
120 return base_.at(derived_.at(inst).base).step;
121 }
122
IsEvoluted(Inst * inst) const123 bool IsEvoluted(Inst *inst) const
124 {
125 return derived_.at(inst).base->IsPhi();
126 }
127
HasKnownEvolution(Inst * inst) const128 bool HasKnownEvolution(Inst *inst) const
129 {
130 Inst *base = derived_.at(inst).base;
131 return base->IsPhi() && base_.find(base) != base_.end();
132 }
133
134 private:
135 /// Add derived variables if we can deduce the change from INST
AddUsers(Inst * inst)136 void AddUsers(Inst *inst)
137 {
138 auto acc = 0;
139 auto base = inst;
140 if (derived_.find(inst) != derived_.end()) {
141 acc += derived_[inst].diff;
142 base = derived_[inst].base;
143 } else {
144 derived_[inst] = {inst, 0};
145 }
146 for (auto &user : inst->GetUsers()) {
147 auto uinst = user.GetInst();
148 ASSERT(uinst->IsPhi() || derived_.find(uinst) == derived_.end());
149 switch (uinst->GetOpcode()) {
150 case Opcode::AddI: {
151 auto val = static_cast<int64_t>(uinst->CastToAddI()->GetImm());
152 derived_[uinst] = {base, acc + val};
153 break;
154 }
155 case Opcode::SubI: {
156 auto val = static_cast<int64_t>(uinst->CastToSubI()->GetImm());
157 derived_[uinst] = {base, acc - val};
158 break;
159 }
160 default:
161 break;
162 }
163 }
164 }
165
166 private:
167 ArenaUnorderedMap<Inst *, struct BaseVariable> base_;
168 ArenaUnorderedMap<Inst *, struct DerivedVariable> derived_;
169 };
170
171 /**
172 * The visitor collects pairs of memory instructions that can be coalesced.
173 * It operates in scope of basic block. During observation of instructions we
174 * collect memory instructions in one common queue of candidates that can be merged.
175 *
176 * Candidate is marked as invalid in the following conditions:
177 * - it has been paired already
178 * - it is a store and SaveState has been met
179 * - a BARRIER or CAN_TROW instruction has been met
180 *
181 * To pair valid array accesses:
182 * - check that accesses happen on the consecutive indices of the same array
183 * - find the lowest position the dominator access can be sunk
184 * - find the highest position the dominatee access can be hoisted
185 * - if highest position dominates lowest position the coalescing is possible
186 */
187 class PairCreatorVisitor : public GraphVisitor {
188 public:
PairCreatorVisitor(Graph * graph,const AliasAnalysis & aliases,const VariableAnalysis & vars,Marker mrk,bool aligned)189 explicit PairCreatorVisitor(Graph *graph, const AliasAnalysis &aliases, const VariableAnalysis &vars, Marker mrk,
190 bool aligned)
191 : alignedOnly_(aligned),
192 mrkInvalid_(mrk),
193 graph_(graph),
194 aliases_(aliases),
195 vars_(vars),
196 pairs_(graph->GetLocalAllocator()->Adapter()),
197 candidates_(graph->GetLocalAllocator()->Adapter())
198 {
199 }
200
GetBlocksToVisit() const201 const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override
202 {
203 return graph_->GetBlocksRPO();
204 }
205
206 NO_MOVE_SEMANTIC(PairCreatorVisitor);
207 NO_COPY_SEMANTIC(PairCreatorVisitor);
208 ~PairCreatorVisitor() override = default;
209
VisitLoadArray(GraphVisitor * v,Inst * inst)210 static void VisitLoadArray(GraphVisitor *v, Inst *inst)
211 {
212 static_cast<PairCreatorVisitor *>(v)->HandleArrayAccess(inst);
213 }
214
VisitStoreArray(GraphVisitor * v,Inst * inst)215 static void VisitStoreArray(GraphVisitor *v, Inst *inst)
216 {
217 static_cast<PairCreatorVisitor *>(v)->HandleArrayAccess(inst);
218 }
219
VisitLoadArrayI(GraphVisitor * v,Inst * inst)220 static void VisitLoadArrayI(GraphVisitor *v, Inst *inst)
221 {
222 static_cast<PairCreatorVisitor *>(v)->HandleArrayAccessI(inst);
223 }
224
VisitStoreArrayI(GraphVisitor * v,Inst * inst)225 static void VisitStoreArrayI(GraphVisitor *v, Inst *inst)
226 {
227 static_cast<PairCreatorVisitor *>(v)->HandleArrayAccessI(inst);
228 }
229
IsNotAcceptableForStore(Inst * inst)230 static bool IsNotAcceptableForStore(Inst *inst)
231 {
232 if (inst->GetOpcode() == Opcode::SaveState) {
233 for (auto &user : inst->GetUsers()) {
234 auto *ui = user.GetInst();
235 if (ui->CanThrow() || ui->CanDeoptimize()) {
236 return true;
237 }
238 }
239 }
240 return inst->GetOpcode() == Opcode::SaveStateDeoptimize;
241 }
242
VisitDefault(Inst * inst)243 void VisitDefault(Inst *inst) override
244 {
245 if (inst->IsMemory()) {
246 candidates_.push_back(inst);
247 return;
248 }
249 if (inst->IsSaveState()) {
250 // 1. Load & Store can be moved through SafePoint
251 if (inst->GetOpcode() == Opcode::SafePoint) {
252 return;
253 }
254 // 2. Load & Store can't be moved through SaveStateOsr
255 if (inst->GetOpcode() == Opcode::SaveStateOsr) {
256 candidates_.clear();
257 return;
258 }
259 // 3. Load can be moved through SaveState and SaveStateDeoptimize
260 // 4. Store can't be moved through SaveStateDeoptimize and SaveState with Users that are IsCheck or
261 // CanDeoptimize. It is checked in IsNotAcceptableForStore
262 if (IsNotAcceptableForStore(inst)) {
263 InvalidateStores();
264 return;
265 }
266 }
267 if (inst->IsBarrier()) {
268 candidates_.clear();
269 return;
270 }
271 if (inst->CanThrow()) {
272 InvalidateStores();
273 return;
274 }
275 }
276
Reset()277 void Reset()
278 {
279 candidates_.clear();
280 }
281
GetPairs()282 ArenaUnorderedMap<Inst *, MemoryCoalescing::CoalescedPair> &GetPairs()
283 {
284 return pairs_;
285 }
286
287 #include "optimizer/ir/visitor.inc"
288 private:
InvalidateStores()289 void InvalidateStores()
290 {
291 for (auto cand : candidates_) {
292 if (cand->IsStore()) {
293 cand->SetMarker(mrkInvalid_);
294 }
295 }
296 }
297
IsPairInst(Inst * inst)298 static bool IsPairInst(Inst *inst)
299 {
300 switch (inst->GetOpcode()) {
301 case Opcode::LoadArrayPair:
302 case Opcode::LoadArrayPairI:
303 case Opcode::StoreArrayPair:
304 case Opcode::StoreArrayPairI:
305 return true;
306 default:
307 return false;
308 }
309 }
310
311 /**
312 * Return the highest instructions that INST can be inserted after (in scope of basic block).
313 * Consider aliased memory accesses and volatile operations. CHECK_CFG enables the check of INST inputs
314 * as well.
315 */
FindUpperInsertAfter(Inst * inst,Inst * bound,bool checkCfg)316 Inst *FindUpperInsertAfter(Inst *inst, Inst *bound, bool checkCfg)
317 {
318 ASSERT(bound != nullptr);
319 auto upperAfter = bound;
320 // We do not move higher than bound
321 auto lowerInput = upperAfter;
322 if (checkCfg) {
323 // Update upper bound according to def-use chains
324 for (auto &inputItem : inst->GetInputs()) {
325 auto input = inputItem.GetInst();
326 if (input->GetBasicBlock() == inst->GetBasicBlock() && lowerInput->IsPrecedingInSameBlock(input)) {
327 ASSERT(input->IsPrecedingInSameBlock(inst));
328 lowerInput = input;
329 }
330 }
331 upperAfter = lowerInput;
332 }
333
334 auto boundIt = std::find(candidates_.rbegin(), candidates_.rend(), bound);
335 ASSERT(boundIt != candidates_.rend());
336 for (auto it = candidates_.rbegin(); it != boundIt; it++) {
337 auto cand = *it;
338 if (checkCfg && cand->IsPrecedingInSameBlock(lowerInput)) {
339 return lowerInput;
340 }
341 // Can't hoist load over aliased store and store over aliased memory instructions
342 if (inst->IsStore() || cand->IsStore()) {
343 auto checkInst = cand;
344 if (IsPairInst(cand)) {
345 // We have already checked the second inst. We now want to check the first one
346 // for alias.
347 auto pair = pairs_[cand];
348 checkInst = pair.first->IsPrecedingInSameBlock(pair.second) ? pair.first : pair.second;
349 }
350 if (aliases_.CheckInstAlias(inst, checkInst) != NO_ALIAS) {
351 return cand;
352 }
353 }
354 // Can't hoist over volatile load
355 if (cand->IsLoad() && IsVolatileMemInst(cand)) {
356 return cand;
357 }
358 }
359 return upperAfter;
360 }
361
362 /**
363 * Return the lowest instructions that INST can be inserted after (in scope of basic block).
364 * Consider aliased memory accesses and volatile operations. CHECK_CFG enables the check of INST users
365 * as well.
366 */
FindLowerInsertAfter(Inst * inst,Inst * bound,bool checkCfg=true)367 Inst *FindLowerInsertAfter(Inst *inst, Inst *bound, bool checkCfg = true)
368 {
369 ASSERT(bound != nullptr);
370 auto lowerAfter = bound->GetPrev();
371 // We do not move lower than bound
372 auto upperUser = lowerAfter;
373 ASSERT(upperUser != nullptr);
374 if (checkCfg) {
375 // Update lower bound according to def-use chains
376 for (auto &userItem : inst->GetUsers()) {
377 auto user = userItem.GetInst();
378 if (!user->IsPhi() && user->GetBasicBlock() == inst->GetBasicBlock() &&
379 user->IsPrecedingInSameBlock(upperUser)) {
380 ASSERT(inst->IsPrecedingInSameBlock(user));
381 upperUser = user->GetPrev();
382 ASSERT(upperUser != nullptr);
383 }
384 }
385 lowerAfter = upperUser;
386 }
387
388 auto instIt = std::find(candidates_.begin(), candidates_.end(), inst);
389 ASSERT(instIt != candidates_.end());
390 for (auto it = instIt + 1; it != candidates_.end(); it++) {
391 auto cand = *it;
392 if (checkCfg && upperUser->IsPrecedingInSameBlock(cand)) {
393 return upperUser;
394 }
395 // Can't lower load over aliased store and store over aliased memory instructions
396 if (inst->IsStore() || cand->IsStore()) {
397 auto checkInst = cand;
398 if (IsPairInst(cand)) {
399 // We have already checked the first inst. We now want to check the second one
400 // for alias.
401 auto pair = pairs_[cand];
402 checkInst = pair.first->IsPrecedingInSameBlock(pair.second) ? pair.second : pair.first;
403 }
404 if (aliases_.CheckInstAlias(inst, checkInst) != NO_ALIAS) {
405 ASSERT(cand->GetPrev() != nullptr);
406 return cand->GetPrev();
407 }
408 }
409 // Can't lower over volatile store
410 if (cand->IsStore() && IsVolatileMemInst(cand)) {
411 ASSERT(cand->GetPrev() != nullptr);
412 return cand->GetPrev();
413 }
414 }
415 return lowerAfter;
416 }
417
418 /// Add a pair if a difference between indices equals to one. The first in pair is with lower index.
TryAddCoalescedPair(Inst * inst,int64_t instIdx,Inst * cand,int64_t candIdx)419 bool TryAddCoalescedPair(Inst *inst, int64_t instIdx, Inst *cand, int64_t candIdx)
420 {
421 Inst *first = nullptr;
422 Inst *second = nullptr;
423 Inst *insertAfter = nullptr;
424 if (instIdx == candIdx - 1) {
425 first = inst;
426 second = cand;
427 } else if (candIdx == instIdx - 1) {
428 first = cand;
429 second = inst;
430 } else {
431 return false;
432 }
433
434 ASSERT(inst->IsMemory() && cand->IsMemory());
435 ASSERT(inst->GetOpcode() == cand->GetOpcode());
436 ASSERT(inst != cand && cand->IsPrecedingInSameBlock(inst));
437 Inst *candLowerAfter = nullptr;
438 Inst *instUpperAfter = nullptr;
439 if (first->IsLoad()) {
440 // Consider dominance of load users
441 bool checkCfg = true;
442 candLowerAfter = FindLowerInsertAfter(cand, inst, checkCfg);
443 // Do not need index if v0[v1] preceeds v0[v1 + 1] because v1 + 1 is not used in paired load.
444 checkCfg = second->IsPrecedingInSameBlock(first);
445 instUpperAfter = FindUpperInsertAfter(inst, cand, checkCfg);
446 } else if (first->IsStore()) {
447 // Store instructions do not have users. Don't check them
448 bool checkCfg = false;
449 candLowerAfter = FindLowerInsertAfter(cand, inst, checkCfg);
450 // Should check that stored value is ready
451 checkCfg = true;
452 instUpperAfter = FindUpperInsertAfter(inst, cand, checkCfg);
453 } else {
454 UNREACHABLE();
455 }
456
457 // No intersection in reordering ranges
458 if (!instUpperAfter->IsPrecedingInSameBlock(candLowerAfter)) {
459 return false;
460 }
461 if (cand->IsPrecedingInSameBlock(instUpperAfter)) {
462 insertAfter = instUpperAfter;
463 } else {
464 insertAfter = cand;
465 }
466
467 first->SetMarker(mrkInvalid_);
468 second->SetMarker(mrkInvalid_);
469 InsertPair(first, second, insertAfter);
470 return true;
471 }
472
HandleArrayAccessI(Inst * inst)473 void HandleArrayAccessI(Inst *inst)
474 {
475 Inst *obj = inst->GetDataFlowInput(inst->GetInput(0).GetInst());
476 uint64_t idx = GetInstImm(inst);
477 if (!MemoryCoalescing::AcceptedType(inst->GetType())) {
478 candidates_.push_back(inst);
479 return;
480 }
481 /* Last candidates more likely to be coalesced */
482 for (auto iter = candidates_.rbegin(); iter != candidates_.rend(); iter++) {
483 auto cand = *iter;
484 /* Skip not interesting candidates */
485 if (cand->IsMarked(mrkInvalid_) || cand->GetOpcode() != inst->GetOpcode()) {
486 continue;
487 }
488
489 Inst *candObj = cand->GetDataFlowInput(cand->GetInput(0).GetInst());
490 /* Array objects must alias each other */
491 if (aliases_.CheckRefAlias(obj, candObj) != MUST_ALIAS) {
492 continue;
493 }
494 /* The difference between indices should be equal to one */
495 uint64_t candIdx = GetInstImm(cand);
496 /* To keep alignment the lowest index should be even */
497 if (alignedOnly_ && ((idx < candIdx && (idx & 1U) != 0) || (candIdx < idx && (candIdx & 1U) != 0))) {
498 continue;
499 }
500 if (TryAddCoalescedPair(inst, idx, cand, candIdx)) {
501 break;
502 }
503 }
504
505 candidates_.push_back(inst);
506 }
507
HandleKnownEvolutionArrayAccessVar(Inst * idx,Inst * candIdx,int64_t idxInitial,int64_t candInitial)508 bool HandleKnownEvolutionArrayAccessVar(Inst *idx, Inst *candIdx, int64_t idxInitial, int64_t candInitial)
509 {
510 /* Accesses inside loop */
511 auto idxStep = vars_.GetStep(idx);
512 auto candStep = vars_.GetStep(candIdx);
513 /* Indices should be incremented at the same value and their
514 increment should be even to hold alignment */
515 if (idxStep != candStep) {
516 return false;
517 }
518 /* To keep alignment we need to have even step and even lowest initial */
519 constexpr auto IMM_2 = 2;
520 // NOLINTBEGIN(readability-simplify-boolean-expr)
521 if (alignedOnly_ && idxStep % IMM_2 != 0 &&
522 ((idxInitial < candInitial && idxInitial % IMM_2 != 0) ||
523 (candInitial < idxInitial && candInitial % IMM_2 != 0))) {
524 return false;
525 }
526 return true;
527 // NOLINTEND(readability-simplify-boolean-expr)
528 }
529
HandleArrayAccess(Inst * inst)530 void HandleArrayAccess(Inst *inst)
531 {
532 Inst *obj = inst->GetDataFlowInput(inst->GetInput(0).GetInst());
533 Inst *idx = inst->GetDataFlowInput(inst->GetInput(1).GetInst());
534 if (!vars_.IsAnalyzed(idx) || !MemoryCoalescing::AcceptedType(inst->GetType())) {
535 candidates_.push_back(inst);
536 return;
537 }
538 /* Last candidates more likely to be coalesced */
539 for (auto iter = candidates_.rbegin(); iter != candidates_.rend(); iter++) {
540 auto cand = *iter;
541 /* Skip not interesting candidates */
542 if (cand->IsMarked(mrkInvalid_) || cand->GetOpcode() != inst->GetOpcode()) {
543 continue;
544 }
545
546 Inst *candObj = cand->GetDataFlowInput(cand->GetInput(0).GetInst());
547 auto candIdx = cand->GetDataFlowInput(cand->GetInput(1).GetInst());
548 /* We need to have info about candidate's index and array objects must alias each other */
549 if (!vars_.IsAnalyzed(candIdx) || aliases_.CheckRefAlias(obj, candObj) != MUST_ALIAS) {
550 continue;
551 }
552 if (vars_.HasKnownEvolution(idx) && vars_.HasKnownEvolution(candIdx)) {
553 auto idxInitial = vars_.GetInitial(idx);
554 auto candInitial = vars_.GetInitial(candIdx);
555 if (!HandleKnownEvolutionArrayAccessVar(idx, candIdx, idxInitial, candInitial)) {
556 continue;
557 }
558 if (TryAddCoalescedPair(inst, idxInitial, cand, candInitial)) {
559 break;
560 }
561 } else if (!alignedOnly_ && !vars_.HasKnownEvolution(idx) && !vars_.HasKnownEvolution(candIdx)) {
562 /* Accesses outside loop */
563 if (vars_.GetBase(idx) != vars_.GetBase(candIdx)) {
564 continue;
565 }
566 if (TryAddCoalescedPair(inst, vars_.GetDiff(idx), cand, vars_.GetDiff(candIdx))) {
567 break;
568 }
569 }
570 }
571
572 candidates_.push_back(inst);
573 }
574
InsertPair(Inst * first,Inst * second,Inst * insertAfter)575 void InsertPair(Inst *first, Inst *second, Inst *insertAfter)
576 {
577 COMPILER_LOG(DEBUG, MEMORY_COALESCING)
578 << "Access that may be coalesced: v" << first->GetId() << " v" << second->GetId();
579
580 ASSERT(first->GetType() == second->GetType());
581 Inst *paired = nullptr;
582 switch (first->GetOpcode()) {
583 case Opcode::LoadArray:
584 paired = ReplaceLoadArray(first, second, insertAfter);
585 break;
586 case Opcode::LoadArrayI:
587 paired = ReplaceLoadArrayI(first, second, insertAfter);
588 break;
589 case Opcode::StoreArray:
590 paired = ReplaceStoreArray(first, second, insertAfter);
591 break;
592 case Opcode::StoreArrayI:
593 paired = ReplaceStoreArrayI(first, second, insertAfter);
594 break;
595 default:
596 UNREACHABLE();
597 }
598
599 COMPILER_LOG(DEBUG, MEMORY_COALESCING)
600 << "Coalescing of {v" << first->GetId() << " v" << second->GetId() << "} is successful";
601 graph_->GetEventWriter().EventMemoryCoalescing(first->GetId(), first->GetPc(), second->GetId(), second->GetPc(),
602 paired->GetId(), paired->IsStore() ? "Store" : "Load");
603
604 pairs_[paired] = {first, second};
605 paired->SetMarker(mrkInvalid_);
606 candidates_.insert(std::find_if(candidates_.rbegin(), candidates_.rend(),
607 [paired](auto x) { return x->IsPrecedingInSameBlock(paired); })
608 .base(),
609 paired);
610 }
611
ReplaceLoadArray(Inst * first,Inst * second,Inst * insertAfter)612 Inst *ReplaceLoadArray(Inst *first, Inst *second, Inst *insertAfter)
613 {
614 ASSERT(first->GetOpcode() == Opcode::LoadArray);
615 ASSERT(second->GetOpcode() == Opcode::LoadArray);
616
617 auto pload = graph_->CreateInstLoadArrayPair(first->GetType(), INVALID_PC, first->GetInput(0).GetInst(),
618 first->GetInput(1).GetInst());
619 pload->CastToLoadArrayPair()->SetNeedBarrier(first->CastToLoadArray()->GetNeedBarrier() ||
620 second->CastToLoadArray()->GetNeedBarrier());
621 insertAfter->InsertAfter(pload);
622 if (first->CanThrow() || second->CanThrow()) {
623 pload->SetFlag(compiler::inst_flags::CAN_THROW);
624 }
625 MemoryCoalescing::RemoveAddI(pload);
626 return pload;
627 }
628
ReplaceLoadArrayI(Inst * first,Inst * second,Inst * insertAfter)629 Inst *ReplaceLoadArrayI(Inst *first, Inst *second, Inst *insertAfter)
630 {
631 ASSERT(first->GetOpcode() == Opcode::LoadArrayI);
632 ASSERT(second->GetOpcode() == Opcode::LoadArrayI);
633
634 auto pload = graph_->CreateInstLoadArrayPairI(first->GetType(), INVALID_PC, first->GetInput(0).GetInst(),
635 first->CastToLoadArrayI()->GetImm());
636 pload->CastToLoadArrayPairI()->SetNeedBarrier(first->CastToLoadArrayI()->GetNeedBarrier() ||
637 second->CastToLoadArrayI()->GetNeedBarrier());
638 insertAfter->InsertAfter(pload);
639 if (first->CanThrow() || second->CanThrow()) {
640 pload->SetFlag(compiler::inst_flags::CAN_THROW);
641 }
642
643 return pload;
644 }
645
ReplaceStoreArray(Inst * first,Inst * second,Inst * insertAfter)646 Inst *ReplaceStoreArray(Inst *first, Inst *second, Inst *insertAfter)
647 {
648 ASSERT(first->GetOpcode() == Opcode::StoreArray);
649 ASSERT(second->GetOpcode() == Opcode::StoreArray);
650
651 auto pstore = graph_->CreateInstStoreArrayPair(
652 first->GetType(), INVALID_PC, first->GetInput(0).GetInst(), first->CastToStoreArray()->GetIndex(),
653 first->CastToStoreArray()->GetStoredValue(), second->CastToStoreArray()->GetStoredValue());
654 pstore->CastToStoreArrayPair()->SetNeedBarrier(first->CastToStoreArray()->GetNeedBarrier() ||
655 second->CastToStoreArray()->GetNeedBarrier());
656 insertAfter->InsertAfter(pstore);
657 if (first->CanThrow() || second->CanThrow()) {
658 pstore->SetFlag(compiler::inst_flags::CAN_THROW);
659 }
660 MemoryCoalescing::RemoveAddI(pstore);
661 return pstore;
662 }
663
ReplaceStoreArrayI(Inst * first,Inst * second,Inst * insertAfter)664 Inst *ReplaceStoreArrayI(Inst *first, Inst *second, Inst *insertAfter)
665 {
666 ASSERT(first->GetOpcode() == Opcode::StoreArrayI);
667 ASSERT(second->GetOpcode() == Opcode::StoreArrayI);
668
669 auto pstore = graph_->CreateInstStoreArrayPairI(
670 first->GetType(), INVALID_PC, first->GetInput(0).GetInst(), first->CastToStoreArrayI()->GetStoredValue(),
671 second->CastToStoreArrayI()->GetStoredValue(), first->CastToStoreArrayI()->GetImm());
672 pstore->CastToStoreArrayPairI()->SetNeedBarrier(first->CastToStoreArrayI()->GetNeedBarrier() ||
673 second->CastToStoreArrayI()->GetNeedBarrier());
674 insertAfter->InsertAfter(pstore);
675 if (first->CanThrow() || second->CanThrow()) {
676 pstore->SetFlag(compiler::inst_flags::CAN_THROW);
677 }
678
679 return pstore;
680 }
681
GetInstImm(Inst * inst)682 uint64_t GetInstImm(Inst *inst)
683 {
684 switch (inst->GetOpcode()) {
685 case Opcode::LoadArrayI:
686 return inst->CastToLoadArrayI()->GetImm();
687 case Opcode::StoreArrayI:
688 return inst->CastToStoreArrayI()->GetImm();
689 default:
690 UNREACHABLE();
691 }
692 }
693
694 private:
695 bool alignedOnly_;
696 Marker mrkInvalid_;
697 Graph *graph_ {nullptr};
698 const AliasAnalysis &aliases_;
699 const VariableAnalysis &vars_;
700 ArenaUnorderedMap<Inst *, MemoryCoalescing::CoalescedPair> pairs_;
701 InstVector candidates_;
702 };
703
ReplaceLoadByPair(Inst * load,Inst * pairedLoad,int32_t dstIdx)704 static void ReplaceLoadByPair(Inst *load, Inst *pairedLoad, int32_t dstIdx)
705 {
706 auto graph = pairedLoad->GetBasicBlock()->GetGraph();
707 auto pairGetter = graph->CreateInstLoadPairPart(load->GetType(), INVALID_PC, pairedLoad, dstIdx);
708 load->ReplaceUsers(pairGetter);
709 pairedLoad->InsertAfter(pairGetter);
710 }
711
RemoveAddI(Inst * inst)712 void MemoryCoalescing::RemoveAddI(Inst *inst)
713 {
714 auto opcode = inst->GetOpcode();
715 ASSERT(opcode == Opcode::LoadArrayPair || opcode == Opcode::StoreArrayPair);
716 auto input1 = inst->GetInput(1).GetInst();
717 if (input1->GetOpcode() == Opcode::AddI) {
718 uint64_t imm = input1->CastToAddI()->GetImm();
719 if (opcode == Opcode::LoadArrayPair) {
720 inst->CastToLoadArrayPair()->SetImm(imm);
721 } else if (opcode == Opcode::StoreArrayPair) {
722 inst->CastToStoreArrayPair()->SetImm(imm);
723 }
724 inst->SetInput(1, input1->GetInput(0).GetInst());
725 }
726 }
727
728 /**
729 * This optimization coalesces two loads (stores) that read (write) values from (to) the consecutive memory into
730 * a single operation.
731 *
732 * 1) If we have two memory instruction that can be coalesced then we are trying to find a position for
733 * coalesced operation. If it is possible, the memory operations are coalesced and skipped otherwise.
734 * 2) The instruction of Aarch64 requires memory address alignment. For arrays
735 * it means we can coalesce only accesses that starts from even index.
736 * 3) The implemented coalescing for arrays supposes there is no volatile array element accesses.
737 */
RunImpl()738 bool MemoryCoalescing::RunImpl()
739 {
740 if (GetGraph()->GetArch() != Arch::AARCH64) {
741 COMPILER_LOG(INFO, MEMORY_COALESCING) << "Skipping Memory Coalescing for unsupported architecture";
742 return false;
743 }
744
745 GetGraph()->RunPass<DominatorsTree>();
746 GetGraph()->RunPass<LoopAnalyzer>();
747 GetGraph()->RunPass<AliasAnalysis>();
748
749 VariableAnalysis variables(GetGraph());
750 auto &aliases = GetGraph()->GetValidAnalysis<AliasAnalysis>();
751 Marker mrk = GetGraph()->NewMarker();
752 PairCreatorVisitor collector(GetGraph(), aliases, variables, mrk, alignedOnly_);
753 for (auto block : GetGraph()->GetBlocksRPO()) {
754 collector.VisitBlock(block);
755 collector.Reset();
756 }
757 GetGraph()->EraseMarker(mrk);
758
759 for (auto pair : collector.GetPairs()) {
760 auto bb = pair.first->GetBasicBlock();
761 if (pair.first->IsLoad()) {
762 ReplaceLoadByPair(pair.second.second, pair.first, 1);
763 ReplaceLoadByPair(pair.second.first, pair.first, 0);
764 }
765 bb->RemoveInst(pair.second.first);
766 bb->RemoveInst(pair.second.second);
767 }
768
769 if (!collector.GetPairs().empty()) {
770 SaveStateBridgesBuilder ssb;
771 for (auto bb : GetGraph()->GetBlocksRPO()) {
772 if (!bb->IsEmpty() && !bb->IsStartBlock()) {
773 ssb.FixSaveStatesInBB(bb);
774 }
775 }
776 }
777 COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Memory Coalescing complete";
778 return !collector.GetPairs().empty();
779 }
780 } // namespace panda::compiler
781