• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "compiler_logger.h"
17 #include "optimizer/ir/graph_visitor.h"
18 #include "optimizer/ir/basicblock.h"
19 #include "optimizer/ir/inst.h"
20 #include "optimizer/ir/analysis.h"
21 #include "optimizer/analysis/alias_analysis.h"
22 #include "optimizer/analysis/rpo.h"
23 #include "optimizer/analysis/loop_analyzer.h"
24 #include "optimizer/optimizations/memory_coalescing.h"
25 
26 namespace panda::compiler {
27 /**
28  * Basic analysis for variables used in loops. It works as follows:
29  * 1) Identify variables that are derived from another variables and their difference (AddI, SubI supported).
30  * 2) Based on previous step reveal loop variables and their iteration increment if possible.
31  */
32 class VariableAnalysis {
33 public:
34     struct BaseVariable {
35         int64_t initial;
36         int64_t step;
37     };
38     struct DerivedVariable {
39         Inst *base;
40         int64_t diff;
41     };
42 
VariableAnalysis(Graph * graph)43     explicit VariableAnalysis(Graph *graph)
44         : base_(graph->GetLocalAllocator()->Adapter()), derived_(graph->GetLocalAllocator()->Adapter())
45     {
46         for (auto block : graph->GetBlocksRPO()) {
47             for (auto inst : block->AllInsts()) {
48                 if (GetCommonType(inst->GetType()) == DataType::INT64) {
49                     AddUsers(inst);
50                 }
51             }
52         }
53         for (auto loop : graph->GetRootLoop()->GetInnerLoops()) {
54             if (loop->IsIrreducible()) {
55                 continue;
56             }
57             auto header = loop->GetHeader();
58             for (auto phi : header->PhiInsts()) {
59                 constexpr auto INPUTS_COUNT = 2;
60                 if (phi->GetInputsCount() != INPUTS_COUNT || GetCommonType(phi->GetType()) != DataType::INT64) {
61                     continue;
62                 }
63                 auto var = phi->CastToPhi();
64                 Inst *initial = var->GetPhiInput(var->GetPhiInputBb(0));
65                 Inst *update = var->GetPhiInput(var->GetPhiInputBb(1));
66                 if (var->GetPhiInputBb(0) != loop->GetPreHeader()) {
67                     std::swap(initial, update);
68                 }
69 
70                 if (!initial->IsConst()) {
71                     continue;
72                 }
73 
74                 if (derived_.find(update) != derived_.end()) {
75                     auto initVal = static_cast<int64_t>(initial->CastToConstant()->GetIntValue());
76                     base_[var] = {initVal, derived_[update].diff};
77                 }
78             }
79         }
80 
81         COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Evolution variables:";
82         for (auto entry : base_) {
83             COMPILER_LOG(DEBUG, MEMORY_COALESCING)
84                 << "v" << entry.first->GetId() << " = {" << entry.second.initial << ", " << entry.second.step << "}";
85         }
86         COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Loop variables:";
87         for (auto entry : derived_) {
88             COMPILER_LOG(DEBUG, MEMORY_COALESCING)
89                 << "v" << entry.first->GetId() << " = v" << entry.second.base->GetId() << " + " << entry.second.diff;
90         }
91     }
92 
93     DEFAULT_MOVE_SEMANTIC(VariableAnalysis);
94     DEFAULT_COPY_SEMANTIC(VariableAnalysis);
95     ~VariableAnalysis() = default;
96 
IsAnalyzed(Inst * inst) const97     bool IsAnalyzed(Inst *inst) const
98     {
99         return derived_.find(inst) != derived_.end();
100     }
101 
GetBase(Inst * inst) const102     Inst *GetBase(Inst *inst) const
103     {
104         return derived_.at(inst).base;
105     }
106 
GetInitial(Inst * inst) const107     int64_t GetInitial(Inst *inst) const
108     {
109         auto var = derived_.at(inst);
110         return base_.at(var.base).initial + var.diff;
111     }
112 
GetDiff(Inst * inst) const113     int64_t GetDiff(Inst *inst) const
114     {
115         return derived_.at(inst).diff;
116     }
117 
GetStep(Inst * inst) const118     int64_t GetStep(Inst *inst) const
119     {
120         return base_.at(derived_.at(inst).base).step;
121     }
122 
IsEvoluted(Inst * inst) const123     bool IsEvoluted(Inst *inst) const
124     {
125         return derived_.at(inst).base->IsPhi();
126     }
127 
HasKnownEvolution(Inst * inst) const128     bool HasKnownEvolution(Inst *inst) const
129     {
130         Inst *base = derived_.at(inst).base;
131         return base->IsPhi() && base_.find(base) != base_.end();
132     }
133 
134 private:
135     /// Add derived variables if we can deduce the change from INST
AddUsers(Inst * inst)136     void AddUsers(Inst *inst)
137     {
138         auto acc = 0;
139         auto base = inst;
140         if (derived_.find(inst) != derived_.end()) {
141             acc += derived_[inst].diff;
142             base = derived_[inst].base;
143         } else {
144             derived_[inst] = {inst, 0};
145         }
146         for (auto &user : inst->GetUsers()) {
147             auto uinst = user.GetInst();
148             ASSERT(uinst->IsPhi() || derived_.find(uinst) == derived_.end());
149             switch (uinst->GetOpcode()) {
150                 case Opcode::AddI: {
151                     auto val = static_cast<int64_t>(uinst->CastToAddI()->GetImm());
152                     derived_[uinst] = {base, acc + val};
153                     break;
154                 }
155                 case Opcode::SubI: {
156                     auto val = static_cast<int64_t>(uinst->CastToSubI()->GetImm());
157                     derived_[uinst] = {base, acc - val};
158                     break;
159                 }
160                 default:
161                     break;
162             }
163         }
164     }
165 
166 private:
167     ArenaUnorderedMap<Inst *, struct BaseVariable> base_;
168     ArenaUnorderedMap<Inst *, struct DerivedVariable> derived_;
169 };
170 
171 /**
172  * The visitor collects pairs of memory instructions that can be coalesced.
173  * It operates in scope of basic block. During observation of instructions we
174  * collect memory instructions in one common queue of candidates that can be merged.
175  *
176  * Candidate is marked as invalid in the following conditions:
177  * - it has been paired already
178  * - it is a store and SaveState has been met
179  * - a BARRIER or CAN_TROW instruction has been met
180  *
181  * To pair valid array accesses:
182  * - check that accesses happen on the consecutive indices of the same array
183  * - find the lowest position the dominator access can be sunk
184  * - find the highest position the dominatee access can be hoisted
185  * - if highest position dominates lowest position the coalescing is possible
186  */
187 class PairCreatorVisitor : public GraphVisitor {
188 public:
PairCreatorVisitor(Graph * graph,const AliasAnalysis & aliases,const VariableAnalysis & vars,Marker mrk,bool aligned)189     explicit PairCreatorVisitor(Graph *graph, const AliasAnalysis &aliases, const VariableAnalysis &vars, Marker mrk,
190                                 bool aligned)
191         : alignedOnly_(aligned),
192           mrkInvalid_(mrk),
193           graph_(graph),
194           aliases_(aliases),
195           vars_(vars),
196           pairs_(graph->GetLocalAllocator()->Adapter()),
197           candidates_(graph->GetLocalAllocator()->Adapter())
198     {
199     }
200 
GetBlocksToVisit() const201     const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override
202     {
203         return graph_->GetBlocksRPO();
204     }
205 
206     NO_MOVE_SEMANTIC(PairCreatorVisitor);
207     NO_COPY_SEMANTIC(PairCreatorVisitor);
208     ~PairCreatorVisitor() override = default;
209 
VisitLoadArray(GraphVisitor * v,Inst * inst)210     static void VisitLoadArray(GraphVisitor *v, Inst *inst)
211     {
212         static_cast<PairCreatorVisitor *>(v)->HandleArrayAccess(inst);
213     }
214 
VisitStoreArray(GraphVisitor * v,Inst * inst)215     static void VisitStoreArray(GraphVisitor *v, Inst *inst)
216     {
217         static_cast<PairCreatorVisitor *>(v)->HandleArrayAccess(inst);
218     }
219 
VisitLoadArrayI(GraphVisitor * v,Inst * inst)220     static void VisitLoadArrayI(GraphVisitor *v, Inst *inst)
221     {
222         static_cast<PairCreatorVisitor *>(v)->HandleArrayAccessI(inst);
223     }
224 
VisitStoreArrayI(GraphVisitor * v,Inst * inst)225     static void VisitStoreArrayI(GraphVisitor *v, Inst *inst)
226     {
227         static_cast<PairCreatorVisitor *>(v)->HandleArrayAccessI(inst);
228     }
229 
IsNotAcceptableForStore(Inst * inst)230     static bool IsNotAcceptableForStore(Inst *inst)
231     {
232         if (inst->GetOpcode() == Opcode::SaveState) {
233             for (auto &user : inst->GetUsers()) {
234                 auto *ui = user.GetInst();
235                 if (ui->CanThrow() || ui->CanDeoptimize()) {
236                     return true;
237                 }
238             }
239         }
240         return inst->GetOpcode() == Opcode::SaveStateDeoptimize;
241     }
242 
VisitDefault(Inst * inst)243     void VisitDefault(Inst *inst) override
244     {
245         if (inst->IsMemory()) {
246             candidates_.push_back(inst);
247             return;
248         }
249         if (inst->IsSaveState()) {
250             // 1. Load & Store can be moved through SafePoint
251             if (inst->GetOpcode() == Opcode::SafePoint) {
252                 return;
253             }
254             // 2. Load & Store can't be moved through SaveStateOsr
255             if (inst->GetOpcode() == Opcode::SaveStateOsr) {
256                 candidates_.clear();
257                 return;
258             }
259             // 3. Load can be moved through SaveState and SaveStateDeoptimize
260             // 4. Store can't be moved through SaveStateDeoptimize and SaveState with Users that are IsCheck or
261             //    CanDeoptimize. It is checked in IsNotAcceptableForStore
262             if (IsNotAcceptableForStore(inst)) {
263                 InvalidateStores();
264                 return;
265             }
266         }
267         if (inst->IsBarrier()) {
268             candidates_.clear();
269             return;
270         }
271         if (inst->CanThrow()) {
272             InvalidateStores();
273             return;
274         }
275     }
276 
Reset()277     void Reset()
278     {
279         candidates_.clear();
280     }
281 
GetPairs()282     ArenaUnorderedMap<Inst *, MemoryCoalescing::CoalescedPair> &GetPairs()
283     {
284         return pairs_;
285     }
286 
287 #include "optimizer/ir/visitor.inc"
288 private:
InvalidateStores()289     void InvalidateStores()
290     {
291         for (auto cand : candidates_) {
292             if (cand->IsStore()) {
293                 cand->SetMarker(mrkInvalid_);
294             }
295         }
296     }
297 
IsPairInst(Inst * inst)298     static bool IsPairInst(Inst *inst)
299     {
300         switch (inst->GetOpcode()) {
301             case Opcode::LoadArrayPair:
302             case Opcode::LoadArrayPairI:
303             case Opcode::StoreArrayPair:
304             case Opcode::StoreArrayPairI:
305                 return true;
306             default:
307                 return false;
308         }
309     }
310 
311     /**
312      * Return the highest instructions that INST can be inserted after (in scope of basic block).
313      * Consider aliased memory accesses and volatile operations. CHECK_CFG enables the check of INST inputs
314      * as well.
315      */
FindUpperInsertAfter(Inst * inst,Inst * bound,bool checkCfg)316     Inst *FindUpperInsertAfter(Inst *inst, Inst *bound, bool checkCfg)
317     {
318         ASSERT(bound != nullptr);
319         auto upperAfter = bound;
320         // We do not move higher than bound
321         auto lowerInput = upperAfter;
322         if (checkCfg) {
323             // Update upper bound according to def-use chains
324             for (auto &inputItem : inst->GetInputs()) {
325                 auto input = inputItem.GetInst();
326                 if (input->GetBasicBlock() == inst->GetBasicBlock() && lowerInput->IsPrecedingInSameBlock(input)) {
327                     ASSERT(input->IsPrecedingInSameBlock(inst));
328                     lowerInput = input;
329                 }
330             }
331             upperAfter = lowerInput;
332         }
333 
334         auto boundIt = std::find(candidates_.rbegin(), candidates_.rend(), bound);
335         ASSERT(boundIt != candidates_.rend());
336         for (auto it = candidates_.rbegin(); it != boundIt; it++) {
337             auto cand = *it;
338             if (checkCfg && cand->IsPrecedingInSameBlock(lowerInput)) {
339                 return lowerInput;
340             }
341             // Can't hoist load over aliased store and store over aliased memory instructions
342             if (inst->IsStore() || cand->IsStore()) {
343                 auto checkInst = cand;
344                 if (IsPairInst(cand)) {
345                     // We have already checked the second inst. We now want to check the first one
346                     // for alias.
347                     auto pair = pairs_[cand];
348                     checkInst = pair.first->IsPrecedingInSameBlock(pair.second) ? pair.first : pair.second;
349                 }
350                 if (aliases_.CheckInstAlias(inst, checkInst) != NO_ALIAS) {
351                     return cand;
352                 }
353             }
354             // Can't hoist over volatile load
355             if (cand->IsLoad() && IsVolatileMemInst(cand)) {
356                 return cand;
357             }
358         }
359         return upperAfter;
360     }
361 
362     /**
363      * Return the lowest instructions that INST can be inserted after (in scope of basic block).
364      * Consider aliased memory accesses and volatile operations. CHECK_CFG enables the check of INST users
365      * as well.
366      */
FindLowerInsertAfter(Inst * inst,Inst * bound,bool checkCfg=true)367     Inst *FindLowerInsertAfter(Inst *inst, Inst *bound, bool checkCfg = true)
368     {
369         ASSERT(bound != nullptr);
370         auto lowerAfter = bound->GetPrev();
371         // We do not move lower than bound
372         auto upperUser = lowerAfter;
373         ASSERT(upperUser != nullptr);
374         if (checkCfg) {
375             // Update lower bound according to def-use chains
376             for (auto &userItem : inst->GetUsers()) {
377                 auto user = userItem.GetInst();
378                 if (!user->IsPhi() && user->GetBasicBlock() == inst->GetBasicBlock() &&
379                     user->IsPrecedingInSameBlock(upperUser)) {
380                     ASSERT(inst->IsPrecedingInSameBlock(user));
381                     upperUser = user->GetPrev();
382                     ASSERT(upperUser != nullptr);
383                 }
384             }
385             lowerAfter = upperUser;
386         }
387 
388         auto instIt = std::find(candidates_.begin(), candidates_.end(), inst);
389         ASSERT(instIt != candidates_.end());
390         for (auto it = instIt + 1; it != candidates_.end(); it++) {
391             auto cand = *it;
392             if (checkCfg && upperUser->IsPrecedingInSameBlock(cand)) {
393                 return upperUser;
394             }
395             // Can't lower load over aliased store and store over aliased memory instructions
396             if (inst->IsStore() || cand->IsStore()) {
397                 auto checkInst = cand;
398                 if (IsPairInst(cand)) {
399                     // We have already checked the first inst. We now want to check the second one
400                     // for alias.
401                     auto pair = pairs_[cand];
402                     checkInst = pair.first->IsPrecedingInSameBlock(pair.second) ? pair.second : pair.first;
403                 }
404                 if (aliases_.CheckInstAlias(inst, checkInst) != NO_ALIAS) {
405                     ASSERT(cand->GetPrev() != nullptr);
406                     return cand->GetPrev();
407                 }
408             }
409             // Can't lower over volatile store
410             if (cand->IsStore() && IsVolatileMemInst(cand)) {
411                 ASSERT(cand->GetPrev() != nullptr);
412                 return cand->GetPrev();
413             }
414         }
415         return lowerAfter;
416     }
417 
418     /// Add a pair if a difference between indices equals to one. The first in pair is with lower index.
TryAddCoalescedPair(Inst * inst,int64_t instIdx,Inst * cand,int64_t candIdx)419     bool TryAddCoalescedPair(Inst *inst, int64_t instIdx, Inst *cand, int64_t candIdx)
420     {
421         Inst *first = nullptr;
422         Inst *second = nullptr;
423         Inst *insertAfter = nullptr;
424         if (instIdx == candIdx - 1) {
425             first = inst;
426             second = cand;
427         } else if (candIdx == instIdx - 1) {
428             first = cand;
429             second = inst;
430         } else {
431             return false;
432         }
433 
434         ASSERT(inst->IsMemory() && cand->IsMemory());
435         ASSERT(inst->GetOpcode() == cand->GetOpcode());
436         ASSERT(inst != cand && cand->IsPrecedingInSameBlock(inst));
437         Inst *candLowerAfter = nullptr;
438         Inst *instUpperAfter = nullptr;
439         if (first->IsLoad()) {
440             // Consider dominance of load users
441             bool checkCfg = true;
442             candLowerAfter = FindLowerInsertAfter(cand, inst, checkCfg);
443             // Do not need index if v0[v1] preceeds v0[v1 + 1] because v1 + 1 is not used in paired load.
444             checkCfg = second->IsPrecedingInSameBlock(first);
445             instUpperAfter = FindUpperInsertAfter(inst, cand, checkCfg);
446         } else if (first->IsStore()) {
447             // Store instructions do not have users. Don't check them
448             bool checkCfg = false;
449             candLowerAfter = FindLowerInsertAfter(cand, inst, checkCfg);
450             // Should check that stored value is ready
451             checkCfg = true;
452             instUpperAfter = FindUpperInsertAfter(inst, cand, checkCfg);
453         } else {
454             UNREACHABLE();
455         }
456 
457         // No intersection in reordering ranges
458         if (!instUpperAfter->IsPrecedingInSameBlock(candLowerAfter)) {
459             return false;
460         }
461         if (cand->IsPrecedingInSameBlock(instUpperAfter)) {
462             insertAfter = instUpperAfter;
463         } else {
464             insertAfter = cand;
465         }
466 
467         first->SetMarker(mrkInvalid_);
468         second->SetMarker(mrkInvalid_);
469         InsertPair(first, second, insertAfter);
470         return true;
471     }
472 
HandleArrayAccessI(Inst * inst)473     void HandleArrayAccessI(Inst *inst)
474     {
475         Inst *obj = inst->GetDataFlowInput(inst->GetInput(0).GetInst());
476         uint64_t idx = GetInstImm(inst);
477         if (!MemoryCoalescing::AcceptedType(inst->GetType())) {
478             candidates_.push_back(inst);
479             return;
480         }
481         /* Last candidates more likely to be coalesced */
482         for (auto iter = candidates_.rbegin(); iter != candidates_.rend(); iter++) {
483             auto cand = *iter;
484             /* Skip not interesting candidates */
485             if (cand->IsMarked(mrkInvalid_) || cand->GetOpcode() != inst->GetOpcode()) {
486                 continue;
487             }
488 
489             Inst *candObj = cand->GetDataFlowInput(cand->GetInput(0).GetInst());
490             /* Array objects must alias each other */
491             if (aliases_.CheckRefAlias(obj, candObj) != MUST_ALIAS) {
492                 continue;
493             }
494             /* The difference between indices should be equal to one */
495             uint64_t candIdx = GetInstImm(cand);
496             /* To keep alignment the lowest index should be even */
497             if (alignedOnly_ && ((idx < candIdx && (idx & 1U) != 0) || (candIdx < idx && (candIdx & 1U) != 0))) {
498                 continue;
499             }
500             if (TryAddCoalescedPair(inst, idx, cand, candIdx)) {
501                 break;
502             }
503         }
504 
505         candidates_.push_back(inst);
506     }
507 
HandleKnownEvolutionArrayAccessVar(Inst * idx,Inst * candIdx,int64_t idxInitial,int64_t candInitial)508     bool HandleKnownEvolutionArrayAccessVar(Inst *idx, Inst *candIdx, int64_t idxInitial, int64_t candInitial)
509     {
510         /* Accesses inside loop */
511         auto idxStep = vars_.GetStep(idx);
512         auto candStep = vars_.GetStep(candIdx);
513         /* Indices should be incremented at the same value and their
514             increment should be even to hold alignment */
515         if (idxStep != candStep) {
516             return false;
517         }
518         /* To keep alignment we need to have even step and even lowest initial */
519         constexpr auto IMM_2 = 2;
520         // NOLINTBEGIN(readability-simplify-boolean-expr)
521         if (alignedOnly_ && idxStep % IMM_2 != 0 &&
522             ((idxInitial < candInitial && idxInitial % IMM_2 != 0) ||
523              (candInitial < idxInitial && candInitial % IMM_2 != 0))) {
524             return false;
525         }
526         return true;
527         // NOLINTEND(readability-simplify-boolean-expr)
528     }
529 
HandleArrayAccess(Inst * inst)530     void HandleArrayAccess(Inst *inst)
531     {
532         Inst *obj = inst->GetDataFlowInput(inst->GetInput(0).GetInst());
533         Inst *idx = inst->GetDataFlowInput(inst->GetInput(1).GetInst());
534         if (!vars_.IsAnalyzed(idx) || !MemoryCoalescing::AcceptedType(inst->GetType())) {
535             candidates_.push_back(inst);
536             return;
537         }
538         /* Last candidates more likely to be coalesced */
539         for (auto iter = candidates_.rbegin(); iter != candidates_.rend(); iter++) {
540             auto cand = *iter;
541             /* Skip not interesting candidates */
542             if (cand->IsMarked(mrkInvalid_) || cand->GetOpcode() != inst->GetOpcode()) {
543                 continue;
544             }
545 
546             Inst *candObj = cand->GetDataFlowInput(cand->GetInput(0).GetInst());
547             auto candIdx = cand->GetDataFlowInput(cand->GetInput(1).GetInst());
548             /* We need to have info about candidate's index and array objects must alias each other */
549             if (!vars_.IsAnalyzed(candIdx) || aliases_.CheckRefAlias(obj, candObj) != MUST_ALIAS) {
550                 continue;
551             }
552             if (vars_.HasKnownEvolution(idx) && vars_.HasKnownEvolution(candIdx)) {
553                 auto idxInitial = vars_.GetInitial(idx);
554                 auto candInitial = vars_.GetInitial(candIdx);
555                 if (!HandleKnownEvolutionArrayAccessVar(idx, candIdx, idxInitial, candInitial)) {
556                     continue;
557                 }
558                 if (TryAddCoalescedPair(inst, idxInitial, cand, candInitial)) {
559                     break;
560                 }
561             } else if (!alignedOnly_ && !vars_.HasKnownEvolution(idx) && !vars_.HasKnownEvolution(candIdx)) {
562                 /* Accesses outside loop */
563                 if (vars_.GetBase(idx) != vars_.GetBase(candIdx)) {
564                     continue;
565                 }
566                 if (TryAddCoalescedPair(inst, vars_.GetDiff(idx), cand, vars_.GetDiff(candIdx))) {
567                     break;
568                 }
569             }
570         }
571 
572         candidates_.push_back(inst);
573     }
574 
InsertPair(Inst * first,Inst * second,Inst * insertAfter)575     void InsertPair(Inst *first, Inst *second, Inst *insertAfter)
576     {
577         COMPILER_LOG(DEBUG, MEMORY_COALESCING)
578             << "Access that may be coalesced: v" << first->GetId() << " v" << second->GetId();
579 
580         ASSERT(first->GetType() == second->GetType());
581         Inst *paired = nullptr;
582         switch (first->GetOpcode()) {
583             case Opcode::LoadArray:
584                 paired = ReplaceLoadArray(first, second, insertAfter);
585                 break;
586             case Opcode::LoadArrayI:
587                 paired = ReplaceLoadArrayI(first, second, insertAfter);
588                 break;
589             case Opcode::StoreArray:
590                 paired = ReplaceStoreArray(first, second, insertAfter);
591                 break;
592             case Opcode::StoreArrayI:
593                 paired = ReplaceStoreArrayI(first, second, insertAfter);
594                 break;
595             default:
596                 UNREACHABLE();
597         }
598 
599         COMPILER_LOG(DEBUG, MEMORY_COALESCING)
600             << "Coalescing of {v" << first->GetId() << " v" << second->GetId() << "} is successful";
601         graph_->GetEventWriter().EventMemoryCoalescing(first->GetId(), first->GetPc(), second->GetId(), second->GetPc(),
602                                                        paired->GetId(), paired->IsStore() ? "Store" : "Load");
603 
604         pairs_[paired] = {first, second};
605         paired->SetMarker(mrkInvalid_);
606         candidates_.insert(std::find_if(candidates_.rbegin(), candidates_.rend(),
607                                         [paired](auto x) { return x->IsPrecedingInSameBlock(paired); })
608                                .base(),
609                            paired);
610     }
611 
ReplaceLoadArray(Inst * first,Inst * second,Inst * insertAfter)612     Inst *ReplaceLoadArray(Inst *first, Inst *second, Inst *insertAfter)
613     {
614         ASSERT(first->GetOpcode() == Opcode::LoadArray);
615         ASSERT(second->GetOpcode() == Opcode::LoadArray);
616 
617         auto pload = graph_->CreateInstLoadArrayPair(first->GetType(), INVALID_PC, first->GetInput(0).GetInst(),
618                                                      first->GetInput(1).GetInst());
619         pload->CastToLoadArrayPair()->SetNeedBarrier(first->CastToLoadArray()->GetNeedBarrier() ||
620                                                      second->CastToLoadArray()->GetNeedBarrier());
621         insertAfter->InsertAfter(pload);
622         if (first->CanThrow() || second->CanThrow()) {
623             pload->SetFlag(compiler::inst_flags::CAN_THROW);
624         }
625         MemoryCoalescing::RemoveAddI(pload);
626         return pload;
627     }
628 
ReplaceLoadArrayI(Inst * first,Inst * second,Inst * insertAfter)629     Inst *ReplaceLoadArrayI(Inst *first, Inst *second, Inst *insertAfter)
630     {
631         ASSERT(first->GetOpcode() == Opcode::LoadArrayI);
632         ASSERT(second->GetOpcode() == Opcode::LoadArrayI);
633 
634         auto pload = graph_->CreateInstLoadArrayPairI(first->GetType(), INVALID_PC, first->GetInput(0).GetInst(),
635                                                       first->CastToLoadArrayI()->GetImm());
636         pload->CastToLoadArrayPairI()->SetNeedBarrier(first->CastToLoadArrayI()->GetNeedBarrier() ||
637                                                       second->CastToLoadArrayI()->GetNeedBarrier());
638         insertAfter->InsertAfter(pload);
639         if (first->CanThrow() || second->CanThrow()) {
640             pload->SetFlag(compiler::inst_flags::CAN_THROW);
641         }
642 
643         return pload;
644     }
645 
ReplaceStoreArray(Inst * first,Inst * second,Inst * insertAfter)646     Inst *ReplaceStoreArray(Inst *first, Inst *second, Inst *insertAfter)
647     {
648         ASSERT(first->GetOpcode() == Opcode::StoreArray);
649         ASSERT(second->GetOpcode() == Opcode::StoreArray);
650 
651         auto pstore = graph_->CreateInstStoreArrayPair(
652             first->GetType(), INVALID_PC, first->GetInput(0).GetInst(), first->CastToStoreArray()->GetIndex(),
653             first->CastToStoreArray()->GetStoredValue(), second->CastToStoreArray()->GetStoredValue());
654         pstore->CastToStoreArrayPair()->SetNeedBarrier(first->CastToStoreArray()->GetNeedBarrier() ||
655                                                        second->CastToStoreArray()->GetNeedBarrier());
656         insertAfter->InsertAfter(pstore);
657         if (first->CanThrow() || second->CanThrow()) {
658             pstore->SetFlag(compiler::inst_flags::CAN_THROW);
659         }
660         MemoryCoalescing::RemoveAddI(pstore);
661         return pstore;
662     }
663 
ReplaceStoreArrayI(Inst * first,Inst * second,Inst * insertAfter)664     Inst *ReplaceStoreArrayI(Inst *first, Inst *second, Inst *insertAfter)
665     {
666         ASSERT(first->GetOpcode() == Opcode::StoreArrayI);
667         ASSERT(second->GetOpcode() == Opcode::StoreArrayI);
668 
669         auto pstore = graph_->CreateInstStoreArrayPairI(
670             first->GetType(), INVALID_PC, first->GetInput(0).GetInst(), first->CastToStoreArrayI()->GetStoredValue(),
671             second->CastToStoreArrayI()->GetStoredValue(), first->CastToStoreArrayI()->GetImm());
672         pstore->CastToStoreArrayPairI()->SetNeedBarrier(first->CastToStoreArrayI()->GetNeedBarrier() ||
673                                                         second->CastToStoreArrayI()->GetNeedBarrier());
674         insertAfter->InsertAfter(pstore);
675         if (first->CanThrow() || second->CanThrow()) {
676             pstore->SetFlag(compiler::inst_flags::CAN_THROW);
677         }
678 
679         return pstore;
680     }
681 
GetInstImm(Inst * inst)682     uint64_t GetInstImm(Inst *inst)
683     {
684         switch (inst->GetOpcode()) {
685             case Opcode::LoadArrayI:
686                 return inst->CastToLoadArrayI()->GetImm();
687             case Opcode::StoreArrayI:
688                 return inst->CastToStoreArrayI()->GetImm();
689             default:
690                 UNREACHABLE();
691         }
692     }
693 
694 private:
695     bool alignedOnly_;
696     Marker mrkInvalid_;
697     Graph *graph_ {nullptr};
698     const AliasAnalysis &aliases_;
699     const VariableAnalysis &vars_;
700     ArenaUnorderedMap<Inst *, MemoryCoalescing::CoalescedPair> pairs_;
701     InstVector candidates_;
702 };
703 
ReplaceLoadByPair(Inst * load,Inst * pairedLoad,int32_t dstIdx)704 static void ReplaceLoadByPair(Inst *load, Inst *pairedLoad, int32_t dstIdx)
705 {
706     auto graph = pairedLoad->GetBasicBlock()->GetGraph();
707     auto pairGetter = graph->CreateInstLoadPairPart(load->GetType(), INVALID_PC, pairedLoad, dstIdx);
708     load->ReplaceUsers(pairGetter);
709     pairedLoad->InsertAfter(pairGetter);
710 }
711 
RemoveAddI(Inst * inst)712 void MemoryCoalescing::RemoveAddI(Inst *inst)
713 {
714     auto opcode = inst->GetOpcode();
715     ASSERT(opcode == Opcode::LoadArrayPair || opcode == Opcode::StoreArrayPair);
716     auto input1 = inst->GetInput(1).GetInst();
717     if (input1->GetOpcode() == Opcode::AddI) {
718         uint64_t imm = input1->CastToAddI()->GetImm();
719         if (opcode == Opcode::LoadArrayPair) {
720             inst->CastToLoadArrayPair()->SetImm(imm);
721         } else if (opcode == Opcode::StoreArrayPair) {
722             inst->CastToStoreArrayPair()->SetImm(imm);
723         }
724         inst->SetInput(1, input1->GetInput(0).GetInst());
725     }
726 }
727 
728 /**
729  * This optimization coalesces two loads (stores) that read (write) values from (to) the consecutive memory into
730  * a single operation.
731  *
732  * 1) If we have two memory instruction that can be coalesced then we are trying to find a position for
733  *    coalesced operation. If it is possible, the memory operations are coalesced and skipped otherwise.
734  * 2) The instruction of Aarch64 requires memory address alignment. For arrays
735  *    it means we can coalesce only accesses that starts from even index.
736  * 3) The implemented coalescing for arrays supposes there is no volatile array element accesses.
737  */
RunImpl()738 bool MemoryCoalescing::RunImpl()
739 {
740     if (GetGraph()->GetArch() != Arch::AARCH64) {
741         COMPILER_LOG(INFO, MEMORY_COALESCING) << "Skipping Memory Coalescing for unsupported architecture";
742         return false;
743     }
744 
745     GetGraph()->RunPass<DominatorsTree>();
746     GetGraph()->RunPass<LoopAnalyzer>();
747     GetGraph()->RunPass<AliasAnalysis>();
748 
749     VariableAnalysis variables(GetGraph());
750     auto &aliases = GetGraph()->GetValidAnalysis<AliasAnalysis>();
751     Marker mrk = GetGraph()->NewMarker();
752     PairCreatorVisitor collector(GetGraph(), aliases, variables, mrk, alignedOnly_);
753     for (auto block : GetGraph()->GetBlocksRPO()) {
754         collector.VisitBlock(block);
755         collector.Reset();
756     }
757     GetGraph()->EraseMarker(mrk);
758 
759     for (auto pair : collector.GetPairs()) {
760         auto bb = pair.first->GetBasicBlock();
761         if (pair.first->IsLoad()) {
762             ReplaceLoadByPair(pair.second.second, pair.first, 1);
763             ReplaceLoadByPair(pair.second.first, pair.first, 0);
764         }
765         bb->RemoveInst(pair.second.first);
766         bb->RemoveInst(pair.second.second);
767     }
768 
769     if (!collector.GetPairs().empty()) {
770         SaveStateBridgesBuilder ssb;
771         for (auto bb : GetGraph()->GetBlocksRPO()) {
772             if (!bb->IsEmpty() && !bb->IsStartBlock()) {
773                 ssb.FixSaveStatesInBB(bb);
774             }
775         }
776     }
777     COMPILER_LOG(DEBUG, MEMORY_COALESCING) << "Memory Coalescing complete";
778     return !collector.GetPairs().empty();
779 }
780 }  // namespace panda::compiler
781