1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "optimizer/code_generator/codegen.h"
17 #include "optimizer/code_generator/encode.h"
18 #include "optimizer/code_generator/spill_fill_encoder.h"
19 #include "optimizer/ir/graph.h"
20
21 namespace ark::compiler {
22
AreConsecutiveOps(const SpillFillData & pred,const SpillFillData & succ)23 bool SpillFillEncoder::AreConsecutiveOps(const SpillFillData &pred, const SpillFillData &succ)
24 {
25 bool sameSrcType = pred.SrcType() == succ.SrcType();
26 bool sameDstType = pred.DstType() == succ.DstType();
27 bool sameArgumentType = pred.GetCommonType() == succ.GetCommonType();
28 if (!sameSrcType || !sameDstType || !sameArgumentType) {
29 return false;
30 }
31
32 // Slots should be neighboring, note that offset from SP is decreasing when slot number is increasing,
33 // so succ's slot number should be lower than pred's slot number.
34 if (pred.SrcType() == LocationType::STACK && pred.SrcValue() != succ.SrcValue() + 1U) {
35 return false;
36 }
37 if (pred.DstType() == LocationType::STACK && pred.DstValue() != succ.DstValue() + 1U) {
38 return false;
39 }
40 // Parameter slots have another direction
41 if (pred.SrcType() == LocationType::STACK_PARAMETER && pred.SrcValue() != succ.SrcValue() - 1U) {
42 return false;
43 }
44 if (pred.DstType() == LocationType::STACK_PARAMETER && pred.DstValue() != succ.DstValue() - 1U) {
45 return false;
46 }
47 return true;
48 }
49
CanCombineSpillFills(SpillFillData pred,SpillFillData succ,const Graph * graph)50 bool SpillFillEncoder::CanCombineSpillFills(SpillFillData pred, SpillFillData succ, const Graph *graph)
51 {
52 if (!IsCombiningEnabled(graph)) {
53 return false;
54 }
55 // Stack slot is 64-bit wide, so we can only combine types that could be widened up to
56 // 64 bit (i.e. we can' combine two floats).
57 if (!DataType::Is64Bits(pred.GetCommonType(), graph->GetArch())) {
58 return false;
59 }
60
61 return AreConsecutiveOps(pred, succ);
62 }
63
SortSpillFillData(ArenaVector<SpillFillData> * spillFills)64 void SpillFillEncoder::SortSpillFillData(ArenaVector<SpillFillData> *spillFills)
65 {
66 constexpr size_t MAX_VECTOR_LEN = MAX_NUM_REGS + MAX_NUM_VREGS;
67 // Don't sort vectors that are too large in order to reduce compilation duration.
68 if (spillFills->size() > MAX_VECTOR_LEN) {
69 COMPILER_LOG(DEBUG, CODEGEN) << "Bypass spill fills sorting because corresponding vector is too large: "
70 << spillFills->size();
71 return;
72 }
73 auto it = spillFills->begin();
74 while (it != spillFills->end()) {
75 // Sort spill fills only within group of consecutive SpillFillData elements sharing the same spill-fill type.
76 // SpillFillData elements could not be reordered within whole spill_fills array, because some of these elements
77 // may be inserted by SpillFillResolver to break cyclic dependency.
78 bool isFill = it->SrcType() == LocationType::STACK && it->GetDst().IsAnyRegister();
79 bool isSpill = it->GetSrc().IsAnyRegister() && it->DstType() == LocationType::STACK;
80 if (!isSpill && !isFill) {
81 ++it;
82 continue;
83 }
84 auto next = std::next(it);
85 while (next != spillFills->end() && it->SrcType() == next->SrcType() && it->DstType() == next->DstType()) {
86 ++next;
87 }
88
89 if (isSpill) {
90 std::sort(it, next, [](auto sf1, auto sf2) { return sf1.DstValue() > sf2.DstValue(); });
91 } else {
92 ASSERT(isFill);
93 std::sort(it, next, [](auto sf1, auto sf2) { return sf1.SrcValue() > sf2.SrcValue(); });
94 }
95
96 it = next;
97 }
98 }
99
SpillFillEncoder(Codegen * codegen,Inst * inst)100 SpillFillEncoder::SpillFillEncoder(Codegen *codegen, Inst *inst)
101 : inst_(inst->CastToSpillFill()),
102 graph_(codegen->GetGraph()),
103 codegen_(codegen),
104 encoder_(codegen->GetEncoder()),
105 fl_(codegen->GetFrameLayout())
106 {
107 spReg_ = codegen->GetTarget().GetStackReg();
108 }
109
EncodeSpillFill()110 void SpillFillEncoder::EncodeSpillFill()
111 {
112 if (IsCombiningEnabled(graph_)) {
113 SortSpillFillData(&(inst_->GetSpillFills()));
114 }
115
116 // hint on how many consecutive ops current group contain
117 int consecutiveOpsHint = 0;
118 for (auto it = inst_->GetSpillFills().begin(), end = inst_->GetSpillFills().end(); it != end;) {
119 auto sf = *it;
120 auto nextIt = std::next(it);
121 SpillFillData *next = nextIt == end ? nullptr : &(*nextIt);
122
123 // new group started
124 if (consecutiveOpsHint <= 0) {
125 consecutiveOpsHint = 1;
126 // find how many consecutive SpillFillData have the same type, source and destination type
127 // and perform read or write from consecutive stack slots.
128 for (auto groupIt = it, nextGroupIt = std::next(it);
129 nextGroupIt != end && AreConsecutiveOps(*groupIt, *nextGroupIt); ++nextGroupIt) {
130 consecutiveOpsHint++;
131 groupIt = nextGroupIt;
132 }
133 }
134
135 size_t adv = 0;
136 switch (sf.SrcType()) {
137 case LocationType::IMMEDIATE: {
138 adv = EncodeImmToX(sf);
139 break;
140 }
141 case LocationType::FP_REGISTER:
142 case LocationType::REGISTER: {
143 adv = EncodeRegisterToX(sf, next, consecutiveOpsHint);
144 break;
145 }
146 case LocationType::STACK_PARAMETER:
147 case LocationType::STACK: {
148 adv = EncodeStackToX(sf, next, consecutiveOpsHint);
149 break;
150 }
151 default:
152 UNREACHABLE();
153 }
154 consecutiveOpsHint -= static_cast<int>(adv);
155 std::advance(it, adv);
156 }
157 }
158
EncodeImmWithCorrectType(DataType::Type sfType,MemRef dstMem,ConstantInst * constInst)159 void SpillFillEncoder::EncodeImmWithCorrectType(DataType::Type sfType, MemRef dstMem, ConstantInst *constInst)
160 {
161 ASSERT(DataType::IsTypeNumeric(sfType));
162 switch (sfType) {
163 case DataType::Type::FLOAT32: {
164 auto imm = constInst->GetFloatValue();
165 encoder_->EncodeSti(imm, dstMem);
166 break;
167 }
168 case DataType::Type::FLOAT64: {
169 auto imm = constInst->GetDoubleValue();
170 encoder_->EncodeSti(imm, dstMem);
171 break;
172 }
173 default: {
174 auto imm = constInst->GetRawValue();
175 auto storeSize = Codegen::ConvertDataType(sfType, codegen_->GetArch()).GetSize() / BYTE_SIZE;
176 encoder_->EncodeSti(imm, storeSize, dstMem);
177 break;
178 }
179 }
180 }
181
EncodeImmToX(const SpillFillData & sf)182 size_t SpillFillEncoder::EncodeImmToX(const SpillFillData &sf)
183 {
184 auto constInst = graph_->GetSpilledConstant(sf.SrcValue());
185 ASSERT(constInst->IsConst());
186
187 if (sf.GetDst().IsAnyRegister()) { // imm -> register
188 auto type = sf.GetType();
189 if (graph_->IsDynamicMethod() && constInst->GetType() == DataType::INT64) {
190 type = DataType::UINT32;
191 }
192
193 Imm imm;
194 #ifndef NDEBUG
195 switch (type) {
196 case DataType::FLOAT32:
197 imm = Imm(constInst->GetFloatValue());
198 break;
199 case DataType::FLOAT64:
200 imm = Imm(constInst->GetDoubleValue());
201 break;
202 default:
203 ASSERT(DataType::IsTypeNumeric(type));
204 imm = Imm(constInst->GetRawValue());
205 break;
206 }
207 #else
208 imm = Imm(constInst->GetRawValue());
209 #endif
210 auto dstReg = GetDstReg(sf.GetDst(), Codegen::ConvertDataType(type, codegen_->GetArch()));
211 encoder_->EncodeMov(dstReg, imm);
212 return 1U;
213 }
214
215 ASSERT(sf.GetDst().IsAnyStack()); // imm -> stack
216 auto dstMem = codegen_->GetMemRefForSlot(sf.GetDst());
217 auto sfType = sf.GetCommonType();
218 EncodeImmWithCorrectType(sfType, dstMem, constInst);
219 return 1U;
220 }
221
EncodeRegisterToX(const SpillFillData & sf,const SpillFillData * next,int consecutiveOpsHint)222 size_t SpillFillEncoder::EncodeRegisterToX(const SpillFillData &sf, const SpillFillData *next, int consecutiveOpsHint)
223 {
224 if (sf.GetDst().IsAnyRegister()) { // register -> register
225 auto srcReg = codegen_->ConvertRegister(sf.SrcValue(), sf.GetType());
226 auto dstReg = GetDstReg(sf.GetDst(), srcReg.GetType());
227 encoder_->EncodeMov(dstReg, srcReg);
228 return 1U;
229 }
230
231 ASSERT(sf.GetDst().IsAnyStack());
232 auto offset = codegen_->GetStackOffset(sf.GetDst());
233 auto memRef = MemRef(spReg_, offset);
234
235 if (sf.GetDst().IsStackArgument()) { // register -> stack_arg
236 auto srcReg = codegen_->ConvertRegister(sf.SrcValue(), sf.GetType());
237 // There is possible to have sequence to intrinsics with no getter/setter in interpreter:
238 // compiled_code->c2i(push to frame)->interpreter(HandleCallVirtShort)->i2c(move to stack)->intrinsic
239 // To do not fix it in interpreter, it is better to store 64-bits
240 if (srcReg.GetSize() < DOUBLE_WORD_SIZE && !srcReg.GetType().IsFloat()) {
241 srcReg = srcReg.As(Codegen::ConvertDataType(DataType::REFERENCE, codegen_->GetArch()));
242 }
243 encoder_->EncodeStrz(srcReg, memRef);
244 return 1U;
245 }
246
247 // register -> stack
248 auto srcReg = codegen_->ConvertRegister(sf.SrcValue(), sf.GetCommonType());
249 // If address is no qword aligned and current group consist of even number of consecutive slots
250 // then we can skip current operation.
251 constexpr int COALESCE_OPS_LIMIT = 2;
252 auto skipCoalescing = (consecutiveOpsHint % COALESCE_OPS_LIMIT == 1) && (offset % QUAD_WORD_SIZE_BYTES != 0);
253 if (next != nullptr && CanCombineSpillFills(sf, *next, graph_) && !skipCoalescing) {
254 auto nextReg = codegen_->ConvertRegister(next->SrcValue(), next->GetCommonType());
255 encoder_->EncodeStp(srcReg, nextReg, memRef);
256 return 2U;
257 }
258 encoder_->EncodeStr(srcReg, memRef);
259 return 1U;
260 }
261
EncodeStackToX(const SpillFillData & sf,const SpillFillData * next,int consecutiveOpsHint)262 size_t SpillFillEncoder::EncodeStackToX(const SpillFillData &sf, const SpillFillData *next, int consecutiveOpsHint)
263 {
264 auto offset = codegen_->GetStackOffset(sf.GetSrc());
265 auto srcMem = MemRef(spReg_, offset);
266 auto typeInfo = Codegen::ConvertDataType(sf.GetType(), codegen_->GetArch());
267
268 if (sf.GetDst().IsAnyRegister()) { // stack -> register
269 // If address is no qword aligned and current group consist of even number of consecutive slots
270 // then we can skip current operation.
271 constexpr int COALESCE_OPS_LIMIT = 2;
272 auto skipCoalescing = (consecutiveOpsHint % COALESCE_OPS_LIMIT == 1) && (offset % QUAD_WORD_SIZE_BYTES != 0);
273 if (next != nullptr && CanCombineSpillFills(sf, *next, graph_) && !skipCoalescing) {
274 auto curReg = codegen_->ConvertRegister(sf.DstValue(), sf.GetCommonType());
275 auto nextReg = codegen_->ConvertRegister(next->DstValue(), next->GetCommonType());
276 encoder_->EncodeLdp(curReg, nextReg, false, srcMem);
277 return 2U;
278 }
279 auto dstReg = GetDstReg(sf.GetDst(), typeInfo);
280 encoder_->EncodeLdr(dstReg, false, srcMem);
281 return 1U;
282 }
283
284 // stack -> stack
285 ASSERT(sf.GetDst().IsAnyStack());
286 auto dstMem = codegen_->GetMemRefForSlot(sf.GetDst());
287 encoder_->EncodeMemCopy(srcMem, dstMem, DOUBLE_WORD_SIZE); // Stack slot is 64-bit wide
288 return 1U;
289 }
290 } // namespace ark::compiler
291