1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "compilation.h"
17 #include "function.h"
18 #include "mem/pool_manager.h"
19 #include "elfio/elfio.hpp"
20 #include "irtoc_runtime.h"
21 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
22 #include "aarch64/disasm-aarch64.h"
23 #endif
24
25 #ifdef PANDA_COMPILER_DEBUG_INFO
26 #include "dwarf_builder.h"
27 #endif
28
29 namespace ark::irtoc {
30
31 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
32 class UsedRegistersCollector : public vixl::aarch64::Disassembler {
33 public:
UsedRegistersCollector(ark::ArenaAllocator * allocator)34 explicit UsedRegistersCollector(ark::ArenaAllocator *allocator) : Disassembler(allocator) {}
35
GetUsedRegs(bool isFp)36 RegMask &GetUsedRegs(bool isFp)
37 {
38 return isFp ? vregMask_ : regMask_;
39 }
40
CollectForCode(ArenaAllocator * allocator,Span<const uint8_t> code)41 static UsedRegisters CollectForCode(ArenaAllocator *allocator, Span<const uint8_t> code)
42 {
43 ASSERT(allocator != nullptr);
44 ASSERT(!code.Empty());
45
46 vixl::aarch64::Decoder decoder(allocator);
47 UsedRegistersCollector usedRegsCollector(allocator);
48 decoder.AppendVisitor(&usedRegsCollector);
49 bool skipping = false;
50
51 auto startInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(code.data());
52 auto endInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(&(*code.end()));
53 // To determine real registers usage we check each assembly instruction which has
54 // destination register(s). There is a problem with handlers with `return` cause
55 // there is an epilogue part with registers restoring. We need to separate both register
56 // restoring and real register usage. There are some heuristics were invented to make it
57 // possible, it work as follows:
58 // 1) We parse assembly code in reverse mode to fast the epilogue part finding.
59 // 2) For each instruction we add all its destination registers to the result set
60 // of used registers.
61 // 3) When we met `ret` instruction then we raise special `skipping` flag for a few
62 // next instructions.
63 // 4) When `skipping` flag is raised, while we meet `load` instructions or `add`
64 // arithmetic involving `sp` (stack pointer) as destination we continue to skip such
65 // instructions (assuming they are related to epilogue part) but without adding their
66 // registers into the result set. If we meet another kind of intruction we unset
67 // the `skipping` flag.
68 for (auto instr = usedRegsCollector.GetPrevInstruction(endInstr); instr >= startInstr;
69 instr = usedRegsCollector.GetPrevInstruction(instr)) {
70 if (instr->Mask(vixl::aarch64::UnconditionalBranchToRegisterMask) == vixl::aarch64::RET) {
71 skipping = true;
72 continue;
73 }
74 if (skipping && (instr->IsLoad() || usedRegsCollector.CheckSPAdd(instr))) {
75 continue;
76 }
77 skipping = false;
78 decoder.Decode(instr);
79 }
80
81 UsedRegisters usedRegisters;
82 usedRegisters.gpr |= usedRegsCollector.GetUsedRegs(false);
83 usedRegisters.fp |= usedRegsCollector.GetUsedRegs(true);
84 return usedRegisters;
85 }
86
87 protected:
GetPrevInstruction(const vixl::aarch64::Instruction * instr) const88 const vixl::aarch64::Instruction *GetPrevInstruction(const vixl::aarch64::Instruction *instr) const
89 {
90 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
91 return instr - vixl::aarch64::kInstructionSize;
92 }
93
CheckSPAdd(const vixl::aarch64::Instruction * instr) const94 bool CheckSPAdd(const vixl::aarch64::Instruction *instr) const
95 {
96 constexpr int32_t SP_REG = GetDwarfSP(Arch::AARCH64);
97 return instr->Mask(vixl::aarch64::AddSubOpMask) == vixl::aarch64::ADD && (instr->GetRd() == SP_REG);
98 }
99
AppendRegisterNameToOutput(const vixl::aarch64::Instruction * instr,const vixl::aarch64::CPURegister & reg)100 void AppendRegisterNameToOutput(const vixl::aarch64::Instruction *instr,
101 const vixl::aarch64::CPURegister ®) override
102 {
103 Disassembler::AppendRegisterNameToOutput(instr, reg);
104 if (instr->IsStore()) {
105 return;
106 }
107 uint32_t code = reg.GetCode();
108 // We need to account for both registers in case of a pair load
109 bool isPair = instr->Mask(vixl::aarch64::LoadStorePairAnyFMask) == vixl::aarch64::LoadStorePairAnyFixed;
110 if (!(code == static_cast<uint32_t>(instr->GetRd()) ||
111 (isPair && code == static_cast<uint32_t>(instr->GetRt2())))) {
112 return;
113 }
114 if (reg.IsRegister()) {
115 if (!reg.IsZero()) {
116 regMask_.Set(code);
117 }
118 } else {
119 ASSERT(reg.IsVRegister());
120 vregMask_.Set(code);
121 }
122 }
123
124 private:
125 RegMask regMask_;
126 VRegMask vregMask_;
127 };
128 #endif // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
129
130 // elfio library missed some elf constants, so lets define it here for a while. We can't include elf.h header because
131 // it conflicts with elfio.
132 static constexpr size_t EF_ARM_EABI_VER5 = 0x05000000;
133
CollectUsedRegisters(ark::ArenaAllocator * allocator)134 void Compilation::CollectUsedRegisters([[maybe_unused]] ark::ArenaAllocator *allocator)
135 {
136 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
137 if (arch_ == Arch::AARCH64) {
138 for (auto unit : units_) {
139 if ((unit->GetGraph()->GetMode().IsInterpreter() || unit->GetGraph()->GetMode().IsInterpreterEntry()) &&
140 unit->GetCompilationResult() != CompilationResult::ARK) {
141 usedRegisters_ |= UsedRegistersCollector::CollectForCode(allocator, unit->GetCode());
142 }
143 }
144 }
145 #endif // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
146 }
147
CheckUsedRegisters()148 void Compilation::CheckUsedRegisters()
149 {
150 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
151 if (usedRegisters_.gpr.Count() > 0) {
152 LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used registers " << usedRegisters_.gpr;
153 usedRegisters_.gpr &= GetCalleeRegsMask(arch_, false);
154 auto diff = usedRegisters_.gpr ^ GetCalleeRegsMask(arch_, false, true);
155 if (diff.Any()) {
156 LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved register usage is different from optimized set"
157 << std::endl
158 << "Expected: " << GetCalleeRegsMask(arch_, false, true) << std::endl
159 << "Got: " << usedRegisters_.gpr;
160 }
161 }
162 if (usedRegisters_.fp.Count() > 0) {
163 LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used fp registers " << usedRegisters_.fp;
164 usedRegisters_.fp &= GetCalleeRegsMask(arch_, true);
165 auto diff = usedRegisters_.fp ^ GetCalleeRegsMask(arch_, true, true);
166 if (diff.Any()) {
167 LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved fp register usage is different from optimized set"
168 << std::endl
169 << "Expected: " << GetCalleeRegsMask(arch_, true, true) << std::endl
170 << "Got: " << usedRegisters_.fp;
171 }
172 }
173 #endif
174 }
175
Run()176 Compilation::Result Compilation::Run()
177 {
178 if (compiler::g_options.WasSetCompilerRegexWithSignature()) {
179 LOG(FATAL, IRTOC) << "Regex with signatures is not supported, please use '--compiler-regex'.";
180 }
181 if (compiler::g_options.WasSetCompilerRegex()) {
182 methodsRegex_ = compiler::g_options.GetCompilerRegex();
183 }
184
185 PoolManager::Initialize(PoolType::MALLOC);
186
187 allocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
188 localAllocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
189
190 if (RUNTIME_ARCH == Arch::X86_64 && compiler::g_options.WasSetCompilerCrossArch()) {
191 arch_ = GetArchFromString(compiler::g_options.GetCompilerCrossArch());
192 if (arch_ == Arch::NONE) {
193 LOG(FATAL, IRTOC) << "FATAL: unknown arch: " << compiler::g_options.GetCompilerCrossArch();
194 }
195 compiler::g_options.AdjustCpuFeatures(arch_ != RUNTIME_ARCH);
196 } else {
197 compiler::g_options.AdjustCpuFeatures(false);
198 }
199
200 LOG(INFO, IRTOC) << "Start Irtoc compilation for " << GetArchString(arch_) << "...";
201
202 auto result = Compile();
203 if (result) {
204 CheckUsedRegisters();
205 LOG(INFO, IRTOC) << "Irtoc compilation success";
206 } else {
207 LOG(FATAL, IRTOC) << "Irtoc compilation failed: " << result.Error();
208 }
209
210 if (result = MakeElf(g_options.GetIrtocOutput()); !result) {
211 return result;
212 }
213
214 for (auto unit : units_) {
215 delete unit;
216 }
217
218 allocator_.reset();
219 localAllocator_.reset();
220
221 PoolManager::Finalize();
222
223 return result;
224 }
225
Compile()226 Compilation::Result Compilation::Compile()
227 {
228 #ifdef PANDA_LLVM_IRTOC
229 IrtocRuntimeInterface runtime;
230 ArenaAllocator allocator(SpaceType::SPACE_TYPE_COMPILER);
231 std::shared_ptr<llvmbackend::IrtocCompilerInterface> llvmCompiler =
232 llvmbackend::CreateLLVMIrtocCompiler(&runtime, &allocator, arch_);
233 #endif
234
235 for (auto unit : units_) {
236 if (compiler::g_options.WasSetCompilerRegex() && !std::regex_match(unit->GetName(), methodsRegex_)) {
237 continue;
238 }
239 LOG(INFO, IRTOC) << "Compile " << unit->GetName();
240 #ifdef PANDA_LLVM_IRTOC
241 unit->SetLLVMCompiler(llvmCompiler);
242 #endif
243 auto result = unit->Compile(arch_, allocator_.get(), localAllocator_.get());
244 if (!result) {
245 return Unexpected {result.Error()};
246 }
247 #ifdef PANDA_COMPILER_DEBUG_INFO
248 hasDebugInfo_ |= unit->GetGraph()->IsLineDebugInfoEnabled();
249 #endif
250 }
251
252 #ifdef PANDA_LLVM_IRTOC
253 llvmCompiler->FinishCompile();
254 ASSERT(!g_options.GetIrtocOutputLlvm().empty());
255 llvmCompiler->WriteObjectFile(g_options.GetIrtocOutputLlvm());
256
257 for (auto unit : units_) {
258 if (unit->IsCompiledByLlvm()) {
259 auto code = llvmCompiler->GetCompiledCode(unit->GetName());
260 Span<uint8_t> span = {const_cast<uint8_t *>(code.code), code.size};
261 unit->SetCode(span);
262 }
263 unit->ReportCompilationStatistic(&std::cerr);
264 }
265 if (g_options.GetIrtocLlvmStats() != "none" && !llvmCompiler->IsEmpty()) {
266 std::cerr << "LLVM total: " << llvmCompiler->GetObjectFileSize() << " bytes" << std::endl;
267 }
268
269 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
270 CollectUsedRegisters(&allocator);
271 #endif
272 #endif // PANDA_LLVM_IRTOC
273
274 return 0;
275 }
276
GetElfArch(Arch arch)277 static size_t GetElfArch(Arch arch)
278 {
279 switch (arch) {
280 case Arch::AARCH32:
281 return ELFIO::EM_ARM;
282 case Arch::AARCH64:
283 return ELFIO::EM_AARCH64;
284 case Arch::X86:
285 return ELFIO::EM_386;
286 case Arch::X86_64:
287 return ELFIO::EM_X86_64;
288 default:
289 UNREACHABLE();
290 }
291 }
292
293 // CC-OFFNXT(huge_method[C++],G.FUN.01-CPP) solid logic
294 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_FUNCTION_SIZE)
MakeElf(std::string_view output)295 Compilation::Result Compilation::MakeElf(std::string_view output)
296 {
297 ELFIO::elfio elfWriter;
298 elfWriter.create(Is64BitsArch(arch_) ? ELFIO::ELFCLASS64 : ELFIO::ELFCLASS32, ELFIO::ELFDATA2LSB);
299 elfWriter.set_type(ELFIO::ET_REL);
300 if (arch_ == Arch::AARCH32) {
301 elfWriter.set_flags(EF_ARM_EABI_VER5);
302 }
303 elfWriter.set_os_abi(ELFIO::ELFOSABI_NONE);
304 elfWriter.set_machine(GetElfArch(arch_));
305
306 ELFIO::section *strSec = elfWriter.sections.add(".strtab");
307 strSec->set_type(ELFIO::SHT_STRTAB);
308 strSec->set_addr_align(0x1);
309
310 ELFIO::string_section_accessor strWriter(strSec);
311
312 static constexpr size_t FIRST_GLOBAL_SYMBOL_INDEX = 2;
313 static constexpr size_t SYMTAB_ADDR_ALIGN = 8;
314
315 ELFIO::section *symSec = elfWriter.sections.add(".symtab");
316 symSec->set_type(ELFIO::SHT_SYMTAB);
317 symSec->set_info(FIRST_GLOBAL_SYMBOL_INDEX);
318 symSec->set_link(strSec->get_index());
319 symSec->set_addr_align(SYMTAB_ADDR_ALIGN);
320 symSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_SYMTAB));
321
322 ELFIO::symbol_section_accessor symbolWriter(elfWriter, symSec);
323
324 symbolWriter.add_symbol(strWriter, "irtoc.cpp", 0, 0, ELFIO::STB_LOCAL, ELFIO::STT_FILE, 0, ELFIO::SHN_ABS);
325
326 ELFIO::section *textSec = elfWriter.sections.add(".text");
327 textSec->set_type(ELFIO::SHT_PROGBITS);
328 // NOLINTNEXTLINE(hicpp-signed-bitwise)
329 textSec->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR);
330 textSec->set_addr_align(GetCodeAlignment(arch_));
331
332 ELFIO::section *relSec = elfWriter.sections.add(".rela.text");
333 relSec->set_type(ELFIO::SHT_RELA);
334 relSec->set_info(textSec->get_index());
335 relSec->set_link(symSec->get_index());
336 relSec->set_addr_align(4U); // CODECHECK-NOLINT(C_RULE_ID_MAGICNUMBER)
337 relSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_RELA));
338 ELFIO::relocation_section_accessor relWriter(elfWriter, relSec);
339
340 /* Use symbols map to avoid saving the same symbols multiple times */
341 std::unordered_map<std::string, uint32_t> symbolsMap;
342 auto addSymbol = [&symbolsMap, &symbolWriter, &strWriter](const char *name) {
343 if (auto it = symbolsMap.find(name); it != symbolsMap.end()) {
344 return it->second;
345 }
346 uint32_t index = symbolWriter.add_symbol(strWriter, name, 0, 0, ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, 0);
347 symbolsMap.insert({name, index});
348 return index;
349 };
350 #ifdef PANDA_COMPILER_DEBUG_INFO
351 auto dwarfBuilder {hasDebugInfo_ ? std::make_optional<DwarfBuilder>(arch_, &elfWriter) : std::nullopt};
352 #endif
353
354 static constexpr size_t MAX_CODE_ALIGNMENT = 64;
355 static constexpr std::array<uint8_t, MAX_CODE_ALIGNMENT> PADDING_DATA {0};
356 CHECK_LE(GetCodeAlignment(GetArch()), MAX_CODE_ALIGNMENT);
357
358 uint32_t codeAlignment = GetCodeAlignment(GetArch());
359 ASSERT(codeAlignment != 0);
360 size_t offset = 0;
361 for (auto unit : units_) {
362 if (unit->IsCompiledByLlvm()) {
363 continue;
364 }
365 auto code = unit->GetCode();
366
367 // Align function
368 if (auto padding = (codeAlignment - (offset % codeAlignment)) % codeAlignment; padding != 0) {
369 textSec->append_data(reinterpret_cast<const char *>(PADDING_DATA.data()), padding);
370 offset += padding;
371 }
372 ASSERT(offset % codeAlignment == 0);
373
374 auto symbol = symbolWriter.add_symbol(strWriter, unit->GetName(), offset, code.size(), ELFIO::STB_GLOBAL,
375 ELFIO::STT_FUNC, 0, textSec->get_index());
376 (void)symbol;
377 textSec->append_data(reinterpret_cast<const char *>(code.data()), code.size());
378 for (auto &rel : unit->GetRelocations()) {
379 size_t relOffset = offset + rel.offset;
380 auto sindex = addSymbol(unit->GetExternalFunction(rel.data));
381 if (Is64BitsArch(arch_)) {
382 // NOLINTNEXTLINE(hicpp-signed-bitwise)
383 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF64_R_INFO(sindex, rel.type)),
384 rel.addend);
385 } else {
386 // NOLINTNEXTLINE(hicpp-signed-bitwise)
387 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF32_R_INFO(sindex, rel.type)),
388 rel.addend);
389 }
390 }
391 #ifdef PANDA_COMPILER_DEBUG_INFO
392 ASSERT(!unit->GetGraph()->IsLineDebugInfoEnabled() || dwarfBuilder);
393 if (dwarfBuilder && !dwarfBuilder->BuildGraph(unit, offset, symbol)) {
394 return Unexpected("DwarfBuilder::BuildGraph failed!");
395 }
396 #endif
397 offset += code.size();
398 }
399 #ifdef PANDA_COMPILER_DEBUG_INFO
400 if (dwarfBuilder && !dwarfBuilder->Finalize(offset)) {
401 return Unexpected("DwarfBuilder::Finalize failed!");
402 }
403 #endif
404
405 elfWriter.save(output.data());
406
407 return 0;
408 }
409 } // namespace ark::irtoc
410