1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "compilation.h"
17 #include "function.h"
18 #include "mem/pool_manager.h"
19 #include "elfio/elfio.hpp"
20 #include "irtoc_runtime.h"
21 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
22 #include "aarch64/disasm-aarch64.h"
23 #endif
24
25 #ifdef PANDA_COMPILER_DEBUG_INFO
26 #include "dwarf_builder.h"
27 #endif
28
29 namespace panda::irtoc {
30
31 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
32 class UsedRegistersCollector : public vixl::aarch64::Disassembler {
33 public:
UsedRegistersCollector(panda::ArenaAllocator * allocator)34 explicit UsedRegistersCollector(panda::ArenaAllocator *allocator) : Disassembler(allocator) {}
35
GetUsedRegs(bool isFp)36 RegMask &GetUsedRegs(bool isFp)
37 {
38 return isFp ? vregMask_ : regMask_;
39 }
40
CollectForCode(ArenaAllocator * allocator,Span<const uint8_t> code)41 static UsedRegisters CollectForCode(ArenaAllocator *allocator, Span<const uint8_t> code)
42 {
43 ASSERT(allocator != nullptr);
44 ASSERT(!code.Empty());
45
46 vixl::aarch64::Decoder decoder(allocator);
47 UsedRegistersCollector usedRegsCollector(allocator);
48 decoder.AppendVisitor(&usedRegsCollector);
49 bool skipping = false;
50
51 auto startInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(code.data());
52 auto endInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(&(*code.end()));
53 // To determine real registers usage we check each assembly instruction which has
54 // destination register(s). There is a problem with handlers with `return` cause
55 // there is an epilogue part with registers restoring. We need to separate both register
56 // restoring and real register usage. There are some heuristics were invented to make it
57 // possible, it work as follows:
58 // 1) We parse assembly code in reverse mode to fast the epilogue part finding.
59 // 2) For each instruction we add all its destination registers to the result set
60 // of used registers.
61 // 3) When we met `ret` instruction then we raise special `skipping` flag for a few
62 // next instructions.
63 // 4) When `skipping` flag is raised, while we meet `load` instructions or `add`
64 // arithmetic involving `sp` (stack pointer) as destination we continue to skip such
65 // instructions (assuming they are related to epilogue part) but without adding their
66 // registers into the result set. If we meet another kind of intruction we unset
67 // the `skipping` flag.
68 for (auto instr = usedRegsCollector.GetPrevInstruction(endInstr); instr >= startInstr;
69 instr = usedRegsCollector.GetPrevInstruction(instr)) {
70 if (instr->Mask(vixl::aarch64::UnconditionalBranchToRegisterMask) == vixl::aarch64::RET) {
71 skipping = true;
72 continue;
73 }
74 if (skipping && (instr->IsLoad() || usedRegsCollector.CheckSPAdd(instr))) {
75 continue;
76 }
77 skipping = false;
78 decoder.Decode(instr);
79 }
80
81 UsedRegisters usedRegisters;
82 usedRegisters.gpr |= usedRegsCollector.GetUsedRegs(false);
83 usedRegisters.fp |= usedRegsCollector.GetUsedRegs(true);
84 return usedRegisters;
85 }
86
87 protected:
GetPrevInstruction(const vixl::aarch64::Instruction * instr) const88 const vixl::aarch64::Instruction *GetPrevInstruction(const vixl::aarch64::Instruction *instr) const
89 {
90 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
91 return instr - vixl::aarch64::kInstructionSize;
92 }
93
CheckSPAdd(const vixl::aarch64::Instruction * instr) const94 bool CheckSPAdd(const vixl::aarch64::Instruction *instr) const
95 {
96 constexpr int32_t SP_REG = GetDwarfSP(Arch::AARCH64);
97 return instr->Mask(vixl::aarch64::AddSubOpMask) == vixl::aarch64::ADD && (instr->GetRd() == SP_REG);
98 }
99
AppendRegisterNameToOutput(const vixl::aarch64::Instruction * instr,const vixl::aarch64::CPURegister & reg)100 void AppendRegisterNameToOutput(const vixl::aarch64::Instruction *instr,
101 const vixl::aarch64::CPURegister ®) override
102 {
103 Disassembler::AppendRegisterNameToOutput(instr, reg);
104 if (instr->IsStore()) {
105 return;
106 }
107 uint32_t code = reg.GetCode();
108 // We need to account for both registers in case of a pair load
109 bool isPair = instr->Mask(vixl::aarch64::LoadStorePairAnyFMask) == vixl::aarch64::LoadStorePairAnyFixed;
110 if (!(code == static_cast<uint32_t>(instr->GetRd()) ||
111 (isPair && code == static_cast<uint32_t>(instr->GetRt2())))) {
112 return;
113 }
114 if (reg.IsRegister()) {
115 if (!reg.IsZero()) {
116 regMask_.Set(code);
117 }
118 } else {
119 ASSERT(reg.IsVRegister());
120 vregMask_.Set(code);
121 }
122 }
123
124 private:
125 RegMask regMask_;
126 VRegMask vregMask_;
127 };
128 #endif // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
129
130 // elfio library missed some elf constants, so lets define it here for a while. We can't include elf.h header because
131 // it conflicts with elfio.
132 static constexpr size_t EF_ARM_EABI_VER5 = 0x05000000;
133
CollectUsedRegisters(panda::ArenaAllocator * allocator)134 void Compilation::CollectUsedRegisters([[maybe_unused]] panda::ArenaAllocator *allocator)
135 {
136 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
137 if (arch_ == Arch::AARCH64) {
138 for (auto unit : units_) {
139 if ((unit->GetGraph()->GetMode().IsInterpreter() || unit->GetGraph()->GetMode().IsInterpreterEntry()) &&
140 unit->GetCompilationResult() != CompilationResult::ARK) {
141 usedRegisters_ |= UsedRegistersCollector::CollectForCode(allocator, unit->GetCode());
142 }
143 }
144 }
145 #endif // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
146 }
147
CheckUsedRegisters()148 void Compilation::CheckUsedRegisters()
149 {
150 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
151 if (usedRegisters_.gpr.Count() > 0) {
152 LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used registers " << usedRegisters_.gpr;
153 usedRegisters_.gpr &= GetCalleeRegsMask(arch_, false);
154 auto diff = usedRegisters_.gpr ^ GetCalleeRegsMask(arch_, false, true);
155 if (diff.Any()) {
156 LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved register usage is different from optimized set"
157 << std::endl
158 << "Expected: " << GetCalleeRegsMask(arch_, false, true) << std::endl
159 << "Got: " << usedRegisters_.gpr;
160 }
161 }
162 if (usedRegisters_.fp.Count() > 0) {
163 LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used fp registers " << usedRegisters_.fp;
164 usedRegisters_.fp &= GetCalleeRegsMask(arch_, true);
165 auto diff = usedRegisters_.fp ^ GetCalleeRegsMask(arch_, true, true);
166 if (diff.Any()) {
167 LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved fp register usage is different from optimized set"
168 << std::endl
169 << "Expected: " << GetCalleeRegsMask(arch_, true, true) << std::endl
170 << "Got: " << usedRegisters_.fp;
171 }
172 }
173 #endif
174 }
175
Run()176 Compilation::Result Compilation::Run()
177 {
178 if (compiler::g_options.WasSetCompilerRegex()) {
179 methodsRegex_ = compiler::g_options.GetCompilerRegex();
180 }
181
182 PoolManager::Initialize(PoolType::MALLOC);
183
184 allocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
185 localAllocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
186
187 if (RUNTIME_ARCH == Arch::X86_64 && compiler::g_options.WasSetCompilerCrossArch()) {
188 arch_ = GetArchFromString(compiler::g_options.GetCompilerCrossArch());
189 if (arch_ == Arch::NONE) {
190 LOG(FATAL, IRTOC) << "FATAL: unknown arch: " << compiler::g_options.GetCompilerCrossArch();
191 }
192 compiler::g_options.AdjustCpuFeatures(arch_ != RUNTIME_ARCH);
193 } else {
194 compiler::g_options.AdjustCpuFeatures(false);
195 }
196
197 LOG(INFO, IRTOC) << "Start Irtoc compilation for " << GetArchString(arch_) << "...";
198
199 auto result = Compile();
200 if (result) {
201 CheckUsedRegisters();
202 LOG(INFO, IRTOC) << "Irtoc compilation success";
203 } else {
204 LOG(FATAL, IRTOC) << "Irtoc compilation failed: " << result.Error();
205 }
206
207 if (result = MakeElf(g_options.GetIrtocOutput()); !result) {
208 return result;
209 }
210
211 for (auto unit : units_) {
212 delete unit;
213 }
214
215 allocator_.reset();
216 localAllocator_.reset();
217
218 PoolManager::Finalize();
219
220 return result;
221 }
222
Compile()223 Compilation::Result Compilation::Compile()
224 {
225 #ifdef PANDA_LLVM_IRTOC
226 IrtocRuntimeInterface runtime;
227 ArenaAllocator allocator(SpaceType::SPACE_TYPE_COMPILER);
228 std::shared_ptr<llvmbackend::IrtocCompilerInterface> llvmCompiler =
229 llvmbackend::CreateLLVMIrtocCompiler(&runtime, &allocator, arch_);
230 #endif
231
232 for (auto unit : units_) {
233 if (compiler::g_options.WasSetCompilerRegex() && !std::regex_match(unit->GetName(), methodsRegex_)) {
234 continue;
235 }
236 LOG(INFO, IRTOC) << "Compile " << unit->GetName();
237 #ifdef PANDA_LLVM_IRTOC
238 unit->SetLLVMCompiler(llvmCompiler);
239 #endif
240 auto result = unit->Compile(arch_, allocator_.get(), localAllocator_.get());
241 if (!result) {
242 return Unexpected {result.Error()};
243 }
244 #ifdef PANDA_COMPILER_DEBUG_INFO
245 hasDebugInfo_ |= unit->GetGraph()->IsLineDebugInfoEnabled();
246 #endif
247 }
248
249 #ifdef PANDA_LLVM_IRTOC
250 llvmCompiler->CompileAll();
251 ASSERT(!g_options.GetIrtocOutputLlvm().empty());
252 llvmCompiler->WriteObjectFile(g_options.GetIrtocOutputLlvm());
253
254 for (auto unit : units_) {
255 if (unit->IsCompiledByLlvm()) {
256 auto code = llvmCompiler->GetCompiledCode(unit->GetName());
257 Span<uint8_t> span = {const_cast<uint8_t *>(code.code), code.size};
258 unit->SetCode(span);
259 }
260 unit->ReportCompilationStatistic(&std::cerr);
261 }
262 if (g_options.GetIrtocLlvmStats() != "none" && !llvmCompiler->IsEmpty()) {
263 std::cerr << "LLVM total: " << llvmCompiler->GetObjectFileSize() << " bytes" << std::endl;
264 }
265
266 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
267 CollectUsedRegisters(&allocator);
268 #endif
269 #endif // PANDA_LLVM_IRTOC
270
271 return 0;
272 }
273
GetElfArch(Arch arch)274 static size_t GetElfArch(Arch arch)
275 {
276 switch (arch) {
277 case Arch::AARCH32:
278 return ELFIO::EM_ARM;
279 case Arch::AARCH64:
280 return ELFIO::EM_AARCH64;
281 case Arch::X86:
282 return ELFIO::EM_386;
283 case Arch::X86_64:
284 return ELFIO::EM_X86_64;
285 default:
286 UNREACHABLE();
287 }
288 }
289
290 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_FUNCTION_SIZE)
MakeElf(std::string_view output)291 Compilation::Result Compilation::MakeElf(std::string_view output)
292 {
293 ELFIO::elfio elfWriter;
294 elfWriter.create(Is64BitsArch(arch_) ? ELFIO::ELFCLASS64 : ELFIO::ELFCLASS32, ELFIO::ELFDATA2LSB);
295 elfWriter.set_type(ELFIO::ET_REL);
296 if (arch_ == Arch::AARCH32) {
297 elfWriter.set_flags(EF_ARM_EABI_VER5);
298 }
299 elfWriter.set_os_abi(ELFIO::ELFOSABI_NONE);
300 elfWriter.set_machine(GetElfArch(arch_));
301
302 ELFIO::section *strSec = elfWriter.sections.add(".strtab");
303 strSec->set_type(ELFIO::SHT_STRTAB);
304 strSec->set_addr_align(0x1);
305
306 ELFIO::string_section_accessor strWriter(strSec);
307
308 static constexpr size_t FIRST_GLOBAL_SYMBOL_INDEX = 2;
309 static constexpr size_t SYMTAB_ADDR_ALIGN = 8;
310
311 ELFIO::section *symSec = elfWriter.sections.add(".symtab");
312 symSec->set_type(ELFIO::SHT_SYMTAB);
313 symSec->set_info(FIRST_GLOBAL_SYMBOL_INDEX);
314 symSec->set_link(strSec->get_index());
315 symSec->set_addr_align(SYMTAB_ADDR_ALIGN);
316 symSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_SYMTAB));
317
318 ELFIO::symbol_section_accessor symbolWriter(elfWriter, symSec);
319
320 symbolWriter.add_symbol(strWriter, "irtoc.cpp", 0, 0, ELFIO::STB_LOCAL, ELFIO::STT_FILE, 0, ELFIO::SHN_ABS);
321
322 ELFIO::section *textSec = elfWriter.sections.add(".text");
323 textSec->set_type(ELFIO::SHT_PROGBITS);
324 // NOLINTNEXTLINE(hicpp-signed-bitwise)
325 textSec->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR);
326 textSec->set_addr_align(GetCodeAlignment(arch_));
327
328 ELFIO::section *relSec = elfWriter.sections.add(".rela.text");
329 relSec->set_type(ELFIO::SHT_RELA);
330 relSec->set_info(textSec->get_index());
331 relSec->set_link(symSec->get_index());
332 relSec->set_addr_align(4U); // CODECHECK-NOLINT(C_RULE_ID_MAGICNUMBER)
333 relSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_RELA));
334 ELFIO::relocation_section_accessor relWriter(elfWriter, relSec);
335
336 /* Use symbols map to avoid saving the same symbols multiple times */
337 std::unordered_map<std::string, uint32_t> symbolsMap;
338 auto addSymbol = [&symbolsMap, &symbolWriter, &strWriter](const char *name) {
339 if (auto it = symbolsMap.find(name); it != symbolsMap.end()) {
340 return it->second;
341 }
342 uint32_t index = symbolWriter.add_symbol(strWriter, name, 0, 0, ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, 0);
343 symbolsMap.insert({name, index});
344 return index;
345 };
346 #ifdef PANDA_COMPILER_DEBUG_INFO
347 auto dwarfBuilder {hasDebugInfo_ ? std::make_optional<DwarfBuilder>(arch_, &elfWriter) : std::nullopt};
348 #endif
349
350 static constexpr size_t MAX_CODE_ALIGNMENT = 64;
351 static constexpr std::array<uint8_t, MAX_CODE_ALIGNMENT> PADDING_DATA {0};
352 CHECK_LE(GetCodeAlignment(GetArch()), MAX_CODE_ALIGNMENT);
353
354 uint32_t codeAlignment = GetCodeAlignment(GetArch());
355 ASSERT(codeAlignment != 0);
356 size_t offset = 0;
357 for (auto unit : units_) {
358 if (unit->IsCompiledByLlvm()) {
359 continue;
360 }
361 auto code = unit->GetCode();
362
363 // Align function
364 if (auto padding = offset % codeAlignment; padding != 0) {
365 textSec->append_data(reinterpret_cast<const char *>(PADDING_DATA.data()), padding);
366 offset += padding;
367 }
368 auto symbol = symbolWriter.add_symbol(strWriter, unit->GetName(), offset, code.size(), ELFIO::STB_GLOBAL,
369 ELFIO::STT_FUNC, 0, textSec->get_index());
370 (void)symbol;
371 textSec->append_data(reinterpret_cast<const char *>(code.data()), code.size());
372 for (auto &rel : unit->GetRelocations()) {
373 size_t relOffset = offset + rel.offset;
374 auto sindex = addSymbol(unit->GetExternalFunction(rel.data));
375 if (Is64BitsArch(arch_)) {
376 // NOLINTNEXTLINE(hicpp-signed-bitwise)
377 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF64_R_INFO(sindex, rel.type)),
378 rel.addend);
379 } else {
380 // NOLINTNEXTLINE(hicpp-signed-bitwise)
381 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF32_R_INFO(sindex, rel.type)),
382 rel.addend);
383 }
384 }
385 #ifdef PANDA_COMPILER_DEBUG_INFO
386 ASSERT(!unit->GetGraph()->IsLineDebugInfoEnabled() || dwarfBuilder);
387 if (dwarfBuilder && !dwarfBuilder->BuildGraph(unit, offset, symbol)) {
388 return Unexpected("DwarfBuilder::BuildGraph failed!");
389 }
390 #endif
391 offset += code.size();
392 }
393 #ifdef PANDA_COMPILER_DEBUG_INFO
394 if (dwarfBuilder && !dwarfBuilder->Finalize(offset)) {
395 return Unexpected("DwarfBuilder::Finalize failed!");
396 }
397 #endif
398
399 elfWriter.save(output.data());
400
401 return 0;
402 }
403 } // namespace panda::irtoc
404