• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "compilation.h"
17 #include "function.h"
18 #include "mem/pool_manager.h"
19 #include "elfio/elfio.hpp"
20 #include "irtoc_runtime.h"
21 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
22 #include "aarch64/disasm-aarch64.h"
23 #endif
24 
25 #ifdef PANDA_COMPILER_DEBUG_INFO
26 #include "dwarf_builder.h"
27 #endif
28 
29 namespace panda::irtoc {
30 
31 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
32 class UsedRegistersCollector : public vixl::aarch64::Disassembler {
33 public:
UsedRegistersCollector(panda::ArenaAllocator * allocator)34     explicit UsedRegistersCollector(panda::ArenaAllocator *allocator) : Disassembler(allocator) {}
35 
GetUsedRegs(bool isFp)36     RegMask &GetUsedRegs(bool isFp)
37     {
38         return isFp ? vregMask_ : regMask_;
39     }
40 
CollectForCode(ArenaAllocator * allocator,Span<const uint8_t> code)41     static UsedRegisters CollectForCode(ArenaAllocator *allocator, Span<const uint8_t> code)
42     {
43         ASSERT(allocator != nullptr);
44         ASSERT(!code.Empty());
45 
46         vixl::aarch64::Decoder decoder(allocator);
47         UsedRegistersCollector usedRegsCollector(allocator);
48         decoder.AppendVisitor(&usedRegsCollector);
49         bool skipping = false;
50 
51         auto startInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(code.data());
52         auto endInstr = reinterpret_cast<const vixl::aarch64::Instruction *>(&(*code.end()));
53         // To determine real registers usage we check each assembly instruction which has
54         // destination register(s). There is a problem with handlers with `return` cause
55         // there is an epilogue part with registers restoring. We need to separate both register
56         // restoring and real register usage. There are some heuristics were invented to make it
57         // possible, it work as follows:
58         //   1) We parse assembly code in reverse mode to fast the epilogue part finding.
59         //   2) For each instruction we add all its destination registers to the result set
60         //      of used registers.
61         //   3) When we met `ret` instruction then we raise special `skipping` flag for a few
62         //      next instructions.
63         //   4) When `skipping` flag is raised, while we meet `load` instructions or `add`
64         //      arithmetic involving `sp` (stack pointer) as destination we continue to skip such
65         //      instructions (assuming they are related to epilogue part) but without adding their
66         //      registers into the result set. If we meet another kind of intruction we unset
67         //      the `skipping` flag.
68         for (auto instr = usedRegsCollector.GetPrevInstruction(endInstr); instr >= startInstr;
69              instr = usedRegsCollector.GetPrevInstruction(instr)) {
70             if (instr->Mask(vixl::aarch64::UnconditionalBranchToRegisterMask) == vixl::aarch64::RET) {
71                 skipping = true;
72                 continue;
73             }
74             if (skipping && (instr->IsLoad() || usedRegsCollector.CheckSPAdd(instr))) {
75                 continue;
76             }
77             skipping = false;
78             decoder.Decode(instr);
79         }
80 
81         UsedRegisters usedRegisters;
82         usedRegisters.gpr |= usedRegsCollector.GetUsedRegs(false);
83         usedRegisters.fp |= usedRegsCollector.GetUsedRegs(true);
84         return usedRegisters;
85     }
86 
87 protected:
GetPrevInstruction(const vixl::aarch64::Instruction * instr) const88     const vixl::aarch64::Instruction *GetPrevInstruction(const vixl::aarch64::Instruction *instr) const
89     {
90         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
91         return instr - vixl::aarch64::kInstructionSize;
92     }
93 
CheckSPAdd(const vixl::aarch64::Instruction * instr) const94     bool CheckSPAdd(const vixl::aarch64::Instruction *instr) const
95     {
96         constexpr int32_t SP_REG = GetDwarfSP(Arch::AARCH64);
97         return instr->Mask(vixl::aarch64::AddSubOpMask) == vixl::aarch64::ADD && (instr->GetRd() == SP_REG);
98     }
99 
AppendRegisterNameToOutput(const vixl::aarch64::Instruction * instr,const vixl::aarch64::CPURegister & reg)100     void AppendRegisterNameToOutput(const vixl::aarch64::Instruction *instr,
101                                     const vixl::aarch64::CPURegister &reg) override
102     {
103         Disassembler::AppendRegisterNameToOutput(instr, reg);
104         if (instr->IsStore()) {
105             return;
106         }
107         uint32_t code = reg.GetCode();
108         // We need to account for both registers in case of a pair load
109         bool isPair = instr->Mask(vixl::aarch64::LoadStorePairAnyFMask) == vixl::aarch64::LoadStorePairAnyFixed;
110         if (!(code == static_cast<uint32_t>(instr->GetRd()) ||
111               (isPair && code == static_cast<uint32_t>(instr->GetRt2())))) {
112             return;
113         }
114         if (reg.IsRegister()) {
115             if (!reg.IsZero()) {
116                 regMask_.Set(code);
117             }
118         } else {
119             ASSERT(reg.IsVRegister());
120             vregMask_.Set(code);
121         }
122     }
123 
124 private:
125     RegMask regMask_;
126     VRegMask vregMask_;
127 };
128 #endif  // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
129 
130 // elfio library missed some elf constants, so lets define it here for a while. We can't include elf.h header because
131 // it conflicts with elfio.
132 static constexpr size_t EF_ARM_EABI_VER5 = 0x05000000;
133 
CollectUsedRegisters(panda::ArenaAllocator * allocator)134 void Compilation::CollectUsedRegisters([[maybe_unused]] panda::ArenaAllocator *allocator)
135 {
136 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
137     if (arch_ == Arch::AARCH64) {
138         for (auto unit : units_) {
139             if ((unit->GetGraph()->GetMode().IsInterpreter() || unit->GetGraph()->GetMode().IsInterpreterEntry()) &&
140                 unit->GetCompilationResult() != CompilationResult::ARK) {
141                 usedRegisters_ |= UsedRegistersCollector::CollectForCode(allocator, unit->GetCode());
142             }
143         }
144     }
145 #endif  // ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
146 }
147 
CheckUsedRegisters()148 void Compilation::CheckUsedRegisters()
149 {
150 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
151     if (usedRegisters_.gpr.Count() > 0) {
152         LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used registers " << usedRegisters_.gpr;
153         usedRegisters_.gpr &= GetCalleeRegsMask(arch_, false);
154         auto diff = usedRegisters_.gpr ^ GetCalleeRegsMask(arch_, false, true);
155         if (diff.Any()) {
156             LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved register usage is different from optimized set"
157                               << std::endl
158                               << "Expected: " << GetCalleeRegsMask(arch_, false, true) << std::endl
159                               << "Got: " << usedRegisters_.gpr;
160         }
161     }
162     if (usedRegisters_.fp.Count() > 0) {
163         LOG(INFO, IRTOC) << "LLVM Irtoc compilation: used fp registers " << usedRegisters_.fp;
164         usedRegisters_.fp &= GetCalleeRegsMask(arch_, true);
165         auto diff = usedRegisters_.fp ^ GetCalleeRegsMask(arch_, true, true);
166         if (diff.Any()) {
167             LOG(FATAL, IRTOC) << "LLVM Irtoc compilation callee saved fp register usage is different from optimized set"
168                               << std::endl
169                               << "Expected: " << GetCalleeRegsMask(arch_, true, true) << std::endl
170                               << "Got: " << usedRegisters_.fp;
171         }
172     }
173 #endif
174 }
175 
Run()176 Compilation::Result Compilation::Run()
177 {
178     if (compiler::g_options.WasSetCompilerRegex()) {
179         methodsRegex_ = compiler::g_options.GetCompilerRegex();
180     }
181 
182     PoolManager::Initialize(PoolType::MALLOC);
183 
184     allocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
185     localAllocator_ = std::make_unique<ArenaAllocator>(SpaceType::SPACE_TYPE_COMPILER);
186 
187     if (RUNTIME_ARCH == Arch::X86_64 && compiler::g_options.WasSetCompilerCrossArch()) {
188         arch_ = GetArchFromString(compiler::g_options.GetCompilerCrossArch());
189         if (arch_ == Arch::NONE) {
190             LOG(FATAL, IRTOC) << "FATAL: unknown arch: " << compiler::g_options.GetCompilerCrossArch();
191         }
192         compiler::g_options.AdjustCpuFeatures(arch_ != RUNTIME_ARCH);
193     } else {
194         compiler::g_options.AdjustCpuFeatures(false);
195     }
196 
197     LOG(INFO, IRTOC) << "Start Irtoc compilation for " << GetArchString(arch_) << "...";
198 
199     auto result = Compile();
200     if (result) {
201         CheckUsedRegisters();
202         LOG(INFO, IRTOC) << "Irtoc compilation success";
203     } else {
204         LOG(FATAL, IRTOC) << "Irtoc compilation failed: " << result.Error();
205     }
206 
207     if (result = MakeElf(g_options.GetIrtocOutput()); !result) {
208         return result;
209     }
210 
211     for (auto unit : units_) {
212         delete unit;
213     }
214 
215     allocator_.reset();
216     localAllocator_.reset();
217 
218     PoolManager::Finalize();
219 
220     return result;
221 }
222 
Compile()223 Compilation::Result Compilation::Compile()
224 {
225 #ifdef PANDA_LLVM_IRTOC
226     IrtocRuntimeInterface runtime;
227     ArenaAllocator allocator(SpaceType::SPACE_TYPE_COMPILER);
228     std::shared_ptr<llvmbackend::IrtocCompilerInterface> llvmCompiler =
229         llvmbackend::CreateLLVMIrtocCompiler(&runtime, &allocator, arch_);
230 #endif
231 
232     for (auto unit : units_) {
233         if (compiler::g_options.WasSetCompilerRegex() && !std::regex_match(unit->GetName(), methodsRegex_)) {
234             continue;
235         }
236         LOG(INFO, IRTOC) << "Compile " << unit->GetName();
237 #ifdef PANDA_LLVM_IRTOC
238         unit->SetLLVMCompiler(llvmCompiler);
239 #endif
240         auto result = unit->Compile(arch_, allocator_.get(), localAllocator_.get());
241         if (!result) {
242             return Unexpected {result.Error()};
243         }
244 #ifdef PANDA_COMPILER_DEBUG_INFO
245         hasDebugInfo_ |= unit->GetGraph()->IsLineDebugInfoEnabled();
246 #endif
247     }
248 
249 #ifdef PANDA_LLVM_IRTOC
250     llvmCompiler->CompileAll();
251     ASSERT(!g_options.GetIrtocOutputLlvm().empty());
252     llvmCompiler->WriteObjectFile(g_options.GetIrtocOutputLlvm());
253 
254     for (auto unit : units_) {
255         if (unit->IsCompiledByLlvm()) {
256             auto code = llvmCompiler->GetCompiledCode(unit->GetName());
257             Span<uint8_t> span = {const_cast<uint8_t *>(code.code), code.size};
258             unit->SetCode(span);
259         }
260         unit->ReportCompilationStatistic(&std::cerr);
261     }
262     if (g_options.GetIrtocLlvmStats() != "none" && !llvmCompiler->IsEmpty()) {
263         std::cerr << "LLVM total: " << llvmCompiler->GetObjectFileSize() << " bytes" << std::endl;
264     }
265 
266 #ifdef LLVM_INTERPRETER_CHECK_REGS_MASK
267     CollectUsedRegisters(&allocator);
268 #endif
269 #endif  // PANDA_LLVM_IRTOC
270 
271     return 0;
272 }
273 
GetElfArch(Arch arch)274 static size_t GetElfArch(Arch arch)
275 {
276     switch (arch) {
277         case Arch::AARCH32:
278             return ELFIO::EM_ARM;
279         case Arch::AARCH64:
280             return ELFIO::EM_AARCH64;
281         case Arch::X86:
282             return ELFIO::EM_386;
283         case Arch::X86_64:
284             return ELFIO::EM_X86_64;
285         default:
286             UNREACHABLE();
287     }
288 }
289 
290 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_FUNCTION_SIZE)
MakeElf(std::string_view output)291 Compilation::Result Compilation::MakeElf(std::string_view output)
292 {
293     ELFIO::elfio elfWriter;
294     elfWriter.create(Is64BitsArch(arch_) ? ELFIO::ELFCLASS64 : ELFIO::ELFCLASS32, ELFIO::ELFDATA2LSB);
295     elfWriter.set_type(ELFIO::ET_REL);
296     if (arch_ == Arch::AARCH32) {
297         elfWriter.set_flags(EF_ARM_EABI_VER5);
298     }
299     elfWriter.set_os_abi(ELFIO::ELFOSABI_NONE);
300     elfWriter.set_machine(GetElfArch(arch_));
301 
302     ELFIO::section *strSec = elfWriter.sections.add(".strtab");
303     strSec->set_type(ELFIO::SHT_STRTAB);
304     strSec->set_addr_align(0x1);
305 
306     ELFIO::string_section_accessor strWriter(strSec);
307 
308     static constexpr size_t FIRST_GLOBAL_SYMBOL_INDEX = 2;
309     static constexpr size_t SYMTAB_ADDR_ALIGN = 8;
310 
311     ELFIO::section *symSec = elfWriter.sections.add(".symtab");
312     symSec->set_type(ELFIO::SHT_SYMTAB);
313     symSec->set_info(FIRST_GLOBAL_SYMBOL_INDEX);
314     symSec->set_link(strSec->get_index());
315     symSec->set_addr_align(SYMTAB_ADDR_ALIGN);
316     symSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_SYMTAB));
317 
318     ELFIO::symbol_section_accessor symbolWriter(elfWriter, symSec);
319 
320     symbolWriter.add_symbol(strWriter, "irtoc.cpp", 0, 0, ELFIO::STB_LOCAL, ELFIO::STT_FILE, 0, ELFIO::SHN_ABS);
321 
322     ELFIO::section *textSec = elfWriter.sections.add(".text");
323     textSec->set_type(ELFIO::SHT_PROGBITS);
324     // NOLINTNEXTLINE(hicpp-signed-bitwise)
325     textSec->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR);
326     textSec->set_addr_align(GetCodeAlignment(arch_));
327 
328     ELFIO::section *relSec = elfWriter.sections.add(".rela.text");
329     relSec->set_type(ELFIO::SHT_RELA);
330     relSec->set_info(textSec->get_index());
331     relSec->set_link(symSec->get_index());
332     relSec->set_addr_align(4U);  // CODECHECK-NOLINT(C_RULE_ID_MAGICNUMBER)
333     relSec->set_entry_size(elfWriter.get_default_entry_size(ELFIO::SHT_RELA));
334     ELFIO::relocation_section_accessor relWriter(elfWriter, relSec);
335 
336     /* Use symbols map to avoid saving the same symbols multiple times */
337     std::unordered_map<std::string, uint32_t> symbolsMap;
338     auto addSymbol = [&symbolsMap, &symbolWriter, &strWriter](const char *name) {
339         if (auto it = symbolsMap.find(name); it != symbolsMap.end()) {
340             return it->second;
341         }
342         uint32_t index = symbolWriter.add_symbol(strWriter, name, 0, 0, ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, 0);
343         symbolsMap.insert({name, index});
344         return index;
345     };
346 #ifdef PANDA_COMPILER_DEBUG_INFO
347     auto dwarfBuilder {hasDebugInfo_ ? std::make_optional<DwarfBuilder>(arch_, &elfWriter) : std::nullopt};
348 #endif
349 
350     static constexpr size_t MAX_CODE_ALIGNMENT = 64;
351     static constexpr std::array<uint8_t, MAX_CODE_ALIGNMENT> PADDING_DATA {0};
352     CHECK_LE(GetCodeAlignment(GetArch()), MAX_CODE_ALIGNMENT);
353 
354     uint32_t codeAlignment = GetCodeAlignment(GetArch());
355     ASSERT(codeAlignment != 0);
356     size_t offset = 0;
357     for (auto unit : units_) {
358         if (unit->IsCompiledByLlvm()) {
359             continue;
360         }
361         auto code = unit->GetCode();
362 
363         // Align function
364         if (auto padding = offset % codeAlignment; padding != 0) {
365             textSec->append_data(reinterpret_cast<const char *>(PADDING_DATA.data()), padding);
366             offset += padding;
367         }
368         auto symbol = symbolWriter.add_symbol(strWriter, unit->GetName(), offset, code.size(), ELFIO::STB_GLOBAL,
369                                               ELFIO::STT_FUNC, 0, textSec->get_index());
370         (void)symbol;
371         textSec->append_data(reinterpret_cast<const char *>(code.data()), code.size());
372         for (auto &rel : unit->GetRelocations()) {
373             size_t relOffset = offset + rel.offset;
374             auto sindex = addSymbol(unit->GetExternalFunction(rel.data));
375             if (Is64BitsArch(arch_)) {
376                 // NOLINTNEXTLINE(hicpp-signed-bitwise)
377                 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF64_R_INFO(sindex, rel.type)),
378                                     rel.addend);
379             } else {
380                 // NOLINTNEXTLINE(hicpp-signed-bitwise)
381                 relWriter.add_entry(relOffset, static_cast<ELFIO::Elf_Xword>(ELF32_R_INFO(sindex, rel.type)),
382                                     rel.addend);
383             }
384         }
385 #ifdef PANDA_COMPILER_DEBUG_INFO
386         ASSERT(!unit->GetGraph()->IsLineDebugInfoEnabled() || dwarfBuilder);
387         if (dwarfBuilder && !dwarfBuilder->BuildGraph(unit, offset, symbol)) {
388             return Unexpected("DwarfBuilder::BuildGraph failed!");
389         }
390 #endif
391         offset += code.size();
392     }
393 #ifdef PANDA_COMPILER_DEBUG_INFO
394     if (dwarfBuilder && !dwarfBuilder->Finalize(offset)) {
395         return Unexpected("DwarfBuilder::Finalize failed!");
396     }
397 #endif
398 
399     elfWriter.save(output.data());
400 
401     return 0;
402 }
403 }  // namespace panda::irtoc
404