• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "literal_data_accessor.h"
21 #include "mangling.h"
22 #include "utils/logger.h"
23 
24 #include <cstdint>
25 #include <iomanip>
26 #include <charconv>
27 
28 #include "get_language_specific_metadata.inc"
29 
30 namespace ark::disasm {
31 
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)32 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
33 {
34     auto file = panda_file::File::Open(filenameIn);
35     if (file == nullptr) {
36         LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
37     }
38 
39     Disassemble(file, quiet, skipStrings);
40 }
41 
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)42 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
43 {
44     SetFile(file);
45     DisassembleImpl(quiet, skipStrings);
46 }
47 
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)48 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
49 {
50     SetFile(file);
51     DisassembleImpl(quiet, skipStrings);
52 }
53 
DisassembleImpl(const bool quiet,const bool skipStrings)54 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
55 {
56     prog_ = pandasm::Program {};
57 
58     recordNameToId_.clear();
59     methodStaticNameToId_.clear();
60     methodInstanceNameToId_.clear();
61 
62     skipStrings_ = skipStrings;
63     quiet_ = quiet;
64 
65     progInfo_ = ProgInfo {};
66 
67     progAnn_ = ProgAnnotations {};
68 
69     GetLiteralArrays();
70     GetRecords();
71 
72     AddExternalFieldsToRecords();
73     GetLanguageSpecificMetadata();
74 }
75 
SetFile(std::unique_ptr<const panda_file::File> & file)76 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
77 {
78     fileHolder_.swap(file);
79     file_ = fileHolder_.get();
80 }
81 
SetFile(const panda_file::File & file)82 void Disassembler::SetFile(const panda_file::File &file)
83 {
84     fileHolder_.reset();
85     file_ = &file;
86 }
87 
SetProfile(std::string_view fname)88 void Disassembler::SetProfile(std::string_view fname)
89 {
90     std::ifstream stm(fname.data(), std::ios::binary);
91     if (!stm.is_open()) {
92         LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
93     }
94 
95     auto res = profiling::ReadProfile(stm, fileLanguage_);
96     if (!res) {
97         LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
98     }
99     profile_ = res.Value();
100 }
101 
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const102 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
103                               MethodInfo *methodInfo /* out */) const
104 {
105     const static size_t FORMAT_WIDTH = 20;
106     const static size_t INSTRUCTION_WIDTH = 2;
107 
108     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
109 
110     std::string methodName = mda.GetFullName();
111     auto prof = profiling::INVALID_PROFILE;
112     if (profile_ != profiling::INVALID_PROFILE) {
113         prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
114     }
115 
116     auto insSz = codeAccessor.GetCodeSize();
117     auto insArr = codeAccessor.GetInstructions();
118 
119     auto bcIns = BytecodeInstruction(insArr);
120     auto bcInsLast = bcIns.JumpTo(insSz);
121 
122     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
123         std::stringstream ss;
124 
125         uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
126         ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
127         ss << ", " << std::setfill('.');
128 
129         BytecodeInstruction::Format format = bcIns.GetFormat();
130 
131         auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
132         ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
133 
134         ss << "[";
135 
136         const uint8_t *pc = bcIns.GetAddress();
137         const size_t sz = bcIns.GetSize();
138 
139         for (size_t i = 0; i < sz; i++) {
140             ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
141                << static_cast<int>(pc[i]);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
142 
143             if (i != sz - 1) {
144                 ss << " ";
145             }
146         }
147 
148         ss << "]";
149 
150         if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
151             auto profId = bcIns.GetProfileId();
152             if (profId != -1) {
153                 ss << ", Profile: ";
154                 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
155             }
156         }
157 
158         methodInfo->instructionsInfo.push_back(ss.str());
159 
160         bcIns = bcIns.GetNext();
161     }
162 }
163 
CollectInfo()164 void Disassembler::CollectInfo()
165 {
166     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
167 
168     debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
169 
170     for (const auto &pair : recordNameToId_) {
171         GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
172     }
173 
174     for (const auto &pair : methodStaticNameToId_) {
175         GetMethodInfo(pair.second, &progInfo_.methodsStaticInfo[pair.first]);
176     }
177     for (const auto &pair : methodInstanceNameToId_) {
178         GetMethodInfo(pair.second, &progInfo_.methodsInstanceInfo[pair.first]);
179     }
180 
181     AddExternalFieldsInfoToRecords();
182 }
183 
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const184 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
185 {
186     if (os.bad()) {
187         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
188 
189         return;
190     }
191 
192     SerializeFilename(os);
193     SerializeLanguage(os);
194     SerializeLitArrays(os, addSeparators);
195     SerializeRecords(os, addSeparators, printInformation);
196     SerializeMethods(os, addSeparators, printInformation);
197 }
198 
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const199 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
200 {
201     headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
202 
203     if (!method.params.empty()) {
204         headerSs << method.params[0].type.GetPandasmName() << " a0";
205 
206         for (size_t i = 1; i < method.params.size(); i++) {
207             headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
208         }
209     }
210     headerSs << ")";
211 }
212 
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const213 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
214                                      const MethodInfo *&methodInfo) const
215 {
216     if (!method.catchBlocks.empty()) {
217         os << "\n";
218 
219         for (const auto &catchBlock : method.catchBlocks) {
220             Serialize(catchBlock, os);
221             os << "\n";
222         }
223     }
224 
225     if (printMethodInfo) {
226         ASSERT(methodInfo != nullptr);
227         SerializeLineNumberTable(methodInfo->lineNumberTable, os);
228         SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
229     }
230 
231     os << "}\n\n";
232 }
233 
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const234 size_t Disassembler::SerializeIfPrintMethodInfo(
235     const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
236     std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
237 {
238     size_t width = 0;
239     if (printMethodInfo) {
240         methodInfo = &methodInfoIt->second;
241 
242         for (const auto &i : method.ins) {
243             if (i.ToString().size() > width) {
244                 width = i.ToString().size();
245             }
246         }
247 
248         headerSs << " # " << methodInfo->methodInfo << "\n#   CODE:";
249     }
250 
251     headerSs << "\n";
252     return width;
253 }
254 
255 // CC-OFFNXT(huge_method) solid logic
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const256 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
257                              panda_file::LineNumberTable *lineTable) const
258 {
259     std::ostringstream headerSs;
260     SerializePrintStartInfo(method, headerSs);
261     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
262     const auto methodIter = progAnn_.methodAnnotations.find(signature);
263     if (methodIter != progAnn_.methodAnnotations.end()) {
264         Serialize(*method.metadata, methodIter->second, headerSs);
265     } else {
266         Serialize(*method.metadata, {}, headerSs);
267     }
268 
269     if (!method.HasImplementation()) {
270         headerSs << "\n\n";
271         os << headerSs.str();
272         return;
273     }
274 
275     headerSs << " {";
276 
277     const MethodInfo *methodInfo = nullptr;
278     auto &methodsInfo = method.IsStatic() ? progInfo_.methodsStaticInfo : progInfo_.methodsInstanceInfo;
279     auto methodInfoIt = methodsInfo.find(signature);
280     bool printMethodInfo = printInformation && methodInfoIt != methodsInfo.end();
281     size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
282 
283     auto headerSsStr = headerSs.str();
284     size_t lineNumber = static_cast<size_t>(std::count(headerSsStr.begin(), headerSsStr.end(), '\n')) + 1;
285 
286     os << headerSsStr;
287 
288     for (size_t i = 0; i < method.ins.size(); i++) {
289         std::ostringstream insSs;
290 
291         std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
292         if (method.ins[i].setLabel) {
293             insSs << ins.substr(0, ins.find(": ")) << ":\n";
294             ins.erase(0, ins.find(": ") + std::string(": ").length());
295         }
296 
297         insSs << "\t";
298         if (printMethodInfo) {
299             insSs << std::setw(width) << std::left;
300         }
301         insSs << ins;
302         if (printMethodInfo) {
303             ASSERT(methodInfo != nullptr);
304             insSs << " # " << methodInfo->instructionsInfo[i];
305         }
306         insSs << "\n";
307 
308         auto insSsStr = insSs.str();
309         lineNumber += static_cast<size_t>(std::count(insSsStr.begin(), insSsStr.end(), '\n'));
310 
311         if (lineTable != nullptr) {
312             lineTable->emplace_back(
313                 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
314         }
315 
316         os << insSsStr;
317     }
318 
319     SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
320 }
321 
IsSystemType(const std::string & typeName)322 inline bool Disassembler::IsSystemType(const std::string &typeName)
323 {
324     bool isArrayType = typeName.back() == ']';
325     bool isGlobal = typeName == "_GLOBAL";
326 
327     return isArrayType || isGlobal;
328 }
329 
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)330 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
331 {
332     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
333 
334     record.name = GetFullRecordName(recordId);
335 
336     LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
337 
338     GetMetaData(&record, recordId);
339 
340     if (!file_->IsExternal(recordId)) {
341         GetMethods(recordId);
342         GetFields(record, recordId);
343     }
344 }
345 
AddMethodToTables(const panda_file::File::EntityId & methodId)346 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
347 {
348     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
349     pandasm::Function newMethod("", fileLanguage_);
350     GetMethod(&newMethod, methodId);
351 
352     const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
353     auto isStatic = methodAccessor.IsStatic();
354     auto &functionTable = isStatic ? prog_.functionStaticTable : prog_.functionInstanceTable;
355     if (functionTable.find(signature) != functionTable.end()) {
356         return;
357     }
358 
359     if (isStatic) {
360         methodStaticNameToId_.emplace(signature, methodId);
361     } else {
362         methodInstanceNameToId_.emplace(signature, methodId);
363     }
364 
365     prog_.functionSynonyms[newMethod.name].push_back(signature);
366     functionTable.emplace(signature, std::move(newMethod));
367 }
368 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)369 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
370 {
371     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
372 
373     if (method == nullptr) {
374         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
375 
376         return;
377     }
378 
379     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
380 
381     method->name = GetFullMethodName(methodId);
382 
383     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
384 
385     GetParams(method, methodAccessor.GetProtoId());
386     GetMetaData(method, methodId);
387 
388     if (!method->HasImplementation()) {
389         return;
390     }
391 
392     if (methodAccessor.GetCodeId().has_value()) {
393         const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
394 
395         for (const auto &id : idList) {
396             AddMethodToTables(id);
397         }
398     } else {
399         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
400                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
401 
402         return;
403     }
404 }
405 
406 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const407 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
408                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
409 {
410     panda_file::File::EntityId id(std::get<uint32_t>(value));
411     auto sp = file_->GetSpanFromId(id);
412     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
413     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
414         for (size_t i = 0; i < len; i++) {
415             pandasm::LiteralArray::Literal lit;
416             lit.tag = tag;
417             lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
418             litArray->literals.push_back(lit);
419         }
420     } else {
421         for (size_t i = 0; i < len; i++) {
422             auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
423             pandasm::LiteralArray::Literal lit;
424             lit.tag = tag;
425             lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
426             litArray->literals.push_back(lit);
427         }
428     }
429 }
430 
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const431 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
432                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
433                                    const panda_file::LiteralTag &tag) const
434 {
435     pandasm::LiteralArray::Literal lit;
436     if (tag == panda_file::LiteralTag::TAGVALUE) {
437         return;
438     }
439     lit.tag = tag;
440     lit.value = ParseLiteralValue(value, tag);
441     litArray->literals.push_back(lit);
442 }
443 
ParseLiteralValue(const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const444 std::variant<bool, uint8_t, uint16_t, uint32_t, uint64_t, float, double, std::string> Disassembler::ParseLiteralValue(
445     const panda_file::LiteralDataAccessor::LiteralValue &value, const panda_file::LiteralTag &tag) const
446 {
447     switch (tag) {
448         case panda_file::LiteralTag::BOOL:
449             return std::get<bool>(value);
450         case panda_file::LiteralTag::ACCESSOR:
451         case panda_file::LiteralTag::NULLVALUE:
452             return std::get<uint8_t>(value);
453         case panda_file::LiteralTag::METHODAFFILIATE:
454             return std::get<uint16_t>(value);
455         case panda_file::LiteralTag::INTEGER:
456             return std::get<uint32_t>(value);
457         case panda_file::LiteralTag::BIGINT:
458             return std::get<uint64_t>(value);
459         case panda_file::LiteralTag::FLOAT:
460             return std::get<float>(value);
461         case panda_file::LiteralTag::DOUBLE:
462             return std::get<double>(value);
463         case panda_file::LiteralTag::STRING:
464         case panda_file::LiteralTag::METHOD:
465         case panda_file::LiteralTag::GENERATORMETHOD:
466             return ParseStringData(value);
467         case panda_file::LiteralTag::LITERALARRAY:
468             return ParseLiteralArrayData(value);
469         default:
470             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
471             UNREACHABLE();
472     }
473 }
474 
ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue & value) const475 std::string Disassembler::ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
476 {
477     auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
478     return StringDataToString(strData);
479 }
480 
ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue & value) const481 std::string Disassembler::ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
482 {
483     std::stringstream ss;
484     ss << "0x" << std::hex << std::get<uint32_t>(value);
485     return ss.str();
486 }
487 
GetLiteralArrayByOffset(pandasm::LiteralArray * litArray,panda_file::File::EntityId offset) const488 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *litArray, panda_file::File::EntityId offset) const
489 {
490     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
491     auto processLiteralValue = [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
492                                                 const panda_file::LiteralTag &tag) {
493         switch (tag) {
494             case panda_file::LiteralTag::ARRAY_U1: {
495                 FillLiteralArrayData<bool>(litArray, tag, value);
496                 break;
497             }
498             case panda_file::LiteralTag::ARRAY_I8:
499             case panda_file::LiteralTag::ARRAY_U8: {
500                 FillLiteralArrayData<uint8_t>(litArray, tag, value);
501                 break;
502             }
503             case panda_file::LiteralTag::ARRAY_I16:
504             case panda_file::LiteralTag::ARRAY_U16: {
505                 FillLiteralArrayData<uint16_t>(litArray, tag, value);
506                 break;
507             }
508             case panda_file::LiteralTag::ARRAY_I32:
509             case panda_file::LiteralTag::ARRAY_U32: {
510                 FillLiteralArrayData<uint32_t>(litArray, tag, value);
511                 break;
512             }
513             case panda_file::LiteralTag::ARRAY_I64:
514             case panda_file::LiteralTag::ARRAY_U64: {
515                 FillLiteralArrayData<uint64_t>(litArray, tag, value);
516                 break;
517             }
518             case panda_file::LiteralTag::ARRAY_F32: {
519                 FillLiteralArrayData<float>(litArray, tag, value);
520                 break;
521             }
522             case panda_file::LiteralTag::ARRAY_F64: {
523                 FillLiteralArrayData<double>(litArray, tag, value);
524                 break;
525             }
526             case panda_file::LiteralTag::ARRAY_STRING: {
527                 FillLiteralArrayData<uint32_t>(litArray, tag, value);
528                 break;
529             }
530             default: {
531                 FillLiteralData(litArray, value, tag);
532                 break;
533             }
534         }
535     };
536 
537     litArrayAccessor.EnumerateLiteralVals(offset, processLiteralValue);
538 }
539 
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)540 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
541 {
542     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
543 
544     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
545     GetLiteralArrayByOffset(litArray, litArrayAccessor.GetLiteralArrayId(index));
546 }
547 
GetLiteralArrays()548 void Disassembler::GetLiteralArrays()
549 {
550     const auto litArraysId = file_->GetLiteralArraysId();
551 
552     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
553                              << ")";
554 
555     panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
556     size_t numLitarrays = litArrayAccessor.GetLiteralNum();
557     for (size_t index = 0; index < numLitarrays; index++) {
558         ark::pandasm::LiteralArray litAr;
559         GetLiteralArray(&litAr, index);
560         prog_.literalarrayTable.emplace(std::to_string(index), litAr);
561     }
562 }
563 
GetRecords()564 void Disassembler::GetRecords()
565 {
566     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
567 
568     const auto classIdx = file_->GetClasses();
569 
570     for (size_t i = 0; i < classIdx.size(); i++) {
571         uint32_t classId = classIdx[i];
572         auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
573 
574         if (classId > file_->GetHeader()->fileSize) {
575             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
576                                      << classOff << "). binary file corrupted. record offset (0x" << classId
577                                      << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
578             break;
579         }
580 
581         const panda_file::File::EntityId recordId {classId};
582         auto language = GetRecordLanguage(recordId);
583         if (language != fileLanguage_) {
584             if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
585                 fileLanguage_ = language;
586             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
587                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
588                                          << std::hex << classOff << "). record's language  ("
589                                          << panda_file::LanguageToString(language)
590                                          << ")  differs from file's language ("
591                                          << panda_file::LanguageToString(fileLanguage_) << ")!";
592             }
593         }
594 
595         pandasm::Record record("", fileLanguage_);
596         GetRecord(record, recordId);
597 
598         if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
599             recordNameToId_.emplace(record.name, recordId);
600             prog_.recordTable.emplace(record.name, std::move(record));
601         }
602     }
603 }
604 
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)605 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
606 {
607     panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
608     field.name = StringDataToString(file_->GetStringData(fieldNameId));
609 
610     uint32_t fieldType = fieldAccessor.GetType();
611     field.type = FieldTypeToPandasmType(fieldType);
612 
613     GetMetaData(&field, fieldAccessor.GetFieldId());
614 }
615 
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)616 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
617 {
618     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
619 
620     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
621         pandasm::Field field(fileLanguage_);
622 
623         GetField(field, fieldAccessor);
624 
625         record.fieldList.push_back(std::move(field));
626     });
627 }
628 
AddExternalFieldsToRecords()629 void Disassembler::AddExternalFieldsToRecords()
630 {
631     for (auto &[recordName, record] : prog_.recordTable) {
632         auto iter = externalFieldTable_.find(recordName);
633         if (iter == externalFieldTable_.end() || iter->second.empty()) {
634             continue;
635         }
636         for (auto &fieldIter : iter->second) {
637             record.fieldList.push_back(std::move(fieldIter));
638         }
639         externalFieldTable_.erase(recordName);
640     }
641 }
642 
AddExternalFieldsInfoToRecords()643 void Disassembler::AddExternalFieldsInfoToRecords()
644 {
645     for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
646         auto iter = externalFieldsInfoTable_.find(recordName);
647         if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
648             continue;
649         }
650         for (auto &info : iter->second) {
651             recordInfo.fieldsInfo.push_back(std::move(info));
652         }
653         externalFieldsInfoTable_.erase(recordName);
654     }
655 }
656 
GetMethods(const panda_file::File::EntityId & recordId)657 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
658 {
659     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
660 
661     classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
662         AddMethodToTables(methodAccessor.GetMethodId());
663     });
664 }
665 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const666 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
667 {
668     /// frame size - 2^16 - 1
669     static const uint32_t MAX_ARG_NUM = 0xFFFF;
670 
671     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
672 
673     if (method == nullptr) {
674         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
675 
676         return;
677     }
678 
679     panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
680 
681     if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
682         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
683                                  << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
684                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
685 
686         return;
687     }
688 
689     size_t refIdx = 0;
690     method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
691 
692     for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
693         auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
694         method->params.emplace_back(argType, fileLanguage_);
695     }
696 }
697 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const698 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
699                                        panda_file::File::EntityId codeId) const
700 {
701     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
702 
703     if (method == nullptr) {
704         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
705         return LabelTable {};
706     }
707 
708     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
709 
710     const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
711     const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
712 
713     size_t tryIdx = 0;
714     LabelTable labelTable {};
715     codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
716         pandasm::Function::CatchBlock catchBlockPa {};
717         if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
718             return false;
719         }
720         size_t catchIdx = 0;
721         tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
722             auto classIdx = catchBlock.GetTypeIdx();
723             if (classIdx == panda_file::INVALID_INDEX) {
724                 catchBlockPa.exceptionRecord = "";
725             } else {
726                 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
727                 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
728             }
729             if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
730                 return false;
731             }
732 
733             method->catchBlocks.push_back(catchBlockPa);
734             catchBlockPa.catchBeginLabel = "";
735             catchBlockPa.catchEndLabel = "";
736             catchIdx++;
737 
738             return true;
739         });
740         tryIdx++;
741 
742         return true;
743     });
744 
745     return labelTable;
746 }
747 
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)748 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
749 {
750     size_t count = 0;
751 
752     while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
753         count++;
754         bcInsFirst = bcInsFirst.GetNext();
755         if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
756             return std::numeric_limits<size_t>::max();
757         }
758     }
759 
760     return count;
761 }
762 
763 // CC-OFFNXT(G.FUN.01) solid logic
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const764 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
765                                   const panda_file::CodeDataAccessor::TryBlock &tryBlock,
766                                   pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
767                                   size_t tryIdx) const
768 {
769     const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
770     const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
771 
772     const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
773     const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
774 
775     const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
776     const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
777     const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
778     const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
779 
780     if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
781         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
782                                  << tryBeginBcIns.GetAddress();
783         return false;
784     }
785 
786     auto itBegin = labelTable->find(tryBeginIdx);
787     if (itBegin == labelTable->end()) {
788         std::stringstream ss {};
789         ss << "try_begin_label_" << tryIdx;
790         catchBlockPa->tryBeginLabel = ss.str();
791         labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
792     } else {
793         catchBlockPa->tryBeginLabel = itBegin->second;
794     }
795 
796     if (!tryEndOffsetInRange || !tryEndOffsetValid) {
797         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
798                                  << tryEndBcIns.GetAddress();
799         return false;
800     }
801 
802     auto itEnd = labelTable->find(tryEndIdx);
803     if (itEnd == labelTable->end()) {
804         std::stringstream ss {};
805         ss << "try_end_label_" << tryIdx;
806         catchBlockPa->tryEndLabel = ss.str();
807         labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
808     } else {
809         catchBlockPa->tryEndLabel = itEnd->second;
810     }
811 
812     return true;
813 }
814 
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const815 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
816                                     const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
817                                     pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
818                                     size_t catchIdx) const
819 {
820     const auto handlerBeginOffset = catchBlock.GetHandlerPc();
821     const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
822 
823     const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
824     const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
825 
826     const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
827     const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
828 
829     const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
830     const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
831     const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
832     const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
833     const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
834 
835     if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
836         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
837                                  << handlerBeginBcIns.GetAddress();
838         return false;
839     }
840 
841     auto itBegin = labelTable->find(handlerBeginIdx);
842     if (itBegin == labelTable->end()) {
843         std::stringstream ss {};
844         ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
845         catchBlockPa->catchBeginLabel = ss.str();
846         labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
847     } else {
848         catchBlockPa->catchBeginLabel = itBegin->second;
849     }
850 
851     if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
852         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
853                                  << handlerEndBcIns.GetAddress();
854         return false;
855     }
856 
857     if (handlerEndPresent) {
858         auto itEnd = labelTable->find(handlerEndIdx);
859         if (itEnd == labelTable->end()) {
860             std::stringstream ss {};
861             ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
862             catchBlockPa->catchEndLabel = ss.str();
863             labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
864         } else {
865             catchBlockPa->catchEndLabel = itEnd->second;
866         }
867     }
868 
869     return true;
870 }
871 
872 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)873 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
874 {
875     if (shouldSet()) {
876         auto err = entity->metadata->SetAttribute(attribute);
877         if (err.has_value()) {
878             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
879         }
880     }
881 }
882 
883 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)884 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
885                                     const char *value)
886 {
887     if (shouldSet()) {
888         auto err = entity->metadata->SetAttributeValue(attribute, value);
889         if (err.has_value()) {
890             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
891         }
892     }
893 }
894 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const895 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
896 {
897     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
898 
899     if (method == nullptr) {
900         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
901 
902         return;
903     }
904 
905     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
906 
907     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
908 
909     if (!methodAccessor.IsStatic()) {
910         const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
911         auto thisType = pandasm::Type::FromDescriptor(className);
912 
913         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
914                                  << "\') is not static. emplacing self-argument of type " << thisType.GetName();
915 
916         method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
917     }
918     SetEntityAttribute(
919         method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
920 
921     SetEntityAttribute(
922         method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
923 
924     SetEntityAttribute(
925         method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
926 
927     SetEntityAttribute(
928         method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
929 
930     SetEntityAttribute(
931         method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
932 
933     SetEntityAttributeValue(
934         method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
935 
936     SetEntityAttributeValue(
937         method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
938 
939     SetEntityAttributeValue(
940         method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
941 
942     SetEntityAttribute(
943         method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
944 
945     std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
946     std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
947 
948     const bool isCtor = (methodNameRaw == ctorName);
949     const bool isCctor = (methodNameRaw == cctorName);
950 
951     if (isCtor) {
952         method->metadata->SetAttribute("ctor");
953         method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
954     } else if (isCctor) {
955         method->metadata->SetAttribute("cctor");
956         method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
957     }
958 }
959 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const960 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
961 {
962     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
963 
964     if (record == nullptr) {
965         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
966 
967         return;
968     }
969 
970     SetEntityAttribute(
971         record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
972 
973     auto external = file_->IsExternal(recordId);
974     if (!external) {
975         auto cda = panda_file::ClassDataAccessor {*file_, recordId};
976         SetEntityAttributeValue(
977             record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
978 
979         SetEntityAttributeValue(
980             record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
981 
982         SetEntityAttributeValue(
983             record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
984 
985         SetEntityAttribute(
986             record, [&cda]() { return cda.IsFinal(); }, "final");
987     }
988 }
989 
990 template <typename T, pandasm::Value::Type VALUE_TYPE>
SetMetadata(panda_file::FieldDataAccessor & accessor,pandasm::Field * field) const991 void Disassembler::SetMetadata(panda_file::FieldDataAccessor &accessor, pandasm::Field *field) const
992 {
993     std::optional<T> val = accessor.GetValue<T>();
994     if (val.has_value()) {
995         field->metadata->SetValue(pandasm::ScalarValue::Create<VALUE_TYPE>(val.value()));
996     }
997 }
998 
GetMetadataFieldValue(panda_file::FieldDataAccessor & fieldAccessor,pandasm::Field * field) const999 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &fieldAccessor, pandasm::Field *field) const
1000 {
1001     static const std::unordered_map<panda_file::Type::TypeId,
1002                                     std::function<void(panda_file::FieldDataAccessor &, pandasm::Field *)>>
1003         HANDLERS = {
1004             {panda_file::Type::TypeId::U1,
1005              [this](auto &accessor, auto *f) { SetMetadata<bool, pandasm::Value::Type::U1>(accessor, f); }},
1006             {panda_file::Type::TypeId::U8,
1007              [this](auto &accessor, auto *f) { SetMetadata<uint8_t, pandasm::Value::Type::U8>(accessor, f); }},
1008             {panda_file::Type::TypeId::U16,
1009              [this](auto &accessor, auto *f) { SetMetadata<uint16_t, pandasm::Value::Type::U16>(accessor, f); }},
1010             {panda_file::Type::TypeId::U32,
1011              [this](auto &accessor, auto *f) { SetMetadata<uint32_t, pandasm::Value::Type::U32>(accessor, f); }},
1012             {panda_file::Type::TypeId::F64,
1013              [this](auto &accessor, auto *f) { SetMetadata<double, pandasm::Value::Type::F64>(accessor, f); }},
1014             {panda_file::Type::TypeId::I8,
1015              [this](auto &accessor, auto *f) { SetMetadata<int8_t, pandasm::Value::Type::I8>(accessor, f); }},
1016             {panda_file::Type::TypeId::I16,
1017              [this](auto &accessor, auto *f) { SetMetadata<int16_t, pandasm::Value::Type::I16>(accessor, f); }},
1018             {panda_file::Type::TypeId::I32,
1019              [this](auto &accessor, auto *f) { SetMetadata<int32_t, pandasm::Value::Type::I32>(accessor, f); }},
1020             {panda_file::Type::TypeId::I64,
1021              [this](auto &accessor, auto *f) { SetMetadata<int64_t, pandasm::Value::Type::I64>(accessor, f); }},
1022         };
1023 
1024     auto it = HANDLERS.find(field->type.GetId());
1025     if (it != HANDLERS.end()) {
1026         it->second(fieldAccessor, field);
1027     } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE &&
1028                field->type.GetName() == "std/core/String") {
1029         std::optional<uint32_t> stringOffsetVal = fieldAccessor.GetValue<uint32_t>();
1030         if (stringOffsetVal.has_value()) {
1031             std::string_view val {reinterpret_cast<const char *>(
1032                 file_->GetStringData(panda_file::File::EntityId(stringOffsetVal.value())).data)};
1033             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1034         }
1035     } else if (field->type.GetRank() > 0) {
1036         std::optional<uint32_t> litarrayOffsetVal = fieldAccessor.GetValue<uint32_t>();
1037         if (litarrayOffsetVal.has_value()) {
1038             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1039                 std::string_view {std::to_string(litarrayOffsetVal.value())}));
1040         }
1041     }
1042 }
1043 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const1044 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
1045 {
1046     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
1047 
1048     if (field == nullptr) {
1049         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1050 
1051         return;
1052     }
1053 
1054     panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
1055 
1056     SetEntityAttribute(
1057         field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
1058 
1059     SetEntityAttribute(
1060         field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
1061 
1062     SetEntityAttributeValue(
1063         field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
1064 
1065     SetEntityAttributeValue(
1066         field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
1067 
1068     SetEntityAttributeValue(
1069         field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
1070 
1071     SetEntityAttribute(
1072         field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
1073     GetMetadataFieldValue(fieldAccessor, field);
1074 }
1075 
AnnotationTagToString(const char tag) const1076 std::string Disassembler::AnnotationTagToString(const char tag) const
1077 {
1078     static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
1079                                                                         {'2', "i8"},
1080                                                                         {'3', "u8"},
1081                                                                         {'4', "i16"},
1082                                                                         {'5', "u16"},
1083                                                                         {'6', "i32"},
1084                                                                         {'7', "u32"},
1085                                                                         {'8', "i64"},
1086                                                                         {'9', "u64"},
1087                                                                         {'A', "f32"},
1088                                                                         {'B', "f64"},
1089                                                                         {'C', "string"},
1090                                                                         {'D', "record"},
1091                                                                         {'E', "method"},
1092                                                                         {'F', "enum"},
1093                                                                         {'G', "annotation"},
1094                                                                         {'J', "method_handle"},
1095                                                                         {'H', "array"},
1096                                                                         {'K', "u1[]"},
1097                                                                         {'L', "i8[]"},
1098                                                                         {'M', "u8[]"},
1099                                                                         {'N', "i16[]"},
1100                                                                         {'O', "u16[]"},
1101                                                                         {'P', "i32[]"},
1102                                                                         {'Q', "u32[]"},
1103                                                                         {'R', "i64[]"},
1104                                                                         {'S', "u64[]"},
1105                                                                         {'T', "f32[]"},
1106                                                                         {'U', "f64[]"},
1107                                                                         {'V', "string[]"},
1108                                                                         {'W', "record[]"},
1109                                                                         {'X', "method[]"},
1110                                                                         {'Y', "enum[]"},
1111                                                                         {'Z', "annotation[]"},
1112                                                                         {'@', "method_handle[]"},
1113                                                                         {'*', "nullptr_string"}};
1114 
1115     return TAG_TO_STRING.at(tag);
1116 }
1117 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1118 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1119 {
1120     std::stringstream ss;
1121 
1122     if (type == "i8") {
1123         auto res = value.Get<int8_t>();
1124         ss << static_cast<int>(res);
1125     } else if (type == "u1" || type == "u8") {
1126         auto res = value.Get<uint8_t>();
1127         ss << static_cast<unsigned int>(res);
1128     } else if (type == "i16") {
1129         ss << value.Get<int16_t>();
1130     } else if (type == "u16") {
1131         ss << value.Get<uint16_t>();
1132     } else if (type == "i32") {
1133         ss << value.Get<int32_t>();
1134     } else if (type == "u32") {
1135         ss << value.Get<uint32_t>();
1136     } else if (type == "i64") {
1137         ss << value.Get<int64_t>();
1138     } else if (type == "u64") {
1139         ss << value.Get<uint64_t>();
1140     } else if (type == "f32") {
1141         ss << value.Get<float>();
1142     } else if (type == "f64") {
1143         ss << value.Get<double>();
1144     } else if (type == "string") {
1145         const auto id = value.Get<panda_file::File::EntityId>();
1146         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1147     } else if (type == "record") {
1148         const auto id = value.Get<panda_file::File::EntityId>();
1149         ss << GetFullRecordName(id);
1150     } else if (type == "method") {
1151         const auto id = value.Get<panda_file::File::EntityId>();
1152         AddMethodToTables(id);
1153         ss << GetMethodSignature(id);
1154     } else if (type == "enum") {
1155         const auto id = value.Get<panda_file::File::EntityId>();
1156         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1157         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1158            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1159     } else if (type == "annotation") {
1160         const auto id = value.Get<panda_file::File::EntityId>();
1161         ss << "id_" << id;
1162     } else if (type == "void") {
1163         return std::string();
1164     } else if (type == "method_handle") {
1165     } else if (type == "nullptr_string") {
1166         ss << static_cast<uint32_t>(0);
1167     }
1168 
1169     return ss.str();
1170 }
1171 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1172 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1173                                              const size_t idx)
1174 {
1175     std::stringstream ss;
1176 
1177     if (type == "i8") {
1178         auto res = value.Get<int8_t>(idx);
1179         ss << static_cast<int>(res);
1180     } else if (type == "u1" || type == "u8") {
1181         auto res = value.Get<uint8_t>(idx);
1182         ss << static_cast<unsigned int>(res);
1183     } else if (type == "i16") {
1184         ss << (value.Get<int16_t>(idx));
1185     } else if (type == "u16") {
1186         ss << (value.Get<uint16_t>(idx));
1187     } else if (type == "i32") {
1188         ss << (value.Get<int32_t>(idx));
1189     } else if (type == "u32") {
1190         ss << (value.Get<uint32_t>(idx));
1191     } else if (type == "i64") {
1192         ss << (value.Get<int64_t>(idx));
1193     } else if (type == "u64") {
1194         ss << (value.Get<uint64_t>(idx));
1195     } else if (type == "f32") {
1196         ss << value.Get<float>(idx);
1197     } else if (type == "f64") {
1198         ss << value.Get<double>(idx);
1199     } else if (type == "string") {
1200         const auto id = value.Get<panda_file::File::EntityId>(idx);
1201         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1202     } else if (type == "record") {
1203         const auto id = value.Get<panda_file::File::EntityId>(idx);
1204         ss << GetFullRecordName(id);
1205     } else if (type == "method") {
1206         const auto id = value.Get<panda_file::File::EntityId>(idx);
1207         AddMethodToTables(id);
1208         ss << GetMethodSignature(id);
1209     } else if (type == "enum") {
1210         const auto id = value.Get<panda_file::File::EntityId>(idx);
1211         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1212         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1213            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1214     } else if (type == "annotation") {
1215         const auto id = value.Get<panda_file::File::EntityId>(idx);
1216         ss << "id_" << id;
1217     } else if (type == "method_handle") {
1218     } else if (type == "nullptr_string") {
1219     }
1220 
1221     return ss.str();
1222 }
1223 
GetFullMethodName(const panda_file::File::EntityId & methodId) const1224 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1225 {
1226     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1227 
1228     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1229 
1230     std::string className = GetFullRecordName(methodAccessor.GetClassId());
1231     if (IsSystemType(className)) {
1232         className = "";
1233     } else {
1234         className += ".";
1235     }
1236 
1237     return className + methodNameRaw;
1238 }
1239 
GetMethodSignature(const panda_file::File::EntityId & methodId) const1240 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1241 {
1242     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1243 
1244     pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1245     GetParams(&method, methodAccessor.GetProtoId());
1246     GetMetaData(&method, methodId);
1247 
1248     auto res = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1249     return method.IsStatic() ? "<static> " + res : res;
1250 }
1251 
GetFullRecordName(const panda_file::File::EntityId & classId) const1252 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1253 {
1254     std::string name = StringDataToString(file_->GetStringData(classId));
1255 
1256     auto type = pandasm::Type::FromDescriptor(name);
1257     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1258 
1259     return type.GetPandasmName();
1260 }
1261 
1262 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1263 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1264 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1265 {
1266     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1267        << ", type: 0x" << fieldAccessor.GetType();
1268 }
1269 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1270 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1271 {
1272     std::stringstream ss;
1273     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1274        << ", type: 0x" << fieldAccessor.GetType();
1275     return ss.str();
1276 }
1277 
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1278 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1279 {
1280     if (file_->IsExternal(recordId)) {
1281         return;
1282     }
1283 
1284     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1285     std::stringstream ss;
1286 
1287     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1288        << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1289        << std::dec << classAccessor.GetSize() << ")";
1290 
1291     recordInfo->recordInfo = ss.str();
1292     ss.str(std::string());
1293 
1294     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1295         GetFieldInfo(fieldAccessor, ss);
1296 
1297         recordInfo->fieldsInfo.push_back(ss.str());
1298 
1299         ss.str(std::string());
1300     });
1301 }
1302 
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1303 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1304 {
1305     panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1306     std::stringstream ss;
1307 
1308     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1309        << methodAccessor.GetMethodId();
1310 
1311     if (methodAccessor.GetCodeId().has_value()) {
1312         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1313            << methodAccessor.GetCodeId().value();
1314 
1315         GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1316     } else {
1317         ss << ", <no code>";
1318     }
1319 
1320     auto profileSize = methodAccessor.GetProfileSize();
1321     if (profileSize) {
1322         ss << ", profile size: " << profileSize.value();
1323     }
1324 
1325     methodInfo->methodInfo = ss.str();
1326 
1327     if (methodAccessor.GetCodeId()) {
1328         ASSERT(debugInfoExtractor_ != nullptr);
1329         methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1330         methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1331 
1332         // Add information about parameters into the table
1333         panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1334         auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1335         uint32_t codeSize = codeda.GetCodeSize();
1336         for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1337             panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1338             methodInfo->localVariableTable.emplace_back(argInfo);
1339         }
1340     }
1341 }
1342 
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1343 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1344 {
1345     if (litArray.literals.empty()) {
1346         return;
1347     }
1348 
1349     bool isConst = litArray.literals[0].IsArray();
1350 
1351     std::stringstream specifiers {};
1352 
1353     if (isConst) {
1354         specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1355     }
1356 
1357     os << ".array array_" << name << " " << specifiers.str() << "{";
1358 
1359     SerializeValues(litArray, isConst, os);
1360 
1361     os << "}\n";
1362 }
1363 
LiteralTagToString(const panda_file::LiteralTag & tag) const1364 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1365 {
1366     switch (tag) {
1367         case panda_file::LiteralTag::BOOL:
1368         case panda_file::LiteralTag::ARRAY_U1:
1369             return "u1";
1370         case panda_file::LiteralTag::ARRAY_U8:
1371             return "u8";
1372         case panda_file::LiteralTag::ARRAY_I8:
1373             return "i8";
1374         case panda_file::LiteralTag::ARRAY_U16:
1375             return "u16";
1376         case panda_file::LiteralTag::ARRAY_I16:
1377             return "i16";
1378         case panda_file::LiteralTag::ARRAY_U32:
1379             return "u32";
1380         case panda_file::LiteralTag::INTEGER:
1381         case panda_file::LiteralTag::ARRAY_I32:
1382             return "i32";
1383         case panda_file::LiteralTag::ARRAY_U64:
1384             return "u64";
1385         case panda_file::LiteralTag::BIGINT:
1386         case panda_file::LiteralTag::ARRAY_I64:
1387             return "i64";
1388         case panda_file::LiteralTag::FLOAT:
1389         case panda_file::LiteralTag::ARRAY_F32:
1390             return "f32";
1391         case panda_file::LiteralTag::DOUBLE:
1392         case panda_file::LiteralTag::ARRAY_F64:
1393             return "f64";
1394         case panda_file::LiteralTag::STRING:
1395         case panda_file::LiteralTag::ARRAY_STRING:
1396             return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1397         case panda_file::LiteralTag::ACCESSOR:
1398             return "accessor";
1399         case panda_file::LiteralTag::NULLVALUE:
1400             return "nullvalue";
1401         case panda_file::LiteralTag::METHODAFFILIATE:
1402             return "method_affiliate";
1403         case panda_file::LiteralTag::METHOD:
1404             return "method";
1405         case panda_file::LiteralTag::GENERATORMETHOD:
1406             return "generator_method";
1407         case panda_file::LiteralTag::LITERALARRAY:
1408             return "lit_offset";
1409         default:
1410             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1411             UNREACHABLE();
1412     }
1413 }
1414 
SerializeLiterals(const pandasm::LiteralArray::Literal & lit) const1415 std::string Disassembler::SerializeLiterals(const pandasm::LiteralArray::Literal &lit) const
1416 {
1417     std::stringstream res {};
1418     const auto &val = lit.value;
1419     switch (lit.tag) {
1420         case panda_file::LiteralTag::BOOL: {
1421             res << (std::get<bool>(val));
1422             break;
1423         }
1424         case panda_file::LiteralTag::INTEGER: {
1425             res << (bit_cast<int32_t>(std::get<uint32_t>(val)));
1426             break;
1427         }
1428         case panda_file::LiteralTag::DOUBLE: {
1429             res << (std::get<double>(val));
1430             break;
1431         }
1432         case panda_file::LiteralTag::STRING: {
1433             res << "\"" << (std::get<std::string>(val)) << "\"";
1434             break;
1435         }
1436         case panda_file::LiteralTag::METHOD:
1437         case panda_file::LiteralTag::GENERATORMETHOD: {
1438             res << (std::get<std::string>(val));
1439             break;
1440         }
1441         case panda_file::LiteralTag::NULLVALUE:
1442         case panda_file::LiteralTag::ACCESSOR: {
1443             res << (static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val))));
1444             break;
1445         }
1446         case panda_file::LiteralTag::METHODAFFILIATE: {
1447             res << (std::get<uint16_t>(val));
1448             break;
1449         }
1450         case panda_file::LiteralTag::LITERALARRAY: {
1451             res << (std::get<std::string>(val));
1452             break;
1453         }
1454         default:
1455             UNREACHABLE();
1456     }
1457     res << ", ";
1458     return res.str();
1459 }
1460 
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1461 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1462 {
1463     if (lit.IsBoolValue()) {
1464         std::stringstream res {};
1465         res << (std::get<bool>(lit.value));
1466         return res.str();
1467     }
1468 
1469     if (lit.IsByteValue()) {
1470         return LiteralIntegralValueToString<uint8_t>(lit);
1471     }
1472 
1473     if (lit.IsShortValue()) {
1474         return LiteralIntegralValueToString<uint16_t>(lit);
1475     }
1476 
1477     if (lit.IsIntegerValue()) {
1478         return LiteralIntegralValueToString<uint32_t>(lit);
1479     }
1480 
1481     if (lit.IsLongValue()) {
1482         return LiteralIntegralValueToString<uint64_t>(lit);
1483     }
1484 
1485     if (lit.IsDoubleValue()) {
1486         std::stringstream res {};
1487         res << std::get<double>(lit.value);
1488         return res.str();
1489     }
1490 
1491     if (lit.IsFloatValue()) {
1492         std::stringstream res {};
1493         res << std::get<float>(lit.value);
1494         return res.str();
1495     }
1496 
1497     if (lit.IsStringValue()) {
1498         std::stringstream res {};
1499         res << "\"" << std::get<std::string>(lit.value) << "\"";
1500         return res.str();
1501     }
1502 
1503     if (lit.IsLiteralArrayValue()) {
1504         return SerializeLiterals(lit);
1505     }
1506 
1507     UNREACHABLE();
1508 }
1509 
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1510 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1511 {
1512     std::string separator = (isConst) ? (" ") : ("\n");
1513 
1514     os << separator;
1515 
1516     if (isConst) {
1517         for (const auto &l : litArray.literals) {
1518             os << LiteralValueToString(l) << separator;
1519         }
1520     } else {
1521         for (const auto &l : litArray.literals) {
1522             os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1523         }
1524     }
1525 }
1526 
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1527 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1528 {
1529     if (IsSystemType(record.name)) {
1530         return;
1531     }
1532 
1533     os << ".record " << record.name;
1534 
1535     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1536     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1537     if (recordInTable) {
1538         Serialize(*record.metadata, recordIter->second.annList, os);
1539     } else {
1540         Serialize(*record.metadata, {}, os);
1541     }
1542 
1543     if (record.metadata->IsForeign() && record.fieldList.empty()) {
1544         os << "\n\n";
1545         return;
1546     }
1547 
1548     os << " {";
1549 
1550     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1551         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1552         SerializeFields(record, os, true);
1553     } else {
1554         os << "\n";
1555         SerializeFields(record, os, false);
1556     }
1557 
1558     os << "}\n\n";
1559 }
1560 
DumpLiteralArray(const pandasm::LiteralArray & literalArray,std::stringstream & ss) const1561 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literalArray, std::stringstream &ss) const
1562 {
1563     ss << "[";
1564     bool firstItem = true;
1565     for (const auto &item : literalArray.literals) {
1566         if (!firstItem) {
1567             ss << ", ";
1568         } else {
1569             firstItem = false;
1570         }
1571 
1572         switch (item.tag) {
1573             case panda_file::LiteralTag::INTEGER: {
1574                 ss << std::get<uint32_t>(item.value);  // CC-OFF(G.EXP.30-CPP) false positive
1575                 break;
1576             }
1577             case panda_file::LiteralTag::DOUBLE: {
1578                 ss << std::get<double>(item.value);
1579                 break;
1580             }
1581             case panda_file::LiteralTag::BOOL: {
1582                 ss << std::get<bool>(item.value);
1583                 break;
1584             }
1585             case panda_file::LiteralTag::STRING: {
1586                 ss << "\"" << std::get<std::string>(item.value) << "\"";
1587                 break;
1588             }
1589             case panda_file::LiteralTag::LITERALARRAY: {
1590                 std::string offsetStr = std::get<std::string>(item.value);
1591                 const int hexBase = 16;
1592                 const char *begin = offsetStr.data();
1593                 const char *end = &(*offsetStr.end());
1594                 uint32_t litArrayOffset = 0;
1595                 std::from_chars(begin, end, litArrayOffset, hexBase);
1596                 pandasm::LiteralArray litArray;
1597                 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1598                 DumpLiteralArray(litArray, ss);
1599                 break;
1600             }
1601             default: {
1602                 UNREACHABLE();
1603                 break;
1604             }
1605         }
1606     }
1607     ss << "]";
1608 }
1609 
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1610 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1611 {
1612     if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1613         ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1614     } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1615         ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1616     } else if (f.type.GetId() == panda_file::Type::TypeId::I8) {
1617         ss << " = 0x" << std::hex << static_cast<int32_t>(f.metadata->GetValue().value().GetValue<int8_t>());
1618     } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1619         ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1620     } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1621         ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1622     } else if (f.type.GetId() == panda_file::Type::TypeId::I32) {
1623         ss << " = " << f.metadata->GetValue().value().GetValue<int>();
1624     } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "std/core/String") {
1625         ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1626     } else if (f.type.GetRank() > 0) {
1627         uint32_t litArrayOffset = 0;
1628         auto value = f.metadata->GetValue().value().GetValue<std::string>();
1629         std::from_chars(value.data(), &(*value.end()), litArrayOffset);
1630         pandasm::LiteralArray litArray;
1631         GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1632         ss << " = ";
1633         DumpLiteralArray(litArray, ss);
1634     }
1635 }
1636 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1637 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1638 {
1639     constexpr size_t INFO_OFFSET = 80;
1640 
1641     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1642     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1643 
1644     const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1645 
1646     size_t fieldIdx = 0;
1647 
1648     std::stringstream ss;
1649     for (const auto &f : record.fieldList) {
1650         ss << "\t" << f.type.GetPandasmName() << " " << f.name;
1651         if (f.metadata->GetValue().has_value()) {
1652             SerializeFieldValue(f, ss);
1653         }
1654         if (recordInTable) {
1655             const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1656             if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1657                 Serialize(*f.metadata, fieldIter->second, ss);
1658             } else {
1659                 Serialize(*f.metadata, {}, ss);
1660             }
1661         } else {
1662             Serialize(*f.metadata, {}, ss);
1663         }
1664 
1665         if (printInformation) {
1666             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1667         } else {
1668             os << ss.str() << "\n";
1669         }
1670 
1671         ss.str(std::string());
1672         ss.clear();
1673 
1674         fieldIdx++;
1675     }
1676 }
1677 
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1678 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1679 {
1680     if (catchBlock.exceptionRecord.empty()) {
1681         os << ".catchall ";
1682     } else {
1683         os << ".catch " << catchBlock.exceptionRecord << ", ";
1684     }
1685 
1686     os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1687 
1688     if (!catchBlock.catchEndLabel.empty()) {
1689         os << ", " << catchBlock.catchEndLabel;
1690     }
1691 }
1692 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1693 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1694 {
1695     auto boolAttributes = meta.GetBoolAttributes();
1696     auto attributes = meta.GetAttributes();
1697     if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1698         return;
1699     }
1700 
1701     os << " <";
1702 
1703     size_t size = boolAttributes.size();
1704     size_t idx = 0;
1705     for (const auto &attr : boolAttributes) {
1706         os << attr;
1707         ++idx;
1708 
1709         if (!attributes.empty() || !annList.empty() || idx < size) {
1710             os << ", ";
1711         }
1712     }
1713 
1714     size = attributes.size();
1715     idx = 0;
1716     for (const auto &[key, values] : attributes) {
1717         for (size_t i = 0; i < values.size(); i++) {
1718             os << key << "=" << values[i];
1719 
1720             if (i < values.size() - 1) {
1721                 os << ", ";
1722             }
1723         }
1724 
1725         ++idx;
1726 
1727         if (!annList.empty() || idx < size) {
1728             os << ", ";
1729         }
1730     }
1731 
1732     size = annList.size();
1733     idx = 0;
1734     for (const auto &[key, value] : annList) {
1735         os << key << "=" << value;
1736 
1737         ++idx;
1738 
1739         if (idx < size) {
1740             os << ", ";
1741         }
1742     }
1743 
1744     os << ">";
1745 }
1746 
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1747 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1748 {
1749     if (lineNumberTable.empty()) {
1750         return;
1751     }
1752 
1753     os << "\n#   LINE_NUMBER_TABLE:\n";
1754     for (const auto &lineInfo : lineNumberTable) {
1755         os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1756     }
1757 }
1758 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1759 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1760                                                const pandasm::Function &method, std::ostream &os) const
1761 {
1762     if (localVariableTable.empty()) {
1763         return;
1764     }
1765 
1766     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1767     os << "#\t Start   End  Register           Name   Signature\n";
1768     const int startWidth = 5;
1769     const int endWidth = 4;
1770     const int regWidth = 8;
1771     const int nameWidth = 14;
1772     for (const auto &variableInfo : localVariableTable) {
1773         std::ostringstream regStream;
1774         regStream << variableInfo.regNumber << '(';
1775         if (variableInfo.regNumber < 0) {
1776             regStream << "acc";
1777         } else {
1778             uint32_t vreg = variableInfo.regNumber;
1779             uint32_t firstArgReg = method.GetTotalRegs();
1780             if (vreg < firstArgReg) {
1781                 regStream << 'v' << vreg;
1782             } else {
1783                 regStream << 'a' << vreg - firstArgReg;
1784             }
1785         }
1786         regStream << ')';
1787 
1788         os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << "  ";
1789         os << std::setw(endWidth) << std::right << variableInfo.endOffset << "  ";
1790         os << std::setw(regWidth) << std::right << regStream.str() << " ";
1791         os << std::setw(nameWidth) << std::right << variableInfo.name << "   " << variableInfo.type;
1792         if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1793             os << " (" << variableInfo.typeSignature << ")";
1794         }
1795         os << "\n";
1796     }
1797 }
1798 
SerializeLanguage(std::ostream & os) const1799 void Disassembler::SerializeLanguage(std::ostream &os) const
1800 {
1801     os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1802 }
1803 
SerializeFilename(std::ostream & os) const1804 void Disassembler::SerializeFilename(std::ostream &os) const
1805 {
1806     if (file_ == nullptr || file_->GetFilename().empty()) {
1807         return;
1808     }
1809 
1810     os << "# source binary: " << file_->GetFilename() << "\n\n";
1811 }
1812 
SerializeLitArrays(std::ostream & os,bool addSeparators) const1813 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1814 {
1815     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1816 
1817     if (prog_.literalarrayTable.empty()) {
1818         return;
1819     }
1820 
1821     if (addSeparators) {
1822         os << "# ====================\n"
1823               "# LITERALS\n\n";
1824     }
1825 
1826     for (const auto &pair : prog_.literalarrayTable) {
1827         Serialize(pair.first, pair.second, os);
1828     }
1829 
1830     os << "\n";
1831 }
1832 
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1833 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1834 {
1835     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1836 
1837     if (prog_.recordTable.empty()) {
1838         return;
1839     }
1840 
1841     if (addSeparators) {
1842         os << "# ====================\n"
1843               "# RECORDS\n\n";
1844     }
1845 
1846     for (const auto &r : prog_.recordTable) {
1847         Serialize(r.second, os, printInformation);
1848     }
1849 }
1850 
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1851 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1852 {
1853     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1854 
1855     if (prog_.functionInstanceTable.empty() && prog_.functionStaticTable.empty()) {
1856         return;
1857     }
1858 
1859     if (addSeparators) {
1860         os << "# ====================\n"
1861               "# METHODS\n\n";
1862     }
1863 
1864     for (const auto &m : prog_.functionStaticTable) {
1865         Serialize(m.second, os, printInformation);
1866     }
1867     for (const auto &m : prog_.functionInstanceTable) {
1868         Serialize(m.second, os, printInformation);
1869     }
1870 }
1871 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1872 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1873 {
1874     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1875 }
1876 
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1877 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1878 {
1879     std::stringstream name;
1880 
1881     if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1882         auto idx = bcIns.GetId().AsIndex();
1883         auto id = file_->ResolveClassIndex(methodId, idx);
1884         auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1885 
1886         name.str("");
1887         name << type.GetPandasmName();
1888     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID) ||
1889                bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_METHOD_ID)) {
1890         auto idx = bcIns.GetId().AsIndex();
1891         auto id = file_->ResolveMethodIndex(methodId, idx);
1892 
1893         name << GetMethodSignature(id);
1894     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1895         name << '\"';
1896 
1897         if (skipStrings_ || quiet_) {
1898             name << std::hex << "0x" << bcIns.GetId().AsFileId();
1899         } else {
1900             name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1901         }
1902 
1903         name << '\"';
1904     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID) ||
1905                bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_FIELD_ID)) {
1906         auto idx = bcIns.GetId().AsIndex();
1907         auto id = file_->ResolveFieldIndex(methodId, idx);
1908         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1909 
1910         auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1911         name << recordName << '.';
1912         name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1913     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1914         auto index = bcIns.GetId().AsIndex();
1915         name << "array_" << index;
1916     }
1917 
1918     return name.str();
1919 }
1920 
GetRecordLanguage(panda_file::File::EntityId classId) const1921 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1922 {
1923     if (file_->IsExternal(classId)) {
1924         return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1925     }
1926 
1927     panda_file::ClassDataAccessor cda(*file_, classId);
1928     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1929 }
1930 
1931 // CC-OFFNXT(G.FUN.01) solid logic
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1932 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1933                                 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1934                                 panda_file::File::EntityId codeId)
1935 {
1936     const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1937     const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1938     if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1939         size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1940         if (idx != std::numeric_limits<size_t>::max()) {
1941             if (labelTable->find(idx) == labelTable->end()) {
1942                 std::stringstream ss;
1943                 ss << "jump_label_" << labelTable->size();
1944                 (*labelTable)[idx] = ss.str();
1945             }
1946 
1947             paIns->imms.clear();
1948             paIns->ids.push_back(labelTable->at(idx));
1949         } else {
1950             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1951                                      << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1952                                      << ": invalid jump offset 0x" << jmpOffset
1953                                      << " - jumping in the middle of another instruction!";
1954         }
1955     } else {
1956         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1957                                  << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1958                                  << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1959     }
1960 }
1961 
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1962 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1963 {
1964     auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1965 
1966     pandasm::Field field(fileLanguage_);
1967     GetField(field, fieldAccessor);
1968     if (field.name.empty()) {
1969         return;
1970     }
1971 
1972     auto &fieldList = externalFieldTable_[recordName];
1973     auto retField = std::find_if(fieldList.begin(), fieldList.end(), [&field](pandasm::Field &fieldFromList) {
1974         return field.name == fieldFromList.name && field.IsStatic() == fieldFromList.IsStatic();
1975     });
1976     if (retField == fieldList.end()) {
1977         fieldList.emplace_back(std::move(field));
1978 
1979         externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1980     }
1981 }
1982 
1983 // CC-OFFNXT(huge_method) solid logic
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1984 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1985                                      panda_file::File::EntityId codeId)
1986 {
1987     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1988 
1989     const auto insArr = codeAccessor.GetInstructions();
1990 
1991     method->regsNum = codeAccessor.GetNumVregs();
1992 
1993     auto bcIns = BytecodeInstruction(insArr);
1994     auto from = bcIns.GetAddress();
1995     const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
1996 
1997     LabelTable labelTable = GetExceptions(method, methodId, codeId);
1998 
1999     IdList unknownExternalMethods {};
2000 
2001     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
2002         if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
2003             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
2004                                      << "). bytecode instructions sequence corrupted for method " << method->name
2005                                      << "! went out of bounds";
2006 
2007             break;
2008         }
2009 
2010         if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID) ||
2011             bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_FIELD_ID)) {
2012             auto idx = bcIns.GetId().AsIndex();
2013             auto id = file_->ResolveFieldIndex(methodId, idx);
2014             panda_file::FieldDataAccessor fieldAccessor(*file_, id);
2015 
2016             if (fieldAccessor.IsExternal()) {
2017                 CollectExternalFields(fieldAccessor);
2018             }
2019         }
2020 
2021         auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
2022         paIns.insDebug.boundLeft =
2023             bcIns.GetAddress() - from;  // It is used to produce a line table during method serialization
2024         if (paIns.IsJump()) {
2025             TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
2026         }
2027 
2028         // check if method id is unknown external method. if so, emplace it in table
2029         if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID) ||
2030             bcIns.HasFlag(BytecodeInstruction::Flags::STATIC_METHOD_ID)) {
2031             const auto argMethodIdx = bcIns.GetId().AsIndex();
2032             const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
2033 
2034             const auto argMethodSignature = GetMethodSignature(argMethodId);
2035             panda_file::MethodDataAccessor methodAccessor(*file_, argMethodId);
2036             const auto &functionTable =
2037                 methodAccessor.IsStatic() ? prog_.functionStaticTable : prog_.functionInstanceTable;
2038             const bool isPresent = functionTable.find(argMethodSignature) != functionTable.cend();
2039             const bool isExternal = file_->IsExternal(argMethodId);
2040             if (isExternal && !isPresent) {
2041                 unknownExternalMethods.push_back(argMethodId);
2042             }
2043         }
2044 
2045         method->ins.push_back(paIns);
2046         bcIns = bcIns.GetNext();
2047     }
2048 
2049     for (const auto &pair : labelTable) {
2050         method->ins[pair.first].label = pair.second;
2051         method->ins[pair.first].setLabel = true;
2052     }
2053 
2054     return unknownExternalMethods;
2055 }
2056 
2057 }  // namespace ark::disasm
2058